tesseract  4.1.0
stopper.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: stopper.c
3  ** Purpose: Stopping criteria for word classifier.
4  ** Author: Dan Johnson
5  **
6  ** (c) Copyright Hewlett-Packard Company, 1988.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  ******************************************************************************/
17 
18 #include <cstdio>
19 #include <cstring>
20 #include <cctype>
21 #include <cmath>
22 
23 #include "stopper.h"
24 #include "ambigs.h"
25 #include "ccutil.h"
26 #include "dict.h"
27 #include "helpers.h"
28 #include "matchdefs.h"
29 #include "pageres.h"
30 #include "params.h"
31 #include "ratngs.h"
32 #include "unichar.h"
33 
34 /*----------------------------------------------------------------------------
35  Private Code
36 ----------------------------------------------------------------------------*/
37 
38 namespace tesseract {
39 
40 bool Dict::AcceptableChoice(const WERD_CHOICE& best_choice,
41  XHeightConsistencyEnum xheight_consistency) {
42  float CertaintyThreshold = stopper_nondict_certainty_base;
43  int WordSize;
44 
45  if (stopper_no_acceptable_choices) return false;
46 
47  if (best_choice.length() == 0) return false;
48 
49  bool no_dang_ambigs = !best_choice.dangerous_ambig_found();
50  bool is_valid_word = valid_word_permuter(best_choice.permuter(), false);
51  bool is_case_ok = case_ok(best_choice);
52 
53  if (stopper_debug_level >= 1) {
54  const char *xht = "UNKNOWN";
55  switch (xheight_consistency) {
56  case XH_GOOD: xht = "NORMAL"; break;
57  case XH_SUBNORMAL: xht = "SUBNORMAL"; break;
58  case XH_INCONSISTENT: xht = "INCONSISTENT"; break;
59  default: xht = "UNKNOWN";
60  }
61  tprintf("\nStopper: %s (word=%c, case=%c, xht_ok=%s=[%g,%g])\n",
62  best_choice.unichar_string().string(),
63  (is_valid_word ? 'y' : 'n'),
64  (is_case_ok ? 'y' : 'n'),
65  xht,
66  best_choice.min_x_height(),
67  best_choice.max_x_height());
68  }
69  // Do not accept invalid words in PASS1.
70  if (reject_offset_ <= 0.0f && !is_valid_word) return false;
71  if (is_valid_word && is_case_ok) {
72  WordSize = LengthOfShortestAlphaRun(best_choice);
73  WordSize -= stopper_smallword_size;
74  if (WordSize < 0)
75  WordSize = 0;
76  CertaintyThreshold += WordSize * stopper_certainty_per_char;
77  }
78 
79  if (stopper_debug_level >= 1)
80  tprintf("Stopper: Rating = %4.1f, Certainty = %4.1f, Threshold = %4.1f\n",
81  best_choice.rating(), best_choice.certainty(), CertaintyThreshold);
82 
83  if (no_dang_ambigs &&
84  best_choice.certainty() > CertaintyThreshold &&
85  xheight_consistency < XH_INCONSISTENT &&
86  UniformCertainties(best_choice)) {
87  return true;
88  } else {
89  if (stopper_debug_level >= 1) {
90  tprintf("AcceptableChoice() returned false"
91  " (no_dang_ambig:%d cert:%.4g thresh:%g uniform:%d)\n",
92  no_dang_ambigs, best_choice.certainty(),
93  CertaintyThreshold,
94  UniformCertainties(best_choice));
95  }
96  return false;
97  }
98 }
99 
100 bool Dict::AcceptableResult(WERD_RES *word) const {
101  if (word->best_choice == nullptr) return false;
102  float CertaintyThreshold = stopper_nondict_certainty_base - reject_offset_;
103  int WordSize;
104 
105  if (stopper_debug_level >= 1) {
106  tprintf("\nRejecter: %s (word=%c, case=%c, unambig=%c, multiple=%c)\n",
107  word->best_choice->debug_string().string(),
108  (valid_word(*word->best_choice) ? 'y' : 'n'),
109  (case_ok(*word->best_choice) ? 'y' : 'n'),
110  word->best_choice->dangerous_ambig_found() ? 'n' : 'y',
111  word->best_choices.singleton() ? 'n' : 'y');
112  }
113 
114  if (word->best_choice->length() == 0 || !word->best_choices.singleton())
115  return false;
116  if (valid_word(*word->best_choice) && case_ok(*word->best_choice)) {
117  WordSize = LengthOfShortestAlphaRun(*word->best_choice);
118  WordSize -= stopper_smallword_size;
119  if (WordSize < 0)
120  WordSize = 0;
121  CertaintyThreshold += WordSize * stopper_certainty_per_char;
122  }
123 
124  if (stopper_debug_level >= 1)
125  tprintf("Rejecter: Certainty = %4.1f, Threshold = %4.1f ",
126  word->best_choice->certainty(), CertaintyThreshold);
127 
128  if (word->best_choice->certainty() > CertaintyThreshold &&
130  if (stopper_debug_level >= 1)
131  tprintf("ACCEPTED\n");
132  return true;
133  } else {
134  if (stopper_debug_level >= 1)
135  tprintf("REJECTED\n");
136  return false;
137  }
138 }
139 
141  DANGERR *fixpt,
142  bool fix_replaceable,
143  MATRIX *ratings) {
144  if (stopper_debug_level > 2) {
145  tprintf("\nRunning NoDangerousAmbig() for %s\n",
146  best_choice->debug_string().string());
147  }
148 
149  // Construct BLOB_CHOICE_LIST_VECTOR with ambiguities
150  // for each unichar id in BestChoice.
151  BLOB_CHOICE_LIST_VECTOR ambig_blob_choices;
152  int i;
153  bool ambigs_found = false;
154  // For each position in best_choice:
155  // -- choose AMBIG_SPEC_LIST that corresponds to unichar_id at best_choice[i]
156  // -- initialize wrong_ngram with a single unichar_id at best_choice[i]
157  // -- look for ambiguities corresponding to wrong_ngram in the list while
158  // adding the following unichar_ids from best_choice to wrong_ngram
159  //
160  // Repeat the above procedure twice: first time look through
161  // ambigs to be replaced and replace all the ambiguities found;
162  // second time look through dangerous ambiguities and construct
163  // ambig_blob_choices with fake a blob choice for each ambiguity
164  // and pass them to dawg_permute_and_select() to search for
165  // ambiguous words in the dictionaries.
166  //
167  // Note that during the execution of the for loop (on the first pass)
168  // if replacements are made the length of best_choice might change.
169  for (int pass = 0; pass < (fix_replaceable ? 2 : 1); ++pass) {
170  bool replace = (fix_replaceable && pass == 0);
171  const UnicharAmbigsVector &table = replace ?
173  if (!replace) {
174  // Initialize ambig_blob_choices with lists containing a single
175  // unichar id for the corresponding position in best_choice.
176  // best_choice consisting from only the original letters will
177  // have a rating of 0.0.
178  for (i = 0; i < best_choice->length(); ++i) {
179  auto *lst = new BLOB_CHOICE_LIST();
180  BLOB_CHOICE_IT lst_it(lst);
181  // TODO(rays/antonova) Put real xheights and y shifts here.
182  lst_it.add_to_end(new BLOB_CHOICE(best_choice->unichar_id(i),
183  0.0, 0.0, -1, 0, 1, 0, BCC_AMBIG));
184  ambig_blob_choices.push_back(lst);
185  }
186  }
187  UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE + 1];
188  int wrong_ngram_index;
189  int next_index;
190  int blob_index = 0;
191  for (i = 0; i < best_choice->length(); blob_index += best_choice->state(i),
192  ++i) {
193  UNICHAR_ID curr_unichar_id = best_choice->unichar_id(i);
194  if (stopper_debug_level > 2) {
195  tprintf("Looking for %s ngrams starting with %s:\n",
196  replace ? "replaceable" : "ambiguous",
197  getUnicharset().debug_str(curr_unichar_id).string());
198  }
199  int num_wrong_blobs = best_choice->state(i);
200  wrong_ngram_index = 0;
201  wrong_ngram[wrong_ngram_index] = curr_unichar_id;
202  if (curr_unichar_id == INVALID_UNICHAR_ID ||
203  curr_unichar_id >= table.size() ||
204  table[curr_unichar_id] == nullptr) {
205  continue; // there is no ambig spec for this unichar id
206  }
207  AmbigSpec_IT spec_it(table[curr_unichar_id]);
208  for (spec_it.mark_cycle_pt(); !spec_it.cycled_list();) {
209  const AmbigSpec *ambig_spec = spec_it.data();
210  wrong_ngram[wrong_ngram_index+1] = INVALID_UNICHAR_ID;
211  int compare = UnicharIdArrayUtils::compare(wrong_ngram,
212  ambig_spec->wrong_ngram);
213  if (stopper_debug_level > 2) {
214  tprintf("candidate ngram: ");
216  tprintf("current ngram from spec: ");
218  tprintf("comparison result: %d\n", compare);
219  }
220  if (compare == 0) {
221  // Record the place where we found an ambiguity.
222  if (fixpt != nullptr) {
223  UNICHAR_ID leftmost_id = ambig_spec->correct_fragments[0];
224  fixpt->push_back(DANGERR_INFO(
225  blob_index, blob_index + num_wrong_blobs, replace,
226  getUnicharset().get_isngram(ambig_spec->correct_ngram_id),
227  leftmost_id));
228  if (stopper_debug_level > 1) {
229  tprintf("fixpt+=(%d %d %d %d %s)\n", blob_index,
230  blob_index + num_wrong_blobs, false,
231  getUnicharset().get_isngram(
232  ambig_spec->correct_ngram_id),
233  getUnicharset().id_to_unichar(leftmost_id));
234  }
235  }
236 
237  if (replace) {
238  if (stopper_debug_level > 2) {
239  tprintf("replace ambiguity with %s : ",
240  getUnicharset().id_to_unichar(
241  ambig_spec->correct_ngram_id));
243  ambig_spec->correct_fragments, getUnicharset());
244  }
245  ReplaceAmbig(i, ambig_spec->wrong_ngram_size,
246  ambig_spec->correct_ngram_id,
247  best_choice, ratings);
248  } else if (i > 0 || ambig_spec->type != CASE_AMBIG) {
249  // We found dang ambig - update ambig_blob_choices.
250  if (stopper_debug_level > 2) {
251  tprintf("found ambiguity: ");
253  ambig_spec->correct_fragments, getUnicharset());
254  }
255  ambigs_found = true;
256  for (int tmp_index = 0; tmp_index <= wrong_ngram_index;
257  ++tmp_index) {
258  // Add a blob choice for the corresponding fragment of the
259  // ambiguity. These fake blob choices are initialized with
260  // negative ratings (which are not possible for real blob
261  // choices), so that dawg_permute_and_select() considers any
262  // word not consisting of only the original letters a better
263  // choice and stops searching for alternatives once such a
264  // choice is found.
265  BLOB_CHOICE_IT bc_it(ambig_blob_choices[i+tmp_index]);
266  bc_it.add_to_end(new BLOB_CHOICE(
267  ambig_spec->correct_fragments[tmp_index], -1.0, 0.0,
268  -1, 0, 1, 0, BCC_AMBIG));
269  }
270  }
271  spec_it.forward();
272  } else if (compare == -1) {
273  if (wrong_ngram_index+1 < ambig_spec->wrong_ngram_size &&
274  ((next_index = wrong_ngram_index+1+i) < best_choice->length())) {
275  // Add the next unichar id to wrong_ngram and keep looking for
276  // more ambigs starting with curr_unichar_id in AMBIG_SPEC_LIST.
277  wrong_ngram[++wrong_ngram_index] =
278  best_choice->unichar_id(next_index);
279  num_wrong_blobs += best_choice->state(next_index);
280  } else {
281  break; // no more matching ambigs in this AMBIG_SPEC_LIST
282  }
283  } else {
284  spec_it.forward();
285  }
286  } // end searching AmbigSpec_LIST
287  } // end searching best_choice
288  } // end searching replace and dangerous ambigs
289 
290  // If any ambiguities were found permute the constructed ambig_blob_choices
291  // to see if an alternative dictionary word can be found.
292  if (ambigs_found) {
293  if (stopper_debug_level > 2) {
294  tprintf("\nResulting ambig_blob_choices:\n");
295  for (i = 0; i < ambig_blob_choices.length(); ++i) {
296  print_ratings_list("", ambig_blob_choices.get(i), getUnicharset());
297  tprintf("\n");
298  }
299  }
300  WERD_CHOICE *alt_word = dawg_permute_and_select(ambig_blob_choices, 0.0);
301  ambigs_found = (alt_word->rating() < 0.0);
302  if (ambigs_found) {
303  if (stopper_debug_level >= 1) {
304  tprintf ("Stopper: Possible ambiguous word = %s\n",
305  alt_word->debug_string().string());
306  }
307  if (fixpt != nullptr) {
308  // Note: Currently character choices combined from fragments can only
309  // be generated by NoDangrousAmbigs(). This code should be updated if
310  // the capability to produce classifications combined from character
311  // fragments is added to other functions.
312  int orig_i = 0;
313  for (i = 0; i < alt_word->length(); ++i) {
314  const UNICHARSET &uchset = getUnicharset();
315  bool replacement_is_ngram =
316  uchset.get_isngram(alt_word->unichar_id(i));
317  UNICHAR_ID leftmost_id = alt_word->unichar_id(i);
318  if (replacement_is_ngram) {
319  // we have to extract the leftmost unichar from the ngram.
320  const char *str = uchset.id_to_unichar(leftmost_id);
321  int step = uchset.step(str);
322  if (step) leftmost_id = uchset.unichar_to_id(str, step);
323  }
324  int end_i = orig_i + alt_word->state(i);
325  if (alt_word->state(i) > 1 ||
326  (orig_i + 1 == end_i && replacement_is_ngram)) {
327  // Compute proper blob indices.
328  int blob_start = 0;
329  for (int j = 0; j < orig_i; ++j)
330  blob_start += best_choice->state(j);
331  int blob_end = blob_start;
332  for (int j = orig_i; j < end_i; ++j)
333  blob_end += best_choice->state(j);
334  fixpt->push_back(DANGERR_INFO(blob_start, blob_end, true,
335  replacement_is_ngram, leftmost_id));
336  if (stopper_debug_level > 1) {
337  tprintf("fixpt->dangerous+=(%d %d %d %d %s)\n", orig_i, end_i,
338  true, replacement_is_ngram,
339  uchset.id_to_unichar(leftmost_id));
340  }
341  }
342  orig_i += alt_word->state(i);
343  }
344  }
345  }
346  delete alt_word;
347  }
348  if (output_ambig_words_file_ != nullptr) {
349  fprintf(output_ambig_words_file_, "\n");
350  }
351 
352  ambig_blob_choices.delete_data_pointers();
353  return !ambigs_found;
354 }
355 
357 
359  reject_offset_ = 0.0;
360 }
361 
364 }
365 
366 void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size,
367  UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice,
368  MATRIX *ratings) {
369  int num_blobs_to_replace = 0;
370  int begin_blob_index = 0;
371  int i;
372  // Rating and certainty for the new BLOB_CHOICE are derived from the
373  // replaced choices.
374  float new_rating = 0.0f;
375  float new_certainty = 0.0f;
376  BLOB_CHOICE* old_choice = nullptr;
377  for (i = 0; i < wrong_ngram_begin_index + wrong_ngram_size; ++i) {
378  if (i >= wrong_ngram_begin_index) {
379  int num_blobs = werd_choice->state(i);
380  int col = begin_blob_index + num_blobs_to_replace;
381  int row = col + num_blobs - 1;
382  BLOB_CHOICE_LIST* choices = ratings->get(col, row);
383  ASSERT_HOST(choices != nullptr);
384  old_choice = FindMatchingChoice(werd_choice->unichar_id(i), choices);
385  ASSERT_HOST(old_choice != nullptr);
386  new_rating += old_choice->rating();
387  new_certainty += old_choice->certainty();
388  num_blobs_to_replace += num_blobs;
389  } else {
390  begin_blob_index += werd_choice->state(i);
391  }
392  }
393  new_certainty /= wrong_ngram_size;
394  // If there is no entry in the ratings matrix, add it.
395  MATRIX_COORD coord(begin_blob_index,
396  begin_blob_index + num_blobs_to_replace - 1);
397  if (!coord.Valid(*ratings)) {
398  ratings->IncreaseBandSize(coord.row - coord.col + 1);
399  }
400  if (ratings->get(coord.col, coord.row) == nullptr)
401  ratings->put(coord.col, coord.row, new BLOB_CHOICE_LIST);
402  BLOB_CHOICE_LIST* new_choices = ratings->get(coord.col, coord.row);
403  BLOB_CHOICE* choice = FindMatchingChoice(correct_ngram_id, new_choices);
404  if (choice != nullptr) {
405  // Already there. Upgrade if new rating better.
406  if (new_rating < choice->rating())
407  choice->set_rating(new_rating);
408  if (new_certainty < choice->certainty())
409  choice->set_certainty(new_certainty);
410  // DO NOT SORT!! It will mess up the iterator in LanguageModel::UpdateState.
411  } else {
412  // Need a new choice with the correct_ngram_id.
413  choice = new BLOB_CHOICE(*old_choice);
414  choice->set_unichar_id(correct_ngram_id);
415  choice->set_rating(new_rating);
416  choice->set_certainty(new_certainty);
417  choice->set_classifier(BCC_AMBIG);
418  choice->set_matrix_cell(coord.col, coord.row);
419  BLOB_CHOICE_IT it (new_choices);
420  it.add_to_end(choice);
421  }
422  // Remove current unichar from werd_choice. On the last iteration
423  // set the correct replacement unichar instead of removing a unichar.
424  for (int replaced_count = 0; replaced_count < wrong_ngram_size;
425  ++replaced_count) {
426  if (replaced_count + 1 == wrong_ngram_size) {
427  werd_choice->set_blob_choice(wrong_ngram_begin_index,
428  num_blobs_to_replace, choice);
429  } else {
430  werd_choice->remove_unichar_id(wrong_ngram_begin_index + 1);
431  }
432  }
433  if (stopper_debug_level >= 1) {
434  werd_choice->print("ReplaceAmbig() ");
435  tprintf("Modified blob_choices: ");
436  print_ratings_list("\n", new_choices, getUnicharset());
437  }
438 }
439 
440 int Dict::LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const {
441  int shortest = INT32_MAX;
442  int curr_len = 0;
443  for (int w = 0; w < WordChoice.length(); ++w) {
444  if (WordChoice.unicharset()->get_isalpha(WordChoice.unichar_id(w))) {
445  curr_len++;
446  } else if (curr_len > 0) {
447  if (curr_len < shortest) shortest = curr_len;
448  curr_len = 0;
449  }
450  }
451  if (curr_len > 0 && curr_len < shortest) {
452  shortest = curr_len;
453  } else if (shortest == INT32_MAX) {
454  shortest = 0;
455  }
456  return shortest;
457 }
458 
460  float Certainty;
461  float WorstCertainty = FLT_MAX;
462  float CertaintyThreshold;
463  double TotalCertainty;
464  double TotalCertaintySquared;
465  double Variance;
466  float Mean, StdDev;
467  int word_length = word.length();
468 
469  if (word_length < 3)
470  return true;
471 
472  TotalCertainty = TotalCertaintySquared = 0.0;
473  for (int i = 0; i < word_length; ++i) {
474  Certainty = word.certainty(i);
475  TotalCertainty += Certainty;
476  TotalCertaintySquared += static_cast<double>(Certainty) * Certainty;
477  if (Certainty < WorstCertainty)
478  WorstCertainty = Certainty;
479  }
480 
481  // Subtract off worst certainty from statistics.
482  word_length--;
483  TotalCertainty -= WorstCertainty;
484  TotalCertaintySquared -= static_cast<double>(WorstCertainty) * WorstCertainty;
485 
486  Mean = TotalCertainty / word_length;
487  Variance = ((word_length * TotalCertaintySquared -
488  TotalCertainty * TotalCertainty) /
489  (word_length * (word_length - 1)));
490  if (Variance < 0.0)
491  Variance = 0.0;
492  StdDev = sqrt(Variance);
493 
494  CertaintyThreshold = Mean - stopper_allowable_character_badness * StdDev;
495  if (CertaintyThreshold > stopper_nondict_certainty_base)
496  CertaintyThreshold = stopper_nondict_certainty_base;
497 
498  if (word.certainty() < CertaintyThreshold) {
499  if (stopper_debug_level >= 1)
500  tprintf("Stopper: Non-uniform certainty = %4.1f"
501  " (m=%4.1f, s=%4.1f, t=%4.1f)\n",
502  word.certainty(), Mean, StdDev, CertaintyThreshold);
503  return false;
504  } else {
505  return true;
506  }
507 }
508 
509 } // namespace tesseract
void set_blob_choice(int index, int blob_count, const BLOB_CHOICE *blob_choice)
Definition: ratngs.cpp:312
void SettupStopperPass2()
Sets up stopper variables in preparation for the second pass.
Definition: stopper.cpp:362
#define MAX_AMBIG_SIZE
Definition: ambigs.h:30
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:787
float rating() const
Definition: ratngs.h:80
void set_rating(float newrat)
Definition: ratngs.h:148
int step(const char *str) const
Definition: unicharset.cpp:233
UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE+1]
Definition: ambigs.h:131
bool AcceptableResult(WERD_RES *word) const
Definition: stopper.cpp:100
const STRING & unichar_string() const
Definition: ratngs.h:541
float min_x_height() const
Definition: ratngs.h:336
WERD_CHOICE * dawg_permute_and_select(const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit)
Definition: permdawg.cpp:168
int length() const
Definition: ratngs.h:303
UNICHAR_ID correct_fragments[MAX_AMBIG_SIZE+1]
Definition: ambigs.h:132
const UnicharAmbigs & getUnicharAmbigs() const
Definition: dict.h:103
WERD_CHOICE_LIST best_choices
Definition: pageres.h:242
UNICHAR_ID correct_ngram_id
Definition: ambigs.h:133
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291
void set_matrix_cell(int col, int row)
Definition: ratngs.h:157
const UnicharAmbigsVector & replace_ambigs() const
Definition: ambigs.h:153
bool stopper_no_acceptable_choices
Definition: dict.h:631
static int compare(const UNICHAR_ID *ptr1, const UNICHAR_ID *ptr2)
Definition: ambigs.h:62
T get(ICOORD pos) const
Definition: matrix.h:231
double stopper_allowable_character_badness
Definition: dict.h:627
int stopper_debug_level
Definition: dict.h:628
XHeightConsistencyEnum
Definition: dict.h:74
void print() const
Definition: ratngs.h:580
int stopper_smallword_size
Definition: dict.h:623
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:180
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:491
float certainty() const
Definition: ratngs.h:83
void set_classifier(BlobChoiceClassifier classifier)
Definition: ratngs.h:167
static void print(const UNICHAR_ID array[], const UNICHARSET &unicharset)
Definition: ambigs.h:98
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:833
double stopper_nondict_certainty_base
Definition: dict.h:619
void EndDangerousAmbigs()
Definition: stopper.cpp:356
float rating() const
Definition: ratngs.h:327
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:210
void remove_unichar_id(int index)
Definition: ratngs.h:484
void put(ICOORD pos, const T &thing)
Definition: matrix.h:223
const char * string() const
Definition: strngs.cpp:194
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
void IncreaseBandSize(int bandwidth)
Definition: matrix.cpp:49
void SettupStopperPass1()
Sets up stopper variables in preparation for the first pass.
Definition: stopper.cpp:358
bool AcceptableChoice(const WERD_CHOICE &best_choice, XHeightConsistencyEnum xheight_consistency)
Returns true if the given best_choice is good enough to stop.
Definition: stopper.cpp:40
int push_back(T object)
Definition: matrix.h:578
bool dangerous_ambig_found() const
Definition: ratngs.h:363
float certainty() const
Definition: ratngs.h:330
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
WERD_CHOICE * best_choice
Definition: pageres.h:234
#define ASSERT_HOST(x)
Definition: errcode.h:88
void ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size, UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice, MATRIX *ratings)
Definition: stopper.cpp:366
const UNICHARSET & getUnicharset() const
Definition: dict.h:97
int case_ok(const WERD_CHOICE &word) const
Check a string to see if it matches a set of lexical rules.
Definition: context.cpp:46
bool Valid(const MATRIX &m) const
Definition: matrix.h:618
bool NoDangerousAmbig(WERD_CHOICE *BestChoice, DANGERR *fixpt, bool fix_replaceable, MATRIX *ratings)
Definition: stopper.cpp:140
int UNICHAR_ID
Definition: unichar.h:34
int UniformCertainties(const WERD_CHOICE &word)
Definition: stopper.cpp:459
AmbigType type
Definition: ambigs.h:134
double stopper_phase2_certainty_rejection_offset
Definition: dict.h:621
const UnicharAmbigsVector & dang_ambigs() const
Definition: ambigs.h:152
float max_x_height() const
Definition: ratngs.h:339
const STRING debug_string() const
Definition: ratngs.h:505
double stopper_certainty_per_char
Definition: dict.h:625
int size() const
Definition: genericvector.h:70
bool get_isngram(UNICHAR_ID unichar_id) const
Definition: unicharset.h:526
const UNICHARSET * unicharset() const
Definition: ratngs.h:300
int state(int index) const
Definition: ratngs.h:319
uint8_t permuter() const
Definition: ratngs.h:346
static bool valid_word_permuter(uint8_t perm, bool numbers_ok)
Check all the DAWGs to see if this word is in any of them.
Definition: dict.h:465
int LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const
Returns the length of the shortest alpha run in WordChoice.
Definition: stopper.cpp:440
void set_certainty(float newrat)
Definition: ratngs.h:151
float Mean(PROTOTYPE *Proto, uint16_t Dimension)
Definition: cluster.cpp:602
void set_unichar_id(UNICHAR_ID newunichar_id)
Definition: ratngs.h:145