|
tesseract 3.04.01
|
00001 /********************************************************************** 00002 * File: ratngs.h (Formerly ratings.h) 00003 * Description: Definition of the WERD_CHOICE and BLOB_CHOICE classes. 00004 * Author: Ray Smith 00005 * Created: Thu Apr 23 11:40:38 BST 1992 00006 * 00007 * (C) Copyright 1992, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #ifndef RATNGS_H 00021 #define RATNGS_H 00022 00023 #include <assert.h> 00024 00025 #include "clst.h" 00026 #include "elst.h" 00027 #include "fontinfo.h" 00028 #include "genericvector.h" 00029 #include "matrix.h" 00030 #include "unichar.h" 00031 #include "unicharset.h" 00032 #include "werd.h" 00033 00034 class MATRIX; 00035 struct TBLOB; 00036 struct TWERD; 00037 00038 // Enum to describe the source of a BLOB_CHOICE to make it possible to determine 00039 // whether a blob has been classified by inspecting the BLOB_CHOICEs. 00040 enum BlobChoiceClassifier { 00041 BCC_STATIC_CLASSIFIER, // From the char_norm classifier. 00042 BCC_ADAPTED_CLASSIFIER, // From the adaptive classifier. 00043 BCC_SPECKLE_CLASSIFIER, // Backup for failed classification. 00044 BCC_AMBIG, // Generated by ambiguity detection. 00045 BCC_FAKE, // From some other process. 00046 }; 00047 00048 class BLOB_CHOICE: public ELIST_LINK 00049 { 00050 public: 00051 BLOB_CHOICE() { 00052 unichar_id_ = UNICHAR_SPACE; 00053 fontinfo_id_ = -1; 00054 fontinfo_id2_ = -1; 00055 rating_ = 10.0; 00056 certainty_ = -1.0; 00057 script_id_ = -1; 00058 xgap_before_ = 0; 00059 xgap_after_ = 0; 00060 min_xheight_ = 0.0f; 00061 max_xheight_ = 0.0f; 00062 yshift_ = 0.0f; 00063 classifier_ = BCC_FAKE; 00064 } 00065 BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id 00066 float src_rating, // rating 00067 float src_cert, // certainty 00068 int script_id, // script 00069 float min_xheight, // min xheight in image pixel units 00070 float max_xheight, // max xheight allowed by this char 00071 float yshift, // the larger of y shift (top or bottom) 00072 BlobChoiceClassifier c); // adapted match or other 00073 BLOB_CHOICE(const BLOB_CHOICE &other); 00074 ~BLOB_CHOICE() {} 00075 00076 UNICHAR_ID unichar_id() const { 00077 return unichar_id_; 00078 } 00079 float rating() const { 00080 return rating_; 00081 } 00082 float certainty() const { 00083 return certainty_; 00084 } 00085 inT16 fontinfo_id() const { 00086 return fontinfo_id_; 00087 } 00088 inT16 fontinfo_id2() const { 00089 return fontinfo_id2_; 00090 } 00091 const GenericVector<tesseract::ScoredFont>& fonts() const { 00092 return fonts_; 00093 } 00094 void set_fonts(const GenericVector<tesseract::ScoredFont>& fonts) { 00095 fonts_ = fonts; 00096 int score1 = 0, score2 = 0; 00097 fontinfo_id_ = -1; 00098 fontinfo_id2_ = -1; 00099 for (int f = 0; f < fonts_.size(); ++f) { 00100 if (fonts_[f].score > score1) { 00101 score2 = score1; 00102 fontinfo_id2_ = fontinfo_id_; 00103 score1 = fonts_[f].score; 00104 fontinfo_id_ = fonts_[f].fontinfo_id; 00105 } else if (fonts_[f].score > score2) { 00106 score2 = fonts_[f].score; 00107 fontinfo_id2_ = fonts_[f].fontinfo_id; 00108 } 00109 } 00110 } 00111 int script_id() const { 00112 return script_id_; 00113 } 00114 const MATRIX_COORD& matrix_cell() { 00115 return matrix_cell_; 00116 } 00117 inT16 xgap_before() const { 00118 return xgap_before_; 00119 } 00120 inT16 xgap_after() const { 00121 return xgap_after_; 00122 } 00123 float min_xheight() const { 00124 return min_xheight_; 00125 } 00126 float max_xheight() const { 00127 return max_xheight_; 00128 } 00129 float yshift() const { 00130 return yshift_; 00131 } 00132 BlobChoiceClassifier classifier() const { 00133 return classifier_; 00134 } 00135 bool IsAdapted() const { 00136 return classifier_ == BCC_ADAPTED_CLASSIFIER; 00137 } 00138 bool IsClassified() const { 00139 return classifier_ == BCC_STATIC_CLASSIFIER || 00140 classifier_ == BCC_ADAPTED_CLASSIFIER || 00141 classifier_ == BCC_SPECKLE_CLASSIFIER; 00142 } 00143 00144 void set_unichar_id(UNICHAR_ID newunichar_id) { 00145 unichar_id_ = newunichar_id; 00146 } 00147 void set_rating(float newrat) { 00148 rating_ = newrat; 00149 } 00150 void set_certainty(float newrat) { 00151 certainty_ = newrat; 00152 } 00153 void set_script(int newscript_id) { 00154 script_id_ = newscript_id; 00155 } 00156 void set_matrix_cell(int col, int row) { 00157 matrix_cell_.col = col; 00158 matrix_cell_.row = row; 00159 } 00160 void set_xgap_before(inT16 gap) { 00161 xgap_before_ = gap; 00162 } 00163 void set_xgap_after(inT16 gap) { 00164 xgap_after_ = gap; 00165 } 00166 void set_classifier(BlobChoiceClassifier classifier) { 00167 classifier_ = classifier; 00168 } 00169 static BLOB_CHOICE* deep_copy(const BLOB_CHOICE* src) { 00170 BLOB_CHOICE* choice = new BLOB_CHOICE; 00171 *choice = *src; 00172 return choice; 00173 } 00174 // Returns true if *this and other agree on the baseline and x-height 00175 // to within some tolerance based on a given estimate of the x-height. 00176 bool PosAndSizeAgree(const BLOB_CHOICE& other, float x_height, 00177 bool debug) const; 00178 00179 void print(const UNICHARSET *unicharset) const { 00180 tprintf("r%.2f c%.2f x[%g,%g]: %d %s", 00181 rating_, certainty_, 00182 min_xheight_, max_xheight_, unichar_id_, 00183 (unicharset == NULL) ? "" : 00184 unicharset->debug_str(unichar_id_).string()); 00185 } 00186 void print_full() const { 00187 print(NULL); 00188 tprintf(" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n", 00189 script_id_, fontinfo_id_, fontinfo_id2_, yshift_, classifier_); 00190 } 00191 // Sort function for sorting BLOB_CHOICEs in increasing order of rating. 00192 static int SortByRating(const void *p1, const void *p2) { 00193 const BLOB_CHOICE *bc1 = 00194 *reinterpret_cast<const BLOB_CHOICE * const *>(p1); 00195 const BLOB_CHOICE *bc2 = 00196 *reinterpret_cast<const BLOB_CHOICE * const *>(p2); 00197 return (bc1->rating_ < bc2->rating_) ? -1 : 1; 00198 } 00199 00200 private: 00201 UNICHAR_ID unichar_id_; // unichar id 00202 // Fonts and scores. Allowed to be empty. 00203 GenericVector<tesseract::ScoredFont> fonts_; 00204 inT16 fontinfo_id_; // char font information 00205 inT16 fontinfo_id2_; // 2nd choice font information 00206 // Rating is the classifier distance weighted by the length of the outline 00207 // in the blob. In terms of probability, classifier distance is -klog p such 00208 // that the resulting distance is in the range [0, 1] and then 00209 // rating = w (-k log p) where w is the weight for the length of the outline. 00210 // Sums of ratings may be compared meaningfully for words of different 00211 // segmentation. 00212 float rating_; // size related 00213 // Certainty is a number in [-20, 0] indicating the classifier certainty 00214 // of the choice. In terms of probability, certainty = 20 (k log p) where 00215 // k is defined as above to normalize -klog p to the range [0, 1]. 00216 float certainty_; // absolute 00217 int script_id_; 00218 // Holds the position of this choice in the ratings matrix. 00219 // Used to location position in the matrix during path backtracking. 00220 MATRIX_COORD matrix_cell_; 00221 inT16 xgap_before_; 00222 inT16 xgap_after_; 00223 // X-height range (in image pixels) that this classification supports. 00224 float min_xheight_; 00225 float max_xheight_; 00226 // yshift_ - The vertical distance (in image pixels) the character is 00227 // shifted (up or down) from an acceptable y position. 00228 float yshift_; 00229 BlobChoiceClassifier classifier_; // What generated *this. 00230 }; 00231 00232 // Make BLOB_CHOICE listable. 00233 ELISTIZEH(BLOB_CHOICE) 00234 00235 // Return the BLOB_CHOICE in bc_list matching a given unichar_id, 00236 // or NULL if there is no match. 00237 BLOB_CHOICE *FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list); 00238 00239 // Permuter codes used in WERD_CHOICEs. 00240 enum PermuterType { 00241 NO_PERM, // 0 00242 PUNC_PERM, // 1 00243 TOP_CHOICE_PERM, // 2 00244 LOWER_CASE_PERM, // 3 00245 UPPER_CASE_PERM, // 4 00246 NGRAM_PERM, // 5 00247 NUMBER_PERM, // 6 00248 USER_PATTERN_PERM, // 7 00249 SYSTEM_DAWG_PERM, // 8 00250 DOC_DAWG_PERM, // 9 00251 USER_DAWG_PERM, // 10 00252 FREQ_DAWG_PERM, // 11 00253 COMPOUND_PERM, // 12 00254 00255 NUM_PERMUTER_TYPES 00256 }; 00257 00258 namespace tesseract { 00259 // ScriptPos tells whether a character is subscript, superscript or normal. 00260 enum ScriptPos { 00261 SP_NORMAL, 00262 SP_SUBSCRIPT, 00263 SP_SUPERSCRIPT, 00264 SP_DROPCAP 00265 }; 00266 00267 const char *ScriptPosToString(tesseract::ScriptPos script_pos); 00268 00269 } // namespace tesseract. 00270 00271 class WERD_CHOICE : public ELIST_LINK { 00272 public: 00273 static const float kBadRating; 00274 static const char *permuter_name(uinT8 permuter); 00275 00276 WERD_CHOICE(const UNICHARSET *unicharset) 00277 : unicharset_(unicharset) { this->init(8); } 00278 WERD_CHOICE(const UNICHARSET *unicharset, int reserved) 00279 : unicharset_(unicharset) { this->init(reserved); } 00280 WERD_CHOICE(const char *src_string, 00281 const char *src_lengths, 00282 float src_rating, 00283 float src_certainty, 00284 uinT8 src_permuter, 00285 const UNICHARSET &unicharset) 00286 : unicharset_(&unicharset) { 00287 this->init(src_string, src_lengths, src_rating, 00288 src_certainty, src_permuter); 00289 } 00290 WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset); 00291 WERD_CHOICE(const WERD_CHOICE &word) : ELIST_LINK(word), unicharset_(word.unicharset_) { 00292 this->init(word.length()); 00293 this->operator=(word); 00294 } 00295 ~WERD_CHOICE(); 00296 00297 const UNICHARSET *unicharset() const { 00298 return unicharset_; 00299 } 00300 inline int length() const { 00301 return length_; 00302 } 00303 float adjust_factor() const { 00304 return adjust_factor_; 00305 } 00306 void set_adjust_factor(float factor) { 00307 adjust_factor_ = factor; 00308 } 00309 inline const UNICHAR_ID *unichar_ids() const { 00310 return unichar_ids_; 00311 } 00312 inline UNICHAR_ID unichar_id(int index) const { 00313 assert(index < length_); 00314 return unichar_ids_[index]; 00315 } 00316 inline int state(int index) const { 00317 return state_[index]; 00318 } 00319 tesseract::ScriptPos BlobPosition(int index) const { 00320 if (index < 0 || index >= length_) 00321 return tesseract::SP_NORMAL; 00322 return script_pos_[index]; 00323 } 00324 inline float rating() const { 00325 return rating_; 00326 } 00327 inline float certainty() const { 00328 return certainty_; 00329 } 00330 inline float certainty(int index) const { 00331 return certainties_[index]; 00332 } 00333 inline float min_x_height() const { 00334 return min_x_height_; 00335 } 00336 inline float max_x_height() const { 00337 return max_x_height_; 00338 } 00339 inline void set_x_heights(float min_height, float max_height) { 00340 min_x_height_ = min_height; 00341 max_x_height_ = max_height; 00342 } 00343 inline uinT8 permuter() const { 00344 return permuter_; 00345 } 00346 const char *permuter_name() const; 00347 // Returns the BLOB_CHOICE_LIST corresponding to the given index in the word, 00348 // taken from the appropriate cell in the ratings MATRIX. 00349 // Borrowed pointer, so do not delete. 00350 BLOB_CHOICE_LIST* blob_choices(int index, MATRIX* ratings) const; 00351 00352 // Returns the MATRIX_COORD corresponding to the location in the ratings 00353 // MATRIX for the given index into the word. 00354 MATRIX_COORD MatrixCoord(int index) const; 00355 00356 inline void set_unichar_id(UNICHAR_ID unichar_id, int index) { 00357 assert(index < length_); 00358 unichar_ids_[index] = unichar_id; 00359 } 00360 bool dangerous_ambig_found() const { 00361 return dangerous_ambig_found_; 00362 } 00363 void set_dangerous_ambig_found_(bool value) { 00364 dangerous_ambig_found_ = value; 00365 } 00366 inline void set_rating(float new_val) { 00367 rating_ = new_val; 00368 } 00369 inline void set_certainty(float new_val) { 00370 certainty_ = new_val; 00371 } 00372 inline void set_permuter(uinT8 perm) { 00373 permuter_ = perm; 00374 } 00375 // Note: this function should only be used if all the fields 00376 // are populated manually with set_* functions (rather than 00377 // (copy)constructors and append_* functions). 00378 inline void set_length(int len) { 00379 ASSERT_HOST(reserved_ >= len); 00380 length_ = len; 00381 } 00382 00384 inline void double_the_size() { 00385 if (reserved_ > 0) { 00386 unichar_ids_ = GenericVector<UNICHAR_ID>::double_the_size_memcpy( 00387 reserved_, unichar_ids_); 00388 script_pos_ = GenericVector<tesseract::ScriptPos>::double_the_size_memcpy( 00389 reserved_, script_pos_); 00390 state_ = GenericVector<int>::double_the_size_memcpy( 00391 reserved_, state_); 00392 certainties_ = GenericVector<float>::double_the_size_memcpy( 00393 reserved_, certainties_); 00394 reserved_ *= 2; 00395 } else { 00396 unichar_ids_ = new UNICHAR_ID[1]; 00397 script_pos_ = new tesseract::ScriptPos[1]; 00398 state_ = new int[1]; 00399 certainties_ = new float[1]; 00400 reserved_ = 1; 00401 } 00402 } 00403 00406 inline void init(int reserved) { 00407 reserved_ = reserved; 00408 if (reserved > 0) { 00409 unichar_ids_ = new UNICHAR_ID[reserved]; 00410 script_pos_ = new tesseract::ScriptPos[reserved]; 00411 state_ = new int[reserved]; 00412 certainties_ = new float[reserved]; 00413 } else { 00414 unichar_ids_ = NULL; 00415 script_pos_ = NULL; 00416 state_ = NULL; 00417 certainties_ = NULL; 00418 } 00419 length_ = 0; 00420 adjust_factor_ = 1.0f; 00421 rating_ = 0.0; 00422 certainty_ = MAX_FLOAT32; 00423 min_x_height_ = 0.0f; 00424 max_x_height_ = MAX_FLOAT32; 00425 permuter_ = NO_PERM; 00426 unichars_in_script_order_ = false; // Tesseract is strict left-to-right. 00427 dangerous_ambig_found_ = false; 00428 } 00429 00435 void init(const char *src_string, const char *src_lengths, 00436 float src_rating, float src_certainty, 00437 uinT8 src_permuter); 00438 00440 inline void make_bad() { 00441 length_ = 0; 00442 rating_ = kBadRating; 00443 certainty_ = -MAX_FLOAT32; 00444 } 00445 00449 inline void append_unichar_id_space_allocated( 00450 UNICHAR_ID unichar_id, int blob_count, 00451 float rating, float certainty) { 00452 assert(reserved_ > length_); 00453 length_++; 00454 this->set_unichar_id(unichar_id, blob_count, 00455 rating, certainty, length_-1); 00456 } 00457 00458 void append_unichar_id(UNICHAR_ID unichar_id, int blob_count, 00459 float rating, float certainty); 00460 00461 inline void set_unichar_id(UNICHAR_ID unichar_id, int blob_count, 00462 float rating, float certainty, int index) { 00463 assert(index < length_); 00464 unichar_ids_[index] = unichar_id; 00465 state_[index] = blob_count; 00466 certainties_[index] = certainty; 00467 script_pos_[index] = tesseract::SP_NORMAL; 00468 rating_ += rating; 00469 if (certainty < certainty_) { 00470 certainty_ = certainty; 00471 } 00472 } 00473 // Sets the entries for the given index from the BLOB_CHOICE, assuming 00474 // unit fragment lengths, but setting the state for this index to blob_count. 00475 void set_blob_choice(int index, int blob_count, 00476 const BLOB_CHOICE* blob_choice); 00477 00478 bool contains_unichar_id(UNICHAR_ID unichar_id) const; 00479 void remove_unichar_ids(int index, int num); 00480 inline void remove_last_unichar_id() { --length_; } 00481 inline void remove_unichar_id(int index) { 00482 this->remove_unichar_ids(index, 1); 00483 } 00484 bool has_rtl_unichar_id() const; 00485 void reverse_and_mirror_unichar_ids(); 00486 00487 // Returns the half-open interval of unichar_id indices [start, end) which 00488 // enclose the core portion of this word -- the part after stripping 00489 // punctuation from the left and right. 00490 void punct_stripped(int *start_core, int *end_core) const; 00491 00492 // Returns the indices [start, end) containing the core of the word, stripped 00493 // of any superscript digits on either side. (i.e., the non-footnote part 00494 // of the word). There is no guarantee that the output range is non-empty. 00495 void GetNonSuperscriptSpan(int *start, int *end) const; 00496 00497 // Return a copy of this WERD_CHOICE with the choices [start, end). 00498 // The result is useful only for checking against a dictionary. 00499 WERD_CHOICE shallow_copy(int start, int end) const; 00500 00501 void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const; 00502 const STRING debug_string() const { 00503 STRING word_str; 00504 for (int i = 0; i < length_; ++i) { 00505 word_str += unicharset_->debug_str(unichar_ids_[i]); 00506 word_str += " "; 00507 } 00508 return word_str; 00509 } 00510 00511 // Call this to override the default (strict left to right graphemes) 00512 // with the fact that some engine produces a "reading order" set of 00513 // Graphemes for each word. 00514 bool set_unichars_in_script_order(bool in_script_order) { 00515 return unichars_in_script_order_ = in_script_order; 00516 } 00517 00518 bool unichars_in_script_order() const { 00519 return unichars_in_script_order_; 00520 } 00521 00522 // Returns a UTF-8 string equivalent to the current choice 00523 // of UNICHAR IDs. 00524 const STRING &unichar_string() const { 00525 this->string_and_lengths(&unichar_string_, &unichar_lengths_); 00526 return unichar_string_; 00527 } 00528 00529 // Returns the lengths, one byte each, representing the number of bytes 00530 // required in the unichar_string for each UNICHAR_ID. 00531 const STRING &unichar_lengths() const { 00532 this->string_and_lengths(&unichar_string_, &unichar_lengths_); 00533 return unichar_lengths_; 00534 } 00535 00536 // Sets up the script_pos_ member using the blobs_list to get the bln 00537 // bounding boxes, *this to get the unichars, and this->unicharset 00538 // to get the target positions. If small_caps is true, sub/super are not 00539 // considered, but dropcaps are. 00540 // NOTE: blobs_list should be the chopped_word blobs. (Fully segemented.) 00541 void SetScriptPositions(bool small_caps, TWERD* word); 00542 // Sets the script_pos_ member from some source positions with a given length. 00543 void SetScriptPositions(const tesseract::ScriptPos* positions, int length); 00544 // Sets all the script_pos_ positions to the given position. 00545 void SetAllScriptPositions(tesseract::ScriptPos position); 00546 00547 static tesseract::ScriptPos ScriptPositionOf(bool print_debug, 00548 const UNICHARSET& unicharset, 00549 const TBOX& blob_box, 00550 UNICHAR_ID unichar_id); 00551 00552 // Returns the "dominant" script ID for the word. By "dominant", the script 00553 // must account for at least half the characters. Otherwise, it returns 0. 00554 // Note that for Japanese, Hiragana and Katakana are simply treated as Han. 00555 int GetTopScriptID() const; 00556 00557 // Fixes the state_ for a chop at the given blob_posiiton. 00558 void UpdateStateForSplit(int blob_position); 00559 00560 // Returns the sum of all the state elements, being the total number of blobs. 00561 int TotalOfStates() const; 00562 00563 void print() const { this->print(""); } 00564 void print(const char *msg) const; 00565 // Prints the segmentation state with an introductory message. 00566 void print_state(const char *msg) const; 00567 00568 // Displays the segmentation state of *this (if not the same as the last 00569 // one displayed) and waits for a click in the window. 00570 void DisplaySegmentation(TWERD* word); 00571 00572 WERD_CHOICE& operator+= ( // concatanate 00573 const WERD_CHOICE & second);// second on first 00574 00575 WERD_CHOICE& operator= (const WERD_CHOICE& source); 00576 00577 private: 00578 const UNICHARSET *unicharset_; 00579 // TODO(rays) Perhaps replace the multiple arrays with an array of structs? 00580 // unichar_ids_ is an array of classifier "results" that make up a word. 00581 // For each unichar_ids_[i], script_pos_[i] has the sub/super/normal position 00582 // of each unichar_id. 00583 // state_[i] indicates the number of blobs in WERD_RES::chopped_word that 00584 // were put together to make the classification results in the ith position 00585 // in unichar_ids_, and certainties_[i] is the certainty of the choice that 00586 // was used in this word. 00587 // == Change from before == 00588 // Previously there was fragment_lengths_ that allowed a word to be 00589 // artificially composed of multiple fragment results. Since the new 00590 // segmentation search doesn't do fragments, treatment of fragments has 00591 // been moved to a lower level, augmenting the ratings matrix with the 00592 // combined fragments, and allowing the language-model/segmentation-search 00593 // to deal with only the combined unichar_ids. 00594 UNICHAR_ID *unichar_ids_; // unichar ids that represent the text of the word 00595 tesseract::ScriptPos* script_pos_; // Normal/Sub/Superscript of each unichar. 00596 int* state_; // Number of blobs in each unichar. 00597 float* certainties_; // Certainty of each unichar. 00598 int reserved_; // size of the above arrays 00599 int length_; // word length 00600 // Factor that was used to adjust the rating. 00601 float adjust_factor_; 00602 // Rating is the sum of the ratings of the individual blobs in the word. 00603 float rating_; // size related 00604 // certainty is the min (worst) certainty of the individual blobs in the word. 00605 float certainty_; // absolute 00606 // xheight computed from the result, or 0 if inconsistent. 00607 float min_x_height_; 00608 float max_x_height_; 00609 uinT8 permuter_; // permuter code 00610 00611 // Normally, the ratings_ matrix represents the recognition results in order 00612 // from left-to-right. However, some engines (say Cube) may return 00613 // recognition results in the order of the script's major reading direction 00614 // (for Arabic, that is right-to-left). 00615 bool unichars_in_script_order_; 00616 // True if NoDangerousAmbig found an ambiguity. 00617 bool dangerous_ambig_found_; 00618 00619 // The following variables are populated and passed by reference any 00620 // time unichar_string() or unichar_lengths() are called. 00621 mutable STRING unichar_string_; 00622 mutable STRING unichar_lengths_; 00623 }; 00624 00625 // Make WERD_CHOICE listable. 00626 ELISTIZEH(WERD_CHOICE) 00627 typedef GenericVector<BLOB_CHOICE_LIST *> BLOB_CHOICE_LIST_VECTOR; 00628 00629 // Utilities for comparing WERD_CHOICEs 00630 00631 bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, 00632 const WERD_CHOICE &word2); 00633 00634 // Utilities for debug printing. 00635 void print_ratings_list( 00636 const char *msg, // intro message 00637 BLOB_CHOICE_LIST *ratings, // list of results 00638 const UNICHARSET ¤t_unicharset // unicharset that can be used 00639 // for id-to-unichar conversion 00640 ); 00641 00642 #endif