tesseract 3.04.01

ccstruct/ratngs.h

Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        ratngs.h  (Formerly ratings.h)
00003  * Description: Definition of the WERD_CHOICE and BLOB_CHOICE classes.
00004  * Author:      Ray Smith
00005  * Created:     Thu Apr 23 11:40:38 BST 1992
00006  *
00007  * (C) Copyright 1992, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #ifndef           RATNGS_H
00021 #define           RATNGS_H
00022 
00023 #include <assert.h>
00024 
00025 #include "clst.h"
00026 #include "elst.h"
00027 #include "fontinfo.h"
00028 #include "genericvector.h"
00029 #include "matrix.h"
00030 #include "unichar.h"
00031 #include "unicharset.h"
00032 #include "werd.h"
00033 
00034 class MATRIX;
00035 struct TBLOB;
00036 struct TWERD;
00037 
00038 // Enum to describe the source of a BLOB_CHOICE to make it possible to determine
00039 // whether a blob has been classified by inspecting the BLOB_CHOICEs.
00040 enum BlobChoiceClassifier {
00041   BCC_STATIC_CLASSIFIER,   // From the char_norm classifier.
00042   BCC_ADAPTED_CLASSIFIER,  // From the adaptive classifier.
00043   BCC_SPECKLE_CLASSIFIER,  // Backup for failed classification.
00044   BCC_AMBIG,               // Generated by ambiguity detection.
00045   BCC_FAKE,                // From some other process.
00046 };
00047 
00048 class BLOB_CHOICE: public ELIST_LINK
00049 {
00050   public:
00051     BLOB_CHOICE() {
00052       unichar_id_ = UNICHAR_SPACE;
00053       fontinfo_id_ = -1;
00054       fontinfo_id2_ = -1;
00055       rating_ = 10.0;
00056       certainty_ = -1.0;
00057       script_id_ = -1;
00058       xgap_before_ = 0;
00059       xgap_after_ = 0;
00060       min_xheight_ = 0.0f;
00061       max_xheight_ = 0.0f;
00062       yshift_ = 0.0f;
00063       classifier_ = BCC_FAKE;
00064     }
00065     BLOB_CHOICE(UNICHAR_ID src_unichar_id,  // character id
00066                 float src_rating,          // rating
00067                 float src_cert,            // certainty
00068                 int script_id,             // script
00069                 float min_xheight,         // min xheight in image pixel units
00070                 float max_xheight,         // max xheight allowed by this char
00071                 float yshift,           // the larger of y shift (top or bottom)
00072                 BlobChoiceClassifier c);   // adapted match or other
00073     BLOB_CHOICE(const BLOB_CHOICE &other);
00074     ~BLOB_CHOICE() {}
00075 
00076     UNICHAR_ID unichar_id() const {
00077       return unichar_id_;
00078     }
00079     float rating() const {
00080       return rating_;
00081     }
00082     float certainty() const {
00083       return certainty_;
00084     }
00085     inT16 fontinfo_id() const {
00086       return fontinfo_id_;
00087     }
00088     inT16 fontinfo_id2() const {
00089       return fontinfo_id2_;
00090     }
00091     const GenericVector<tesseract::ScoredFont>& fonts() const {
00092       return fonts_;
00093     }
00094     void set_fonts(const GenericVector<tesseract::ScoredFont>& fonts) {
00095       fonts_ = fonts;
00096       int score1 = 0, score2 = 0;
00097       fontinfo_id_ = -1;
00098       fontinfo_id2_ = -1;
00099       for (int f = 0; f < fonts_.size(); ++f) {
00100         if (fonts_[f].score > score1) {
00101           score2 = score1;
00102           fontinfo_id2_ = fontinfo_id_;
00103           score1 = fonts_[f].score;
00104           fontinfo_id_ = fonts_[f].fontinfo_id;
00105         } else if (fonts_[f].score > score2) {
00106           score2 = fonts_[f].score;
00107           fontinfo_id2_ = fonts_[f].fontinfo_id;
00108         }
00109       }
00110     }
00111     int script_id() const {
00112       return script_id_;
00113     }
00114     const MATRIX_COORD& matrix_cell() {
00115       return matrix_cell_;
00116     }
00117     inT16 xgap_before() const {
00118       return xgap_before_;
00119     }
00120     inT16 xgap_after() const {
00121       return xgap_after_;
00122     }
00123     float min_xheight() const {
00124       return min_xheight_;
00125     }
00126     float max_xheight() const {
00127       return max_xheight_;
00128     }
00129     float yshift() const {
00130       return yshift_;
00131     }
00132     BlobChoiceClassifier classifier() const {
00133       return classifier_;
00134     }
00135     bool IsAdapted() const {
00136       return classifier_ == BCC_ADAPTED_CLASSIFIER;
00137     }
00138     bool IsClassified() const {
00139       return classifier_ == BCC_STATIC_CLASSIFIER ||
00140              classifier_ == BCC_ADAPTED_CLASSIFIER ||
00141              classifier_ == BCC_SPECKLE_CLASSIFIER;
00142     }
00143 
00144     void set_unichar_id(UNICHAR_ID newunichar_id) {
00145       unichar_id_ = newunichar_id;
00146     }
00147     void set_rating(float newrat) {
00148       rating_ = newrat;
00149     }
00150     void set_certainty(float newrat) {
00151       certainty_ = newrat;
00152     }
00153     void set_script(int newscript_id) {
00154       script_id_ = newscript_id;
00155     }
00156     void set_matrix_cell(int col, int row) {
00157       matrix_cell_.col = col;
00158       matrix_cell_.row = row;
00159     }
00160     void set_xgap_before(inT16 gap) {
00161       xgap_before_ = gap;
00162     }
00163     void set_xgap_after(inT16 gap) {
00164       xgap_after_ = gap;
00165     }
00166     void set_classifier(BlobChoiceClassifier classifier) {
00167       classifier_ = classifier;
00168     }
00169     static BLOB_CHOICE* deep_copy(const BLOB_CHOICE* src) {
00170       BLOB_CHOICE* choice = new BLOB_CHOICE;
00171       *choice = *src;
00172       return choice;
00173     }
00174     // Returns true if *this and other agree on the baseline and x-height
00175     // to within some tolerance based on a given estimate of the x-height.
00176     bool PosAndSizeAgree(const BLOB_CHOICE& other, float x_height,
00177                          bool debug) const;
00178 
00179     void print(const UNICHARSET *unicharset) const {
00180       tprintf("r%.2f c%.2f x[%g,%g]: %d %s",
00181               rating_, certainty_,
00182               min_xheight_, max_xheight_, unichar_id_,
00183               (unicharset == NULL) ? "" :
00184               unicharset->debug_str(unichar_id_).string());
00185     }
00186     void print_full() const {
00187       print(NULL);
00188       tprintf(" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n",
00189               script_id_, fontinfo_id_, fontinfo_id2_, yshift_, classifier_);
00190     }
00191     // Sort function for sorting BLOB_CHOICEs in increasing order of rating.
00192     static int SortByRating(const void *p1, const void *p2) {
00193       const BLOB_CHOICE *bc1 =
00194           *reinterpret_cast<const BLOB_CHOICE * const *>(p1);
00195       const BLOB_CHOICE *bc2 =
00196           *reinterpret_cast<const BLOB_CHOICE * const *>(p2);
00197       return (bc1->rating_ < bc2->rating_) ? -1 : 1;
00198     }
00199 
00200  private:
00201   UNICHAR_ID unichar_id_;          // unichar id
00202   // Fonts and scores. Allowed to be empty.
00203   GenericVector<tesseract::ScoredFont> fonts_;
00204   inT16 fontinfo_id_;              // char font information
00205   inT16 fontinfo_id2_;             // 2nd choice font information
00206   // Rating is the classifier distance weighted by the length of the outline
00207   // in the blob. In terms of probability, classifier distance is -klog p such
00208   // that the resulting distance is in the range [0, 1] and then
00209   // rating = w (-k log p) where w is the weight for the length of the outline.
00210   // Sums of ratings may be compared meaningfully for words of different
00211   // segmentation.
00212   float rating_;                  // size related
00213   // Certainty is a number in [-20, 0] indicating the classifier certainty
00214   // of the choice. In terms of probability, certainty = 20 (k log p) where
00215   // k is defined as above to normalize -klog p to the range [0, 1].
00216   float certainty_;               // absolute
00217   int script_id_;
00218   // Holds the position of this choice in the ratings matrix.
00219   // Used to location position in the matrix during path backtracking.
00220   MATRIX_COORD matrix_cell_;
00221   inT16 xgap_before_;
00222   inT16 xgap_after_;
00223   // X-height range (in image pixels) that this classification supports.
00224   float min_xheight_;
00225   float max_xheight_;
00226   // yshift_ - The vertical distance (in image pixels) the character is
00227   //           shifted (up or down) from an acceptable y position.
00228   float yshift_;
00229   BlobChoiceClassifier classifier_;  // What generated *this.
00230 };
00231 
00232 // Make BLOB_CHOICE listable.
00233 ELISTIZEH(BLOB_CHOICE)
00234 
00235 // Return the BLOB_CHOICE in bc_list matching a given unichar_id,
00236 // or NULL if there is no match.
00237 BLOB_CHOICE *FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list);
00238 
00239 // Permuter codes used in WERD_CHOICEs.
00240 enum PermuterType {
00241   NO_PERM,            // 0
00242   PUNC_PERM,          // 1
00243   TOP_CHOICE_PERM,    // 2
00244   LOWER_CASE_PERM,    // 3
00245   UPPER_CASE_PERM,    // 4
00246   NGRAM_PERM,         // 5
00247   NUMBER_PERM,        // 6
00248   USER_PATTERN_PERM,  // 7
00249   SYSTEM_DAWG_PERM,   // 8
00250   DOC_DAWG_PERM,      // 9
00251   USER_DAWG_PERM,     // 10
00252   FREQ_DAWG_PERM,     // 11
00253   COMPOUND_PERM,      // 12
00254 
00255   NUM_PERMUTER_TYPES
00256 };
00257 
00258 namespace tesseract {
00259 // ScriptPos tells whether a character is subscript, superscript or normal.
00260 enum ScriptPos {
00261   SP_NORMAL,
00262   SP_SUBSCRIPT,
00263   SP_SUPERSCRIPT,
00264   SP_DROPCAP
00265 };
00266 
00267 const char *ScriptPosToString(tesseract::ScriptPos script_pos);
00268 
00269 }  // namespace tesseract.
00270 
00271 class WERD_CHOICE : public ELIST_LINK {
00272  public:
00273   static const float kBadRating;
00274   static const char *permuter_name(uinT8 permuter);
00275 
00276   WERD_CHOICE(const UNICHARSET *unicharset)
00277     : unicharset_(unicharset) { this->init(8); }
00278   WERD_CHOICE(const UNICHARSET *unicharset, int reserved)
00279     : unicharset_(unicharset) { this->init(reserved); }
00280   WERD_CHOICE(const char *src_string,
00281               const char *src_lengths,
00282               float src_rating,
00283               float src_certainty,
00284               uinT8 src_permuter,
00285               const UNICHARSET &unicharset)
00286     : unicharset_(&unicharset) {
00287     this->init(src_string, src_lengths, src_rating,
00288                src_certainty, src_permuter);
00289   }
00290   WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset);
00291   WERD_CHOICE(const WERD_CHOICE &word) : ELIST_LINK(word), unicharset_(word.unicharset_) {
00292     this->init(word.length());
00293     this->operator=(word);
00294   }
00295   ~WERD_CHOICE();
00296 
00297   const UNICHARSET *unicharset() const {
00298     return unicharset_;
00299   }
00300   inline int length() const {
00301     return length_;
00302   }
00303   float adjust_factor() const {
00304     return adjust_factor_;
00305   }
00306   void set_adjust_factor(float factor) {
00307     adjust_factor_ = factor;
00308   }
00309   inline const UNICHAR_ID *unichar_ids() const {
00310     return unichar_ids_;
00311   }
00312   inline UNICHAR_ID unichar_id(int index) const {
00313     assert(index < length_);
00314     return unichar_ids_[index];
00315   }
00316   inline int state(int index) const {
00317     return state_[index];
00318   }
00319   tesseract::ScriptPos BlobPosition(int index) const {
00320     if (index < 0 || index >= length_)
00321       return tesseract::SP_NORMAL;
00322     return script_pos_[index];
00323   }
00324   inline float rating() const {
00325     return rating_;
00326   }
00327   inline float certainty() const {
00328     return certainty_;
00329   }
00330   inline float certainty(int index) const {
00331     return certainties_[index];
00332   }
00333   inline float min_x_height() const {
00334     return min_x_height_;
00335   }
00336   inline float max_x_height() const {
00337     return max_x_height_;
00338   }
00339   inline void set_x_heights(float min_height, float max_height) {
00340     min_x_height_ = min_height;
00341     max_x_height_ = max_height;
00342   }
00343   inline uinT8 permuter() const {
00344     return permuter_;
00345   }
00346   const char *permuter_name() const;
00347   // Returns the BLOB_CHOICE_LIST corresponding to the given index in the word,
00348   // taken from the appropriate cell in the ratings MATRIX.
00349   // Borrowed pointer, so do not delete.
00350   BLOB_CHOICE_LIST* blob_choices(int index, MATRIX* ratings) const;
00351 
00352   // Returns the MATRIX_COORD corresponding to the location in the ratings
00353   // MATRIX for the given index into the word.
00354   MATRIX_COORD MatrixCoord(int index) const;
00355 
00356   inline void set_unichar_id(UNICHAR_ID unichar_id, int index) {
00357     assert(index < length_);
00358     unichar_ids_[index] = unichar_id;
00359   }
00360   bool dangerous_ambig_found() const {
00361     return dangerous_ambig_found_;
00362   }
00363   void set_dangerous_ambig_found_(bool value) {
00364     dangerous_ambig_found_ = value;
00365   }
00366   inline void set_rating(float new_val) {
00367     rating_ = new_val;
00368   }
00369   inline void set_certainty(float new_val) {
00370     certainty_ = new_val;
00371   }
00372   inline void set_permuter(uinT8 perm) {
00373     permuter_ = perm;
00374   }
00375   // Note: this function should only be used if all the fields
00376   // are populated manually with set_* functions (rather than
00377   // (copy)constructors and append_* functions).
00378   inline void set_length(int len) {
00379     ASSERT_HOST(reserved_ >= len);
00380     length_ = len;
00381   }
00382 
00384   inline void double_the_size() {
00385     if (reserved_ > 0) {
00386       unichar_ids_ = GenericVector<UNICHAR_ID>::double_the_size_memcpy(
00387           reserved_, unichar_ids_);
00388       script_pos_ = GenericVector<tesseract::ScriptPos>::double_the_size_memcpy(
00389           reserved_, script_pos_);
00390       state_ = GenericVector<int>::double_the_size_memcpy(
00391           reserved_, state_);
00392       certainties_ = GenericVector<float>::double_the_size_memcpy(
00393           reserved_, certainties_);
00394       reserved_ *= 2;
00395     } else {
00396       unichar_ids_ = new UNICHAR_ID[1];
00397       script_pos_ = new tesseract::ScriptPos[1];
00398       state_ = new int[1];
00399       certainties_ = new float[1];
00400       reserved_ = 1;
00401     }
00402   }
00403 
00406   inline void init(int reserved) {
00407     reserved_ = reserved;
00408     if (reserved > 0) {
00409       unichar_ids_ = new UNICHAR_ID[reserved];
00410       script_pos_ = new tesseract::ScriptPos[reserved];
00411       state_ = new int[reserved];
00412       certainties_ = new float[reserved];
00413     } else {
00414       unichar_ids_ = NULL;
00415       script_pos_ = NULL;
00416       state_ = NULL;
00417       certainties_ = NULL;
00418     }
00419     length_ = 0;
00420     adjust_factor_ = 1.0f;
00421     rating_ = 0.0;
00422     certainty_ = MAX_FLOAT32;
00423     min_x_height_ = 0.0f;
00424     max_x_height_ = MAX_FLOAT32;
00425     permuter_ = NO_PERM;
00426     unichars_in_script_order_ = false;  // Tesseract is strict left-to-right.
00427     dangerous_ambig_found_ = false;
00428   }
00429 
00435   void init(const char *src_string, const char *src_lengths,
00436             float src_rating, float src_certainty,
00437             uinT8 src_permuter);
00438 
00440   inline void make_bad() {
00441     length_ = 0;
00442     rating_ = kBadRating;
00443     certainty_ = -MAX_FLOAT32;
00444   }
00445 
00449   inline void append_unichar_id_space_allocated(
00450       UNICHAR_ID unichar_id, int blob_count,
00451       float rating, float certainty) {
00452     assert(reserved_ > length_);
00453     length_++;
00454     this->set_unichar_id(unichar_id, blob_count,
00455                          rating, certainty, length_-1);
00456   }
00457 
00458   void append_unichar_id(UNICHAR_ID unichar_id, int blob_count,
00459                          float rating, float certainty);
00460 
00461   inline void set_unichar_id(UNICHAR_ID unichar_id, int blob_count,
00462                              float rating, float certainty, int index) {
00463     assert(index < length_);
00464     unichar_ids_[index] = unichar_id;
00465     state_[index] = blob_count;
00466     certainties_[index] = certainty;
00467     script_pos_[index] = tesseract::SP_NORMAL;
00468     rating_ += rating;
00469     if (certainty < certainty_) {
00470       certainty_ = certainty;
00471     }
00472   }
00473   // Sets the entries for the given index from the BLOB_CHOICE, assuming
00474   // unit fragment lengths, but setting the state for this index to blob_count.
00475   void set_blob_choice(int index, int blob_count,
00476                        const BLOB_CHOICE* blob_choice);
00477 
00478   bool contains_unichar_id(UNICHAR_ID unichar_id) const;
00479   void remove_unichar_ids(int index, int num);
00480   inline void remove_last_unichar_id() { --length_; }
00481   inline void remove_unichar_id(int index) {
00482     this->remove_unichar_ids(index, 1);
00483   }
00484   bool has_rtl_unichar_id() const;
00485   void reverse_and_mirror_unichar_ids();
00486 
00487   // Returns the half-open interval of unichar_id indices [start, end) which
00488   // enclose the core portion of this word -- the part after stripping
00489   // punctuation from the left and right.
00490   void punct_stripped(int *start_core, int *end_core) const;
00491 
00492   // Returns the indices [start, end) containing the core of the word, stripped
00493   // of any superscript digits on either side. (i.e., the non-footnote part
00494   // of the word). There is no guarantee that the output range is non-empty.
00495   void GetNonSuperscriptSpan(int *start, int *end) const;
00496 
00497   // Return a copy of this WERD_CHOICE with the choices [start, end).
00498   // The result is useful only for checking against a dictionary.
00499   WERD_CHOICE shallow_copy(int start, int end) const;
00500 
00501   void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const;
00502   const STRING debug_string() const {
00503     STRING word_str;
00504     for (int i = 0; i < length_; ++i) {
00505       word_str += unicharset_->debug_str(unichar_ids_[i]);
00506       word_str += " ";
00507     }
00508     return word_str;
00509   }
00510 
00511   // Call this to override the default (strict left to right graphemes)
00512   // with the fact that some engine produces a "reading order" set of
00513   // Graphemes for each word.
00514   bool set_unichars_in_script_order(bool in_script_order) {
00515     return unichars_in_script_order_ = in_script_order;
00516   }
00517 
00518   bool unichars_in_script_order() const {
00519     return unichars_in_script_order_;
00520   }
00521 
00522   // Returns a UTF-8 string equivalent to the current choice
00523   // of UNICHAR IDs.
00524   const STRING &unichar_string() const {
00525     this->string_and_lengths(&unichar_string_, &unichar_lengths_);
00526     return unichar_string_;
00527   }
00528 
00529   // Returns the lengths, one byte each, representing the number of bytes
00530   // required in the unichar_string for each UNICHAR_ID.
00531   const STRING &unichar_lengths() const {
00532     this->string_and_lengths(&unichar_string_, &unichar_lengths_);
00533     return unichar_lengths_;
00534   }
00535 
00536   // Sets up the script_pos_ member using the blobs_list to get the bln
00537   // bounding boxes, *this to get the unichars, and this->unicharset
00538   // to get the target positions. If small_caps is true, sub/super are not
00539   // considered, but dropcaps are.
00540   // NOTE: blobs_list should be the chopped_word blobs. (Fully segemented.)
00541   void SetScriptPositions(bool small_caps, TWERD* word);
00542   // Sets the script_pos_ member from some source positions with a given length.
00543   void SetScriptPositions(const tesseract::ScriptPos* positions, int length);
00544   // Sets all the script_pos_ positions to the given position.
00545   void SetAllScriptPositions(tesseract::ScriptPos position);
00546 
00547   static tesseract::ScriptPos ScriptPositionOf(bool print_debug,
00548                                                const UNICHARSET& unicharset,
00549                                                const TBOX& blob_box,
00550                                                UNICHAR_ID unichar_id);
00551 
00552   // Returns the "dominant" script ID for the word.  By "dominant", the script
00553   // must account for at least half the characters.  Otherwise, it returns 0.
00554   // Note that for Japanese, Hiragana and Katakana are simply treated as Han.
00555   int GetTopScriptID() const;
00556 
00557   // Fixes the state_ for a chop at the given blob_posiiton.
00558   void UpdateStateForSplit(int blob_position);
00559 
00560   // Returns the sum of all the state elements, being the total number of blobs.
00561   int TotalOfStates() const;
00562 
00563   void print() const { this->print(""); }
00564   void print(const char *msg) const;
00565   // Prints the segmentation state with an introductory message.
00566   void print_state(const char *msg) const;
00567 
00568   // Displays the segmentation state of *this (if not the same as the last
00569   // one displayed) and waits for a click in the window.
00570   void DisplaySegmentation(TWERD* word);
00571 
00572   WERD_CHOICE& operator+= (     // concatanate
00573     const WERD_CHOICE & second);// second on first
00574 
00575   WERD_CHOICE& operator= (const WERD_CHOICE& source);
00576 
00577  private:
00578   const UNICHARSET *unicharset_;
00579   // TODO(rays) Perhaps replace the multiple arrays with an array of structs?
00580   // unichar_ids_ is an array of classifier "results" that make up a word.
00581   // For each unichar_ids_[i], script_pos_[i] has the sub/super/normal position
00582   // of each unichar_id.
00583   // state_[i] indicates the number of blobs in WERD_RES::chopped_word that
00584   // were put together to make the classification results in the ith position
00585   // in unichar_ids_, and certainties_[i] is the certainty of the choice that
00586   // was used in this word.
00587   // == Change from before ==
00588   // Previously there was fragment_lengths_ that allowed a word to be
00589   // artificially composed of multiple fragment results. Since the new
00590   // segmentation search doesn't do fragments, treatment of fragments has
00591   // been moved to a lower level, augmenting the ratings matrix with the
00592   // combined fragments, and allowing the language-model/segmentation-search
00593   // to deal with only the combined unichar_ids.
00594   UNICHAR_ID *unichar_ids_;  // unichar ids that represent the text of the word
00595   tesseract::ScriptPos* script_pos_;  // Normal/Sub/Superscript of each unichar.
00596   int* state_;               // Number of blobs in each unichar.
00597   float* certainties_;       // Certainty of each unichar.
00598   int reserved_;             // size of the above arrays
00599   int length_;               // word length
00600   // Factor that was used to adjust the rating.
00601   float adjust_factor_;
00602   // Rating is the sum of the ratings of the individual blobs in the word.
00603   float rating_;             // size related
00604   // certainty is the min (worst) certainty of the individual blobs in the word.
00605   float certainty_;          // absolute
00606   // xheight computed from the result, or 0 if inconsistent.
00607   float min_x_height_;
00608   float max_x_height_;
00609   uinT8 permuter_;           // permuter code
00610 
00611   // Normally, the ratings_ matrix represents the recognition results in order
00612   // from left-to-right.  However, some engines (say Cube) may return
00613   // recognition results in the order of the script's major reading direction
00614   // (for Arabic, that is right-to-left).
00615   bool unichars_in_script_order_;
00616   // True if NoDangerousAmbig found an ambiguity.
00617   bool dangerous_ambig_found_;
00618 
00619   // The following variables are populated and passed by reference any
00620   // time unichar_string() or unichar_lengths() are called.
00621   mutable STRING unichar_string_;
00622   mutable STRING unichar_lengths_;
00623 };
00624 
00625 // Make WERD_CHOICE listable.
00626 ELISTIZEH(WERD_CHOICE)
00627 typedef GenericVector<BLOB_CHOICE_LIST *> BLOB_CHOICE_LIST_VECTOR;
00628 
00629 // Utilities for comparing WERD_CHOICEs
00630 
00631 bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1,
00632                                        const WERD_CHOICE &word2);
00633 
00634 // Utilities for debug printing.
00635 void print_ratings_list(
00636     const char *msg,                      // intro message
00637     BLOB_CHOICE_LIST *ratings,            // list of results
00638     const UNICHARSET &current_unicharset  // unicharset that can be used
00639                                           // for id-to-unichar conversion
00640     );
00641 
00642 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines