|
tesseract 3.04.01
|
00001 00002 // File: ltrresultiterator.h 00003 // Description: Iterator for tesseract results in strict left-to-right 00004 // order that avoids using tesseract internal data structures. 00005 // Author: Ray Smith 00006 // Created: Fri Feb 26 11:01:06 PST 2010 00007 // 00008 // (C) Copyright 2010, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__ 00022 #define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__ 00023 00024 #include "platform.h" 00025 #include "pageiterator.h" 00026 #include "unichar.h" 00027 00028 class BLOB_CHOICE_IT; 00029 class WERD_RES; 00030 00031 namespace tesseract { 00032 00033 class Tesseract; 00034 00035 // Class to iterate over tesseract results, providing access to all levels 00036 // of the page hierarchy, without including any tesseract headers or having 00037 // to handle any tesseract structures. 00038 // WARNING! This class points to data held within the TessBaseAPI class, and 00039 // therefore can only be used while the TessBaseAPI class still exists and 00040 // has not been subjected to a call of Init, SetImage, Recognize, Clear, End 00041 // DetectOS, or anything else that changes the internal PAGE_RES. 00042 // See apitypes.h for the definition of PageIteratorLevel. 00043 // See also base class PageIterator, which contains the bulk of the interface. 00044 // LTRResultIterator adds text-specific methods for access to OCR output. 00045 00046 class TESS_API LTRResultIterator : public PageIterator { 00047 friend class ChoiceIterator; 00048 public: 00049 // page_res and tesseract come directly from the BaseAPI. 00050 // The rectangle parameters are copied indirectly from the Thresholder, 00051 // via the BaseAPI. They represent the coordinates of some rectangle in an 00052 // original image (in top-left-origin coordinates) and therefore the top-left 00053 // needs to be added to any output boxes in order to specify coordinates 00054 // in the original image. See TessBaseAPI::SetRectangle. 00055 // The scale and scaled_yres are in case the Thresholder scaled the image 00056 // rectangle prior to thresholding. Any coordinates in tesseract's image 00057 // must be divided by scale before adding (rect_left, rect_top). 00058 // The scaled_yres indicates the effective resolution of the binary image 00059 // that tesseract has been given by the Thresholder. 00060 // After the constructor, Begin has already been called. 00061 LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract, 00062 int scale, int scaled_yres, 00063 int rect_left, int rect_top, 00064 int rect_width, int rect_height); 00065 virtual ~LTRResultIterator(); 00066 00067 // LTRResultIterators may be copied! This makes it possible to iterate over 00068 // all the objects at a lower level, while maintaining an iterator to 00069 // objects at a higher level. These constructors DO NOT CALL Begin, so 00070 // iterations will continue from the location of src. 00071 // TODO: For now the copy constructor and operator= only need the base class 00072 // versions, but if new data members are added, don't forget to add them! 00073 00074 // ============= Moving around within the page ============. 00075 00076 // See PageIterator. 00077 00078 // ============= Accessing data ==============. 00079 00080 // Returns the null terminated UTF-8 encoded text string for the current 00081 // object at the given level. Use delete [] to free after use. 00082 char* GetUTF8Text(PageIteratorLevel level) const; 00083 00084 // Set the string inserted at the end of each text line. "\n" by default. 00085 void SetLineSeparator(const char *new_line); 00086 00087 // Set the string inserted at the end of each paragraph. "\n" by default. 00088 void SetParagraphSeparator(const char *new_para); 00089 00090 // Returns the mean confidence of the current object at the given level. 00091 // The number should be interpreted as a percent probability. (0.0f-100.0f) 00092 float Confidence(PageIteratorLevel level) const; 00093 00094 // Returns the attributes of the current row. 00095 void RowAttributes(float* row_height, 00096 float* descenders, 00097 float* ascenders) const; 00098 00099 // ============= Functions that refer to words only ============. 00100 00101 // Returns the font attributes of the current word. If iterating at a higher 00102 // level object than words, eg textlines, then this will return the 00103 // attributes of the first word in that textline. 00104 // The actual return value is a string representing a font name. It points 00105 // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as 00106 // the iterator itself, ie rendered invalid by various members of 00107 // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI. 00108 // Pointsize is returned in printers points (1/72 inch.) 00109 const char* WordFontAttributes(bool* is_bold, 00110 bool* is_italic, 00111 bool* is_underlined, 00112 bool* is_monospace, 00113 bool* is_serif, 00114 bool* is_smallcaps, 00115 int* pointsize, 00116 int* font_id) const; 00117 00118 // Return the name of the language used to recognize this word. 00119 // On error, NULL. Do not delete this pointer. 00120 const char* WordRecognitionLanguage() const; 00121 00122 // Return the overall directionality of this word. 00123 StrongScriptDirection WordDirection() const; 00124 00125 // Returns true if the current word was found in a dictionary. 00126 bool WordIsFromDictionary() const; 00127 00128 // Returns true if the current word is numeric. 00129 bool WordIsNumeric() const; 00130 00131 // Returns true if the word contains blamer information. 00132 bool HasBlamerInfo() const; 00133 00134 // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle 00135 // of the current word. 00136 const void *GetParamsTrainingBundle() const; 00137 00138 // Returns a pointer to the string with blamer information for this word. 00139 // Assumes that the word's blamer_bundle is not NULL. 00140 const char *GetBlamerDebug() const; 00141 00142 // Returns a pointer to the string with misadaption information for this word. 00143 // Assumes that the word's blamer_bundle is not NULL. 00144 const char *GetBlamerMisadaptionDebug() const; 00145 00146 // Returns true if a truth string was recorded for the current word. 00147 bool HasTruthString() const; 00148 00149 // Returns true if the given string is equivalent to the truth string for 00150 // the current word. 00151 bool EquivalentToTruth(const char *str) const; 00152 00153 // Returns a null terminated UTF-8 encoded truth string for the current word. 00154 // Use delete [] to free after use. 00155 char* WordTruthUTF8Text() const; 00156 00157 // Returns a null terminated UTF-8 encoded normalized OCR string for the 00158 // current word. Use delete [] to free after use. 00159 char* WordNormedUTF8Text() const; 00160 00161 // Returns a pointer to serialized choice lattice. 00162 // Fills lattice_size with the number of bytes in lattice data. 00163 const char *WordLattice(int *lattice_size) const; 00164 00165 // ============= Functions that refer to symbols only ============. 00166 00167 // Returns true if the current symbol is a superscript. 00168 // If iterating at a higher level object than symbols, eg words, then 00169 // this will return the attributes of the first symbol in that word. 00170 bool SymbolIsSuperscript() const; 00171 // Returns true if the current symbol is a subscript. 00172 // If iterating at a higher level object than symbols, eg words, then 00173 // this will return the attributes of the first symbol in that word. 00174 bool SymbolIsSubscript() const; 00175 // Returns true if the current symbol is a dropcap. 00176 // If iterating at a higher level object than symbols, eg words, then 00177 // this will return the attributes of the first symbol in that word. 00178 bool SymbolIsDropcap() const; 00179 00180 protected: 00181 const char *line_separator_; 00182 const char *paragraph_separator_; 00183 }; 00184 00185 // Class to iterate over the classifier choices for a single RIL_SYMBOL. 00186 class ChoiceIterator { 00187 public: 00188 // Construction is from a LTRResultIterator that points to the symbol of 00189 // interest. The ChoiceIterator allows a one-shot iteration over the 00190 // choices for this symbol and after that is is useless. 00191 explicit ChoiceIterator(const LTRResultIterator& result_it); 00192 ~ChoiceIterator(); 00193 00194 // Moves to the next choice for the symbol and returns false if there 00195 // are none left. 00196 bool Next(); 00197 00198 // ============= Accessing data ==============. 00199 00200 // Returns the null terminated UTF-8 encoded text string for the current 00201 // choice. 00202 // NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an 00203 // internal structure and should NOT be delete[]ed to free after use. 00204 const char* GetUTF8Text() const; 00205 00206 // Returns the confidence of the current choice. 00207 // The number should be interpreted as a percent probability. (0.0f-100.0f) 00208 float Confidence() const; 00209 00210 private: 00211 // Pointer to the WERD_RES object owned by the API. 00212 WERD_RES* word_res_; 00213 // Iterator over the blob choices. 00214 BLOB_CHOICE_IT* choice_it_; 00215 }; 00216 00217 } // namespace tesseract. 00218 00219 #endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__