|
tesseract 3.04.01
|
00001 00002 // File: ltrresultiterator.cpp 00003 // Description: Iterator for tesseract results in strict left-to-right 00004 // order that avoids using tesseract internal data structures. 00005 // Author: Ray Smith 00006 // Created: Fri Feb 26 14:32:09 PST 2010 00007 // 00008 // (C) Copyright 2010, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #include "ltrresultiterator.h" 00022 00023 #include "allheaders.h" 00024 #include "pageres.h" 00025 #include "strngs.h" 00026 #include "tesseractclass.h" 00027 00028 namespace tesseract { 00029 00030 LTRResultIterator::LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract, 00031 int scale, int scaled_yres, 00032 int rect_left, int rect_top, 00033 int rect_width, int rect_height) 00034 : PageIterator(page_res, tesseract, scale, scaled_yres, 00035 rect_left, rect_top, rect_width, rect_height), 00036 line_separator_("\n"), 00037 paragraph_separator_("\n") { 00038 } 00039 00040 LTRResultIterator::~LTRResultIterator() { 00041 } 00042 00043 // Returns the null terminated UTF-8 encoded text string for the current 00044 // object at the given level. Use delete [] to free after use. 00045 char* LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const { 00046 if (it_->word() == NULL) return NULL; // Already at the end! 00047 STRING text; 00048 PAGE_RES_IT res_it(*it_); 00049 WERD_CHOICE* best_choice = res_it.word()->best_choice; 00050 ASSERT_HOST(best_choice != NULL); 00051 if (level == RIL_SYMBOL) { 00052 text = res_it.word()->BestUTF8(blob_index_, false); 00053 } else if (level == RIL_WORD) { 00054 text = best_choice->unichar_string(); 00055 } else { 00056 bool eol = false; // end of line? 00057 bool eop = false; // end of paragraph? 00058 do { // for each paragraph in a block 00059 do { // for each text line in a paragraph 00060 do { // for each word in a text line 00061 best_choice = res_it.word()->best_choice; 00062 ASSERT_HOST(best_choice != NULL); 00063 text += best_choice->unichar_string(); 00064 text += " "; 00065 res_it.forward(); 00066 eol = res_it.row() != res_it.prev_row(); 00067 } while (!eol); 00068 text.truncate_at(text.length() - 1); 00069 text += line_separator_; 00070 eop = res_it.block() != res_it.prev_block() || 00071 res_it.row()->row->para() != res_it.prev_row()->row->para(); 00072 } while (level != RIL_TEXTLINE && !eop); 00073 if (eop) text += paragraph_separator_; 00074 } while (level == RIL_BLOCK && res_it.block() == res_it.prev_block()); 00075 } 00076 int length = text.length() + 1; 00077 char* result = new char[length]; 00078 strncpy(result, text.string(), length); 00079 return result; 00080 } 00081 00082 // Set the string inserted at the end of each text line. "\n" by default. 00083 void LTRResultIterator::SetLineSeparator(const char *new_line) { 00084 line_separator_ = new_line; 00085 } 00086 00087 // Set the string inserted at the end of each paragraph. "\n" by default. 00088 void LTRResultIterator::SetParagraphSeparator(const char *new_para) { 00089 paragraph_separator_ = new_para; 00090 } 00091 00092 // Returns the mean confidence of the current object at the given level. 00093 // The number should be interpreted as a percent probability. (0.0f-100.0f) 00094 float LTRResultIterator::Confidence(PageIteratorLevel level) const { 00095 if (it_->word() == NULL) return 0.0f; // Already at the end! 00096 float mean_certainty = 0.0f; 00097 int certainty_count = 0; 00098 PAGE_RES_IT res_it(*it_); 00099 WERD_CHOICE* best_choice = res_it.word()->best_choice; 00100 ASSERT_HOST(best_choice != NULL); 00101 switch (level) { 00102 case RIL_BLOCK: 00103 do { 00104 best_choice = res_it.word()->best_choice; 00105 ASSERT_HOST(best_choice != NULL); 00106 mean_certainty += best_choice->certainty(); 00107 ++certainty_count; 00108 res_it.forward(); 00109 } while (res_it.block() == res_it.prev_block()); 00110 break; 00111 case RIL_PARA: 00112 do { 00113 best_choice = res_it.word()->best_choice; 00114 ASSERT_HOST(best_choice != NULL); 00115 mean_certainty += best_choice->certainty(); 00116 ++certainty_count; 00117 res_it.forward(); 00118 } while (res_it.block() == res_it.prev_block() && 00119 res_it.row()->row->para() == res_it.prev_row()->row->para()); 00120 break; 00121 case RIL_TEXTLINE: 00122 do { 00123 best_choice = res_it.word()->best_choice; 00124 ASSERT_HOST(best_choice != NULL); 00125 mean_certainty += best_choice->certainty(); 00126 ++certainty_count; 00127 res_it.forward(); 00128 } while (res_it.row() == res_it.prev_row()); 00129 break; 00130 case RIL_WORD: 00131 mean_certainty += best_choice->certainty(); 00132 ++certainty_count; 00133 break; 00134 case RIL_SYMBOL: 00135 mean_certainty += best_choice->certainty(blob_index_); 00136 ++certainty_count; 00137 } 00138 if (certainty_count > 0) { 00139 mean_certainty /= certainty_count; 00140 float confidence = 100 + 5 * mean_certainty; 00141 if (confidence < 0.0f) confidence = 0.0f; 00142 if (confidence > 100.0f) confidence = 100.0f; 00143 return confidence; 00144 } 00145 return 0.0f; 00146 } 00147 00148 void LTRResultIterator::RowAttributes(float* row_height, 00149 float* descenders, 00150 float* ascenders) const { 00151 *row_height = it_->row()->row->x_height() + it_->row()-> row->ascenders() 00152 - it_->row()->row->descenders(); 00153 *descenders = it_->row()->row->descenders(); 00154 *ascenders = it_->row()->row->ascenders(); 00155 } 00156 00157 // Returns the font attributes of the current word. If iterating at a higher 00158 // level object than words, eg textlines, then this will return the 00159 // attributes of the first word in that textline. 00160 // The actual return value is a string representing a font name. It points 00161 // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as 00162 // the iterator itself, ie rendered invalid by various members of 00163 // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI. 00164 // Pointsize is returned in printers points (1/72 inch.) 00165 const char* LTRResultIterator::WordFontAttributes(bool* is_bold, 00166 bool* is_italic, 00167 bool* is_underlined, 00168 bool* is_monospace, 00169 bool* is_serif, 00170 bool* is_smallcaps, 00171 int* pointsize, 00172 int* font_id) const { 00173 if (it_->word() == NULL) return NULL; // Already at the end! 00174 if (it_->word()->fontinfo == NULL) { 00175 *font_id = -1; 00176 return NULL; // No font information. 00177 } 00178 const FontInfo& font_info = *it_->word()->fontinfo; 00179 *font_id = font_info.universal_id; 00180 *is_bold = font_info.is_bold(); 00181 *is_italic = font_info.is_italic(); 00182 *is_underlined = false; // TODO(rays) fix this! 00183 *is_monospace = font_info.is_fixed_pitch(); 00184 *is_serif = font_info.is_serif(); 00185 *is_smallcaps = it_->word()->small_caps; 00186 float row_height = it_->row()->row->x_height() + 00187 it_->row()->row->ascenders() - it_->row()->row->descenders(); 00188 // Convert from pixels to printers points. 00189 *pointsize = scaled_yres_ > 0 00190 ? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5) 00191 : 0; 00192 00193 return font_info.name; 00194 } 00195 00196 // Returns the name of the language used to recognize this word. 00197 const char* LTRResultIterator::WordRecognitionLanguage() const { 00198 if (it_->word() == NULL || it_->word()->tesseract == NULL) return NULL; 00199 return it_->word()->tesseract->lang.string(); 00200 } 00201 00202 // Return the overall directionality of this word. 00203 StrongScriptDirection LTRResultIterator::WordDirection() const { 00204 if (it_->word() == NULL) return DIR_NEUTRAL; 00205 bool has_rtl = it_->word()->AnyRtlCharsInWord(); 00206 bool has_ltr = it_->word()->AnyLtrCharsInWord(); 00207 if (has_rtl && !has_ltr) 00208 return DIR_RIGHT_TO_LEFT; 00209 if (has_ltr && !has_rtl) 00210 return DIR_LEFT_TO_RIGHT; 00211 if (!has_ltr && !has_rtl) 00212 return DIR_NEUTRAL; 00213 return DIR_MIX; 00214 } 00215 00216 // Returns true if the current word was found in a dictionary. 00217 bool LTRResultIterator::WordIsFromDictionary() const { 00218 if (it_->word() == NULL) return false; // Already at the end! 00219 int permuter = it_->word()->best_choice->permuter(); 00220 return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM || 00221 permuter == USER_DAWG_PERM; 00222 } 00223 00224 // Returns true if the current word is numeric. 00225 bool LTRResultIterator::WordIsNumeric() const { 00226 if (it_->word() == NULL) return false; // Already at the end! 00227 int permuter = it_->word()->best_choice->permuter(); 00228 return permuter == NUMBER_PERM; 00229 } 00230 00231 // Returns true if the word contains blamer information. 00232 bool LTRResultIterator::HasBlamerInfo() const { 00233 return it_->word() != NULL && it_->word()->blamer_bundle != NULL && 00234 it_->word()->blamer_bundle->HasDebugInfo(); 00235 } 00236 00237 // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle 00238 // of the current word. 00239 const void *LTRResultIterator::GetParamsTrainingBundle() const { 00240 return (it_->word() != NULL && it_->word()->blamer_bundle != NULL) ? 00241 &(it_->word()->blamer_bundle->params_training_bundle()) : NULL; 00242 } 00243 00244 // Returns the pointer to the string with blamer information for this word. 00245 // Assumes that the word's blamer_bundle is not NULL. 00246 const char *LTRResultIterator::GetBlamerDebug() const { 00247 return it_->word()->blamer_bundle->debug().string(); 00248 } 00249 00250 // Returns the pointer to the string with misadaption information for this word. 00251 // Assumes that the word's blamer_bundle is not NULL. 00252 const char *LTRResultIterator::GetBlamerMisadaptionDebug() const { 00253 return it_->word()->blamer_bundle->misadaption_debug().string(); 00254 } 00255 00256 // Returns true if a truth string was recorded for the current word. 00257 bool LTRResultIterator::HasTruthString() const { 00258 if (it_->word() == NULL) return false; // Already at the end! 00259 if (it_->word()->blamer_bundle == NULL || 00260 it_->word()->blamer_bundle->NoTruth()) { 00261 return false; // no truth information for this word 00262 } 00263 return true; 00264 } 00265 00266 // Returns true if the given string is equivalent to the truth string for 00267 // the current word. 00268 bool LTRResultIterator::EquivalentToTruth(const char *str) const { 00269 if (!HasTruthString()) return false; 00270 ASSERT_HOST(it_->word()->uch_set != NULL); 00271 WERD_CHOICE str_wd(str, *(it_->word()->uch_set)); 00272 return it_->word()->blamer_bundle->ChoiceIsCorrect(&str_wd); 00273 } 00274 00275 // Returns the null terminated UTF-8 encoded truth string for the current word. 00276 // Use delete [] to free after use. 00277 char* LTRResultIterator::WordTruthUTF8Text() const { 00278 if (!HasTruthString()) return NULL; 00279 STRING truth_text = it_->word()->blamer_bundle->TruthString(); 00280 int length = truth_text.length() + 1; 00281 char* result = new char[length]; 00282 strncpy(result, truth_text.string(), length); 00283 return result; 00284 } 00285 00286 // Returns the null terminated UTF-8 encoded normalized OCR string for the 00287 // current word. Use delete [] to free after use. 00288 char* LTRResultIterator::WordNormedUTF8Text() const { 00289 if (it_->word() == NULL) return NULL; // Already at the end! 00290 STRING ocr_text; 00291 WERD_CHOICE* best_choice = it_->word()->best_choice; 00292 const UNICHARSET *unicharset = it_->word()->uch_set; 00293 ASSERT_HOST(best_choice != NULL); 00294 for (int i = 0; i < best_choice->length(); ++i) { 00295 ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i)); 00296 } 00297 int length = ocr_text.length() + 1; 00298 char* result = new char[length]; 00299 strncpy(result, ocr_text.string(), length); 00300 return result; 00301 } 00302 00303 // Returns a pointer to serialized choice lattice. 00304 // Fills lattice_size with the number of bytes in lattice data. 00305 const char *LTRResultIterator::WordLattice(int *lattice_size) const { 00306 if (it_->word() == NULL) return NULL; // Already at the end! 00307 if (it_->word()->blamer_bundle == NULL) return NULL; 00308 *lattice_size = it_->word()->blamer_bundle->lattice_size(); 00309 return it_->word()->blamer_bundle->lattice_data(); 00310 } 00311 00312 // Returns true if the current symbol is a superscript. 00313 // If iterating at a higher level object than symbols, eg words, then 00314 // this will return the attributes of the first symbol in that word. 00315 bool LTRResultIterator::SymbolIsSuperscript() const { 00316 if (cblob_it_ == NULL && it_->word() != NULL) 00317 return it_->word()->best_choice->BlobPosition(blob_index_) == 00318 SP_SUPERSCRIPT; 00319 return false; 00320 } 00321 00322 // Returns true if the current symbol is a subscript. 00323 // If iterating at a higher level object than symbols, eg words, then 00324 // this will return the attributes of the first symbol in that word. 00325 bool LTRResultIterator::SymbolIsSubscript() const { 00326 if (cblob_it_ == NULL && it_->word() != NULL) 00327 return it_->word()->best_choice->BlobPosition(blob_index_) == SP_SUBSCRIPT; 00328 return false; 00329 } 00330 00331 // Returns true if the current symbol is a dropcap. 00332 // If iterating at a higher level object than symbols, eg words, then 00333 // this will return the attributes of the first symbol in that word. 00334 bool LTRResultIterator::SymbolIsDropcap() const { 00335 if (cblob_it_ == NULL && it_->word() != NULL) 00336 return it_->word()->best_choice->BlobPosition(blob_index_) == SP_DROPCAP; 00337 return false; 00338 } 00339 00340 ChoiceIterator::ChoiceIterator(const LTRResultIterator& result_it) { 00341 ASSERT_HOST(result_it.it_->word() != NULL); 00342 word_res_ = result_it.it_->word(); 00343 BLOB_CHOICE_LIST* choices = NULL; 00344 if (word_res_->ratings != NULL) 00345 choices = word_res_->GetBlobChoices(result_it.blob_index_); 00346 if (choices != NULL && !choices->empty()) { 00347 choice_it_ = new BLOB_CHOICE_IT(choices); 00348 choice_it_->mark_cycle_pt(); 00349 } else { 00350 choice_it_ = NULL; 00351 } 00352 } 00353 00354 ChoiceIterator::~ChoiceIterator() { 00355 delete choice_it_; 00356 } 00357 00358 // Moves to the next choice for the symbol and returns false if there 00359 // are none left. 00360 bool ChoiceIterator::Next() { 00361 if (choice_it_ == NULL) 00362 return false; 00363 choice_it_->forward(); 00364 return !choice_it_->cycled_list(); 00365 } 00366 00367 // Returns the null terminated UTF-8 encoded text string for the current 00368 // choice. Do NOT use delete [] to free after use. 00369 const char* ChoiceIterator::GetUTF8Text() const { 00370 if (choice_it_ == NULL) 00371 return NULL; 00372 UNICHAR_ID id = choice_it_->data()->unichar_id(); 00373 return word_res_->uch_set->id_to_unichar_ext(id); 00374 } 00375 00376 // Returns the confidence of the current choice. 00377 // The number should be interpreted as a percent probability. (0.0f-100.0f) 00378 float ChoiceIterator::Confidence() const { 00379 if (choice_it_ == NULL) 00380 return 0.0f; 00381 float confidence = 100 + 5 * choice_it_->data()->certainty(); 00382 if (confidence < 0.0f) confidence = 0.0f; 00383 if (confidence > 100.0f) confidence = 100.0f; 00384 return confidence; 00385 } 00386 00387 00388 } // namespace tesseract.