tesseract 3.04.01

ccmain/ltrresultiterator.cpp

Go to the documentation of this file.
00001 
00002 // File:        ltrresultiterator.cpp
00003 // Description: Iterator for tesseract results in strict left-to-right
00004 //              order that avoids using tesseract internal data structures.
00005 // Author:      Ray Smith
00006 // Created:     Fri Feb 26 14:32:09 PST 2010
00007 //
00008 // (C) Copyright 2010, Google Inc.
00009 // Licensed under the Apache License, Version 2.0 (the "License");
00010 // you may not use this file except in compliance with the License.
00011 // You may obtain a copy of the License at
00012 // http://www.apache.org/licenses/LICENSE-2.0
00013 // Unless required by applicable law or agreed to in writing, software
00014 // distributed under the License is distributed on an "AS IS" BASIS,
00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 // See the License for the specific language governing permissions and
00017 // limitations under the License.
00018 //
00020 
00021 #include "ltrresultiterator.h"
00022 
00023 #include "allheaders.h"
00024 #include "pageres.h"
00025 #include "strngs.h"
00026 #include "tesseractclass.h"
00027 
00028 namespace tesseract {
00029 
00030 LTRResultIterator::LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract,
00031                                      int scale, int scaled_yres,
00032                                      int rect_left, int rect_top,
00033                                      int rect_width, int rect_height)
00034   : PageIterator(page_res, tesseract, scale, scaled_yres,
00035                  rect_left, rect_top, rect_width, rect_height),
00036     line_separator_("\n"),
00037     paragraph_separator_("\n") {
00038 }
00039 
00040 LTRResultIterator::~LTRResultIterator() {
00041 }
00042 
00043 // Returns the null terminated UTF-8 encoded text string for the current
00044 // object at the given level. Use delete [] to free after use.
00045 char* LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
00046   if (it_->word() == NULL) return NULL;  // Already at the end!
00047   STRING text;
00048   PAGE_RES_IT res_it(*it_);
00049   WERD_CHOICE* best_choice = res_it.word()->best_choice;
00050   ASSERT_HOST(best_choice != NULL);
00051   if (level == RIL_SYMBOL) {
00052     text = res_it.word()->BestUTF8(blob_index_, false);
00053   } else if (level == RIL_WORD) {
00054     text = best_choice->unichar_string();
00055   } else {
00056     bool eol = false;  // end of line?
00057     bool eop = false;  // end of paragraph?
00058     do {  // for each paragraph in a block
00059       do {  // for each text line in a paragraph
00060         do {  // for each word in a text line
00061           best_choice = res_it.word()->best_choice;
00062           ASSERT_HOST(best_choice != NULL);
00063           text += best_choice->unichar_string();
00064           text += " ";
00065           res_it.forward();
00066           eol = res_it.row() != res_it.prev_row();
00067         } while (!eol);
00068         text.truncate_at(text.length() - 1);
00069         text += line_separator_;
00070         eop = res_it.block() != res_it.prev_block() ||
00071             res_it.row()->row->para() != res_it.prev_row()->row->para();
00072       } while (level != RIL_TEXTLINE && !eop);
00073       if (eop) text += paragraph_separator_;
00074     } while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
00075   }
00076   int length = text.length() + 1;
00077   char* result = new char[length];
00078   strncpy(result, text.string(), length);
00079   return result;
00080 }
00081 
00082 // Set the string inserted at the end of each text line. "\n" by default.
00083 void LTRResultIterator::SetLineSeparator(const char *new_line) {
00084   line_separator_ = new_line;
00085 }
00086 
00087 // Set the string inserted at the end of each paragraph. "\n" by default.
00088 void LTRResultIterator::SetParagraphSeparator(const char *new_para) {
00089   paragraph_separator_ = new_para;
00090 }
00091 
00092 // Returns the mean confidence of the current object at the given level.
00093 // The number should be interpreted as a percent probability. (0.0f-100.0f)
00094 float LTRResultIterator::Confidence(PageIteratorLevel level) const {
00095   if (it_->word() == NULL) return 0.0f;  // Already at the end!
00096   float mean_certainty = 0.0f;
00097   int certainty_count = 0;
00098   PAGE_RES_IT res_it(*it_);
00099   WERD_CHOICE* best_choice = res_it.word()->best_choice;
00100   ASSERT_HOST(best_choice != NULL);
00101   switch (level) {
00102     case RIL_BLOCK:
00103       do {
00104         best_choice = res_it.word()->best_choice;
00105         ASSERT_HOST(best_choice != NULL);
00106         mean_certainty += best_choice->certainty();
00107         ++certainty_count;
00108         res_it.forward();
00109       } while (res_it.block() == res_it.prev_block());
00110       break;
00111     case RIL_PARA:
00112       do {
00113         best_choice = res_it.word()->best_choice;
00114         ASSERT_HOST(best_choice != NULL);
00115         mean_certainty += best_choice->certainty();
00116         ++certainty_count;
00117         res_it.forward();
00118       } while (res_it.block() == res_it.prev_block() &&
00119                res_it.row()->row->para() == res_it.prev_row()->row->para());
00120       break;
00121     case RIL_TEXTLINE:
00122       do {
00123         best_choice = res_it.word()->best_choice;
00124         ASSERT_HOST(best_choice != NULL);
00125         mean_certainty += best_choice->certainty();
00126         ++certainty_count;
00127         res_it.forward();
00128       } while (res_it.row() == res_it.prev_row());
00129       break;
00130     case RIL_WORD:
00131       mean_certainty += best_choice->certainty();
00132      ++certainty_count;
00133       break;
00134     case RIL_SYMBOL:
00135       mean_certainty += best_choice->certainty(blob_index_);
00136       ++certainty_count;
00137   }
00138   if (certainty_count > 0) {
00139     mean_certainty /= certainty_count;
00140     float confidence = 100 + 5 * mean_certainty;
00141     if (confidence < 0.0f) confidence = 0.0f;
00142     if (confidence > 100.0f) confidence = 100.0f;
00143     return confidence;
00144   }
00145   return 0.0f;
00146 }
00147 
00148 void LTRResultIterator::RowAttributes(float* row_height,
00149                                       float* descenders,
00150                                       float* ascenders) const {
00151     *row_height = it_->row()->row->x_height() + it_->row()-> row->ascenders()
00152                   - it_->row()->row->descenders();
00153     *descenders = it_->row()->row->descenders();
00154     *ascenders = it_->row()->row->ascenders();
00155 }
00156 
00157 // Returns the font attributes of the current word. If iterating at a higher
00158 // level object than words, eg textlines, then this will return the
00159 // attributes of the first word in that textline.
00160 // The actual return value is a string representing a font name. It points
00161 // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
00162 // the iterator itself, ie rendered invalid by various members of
00163 // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
00164 // Pointsize is returned in printers points (1/72 inch.)
00165 const char* LTRResultIterator::WordFontAttributes(bool* is_bold,
00166                                                   bool* is_italic,
00167                                                   bool* is_underlined,
00168                                                   bool* is_monospace,
00169                                                   bool* is_serif,
00170                                                   bool* is_smallcaps,
00171                                                   int* pointsize,
00172                                                   int* font_id) const {
00173   if (it_->word() == NULL) return NULL;  // Already at the end!
00174   if (it_->word()->fontinfo == NULL) {
00175     *font_id = -1;
00176     return NULL;  // No font information.
00177   }
00178   const FontInfo& font_info = *it_->word()->fontinfo;
00179   *font_id = font_info.universal_id;
00180   *is_bold = font_info.is_bold();
00181   *is_italic = font_info.is_italic();
00182   *is_underlined = false;  // TODO(rays) fix this!
00183   *is_monospace = font_info.is_fixed_pitch();
00184   *is_serif = font_info.is_serif();
00185   *is_smallcaps = it_->word()->small_caps;
00186   float row_height = it_->row()->row->x_height() +
00187       it_->row()->row->ascenders() - it_->row()->row->descenders();
00188   // Convert from pixels to printers points.
00189   *pointsize = scaled_yres_ > 0
00190       ? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5)
00191       : 0;
00192 
00193   return font_info.name;
00194 }
00195 
00196 // Returns the name of the language used to recognize this word.
00197 const char* LTRResultIterator::WordRecognitionLanguage() const {
00198   if (it_->word() == NULL || it_->word()->tesseract == NULL) return NULL;
00199   return it_->word()->tesseract->lang.string();
00200 }
00201 
00202 // Return the overall directionality of this word.
00203 StrongScriptDirection LTRResultIterator::WordDirection() const {
00204   if (it_->word() == NULL) return DIR_NEUTRAL;
00205   bool has_rtl = it_->word()->AnyRtlCharsInWord();
00206   bool has_ltr = it_->word()->AnyLtrCharsInWord();
00207   if (has_rtl && !has_ltr)
00208     return DIR_RIGHT_TO_LEFT;
00209   if (has_ltr && !has_rtl)
00210     return DIR_LEFT_TO_RIGHT;
00211   if (!has_ltr && !has_rtl)
00212     return DIR_NEUTRAL;
00213   return DIR_MIX;
00214 }
00215 
00216 // Returns true if the current word was found in a dictionary.
00217 bool LTRResultIterator::WordIsFromDictionary() const {
00218   if (it_->word() == NULL) return false;  // Already at the end!
00219   int permuter = it_->word()->best_choice->permuter();
00220   return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM ||
00221          permuter == USER_DAWG_PERM;
00222 }
00223 
00224 // Returns true if the current word is numeric.
00225 bool LTRResultIterator::WordIsNumeric() const {
00226   if (it_->word() == NULL) return false;  // Already at the end!
00227   int permuter = it_->word()->best_choice->permuter();
00228   return permuter == NUMBER_PERM;
00229 }
00230 
00231 // Returns true if the word contains blamer information.
00232 bool LTRResultIterator::HasBlamerInfo() const {
00233   return it_->word() != NULL && it_->word()->blamer_bundle != NULL &&
00234          it_->word()->blamer_bundle->HasDebugInfo();
00235 }
00236 
00237 // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
00238 // of the current word.
00239 const void *LTRResultIterator::GetParamsTrainingBundle() const {
00240   return (it_->word() != NULL && it_->word()->blamer_bundle != NULL) ?
00241       &(it_->word()->blamer_bundle->params_training_bundle()) : NULL;
00242 }
00243 
00244 // Returns the pointer to the string with blamer information for this word.
00245 // Assumes that the word's blamer_bundle is not NULL.
00246 const char *LTRResultIterator::GetBlamerDebug() const {
00247   return it_->word()->blamer_bundle->debug().string();
00248 }
00249 
00250 // Returns the pointer to the string with misadaption information for this word.
00251 // Assumes that the word's blamer_bundle is not NULL.
00252 const char *LTRResultIterator::GetBlamerMisadaptionDebug() const {
00253   return it_->word()->blamer_bundle->misadaption_debug().string();
00254 }
00255 
00256 // Returns true if a truth string was recorded for the current word.
00257 bool LTRResultIterator::HasTruthString() const {
00258   if (it_->word() == NULL) return false;  // Already at the end!
00259   if (it_->word()->blamer_bundle == NULL ||
00260       it_->word()->blamer_bundle->NoTruth()) {
00261     return false;  // no truth information for this word
00262   }
00263   return true;
00264 }
00265 
00266 // Returns true if the given string is equivalent to the truth string for
00267 // the current word.
00268 bool LTRResultIterator::EquivalentToTruth(const char *str) const {
00269   if (!HasTruthString()) return false;
00270   ASSERT_HOST(it_->word()->uch_set != NULL);
00271   WERD_CHOICE str_wd(str, *(it_->word()->uch_set));
00272   return it_->word()->blamer_bundle->ChoiceIsCorrect(&str_wd);
00273 }
00274 
00275 // Returns the null terminated UTF-8 encoded truth string for the current word.
00276 // Use delete [] to free after use.
00277 char* LTRResultIterator::WordTruthUTF8Text() const {
00278   if (!HasTruthString()) return NULL;
00279   STRING truth_text = it_->word()->blamer_bundle->TruthString();
00280   int length = truth_text.length() + 1;
00281   char* result = new char[length];
00282   strncpy(result, truth_text.string(), length);
00283   return result;
00284 }
00285 
00286 // Returns the null terminated UTF-8 encoded normalized OCR string for the
00287 // current word. Use delete [] to free after use.
00288 char* LTRResultIterator::WordNormedUTF8Text() const {
00289   if (it_->word() == NULL) return NULL;  // Already at the end!
00290   STRING ocr_text;
00291   WERD_CHOICE* best_choice = it_->word()->best_choice;
00292   const UNICHARSET *unicharset = it_->word()->uch_set;
00293   ASSERT_HOST(best_choice != NULL);
00294   for (int i = 0; i < best_choice->length(); ++i) {
00295     ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i));
00296   }
00297   int length = ocr_text.length() + 1;
00298   char* result = new char[length];
00299   strncpy(result, ocr_text.string(), length);
00300   return result;
00301 }
00302 
00303 // Returns a pointer to serialized choice lattice.
00304 // Fills lattice_size with the number of bytes in lattice data.
00305 const char *LTRResultIterator::WordLattice(int *lattice_size) const {
00306   if (it_->word() == NULL) return NULL;  // Already at the end!
00307   if (it_->word()->blamer_bundle == NULL) return NULL;
00308   *lattice_size = it_->word()->blamer_bundle->lattice_size();
00309   return it_->word()->blamer_bundle->lattice_data();
00310 }
00311 
00312 // Returns true if the current symbol is a superscript.
00313 // If iterating at a higher level object than symbols, eg words, then
00314 // this will return the attributes of the first symbol in that word.
00315 bool LTRResultIterator::SymbolIsSuperscript() const {
00316   if (cblob_it_ == NULL && it_->word() != NULL)
00317     return it_->word()->best_choice->BlobPosition(blob_index_) ==
00318         SP_SUPERSCRIPT;
00319   return false;
00320 }
00321 
00322 // Returns true if the current symbol is a subscript.
00323 // If iterating at a higher level object than symbols, eg words, then
00324 // this will return the attributes of the first symbol in that word.
00325 bool LTRResultIterator::SymbolIsSubscript() const {
00326   if (cblob_it_ == NULL && it_->word() != NULL)
00327     return it_->word()->best_choice->BlobPosition(blob_index_) == SP_SUBSCRIPT;
00328   return false;
00329 }
00330 
00331 // Returns true if the current symbol is a dropcap.
00332 // If iterating at a higher level object than symbols, eg words, then
00333 // this will return the attributes of the first symbol in that word.
00334 bool LTRResultIterator::SymbolIsDropcap() const {
00335   if (cblob_it_ == NULL && it_->word() != NULL)
00336     return it_->word()->best_choice->BlobPosition(blob_index_) == SP_DROPCAP;
00337   return false;
00338 }
00339 
00340 ChoiceIterator::ChoiceIterator(const LTRResultIterator& result_it) {
00341   ASSERT_HOST(result_it.it_->word() != NULL);
00342   word_res_ = result_it.it_->word();
00343   BLOB_CHOICE_LIST* choices = NULL;
00344   if (word_res_->ratings != NULL)
00345     choices = word_res_->GetBlobChoices(result_it.blob_index_);
00346   if (choices != NULL && !choices->empty()) {
00347     choice_it_ = new BLOB_CHOICE_IT(choices);
00348     choice_it_->mark_cycle_pt();
00349   } else {
00350     choice_it_ = NULL;
00351   }
00352 }
00353 
00354 ChoiceIterator::~ChoiceIterator() {
00355   delete choice_it_;
00356 }
00357 
00358 // Moves to the next choice for the symbol and returns false if there
00359 // are none left.
00360 bool ChoiceIterator::Next() {
00361   if (choice_it_ == NULL)
00362     return false;
00363   choice_it_->forward();
00364   return !choice_it_->cycled_list();
00365 }
00366 
00367 // Returns the null terminated UTF-8 encoded text string for the current
00368 // choice. Do NOT use delete [] to free after use.
00369 const char* ChoiceIterator::GetUTF8Text() const {
00370   if (choice_it_ == NULL)
00371     return NULL;
00372   UNICHAR_ID id = choice_it_->data()->unichar_id();
00373   return word_res_->uch_set->id_to_unichar_ext(id);
00374 }
00375 
00376 // Returns the confidence of the current choice.
00377 // The number should be interpreted as a percent probability. (0.0f-100.0f)
00378 float ChoiceIterator::Confidence() const {
00379   if (choice_it_ == NULL)
00380     return 0.0f;
00381   float confidence = 100 + 5 * choice_it_->data()->certainty();
00382   if (confidence < 0.0f) confidence = 0.0f;
00383   if (confidence > 100.0f) confidence = 100.0f;
00384   return confidence;
00385 }
00386 
00387 
00388 }  // namespace tesseract.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines