tesseract  4.1.0
tesseract::LTRResultIterator Class Reference

#include <ltrresultiterator.h>

Inheritance diagram for tesseract::LTRResultIterator:
tesseract::PageIterator tesseract::ResultIterator tesseract::MutableIterator

Public Member Functions

 LTRResultIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
 
 ~LTRResultIterator () override
 
char * GetUTF8Text (PageIteratorLevel level) const
 
void SetLineSeparator (const char *new_line)
 
void SetParagraphSeparator (const char *new_para)
 
float Confidence (PageIteratorLevel level) const
 
void RowAttributes (float *row_height, float *descenders, float *ascenders) const
 
const char * WordFontAttributes (bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const
 
const char * WordRecognitionLanguage () const
 
StrongScriptDirection WordDirection () const
 
bool WordIsFromDictionary () const
 
int BlanksBeforeWord () const
 
bool WordIsNumeric () const
 
bool HasBlamerInfo () const
 
const void * GetParamsTrainingBundle () const
 
const char * GetBlamerDebug () const
 
const char * GetBlamerMisadaptionDebug () const
 
bool HasTruthString () const
 
bool EquivalentToTruth (const char *str) const
 
char * WordTruthUTF8Text () const
 
char * WordNormedUTF8Text () const
 
const char * WordLattice (int *lattice_size) const
 
bool SymbolIsSuperscript () const
 
bool SymbolIsSubscript () const
 
bool SymbolIsDropcap () const
 
- Public Member Functions inherited from tesseract::PageIterator
 PageIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
 
virtual ~PageIterator ()
 
 PageIterator (const PageIterator &src)
 
const PageIteratoroperator= (const PageIterator &src)
 
bool PositionedAtSameWord (const PAGE_RES_IT *other) const
 
virtual void Begin ()
 
virtual void RestartParagraph ()
 
bool IsWithinFirstTextlineOfParagraph () const
 
virtual void RestartRow ()
 
virtual bool Next (PageIteratorLevel level)
 
virtual bool IsAtBeginningOf (PageIteratorLevel level) const
 
virtual bool IsAtFinalElement (PageIteratorLevel level, PageIteratorLevel element) const
 
int Cmp (const PageIterator &other) const
 
void SetBoundingBoxComponents (bool include_upper_dots, bool include_lower_dots)
 
bool BoundingBox (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
 
bool BoundingBox (PageIteratorLevel level, int padding, int *left, int *top, int *right, int *bottom) const
 
bool BoundingBoxInternal (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
 
bool Empty (PageIteratorLevel level) const
 
PolyBlockType BlockType () const
 
Pta * BlockPolygon () const
 
Pix * GetBinaryImage (PageIteratorLevel level) const
 
Pix * GetImage (PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const
 
bool Baseline (PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const
 
void Orientation (tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
 
void ParagraphInfo (tesseract::ParagraphJustification *justification, bool *is_list_item, bool *is_crown, int *first_line_indent) const
 
bool SetWordBlamerBundle (BlamerBundle *blamer_bundle)
 

Protected Attributes

const char * line_separator_
 
const char * paragraph_separator_
 
- Protected Attributes inherited from tesseract::PageIterator
PAGE_RESpage_res_
 
Tesseracttesseract_
 
PAGE_RES_ITit_
 
WERDword_
 
int word_length_
 
int blob_index_
 
C_BLOB_IT * cblob_it_
 
bool include_upper_dots_
 
bool include_lower_dots_
 
int scale_
 
int scaled_yres_
 
int rect_left_
 
int rect_top_
 
int rect_width_
 
int rect_height_
 

Friends

class ChoiceIterator
 

Additional Inherited Members

- Protected Member Functions inherited from tesseract::PageIterator
TESS_LOCAL void BeginWord (int offset)
 

Detailed Description

Definition at line 48 of file ltrresultiterator.h.

Constructor & Destructor Documentation

tesseract::LTRResultIterator::LTRResultIterator ( PAGE_RES page_res,
Tesseract tesseract,
int  scale,
int  scaled_yres,
int  rect_left,
int  rect_top,
int  rect_width,
int  rect_height 
)

Definition at line 29 of file ltrresultiterator.cpp.

33  : PageIterator(page_res, tesseract, scale, scaled_yres, rect_left, rect_top,
34  rect_width, rect_height),
35  line_separator_("\n"),
36  paragraph_separator_("\n") {}
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
tesseract::LTRResultIterator::~LTRResultIterator ( )
overridedefault

Member Function Documentation

int tesseract::LTRResultIterator::BlanksBeforeWord ( ) const

Definition at line 235 of file ltrresultiterator.cpp.

235  {
236  if (it_->word() == nullptr) return 1;
237  return it_->word()->word->space();
238 }
WERD_RES * word() const
Definition: pageres.h:755
uint8_t space()
Definition: werd.h:99
WERD * word
Definition: pageres.h:188
float tesseract::LTRResultIterator::Confidence ( PageIteratorLevel  level) const

Definition at line 94 of file ltrresultiterator.cpp.

94  {
95  if (it_->word() == nullptr) return 0.0f; // Already at the end!
96  float mean_certainty = 0.0f;
97  int certainty_count = 0;
98  PAGE_RES_IT res_it(*it_);
99  WERD_CHOICE* best_choice = res_it.word()->best_choice;
100  ASSERT_HOST(best_choice != nullptr);
101  switch (level) {
102  case RIL_BLOCK:
103  do {
104  best_choice = res_it.word()->best_choice;
105  ASSERT_HOST(best_choice != nullptr);
106  mean_certainty += best_choice->certainty();
107  ++certainty_count;
108  res_it.forward();
109  } while (res_it.block() == res_it.prev_block());
110  break;
111  case RIL_PARA:
112  do {
113  best_choice = res_it.word()->best_choice;
114  ASSERT_HOST(best_choice != nullptr);
115  mean_certainty += best_choice->certainty();
116  ++certainty_count;
117  res_it.forward();
118  } while (res_it.block() == res_it.prev_block() &&
119  res_it.row()->row->para() == res_it.prev_row()->row->para());
120  break;
121  case RIL_TEXTLINE:
122  do {
123  best_choice = res_it.word()->best_choice;
124  ASSERT_HOST(best_choice != nullptr);
125  mean_certainty += best_choice->certainty();
126  ++certainty_count;
127  res_it.forward();
128  } while (res_it.row() == res_it.prev_row());
129  break;
130  case RIL_WORD:
131  mean_certainty += best_choice->certainty();
132  ++certainty_count;
133  break;
134  case RIL_SYMBOL:
135  mean_certainty += best_choice->certainty(blob_index_);
136  ++certainty_count;
137  }
138  if (certainty_count > 0) {
139  mean_certainty /= certainty_count;
140  float confidence = 100 + 5 * mean_certainty;
141  if (confidence < 0.0f) confidence = 0.0f;
142  if (confidence > 100.0f) confidence = 100.0f;
143  return confidence;
144  }
145  return 0.0f;
146 }
WERD_RES * word() const
Definition: pageres.h:755
float certainty() const
Definition: ratngs.h:330
#define ASSERT_HOST(x)
Definition: errcode.h:88
bool tesseract::LTRResultIterator::EquivalentToTruth ( const char *  str) const

Definition at line 285 of file ltrresultiterator.cpp.

285  {
286  if (!HasTruthString()) return false;
287  ASSERT_HOST(it_->word()->uch_set != nullptr);
288  WERD_CHOICE str_wd(str, *(it_->word()->uch_set));
289  return it_->word()->blamer_bundle->ChoiceIsCorrect(&str_wd);
290 }
const UNICHARSET * uch_set
Definition: pageres.h:205
WERD_RES * word() const
Definition: pageres.h:755
#define ASSERT_HOST(x)
Definition: errcode.h:88
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:116
BlamerBundle * blamer_bundle
Definition: pageres.h:245
const char * tesseract::LTRResultIterator::GetBlamerDebug ( ) const

Definition at line 263 of file ltrresultiterator.cpp.

263  {
264  return it_->word()->blamer_bundle->debug().string();
265 }
const char * string() const
Definition: strngs.cpp:194
const STRING & debug() const
Definition: blamer.h:128
WERD_RES * word() const
Definition: pageres.h:755
BlamerBundle * blamer_bundle
Definition: pageres.h:245
const char * tesseract::LTRResultIterator::GetBlamerMisadaptionDebug ( ) const

Definition at line 269 of file ltrresultiterator.cpp.

269  {
271 }
const char * string() const
Definition: strngs.cpp:194
WERD_RES * word() const
Definition: pageres.h:755
const STRING & misadaption_debug() const
Definition: blamer.h:131
BlamerBundle * blamer_bundle
Definition: pageres.h:245
const void * tesseract::LTRResultIterator::GetParamsTrainingBundle ( ) const

Definition at line 255 of file ltrresultiterator.cpp.

255  {
256  return (it_->word() != nullptr && it_->word()->blamer_bundle != nullptr)
258  : nullptr;
259 }
const tesseract::ParamsTrainingBundle & params_training_bundle() const
Definition: blamer.h:162
WERD_RES * word() const
Definition: pageres.h:755
BlamerBundle * blamer_bundle
Definition: pageres.h:245
char * tesseract::LTRResultIterator::GetUTF8Text ( PageIteratorLevel  level) const

Definition at line 45 of file ltrresultiterator.cpp.

45  {
46  if (it_->word() == nullptr) return nullptr; // Already at the end!
47  STRING text;
48  PAGE_RES_IT res_it(*it_);
49  WERD_CHOICE* best_choice = res_it.word()->best_choice;
50  ASSERT_HOST(best_choice != nullptr);
51  if (level == RIL_SYMBOL) {
52  text = res_it.word()->BestUTF8(blob_index_, false);
53  } else if (level == RIL_WORD) {
54  text = best_choice->unichar_string();
55  } else {
56  bool eol = false; // end of line?
57  bool eop = false; // end of paragraph?
58  do { // for each paragraph in a block
59  do { // for each text line in a paragraph
60  do { // for each word in a text line
61  best_choice = res_it.word()->best_choice;
62  ASSERT_HOST(best_choice != nullptr);
63  text += best_choice->unichar_string();
64  text += " ";
65  res_it.forward();
66  eol = res_it.row() != res_it.prev_row();
67  } while (!eol);
68  text.truncate_at(text.length() - 1);
69  text += line_separator_;
70  eop = res_it.block() != res_it.prev_block() ||
71  res_it.row()->row->para() != res_it.prev_row()->row->para();
72  } while (level != RIL_TEXTLINE && !eop);
73  if (eop) text += paragraph_separator_;
74  } while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
75  }
76  int length = text.length() + 1;
77  char* result = new char[length];
78  strncpy(result, text.string(), length);
79  return result;
80 }
Definition: strngs.h:45
const STRING & unichar_string() const
Definition: ratngs.h:541
int32_t length() const
Definition: strngs.cpp:189
void truncate_at(int32_t index)
Definition: strngs.cpp:265
const char * string() const
Definition: strngs.cpp:194
WERD_RES * word() const
Definition: pageres.h:755
#define ASSERT_HOST(x)
Definition: errcode.h:88
bool tesseract::LTRResultIterator::HasBlamerInfo ( ) const

Definition at line 248 of file ltrresultiterator.cpp.

248  {
249  return it_->word() != nullptr && it_->word()->blamer_bundle != nullptr &&
251 }
WERD_RES * word() const
Definition: pageres.h:755
bool HasDebugInfo() const
Definition: blamer.h:125
BlamerBundle * blamer_bundle
Definition: pageres.h:245
bool tesseract::LTRResultIterator::HasTruthString ( ) const

Definition at line 274 of file ltrresultiterator.cpp.

274  {
275  if (it_->word() == nullptr) return false; // Already at the end!
276  if (it_->word()->blamer_bundle == nullptr ||
277  it_->word()->blamer_bundle->NoTruth()) {
278  return false; // no truth information for this word
279  }
280  return true;
281 }
bool NoTruth() const
Definition: blamer.h:121
WERD_RES * word() const
Definition: pageres.h:755
BlamerBundle * blamer_bundle
Definition: pageres.h:245
void tesseract::LTRResultIterator::RowAttributes ( float *  row_height,
float *  descenders,
float *  ascenders 
) const

Definition at line 148 of file ltrresultiterator.cpp.

149  {
150  *row_height = it_->row()->row->x_height() + it_->row()->row->ascenders() -
151  it_->row()->row->descenders();
152  *descenders = it_->row()->row->descenders();
153  *ascenders = it_->row()->row->ascenders();
154 }
float x_height() const
Definition: ocrrow.h:64
ROW_RES * row() const
Definition: pageres.h:758
float descenders() const
Definition: ocrrow.h:85
ROW * row
Definition: pageres.h:142
float ascenders() const
Definition: ocrrow.h:82
void tesseract::LTRResultIterator::SetLineSeparator ( const char *  new_line)

Definition at line 83 of file ltrresultiterator.cpp.

83  {
84  line_separator_ = new_line;
85 }
void tesseract::LTRResultIterator::SetParagraphSeparator ( const char *  new_para)

Definition at line 88 of file ltrresultiterator.cpp.

88  {
89  paragraph_separator_ = new_para;
90 }
bool tesseract::LTRResultIterator::SymbolIsDropcap ( ) const

Definition at line 351 of file ltrresultiterator.cpp.

351  {
352  if (cblob_it_ == nullptr && it_->word() != nullptr)
354  return false;
355 }
WERD_RES * word() const
Definition: pageres.h:755
WERD_CHOICE * best_choice
Definition: pageres.h:234
tesseract::ScriptPos BlobPosition(int index) const
Definition: ratngs.h:322
bool tesseract::LTRResultIterator::SymbolIsSubscript ( ) const

Definition at line 342 of file ltrresultiterator.cpp.

342  {
343  if (cblob_it_ == nullptr && it_->word() != nullptr)
345  return false;
346 }
WERD_RES * word() const
Definition: pageres.h:755
WERD_CHOICE * best_choice
Definition: pageres.h:234
tesseract::ScriptPos BlobPosition(int index) const
Definition: ratngs.h:322
bool tesseract::LTRResultIterator::SymbolIsSuperscript ( ) const

Definition at line 332 of file ltrresultiterator.cpp.

332  {
333  if (cblob_it_ == nullptr && it_->word() != nullptr)
336  return false;
337 }
WERD_RES * word() const
Definition: pageres.h:755
WERD_CHOICE * best_choice
Definition: pageres.h:234
tesseract::ScriptPos BlobPosition(int index) const
Definition: ratngs.h:322
StrongScriptDirection tesseract::LTRResultIterator::WordDirection ( ) const

Definition at line 216 of file ltrresultiterator.cpp.

216  {
217  if (it_->word() == nullptr) return DIR_NEUTRAL;
218  bool has_rtl = it_->word()->AnyRtlCharsInWord();
219  bool has_ltr = it_->word()->AnyLtrCharsInWord();
220  if (has_rtl && !has_ltr) return DIR_RIGHT_TO_LEFT;
221  if (has_ltr && !has_rtl) return DIR_LEFT_TO_RIGHT;
222  if (!has_ltr && !has_rtl) return DIR_NEUTRAL;
223  return DIR_MIX;
224 }
bool AnyLtrCharsInWord() const
Definition: pageres.h:408
WERD_RES * word() const
Definition: pageres.h:755
bool AnyRtlCharsInWord() const
Definition: pageres.h:392
const char * tesseract::LTRResultIterator::WordFontAttributes ( bool *  is_bold,
bool *  is_italic,
bool *  is_underlined,
bool *  is_monospace,
bool *  is_serif,
bool *  is_smallcaps,
int *  pointsize,
int *  font_id 
) const

Definition at line 164 of file ltrresultiterator.cpp.

166  {
167  const char* result = nullptr;
168 
169  if (it_->word() == nullptr) {
170  // Already at the end!
171  *pointsize = 0;
172  } else {
173  float row_height = it_->row()->row->x_height() +
174  it_->row()->row->ascenders() -
175  it_->row()->row->descenders();
176  // Convert from pixels to printers points.
177  *pointsize =
178  scaled_yres_ > 0
179  ? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5)
180  : 0;
181  const FontInfo* font_info = it_->word()->fontinfo;
182  if (font_info) {
183  // Font information available.
184  *font_id = font_info->universal_id;
185  *is_bold = font_info->is_bold();
186  *is_italic = font_info->is_italic();
187  *is_underlined = false; // TODO(rays) fix this!
188  *is_monospace = font_info->is_fixed_pitch();
189  *is_serif = font_info->is_serif();
190  *is_smallcaps = it_->word()->small_caps;
191  result = font_info->name;
192  }
193  }
194 
195  if (!result) {
196  *is_bold = false;
197  *is_italic = false;
198  *is_underlined = false;
199  *is_monospace = false;
200  *is_serif = false;
201  *is_smallcaps = false;
202  *font_id = -1;
203  }
204 
205  return result;
206 }
float x_height() const
Definition: ocrrow.h:64
constexpr int kPointsPerInch
Definition: publictypes.h:33
bool is_italic() const
Definition: fontinfo.h:111
ROW_RES * row() const
Definition: pageres.h:758
float descenders() const
Definition: ocrrow.h:85
bool small_caps
Definition: pageres.h:298
const FontInfo * fontinfo
Definition: pageres.h:303
WERD_RES * word() const
Definition: pageres.h:755
int32_t universal_id
Definition: fontinfo.h:123
bool is_fixed_pitch() const
Definition: fontinfo.h:113
bool is_serif() const
Definition: fontinfo.h:114
ROW * row
Definition: pageres.h:142
bool is_bold() const
Definition: fontinfo.h:112
float ascenders() const
Definition: ocrrow.h:82
bool tesseract::LTRResultIterator::WordIsFromDictionary ( ) const

Definition at line 227 of file ltrresultiterator.cpp.

227  {
228  if (it_->word() == nullptr) return false; // Already at the end!
229  int permuter = it_->word()->best_choice->permuter();
230  return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM ||
231  permuter == USER_DAWG_PERM;
232 }
WERD_RES * word() const
Definition: pageres.h:755
WERD_CHOICE * best_choice
Definition: pageres.h:234
uint8_t permuter() const
Definition: ratngs.h:346
bool tesseract::LTRResultIterator::WordIsNumeric ( ) const

Definition at line 241 of file ltrresultiterator.cpp.

241  {
242  if (it_->word() == nullptr) return false; // Already at the end!
243  int permuter = it_->word()->best_choice->permuter();
244  return permuter == NUMBER_PERM;
245 }
WERD_RES * word() const
Definition: pageres.h:755
WERD_CHOICE * best_choice
Definition: pageres.h:234
uint8_t permuter() const
Definition: ratngs.h:346
const char * tesseract::LTRResultIterator::WordLattice ( int *  lattice_size) const

Definition at line 322 of file ltrresultiterator.cpp.

322  {
323  if (it_->word() == nullptr) return nullptr; // Already at the end!
324  if (it_->word()->blamer_bundle == nullptr) return nullptr;
325  *lattice_size = it_->word()->blamer_bundle->lattice_size();
326  return it_->word()->blamer_bundle->lattice_data();
327 }
const char * lattice_data() const
Definition: blamer.h:150
WERD_RES * word() const
Definition: pageres.h:755
int lattice_size() const
Definition: blamer.h:153
BlamerBundle * blamer_bundle
Definition: pageres.h:245
char * tesseract::LTRResultIterator::WordNormedUTF8Text ( ) const

Definition at line 305 of file ltrresultiterator.cpp.

305  {
306  if (it_->word() == nullptr) return nullptr; // Already at the end!
307  STRING ocr_text;
308  WERD_CHOICE* best_choice = it_->word()->best_choice;
309  const UNICHARSET* unicharset = it_->word()->uch_set;
310  ASSERT_HOST(best_choice != nullptr);
311  for (int i = 0; i < best_choice->length(); ++i) {
312  ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i));
313  }
314  int length = ocr_text.length() + 1;
315  char* result = new char[length];
316  strncpy(result, ocr_text.string(), length);
317  return result;
318 }
Definition: strngs.h:45
int32_t length() const
Definition: strngs.cpp:189
int length() const
Definition: ratngs.h:303
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
Definition: unicharset.h:828
const UNICHARSET * uch_set
Definition: pageres.h:205
const char * string() const
Definition: strngs.cpp:194
WERD_RES * word() const
Definition: pageres.h:755
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
WERD_CHOICE * best_choice
Definition: pageres.h:234
#define ASSERT_HOST(x)
Definition: errcode.h:88
const char * tesseract::LTRResultIterator::WordRecognitionLanguage ( ) const

Definition at line 209 of file ltrresultiterator.cpp.

209  {
210  if (it_->word() == nullptr || it_->word()->tesseract == nullptr)
211  return nullptr;
212  return it_->word()->tesseract->lang.string();
213 }
tesseract::Tesseract * tesseract
Definition: pageres.h:281
const char * string() const
Definition: strngs.cpp:194
WERD_RES * word() const
Definition: pageres.h:755
STRING lang
Definition: ccutil.h:69
char * tesseract::LTRResultIterator::WordTruthUTF8Text ( ) const

Definition at line 294 of file ltrresultiterator.cpp.

294  {
295  if (!HasTruthString()) return nullptr;
296  STRING truth_text = it_->word()->blamer_bundle->TruthString();
297  int length = truth_text.length() + 1;
298  char* result = new char[length];
299  strncpy(result, truth_text.string(), length);
300  return result;
301 }
STRING TruthString() const
Definition: blamer.h:112
Definition: strngs.h:45
int32_t length() const
Definition: strngs.cpp:189
const char * string() const
Definition: strngs.cpp:194
WERD_RES * word() const
Definition: pageres.h:755
BlamerBundle * blamer_bundle
Definition: pageres.h:245

Friends And Related Function Documentation

friend class ChoiceIterator
friend

Definition at line 49 of file ltrresultiterator.h.

Member Data Documentation

const char* tesseract::LTRResultIterator::line_separator_
protected

Definition at line 186 of file ltrresultiterator.h.

const char* tesseract::LTRResultIterator::paragraph_separator_
protected

Definition at line 187 of file ltrresultiterator.h.


The documentation for this class was generated from the following files: