22 #include "allheaders.h" 30 int scale,
int scaled_yres,
int rect_left,
31 int rect_top,
int rect_width,
33 :
PageIterator(page_res, tesseract, scale, scaled_yres, rect_left, rect_top,
34 rect_width, rect_height),
35 line_separator_(
"\n"),
36 paragraph_separator_(
"\n") {}
46 if (
it_->
word() ==
nullptr)
return nullptr;
76 int length = text.
length() + 1;
77 char* result =
new char[length];
78 strncpy(result, text.
string(), length);
95 if (
it_->
word() ==
nullptr)
return 0.0f;
96 float mean_certainty = 0.0f;
97 int certainty_count = 0;
106 mean_certainty += best_choice->
certainty();
115 mean_certainty += best_choice->
certainty();
125 mean_certainty += best_choice->
certainty();
131 mean_certainty += best_choice->
certainty();
138 if (certainty_count > 0) {
139 mean_certainty /= certainty_count;
140 float confidence = 100 + 5 * mean_certainty;
141 if (confidence < 0.0f) confidence = 0.0f;
142 if (confidence > 100.0f) confidence = 100.0f;
149 float* ascenders)
const {
165 bool* is_bold,
bool* is_italic,
bool* is_underlined,
bool* is_monospace,
166 bool* is_serif,
bool* is_smallcaps,
int* pointsize,
int* font_id)
const {
167 const char* result =
nullptr;
185 *is_bold = font_info->
is_bold();
187 *is_underlined =
false;
191 result = font_info->
name;
198 *is_underlined =
false;
199 *is_monospace =
false;
201 *is_smallcaps =
false;
228 if (
it_->
word() ==
nullptr)
return false;
236 if (
it_->
word() ==
nullptr)
return 1;
242 if (
it_->
word() ==
nullptr)
return false;
275 if (
it_->
word() ==
nullptr)
return false;
297 int length = truth_text.
length() + 1;
298 char* result =
new char[length];
299 strncpy(result, truth_text.
string(), length);
306 if (
it_->
word() ==
nullptr)
return nullptr;
311 for (
int i = 0; i < best_choice->
length(); ++i) {
314 int length = ocr_text.
length() + 1;
315 char* result =
new char[length];
316 strncpy(result, ocr_text.
string(), length);
323 if (
it_->
word() ==
nullptr)
return nullptr;
359 word_res_ = result_it.
it_->
word();
360 BLOB_CHOICE_LIST* choices =
nullptr;
361 if (word_res_->ratings !=
nullptr)
363 if (choices !=
nullptr && !choices->empty()) {
364 choice_it_ =
new BLOB_CHOICE_IT(choices);
365 choice_it_->mark_cycle_pt();
367 choice_it_ =
nullptr;
375 if (choice_it_ ==
nullptr)
return false;
376 choice_it_->forward();
377 return !choice_it_->cycled_list();
383 if (choice_it_ ==
nullptr)
return nullptr;
384 UNICHAR_ID id = choice_it_->data()->unichar_id();
385 return word_res_->uch_set->id_to_unichar_ext(
id);
395 if (choice_it_ ==
nullptr)
return 0.0f;
396 float confidence = 100 + 5 * choice_it_->data()->certainty();
397 if (confidence < 0.0f) confidence = 0.0f;
398 if (confidence > 100.0f) confidence = 100.0f;
STRING TruthString() const
const char * WordLattice(int *lattice_size) const
const char * paragraph_separator_
constexpr int kPointsPerInch
const void * GetParamsTrainingBundle() const
void SetParagraphSeparator(const char *new_para)
bool SymbolIsDropcap() const
ROW_RES * prev_row() const
const STRING & unichar_string() const
ChoiceIterator(const LTRResultIterator &result_it)
const char * WordRecognitionLanguage() const
const char * GetUTF8Text() const
BLOCK_RES * block() const
bool SymbolIsSubscript() const
StrongScriptDirection WordDirection() const
void truncate_at(int32_t index)
tesseract::Tesseract * tesseract
bool WordIsFromDictionary() const
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
bool AnyLtrCharsInWord() const
const char * GetBlamerMisadaptionDebug() const
const UNICHARSET * uch_set
void RowAttributes(float *row_height, float *descenders, float *ascenders) const
char * WordNormedUTF8Text() const
char * WordTruthUTF8Text() const
const tesseract::ParamsTrainingBundle & params_training_bundle() const
~LTRResultIterator() override
BLOCK_RES * prev_block() const
const FontInfo * fontinfo
const char * string() const
const char * lattice_data() const
const STRING & debug() const
LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
const char * GetBlamerDebug() const
const char * BestUTF8(int blob_index, bool in_rtl_context) const
bool HasBlamerInfo() const
bool is_fixed_pitch() const
bool AnyRtlCharsInWord() const
bool HasTruthString() const
UNICHAR_ID unichar_id(int index) const
WERD_CHOICE * best_choice
void SetLineSeparator(const char *new_line)
bool SymbolIsSuperscript() const
char * GetUTF8Text(PageIteratorLevel level) const
int BlanksBeforeWord() const
const char * WordFontAttributes(bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const
bool EquivalentToTruth(const char *str) const
const STRING & misadaption_debug() const
tesseract::ScriptPos BlobPosition(int index) const
bool HasDebugInfo() const
float Confidence(PageIteratorLevel level) const
const char * line_separator_
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
bool WordIsNumeric() const
BlamerBundle * blamer_bundle