25 #include <sys/types.h> 97 prev_word_best_choice =
nullptr;
103 PAGE_RES(
bool merge_similar_words,
104 BLOCK_LIST *block_list,
107 ~PAGE_RES () =
default;
150 ROW_RES(
bool merge_similar_words,
ROW *the_row);
223 std::vector<std::vector<std::pair<const char*, float>>>
timesteps;
362 const char*
BestUTF8(
int blob_index,
bool in_rtl_context)
const {
363 if (blob_index < 0 || best_choice ==
nullptr ||
364 blob_index >= best_choice->
length())
367 if (id < 0 || id >= uch_set->
size())
370 if (in_rtl_context && mirrored > 0)
376 if (blob_index < 0 || blob_index >= raw_choice->
length())
379 if (id < 0 || id >= uch_set->
size())
385 if (best_choice ==
nullptr ||
386 blob_index >= best_choice->
length() ||
393 if (uch_set ==
nullptr || best_choice ==
nullptr || best_choice->
length() < 1)
395 for (
int id = 0;
id < best_choice->
length();
id++) {
397 if (unichar_id < 0 || unichar_id >= uch_set->
size())
409 if (uch_set ==
nullptr || best_choice ==
nullptr || best_choice->
length() < 1)
411 for (
int id = 0;
id < best_choice->
length();
id++) {
413 if (unichar_id < 0 || unichar_id >= uch_set->
size())
430 void InitNonPointers();
434 void ClearWordChoices();
441 void CopySimpleFields(
const WERD_RES& source);
447 void InitForRetryRecognition(
const WERD_RES& source);
464 bool SetupForRecognition(
const UNICHARSET& unicharset_in,
467 const TBOX* norm_box,
bool numeric_mode,
468 bool use_body_size,
bool allow_detailed_fx,
474 void SetupBasicsFromChoppedWord(
const UNICHARSET &unicharset_in);
481 void SetupWordScript(
const UNICHARSET& unicharset_in);
484 void SetupBlamerBundle();
487 void SetupBlobWidthsAndGaps();
492 void InsertSeam(
int blob_number,
SEAM* seam);
496 bool AlternativeChoiceAdjustmentsWorseThan(
float threshold)
const;
504 bool StatesAllValid();
508 void DebugWordChoices(
bool debug,
const char* word_to_debug);
511 void DebugTopChoice(
const char* msg)
const;
515 void FilterWordChoices(
int debug_level);
532 void ComputeAdaptionThresholds(
float certainty_scale,
547 bool LogNewCookedChoice(
int max_num_choices,
bool debug,
551 void PrintBestChoices()
const;
555 int GetBlobsWidth(
int start_blob,
int last_blob);
557 int GetBlobsGap(
int blob_index);
568 BLOB_CHOICE_LIST* GetBlobChoices(
int index)
const;
578 void ConsumeWordResults(
WERD_RES* word);
586 void RebuildBestState();
590 void CloneChoppedToRebuild();
597 void SetScriptPositions();
609 void FakeClassifyWord(
int blob_count,
BLOB_CHOICE** choices);
616 void BestChoiceToCorrectText();
623 bool ConditionalBlobMerge(
629 void MergeAdjacentBlobs(
int index);
641 bool HyphenBoxesOverlap(
const TBOX& box1,
const TBOX& box2);
647 void merge_tess_fails();
669 bool PiecesAllNatural(
int start,
int count)
const;
683 page_res = the_page_res;
690 return word_res == other.word_res && row_res == other.row_res &&
691 block_res == other.block_res;
694 bool operator !=(
const PAGE_RES_IT &other)
const {
return !(*
this == other); }
703 return start_page(
false);
706 return start_page(
true);
708 WERD_RES *start_page(
bool empty_ok);
729 void DeleteCurrentWord();
733 void MakeCurrentWordFuzzy();
736 return internal_forward(
false,
false);
740 return internal_forward(
false,
true);
747 return prev_word_res;
753 return prev_block_res;
765 return next_word_res;
771 return next_block_res;
773 void rej_stat_word();
774 void ResetWordIterator();
777 WERD_RES *internal_forward(
bool new_block,
bool empty_ok);
791 BLOCK_RES_IT block_res_it;
792 ROW_RES_IT row_res_it;
793 WERD_RES_IT word_res_it;
796 WERD_RES_IT wr_it_of_current_word;
797 WERD_RES_IT wr_it_of_next_word;
ROW_RES * next_row() const
bool UnicharsInReadingOrder() const
WERD_RES * restart_page()
std::vector< std::vector< std::pair< const char *, float > > > timesteps
WERD_RES_LIST word_res_list
GenericVector< int > blame_reasons
GenericVector< int > blob_widths
UNICHAR_ID get_mirror(UNICHAR_ID unichar_id) const
UNICHARSET::Direction SymbolDirection(int blob_index) const
GenericVector< STRING > correct_text
ROW_RES * prev_row() const
int32_t whole_word_rej_count
GenericVector< STRING > misadaption_log
GenericVector< SEAM * > seam_array
ROW_RES_LIST row_res_list
BLOCK_RES * block() const
WERD_CHOICE_LIST best_choices
Direction get_direction(UNICHAR_ID unichar_id) const
const char * id_to_unichar(UNICHAR_ID id) const
WERD_RES * restart_page_with_empties()
void init_to_size(int size, const T &t)
tesseract::BoxWord * bln_boxes
const char * id_to_unichar_ext(UNICHAR_ID id) const
tesseract::Tesseract * tesseract
BLOCK_RES * next_block() const
PAGE_RES_IT(PAGE_RES *the_page_res)
bool AnyLtrCharsInWord() const
const UNICHARSET * uch_set
ELISTIZEH(BLOCK_RES) CLISTIZEH(BLOCK_RES) class ROW_RES
const FontInfo * fontinfo2
CRUNCH_MODE unlv_crunch_mode
int8_t fontinfo_id2_count
WERD_RES * next_word() const
bool unichars_in_script_order() const
BLOCK_RES * prev_block() const
static WERD_RES * deep_copy(const WERD_RES *src)
const FontInfo * fontinfo
tesseract::BoxWord * box_word
void copy_on(WERD_RES *word_res)
const char * BestUTF8(int blob_index, bool in_rtl_context) const
bool AnyRtlCharsInWord() const
UNICHAR_ID unichar_id(int index) const
WERD_RES(const WERD_RES &source)
WERD_CHOICE * best_choice
WERD_RES * prev_word() const
bool flag(WERD_FLAGS mask) const
GenericVector< int > best_state
GenericVector< int > blob_gaps
WERD_CHOICE ** prev_word_best_choice
WERD_RES * forward_with_empties()
BLOCK_RES_LIST block_res_list
CLISTIZEH(STRING) CLISTIZE(STRING) namespace tesseract
const char * RawUTF8(int blob_index) const
MATRIX * DeepCopy() const
BlamerBundle * blamer_bundle