tesseract  4.1.0
WERD_RES Class Reference

#include <pageres.h>

Inheritance diagram for WERD_RES:
ELIST_LINK

Public Member Functions

 WERD_RES ()
 
 WERD_RES (WERD *the_word)
 
 WERD_RES (const WERD_RES &source)
 
 ~WERD_RES ()
 
const char * BestUTF8 (int blob_index, bool in_rtl_context) const
 
const char * RawUTF8 (int blob_index) const
 
UNICHARSET::Direction SymbolDirection (int blob_index) const
 
bool AnyRtlCharsInWord () const
 
bool AnyLtrCharsInWord () const
 
bool UnicharsInReadingOrder () const
 
void InitNonPointers ()
 
void InitPointers ()
 
void Clear ()
 
void ClearResults ()
 
void ClearWordChoices ()
 
void ClearRatings ()
 
WERD_RESoperator= (const WERD_RES &source)
 
void CopySimpleFields (const WERD_RES &source)
 
void InitForRetryRecognition (const WERD_RES &source)
 
bool SetupForRecognition (const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, Pix *pix, int norm_mode, const TBOX *norm_box, bool numeric_mode, bool use_body_size, bool allow_detailed_fx, ROW *row, const BLOCK *block)
 
void SetupBasicsFromChoppedWord (const UNICHARSET &unicharset_in)
 
void SetupFake (const UNICHARSET &uch)
 
void SetupWordScript (const UNICHARSET &unicharset_in)
 
void SetupBlamerBundle ()
 
void SetupBlobWidthsAndGaps ()
 
void InsertSeam (int blob_number, SEAM *seam)
 
bool AlternativeChoiceAdjustmentsWorseThan (float threshold) const
 
bool IsAmbiguous ()
 
bool StatesAllValid ()
 
void DebugWordChoices (bool debug, const char *word_to_debug)
 
void DebugTopChoice (const char *msg) const
 
void FilterWordChoices (int debug_level)
 
void ComputeAdaptionThresholds (float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
 
bool LogNewRawChoice (WERD_CHOICE *word_choice)
 
bool LogNewCookedChoice (int max_num_choices, bool debug, WERD_CHOICE *word_choice)
 
void PrintBestChoices () const
 
int GetBlobsWidth (int start_blob, int last_blob)
 
int GetBlobsGap (int blob_index)
 
BLOB_CHOICEGetBlobChoice (int index) const
 
BLOB_CHOICE_LIST * GetBlobChoices (int index) const
 
void ConsumeWordResults (WERD_RES *word)
 
void ReplaceBestChoice (WERD_CHOICE *choice)
 
void RebuildBestState ()
 
void CloneChoppedToRebuild ()
 
void SetupBoxWord ()
 
void SetScriptPositions ()
 
void SetAllScriptPositions (tesseract::ScriptPos position)
 
void FakeClassifyWord (int blob_count, BLOB_CHOICE **choices)
 
void FakeWordFromRatings (PermuterType permuter)
 
void BestChoiceToCorrectText ()
 
bool ConditionalBlobMerge (TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX & > *box_cb)
 
void MergeAdjacentBlobs (int index)
 
UNICHAR_ID BothQuotes (UNICHAR_ID id1, UNICHAR_ID id2)
 
void fix_quotes ()
 
UNICHAR_ID BothHyphens (UNICHAR_ID id1, UNICHAR_ID id2)
 
bool HyphenBoxesOverlap (const TBOX &box1, const TBOX &box2)
 
void fix_hyphens ()
 
UNICHAR_ID BothSpaces (UNICHAR_ID id1, UNICHAR_ID id2)
 
void merge_tess_fails ()
 
void copy_on (WERD_RES *word_res)
 
bool PiecesAllNatural (int start, int count) const
 
- Public Member Functions inherited from ELIST_LINK
 ELIST_LINK ()
 
 ELIST_LINK (const ELIST_LINK &)
 
void operator= (const ELIST_LINK &)
 

Static Public Member Functions

static WERD_RESdeep_copy (const WERD_RES *src)
 

Public Attributes

WERDword
 
tesseract::BoxWordbln_boxes
 
ROWblob_row
 
DENORM denorm
 
const UNICHARSETuch_set
 
TWERDchopped_word
 
GenericVector< SEAM * > seam_array
 
GenericVector< int > blob_widths
 
GenericVector< int > blob_gaps
 
std::vector< std::vector< std::pair< const char *, float > > > timesteps
 
MATRIXratings
 
WERD_CHOICEbest_choice
 
WERD_CHOICEraw_choice
 
WERD_CHOICE_LIST best_choices
 
BlamerBundleblamer_bundle
 
TWERDrebuild_word
 
tesseract::BoxWordbox_word
 
GenericVector< int > best_state
 
GenericVector< STRINGcorrect_text
 
tesseract::Tesseracttesseract
 
WERD_CHOICEep_choice
 
REJMAP reject_map
 
bool tess_failed
 
bool tess_accepted
 
bool tess_would_adapt
 
bool done
 
bool small_caps
 
bool odd_size
 
int8_t italic
 
int8_t bold
 
const FontInfofontinfo
 
const FontInfofontinfo2
 
int8_t fontinfo_id_count
 
int8_t fontinfo_id2_count
 
bool guessed_x_ht
 
bool guessed_caps_ht
 
CRUNCH_MODE unlv_crunch_mode
 
float x_height
 
float caps_height
 
float baseline_shift
 
float space_certainty
 
bool combination
 
bool part_of_combo
 
bool reject_spaces
 

Detailed Description

Definition at line 168 of file pageres.h.

Constructor & Destructor Documentation

WERD_RES::WERD_RES ( )
inline

Definition at line 337 of file pageres.h.

337  {
338  InitNonPointers();
339  InitPointers();
340  }
void InitPointers()
Definition: pageres.cpp:1126
void InitNonPointers()
Definition: pageres.cpp:1098
WERD_RES::WERD_RES ( WERD the_word)
inline

Definition at line 341 of file pageres.h.

341  {
342  InitNonPointers();
343  InitPointers();
344  word = the_word;
345  }
void InitPointers()
Definition: pageres.cpp:1126
void InitNonPointers()
Definition: pageres.cpp:1098
WERD * word
Definition: pageres.h:188
WERD_RES::WERD_RES ( const WERD_RES source)
inline

Definition at line 348 of file pageres.h.

348  : ELIST_LINK(source) {
349  // combination is used in function Clear which is called from operator=.
350  combination = false;
351  InitPointers();
352  *this = source; // see operator=
353  }
bool combination
Definition: pageres.h:333
void InitPointers()
Definition: pageres.cpp:1126
ELIST_LINK()
Definition: elst.h:90
WERD_RES::~WERD_RES ( )

Definition at line 1094 of file pageres.cpp.

1094  {
1095  Clear();
1096 }
void Clear()
Definition: pageres.cpp:1141

Member Function Documentation

bool WERD_RES::AlternativeChoiceAdjustmentsWorseThan ( float  threshold) const

Definition at line 443 of file pageres.cpp.

443  {
444  // The choices are not changed by this iteration.
445  WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
446  for (wc_it.forward(); !wc_it.at_first(); wc_it.forward()) {
447  WERD_CHOICE* choice = wc_it.data();
448  if (choice->adjust_factor() <= threshold)
449  return false;
450  }
451  return true;
452 }
float adjust_factor() const
Definition: ratngs.h:306
WERD_CHOICE_LIST best_choices
Definition: pageres.h:242
bool WERD_RES::AnyLtrCharsInWord ( ) const
inline

Definition at line 408 of file pageres.h.

408  {
409  if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1)
410  return false;
411  for (int id = 0; id < best_choice->length(); id++) {
412  int unichar_id = best_choice->unichar_id(id);
413  if (unichar_id < 0 || unichar_id >= uch_set->size())
414  continue; // Ignore illegal chars.
415  UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
416  if (dir == UNICHARSET::U_LEFT_TO_RIGHT ||
418  return true;
419  }
420  return false;
421  }
int length() const
Definition: ratngs.h:303
int size() const
Definition: unicharset.h:341
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:690
const UNICHARSET * uch_set
Definition: pageres.h:205
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
WERD_CHOICE * best_choice
Definition: pageres.h:234
bool WERD_RES::AnyRtlCharsInWord ( ) const
inline

Definition at line 392 of file pageres.h.

392  {
393  if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1)
394  return false;
395  for (int id = 0; id < best_choice->length(); id++) {
396  int unichar_id = best_choice->unichar_id(id);
397  if (unichar_id < 0 || unichar_id >= uch_set->size())
398  continue; // Ignore illegal chars.
400  uch_set->get_direction(unichar_id);
401  if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
403  return true;
404  }
405  return false;
406  }
int length() const
Definition: ratngs.h:303
int size() const
Definition: unicharset.h:341
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:690
const UNICHARSET * uch_set
Definition: pageres.h:205
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
WERD_CHOICE * best_choice
Definition: pageres.h:234
void WERD_RES::BestChoiceToCorrectText ( )

Definition at line 927 of file pageres.cpp.

927  {
929  ASSERT_HOST(best_choice != nullptr);
930  for (int i = 0; i < best_choice->length(); ++i) {
931  UNICHAR_ID choice_id = best_choice->unichar_id(i);
932  const char* blob_choice = uch_set->id_to_unichar(choice_id);
933  correct_text.push_back(STRING(blob_choice));
934  }
935 }
Definition: strngs.h:45
GenericVector< STRING > correct_text
Definition: pageres.h:274
int length() const
Definition: ratngs.h:303
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291
const UNICHARSET * uch_set
Definition: pageres.h:205
int push_back(T object)
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
WERD_CHOICE * best_choice
Definition: pageres.h:234
#define ASSERT_HOST(x)
Definition: errcode.h:88
int UNICHAR_ID
Definition: unichar.h:34
const char* WERD_RES::BestUTF8 ( int  blob_index,
bool  in_rtl_context 
) const
inline

Definition at line 362 of file pageres.h.

362  {
363  if (blob_index < 0 || best_choice == nullptr ||
364  blob_index >= best_choice->length())
365  return nullptr;
366  UNICHAR_ID id = best_choice->unichar_id(blob_index);
367  if (id < 0 || id >= uch_set->size())
368  return nullptr;
369  UNICHAR_ID mirrored = uch_set->get_mirror(id);
370  if (in_rtl_context && mirrored > 0)
371  id = mirrored;
372  return uch_set->id_to_unichar_ext(id);
373  }
UNICHAR_ID get_mirror(UNICHAR_ID unichar_id) const
Definition: unicharset.h:697
int length() const
Definition: ratngs.h:303
int size() const
Definition: unicharset.h:341
const char * id_to_unichar_ext(UNICHAR_ID id) const
Definition: unicharset.cpp:299
const UNICHARSET * uch_set
Definition: pageres.h:205
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
WERD_CHOICE * best_choice
Definition: pageres.h:234
int UNICHAR_ID
Definition: unichar.h:34
UNICHAR_ID WERD_RES::BothHyphens ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1034 of file pageres.cpp.

1034  {
1035  const char *ch = uch_set->id_to_unichar(id1);
1036  const char *next_ch = uch_set->id_to_unichar(id2);
1037  if (strlen(ch) == 1 && strlen(next_ch) == 1 &&
1038  (*ch == '-' || *ch == '~') && (*next_ch == '-' || *next_ch == '~'))
1039  return uch_set->unichar_to_id("-");
1040  return INVALID_UNICHAR_ID;
1041 }
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291
const UNICHARSET * uch_set
Definition: pageres.h:205
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:210
UNICHAR_ID WERD_RES::BothQuotes ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1012 of file pageres.cpp.

1012  {
1013  const char *ch = uch_set->id_to_unichar(id1);
1014  const char *next_ch = uch_set->id_to_unichar(id2);
1015  if (is_simple_quote(ch, strlen(ch)) &&
1016  is_simple_quote(next_ch, strlen(next_ch)))
1017  return uch_set->unichar_to_id("\"");
1018  return INVALID_UNICHAR_ID;
1019 }
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291
const UNICHARSET * uch_set
Definition: pageres.h:205
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:210
UNICHAR_ID WERD_RES::BothSpaces ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1063 of file pageres.cpp.

1063  {
1064  if (id1 == id2 && id1 == uch_set->unichar_to_id(" "))
1065  return id1;
1066  else
1067  return INVALID_UNICHAR_ID;
1068 }
const UNICHARSET * uch_set
Definition: pageres.h:205
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:210
void WERD_RES::Clear ( )

Definition at line 1141 of file pageres.cpp.

1141  {
1142  if (combination) {
1143  delete word;
1144  }
1145  word = nullptr;
1146  delete blamer_bundle;
1147  blamer_bundle = nullptr;
1148  ClearResults();
1149 }
bool combination
Definition: pageres.h:333
void ClearResults()
Definition: pageres.cpp:1151
WERD * word
Definition: pageres.h:188
BlamerBundle * blamer_bundle
Definition: pageres.h:245
void WERD_RES::ClearRatings ( )

Definition at line 1184 of file pageres.cpp.

1184  {
1185  if (ratings != nullptr) {
1187  delete ratings;
1188  ratings = nullptr;
1189  }
1190 }
void delete_matrix_pointers()
Definition: matrix.h:458
MATRIX * ratings
Definition: pageres.h:230
void WERD_RES::ClearResults ( )

Definition at line 1151 of file pageres.cpp.

1151  {
1152  done = false;
1153  fontinfo = nullptr;
1154  fontinfo2 = nullptr;
1155  fontinfo_id_count = 0;
1156  fontinfo_id2_count = 0;
1157  delete bln_boxes;
1158  bln_boxes = nullptr;
1159  blob_row = nullptr;
1160  delete chopped_word;
1161  chopped_word = nullptr;
1162  delete rebuild_word;
1163  rebuild_word = nullptr;
1164  delete box_word;
1165  box_word = nullptr;
1166  best_state.clear();
1167  correct_text.clear();
1169  seam_array.clear();
1170  blob_widths.clear();
1171  blob_gaps.clear();
1172  ClearRatings();
1173  ClearWordChoices();
1174  if (blamer_bundle != nullptr) blamer_bundle->ClearResults();
1175 }
void delete_data_pointers()
void ClearWordChoices()
Definition: pageres.cpp:1176
bool done
Definition: pageres.h:297
TWERD * rebuild_word
Definition: pageres.h:259
GenericVector< int > blob_widths
Definition: pageres.h:218
GenericVector< STRING > correct_text
Definition: pageres.h:274
GenericVector< SEAM * > seam_array
Definition: pageres.h:216
tesseract::BoxWord * bln_boxes
Definition: pageres.h:197
const FontInfo * fontinfo2
Definition: pageres.h:304
int8_t fontinfo_id2_count
Definition: pageres.h:306
const FontInfo * fontinfo
Definition: pageres.h:303
tesseract::BoxWord * box_word
Definition: pageres.h:265
ROW * blob_row
Definition: pageres.h:199
void ClearResults()
Definition: blamer.h:185
int8_t fontinfo_id_count
Definition: pageres.h:305
TWERD * chopped_word
Definition: pageres.h:214
GenericVector< int > best_state
Definition: pageres.h:270
GenericVector< int > blob_gaps
Definition: pageres.h:221
void ClearRatings()
Definition: pageres.cpp:1184
BlamerBundle * blamer_bundle
Definition: pageres.h:245
void WERD_RES::ClearWordChoices ( )

Definition at line 1176 of file pageres.cpp.

1176  {
1177  best_choice = nullptr;
1178  delete raw_choice;
1179  raw_choice = nullptr;
1180  best_choices.clear();
1181  delete ep_choice;
1182  ep_choice = nullptr;
1183 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:242
WERD_CHOICE * ep_choice
Definition: pageres.h:285
WERD_CHOICE * best_choice
Definition: pageres.h:234
WERD_CHOICE * raw_choice
Definition: pageres.h:239
void WERD_RES::CloneChoppedToRebuild ( )

Definition at line 839 of file pageres.cpp.

839  {
840  delete rebuild_word;
842  SetupBoxWord();
843  int word_len = box_word->length();
844  best_state.reserve(word_len);
845  correct_text.reserve(word_len);
846  for (int i = 0; i < word_len; ++i) {
849  }
850 }
TWERD * rebuild_word
Definition: pageres.h:259
Definition: blobs.h:397
Definition: strngs.h:45
void SetupBoxWord()
Definition: pageres.cpp:853
GenericVector< STRING > correct_text
Definition: pageres.h:274
int length() const
Definition: boxword.h:83
void reserve(int size)
tesseract::BoxWord * box_word
Definition: pageres.h:265
int push_back(T object)
TWERD * chopped_word
Definition: pageres.h:214
GenericVector< int > best_state
Definition: pageres.h:270
void WERD_RES::ComputeAdaptionThresholds ( float  certainty_scale,
float  min_rating,
float  max_rating,
float  rating_margin,
float *  thresholds 
)

Definition at line 565 of file pageres.cpp.

569  {
570  int chunk = 0;
571  int end_chunk = best_choice->state(0);
572  int end_raw_chunk = raw_choice->state(0);
573  int raw_blob = 0;
574  for (int i = 0; i < best_choice->length(); i++, thresholds++) {
575  float avg_rating = 0.0f;
576  int num_error_chunks = 0;
577 
578  // For each chunk in best choice blob i, count non-matching raw results.
579  while (chunk < end_chunk) {
580  if (chunk >= end_raw_chunk) {
581  ++raw_blob;
582  end_raw_chunk += raw_choice->state(raw_blob);
583  }
584  if (best_choice->unichar_id(i) !=
585  raw_choice->unichar_id(raw_blob)) {
586  avg_rating += raw_choice->certainty(raw_blob);
587  ++num_error_chunks;
588  }
589  ++chunk;
590  }
591 
592  if (num_error_chunks > 0) {
593  avg_rating /= num_error_chunks;
594  *thresholds = (avg_rating / -certainty_scale) * (1.0 - rating_margin);
595  } else {
596  *thresholds = max_rating;
597  }
598 
599  if (*thresholds > max_rating)
600  *thresholds = max_rating;
601  if (*thresholds < min_rating)
602  *thresholds = min_rating;
603  }
604 }
int length() const
Definition: ratngs.h:303
float certainty() const
Definition: ratngs.h:330
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
WERD_CHOICE * best_choice
Definition: pageres.h:234
WERD_CHOICE * raw_choice
Definition: pageres.h:239
int state(int index) const
Definition: ratngs.h:319
bool WERD_RES::ConditionalBlobMerge ( TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *  class_cb,
TessResultCallback2< bool, const TBOX &, const TBOX & > *  box_cb 
)

Definition at line 942 of file pageres.cpp.

944  {
945  ASSERT_HOST(best_choice->length() == 0 || ratings != nullptr);
946  bool modified = false;
947  for (int i = 0; i + 1 < best_choice->length(); ++i) {
948  UNICHAR_ID new_id = class_cb->Run(best_choice->unichar_id(i),
949  best_choice->unichar_id(i+1));
950  if (new_id != INVALID_UNICHAR_ID &&
951  (box_cb == nullptr || box_cb->Run(box_word->BlobBox(i),
952  box_word->BlobBox(i + 1)))) {
953  // Raw choice should not be fixed.
954  best_choice->set_unichar_id(new_id, i);
955  modified = true;
957  const MATRIX_COORD& coord = best_choice->MatrixCoord(i);
958  if (!coord.Valid(*ratings)) {
959  ratings->IncreaseBandSize(coord.row + 1 - coord.col);
960  }
961  BLOB_CHOICE_LIST* blob_choices = GetBlobChoices(i);
962  if (FindMatchingChoice(new_id, blob_choices) == nullptr) {
963  // Insert a fake result.
964  auto* blob_choice = new BLOB_CHOICE;
965  blob_choice->set_unichar_id(new_id);
966  BLOB_CHOICE_IT bc_it(blob_choices);
967  bc_it.add_before_then_move(blob_choice);
968  }
969  }
970  }
971  delete class_cb;
972  delete box_cb;
973  return modified;
974 }
MATRIX_COORD MatrixCoord(int index) const
Definition: ratngs.cpp:302
int length() const
Definition: ratngs.h:303
const TBOX & BlobBox(int index) const
Definition: boxword.h:84
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:180
void IncreaseBandSize(int bandwidth)
Definition: matrix.cpp:49
void set_unichar_id(UNICHAR_ID unichar_id, int index)
Definition: ratngs.h:359
tesseract::BoxWord * box_word
Definition: pageres.h:265
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
WERD_CHOICE * best_choice
Definition: pageres.h:234
MATRIX * ratings
Definition: pageres.h:230
#define ASSERT_HOST(x)
Definition: errcode.h:88
bool Valid(const MATRIX &m) const
Definition: matrix.h:618
int UNICHAR_ID
Definition: unichar.h:34
virtual R Run(A1, A2)=0
void MergeAdjacentBlobs(int index)
Definition: pageres.cpp:978
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
Definition: pageres.cpp:763
void set_unichar_id(UNICHAR_ID newunichar_id)
Definition: ratngs.h:145
void WERD_RES::ConsumeWordResults ( WERD_RES word)

Definition at line 769 of file pageres.cpp.

769  {
770  denorm = word->denorm;
771  blob_row = word->blob_row;
772  MovePointerData(&chopped_word, &word->chopped_word);
773  MovePointerData(&rebuild_word, &word->rebuild_word);
774  MovePointerData(&box_word, &word->box_word);
776  seam_array = word->seam_array;
777  word->seam_array.clear();
778  best_state.move(&word->best_state);
780  blob_widths.move(&word->blob_widths);
781  blob_gaps.move(&word->blob_gaps);
782  if (ratings != nullptr) ratings->delete_matrix_pointers();
783  MovePointerData(&ratings, &word->ratings);
784  best_choice = word->best_choice;
785  MovePointerData(&raw_choice, &word->raw_choice);
786  best_choices.clear();
787  WERD_CHOICE_IT wc_it(&best_choices);
788  wc_it.add_list_after(&word->best_choices);
789  reject_map = word->reject_map;
790  if (word->blamer_bundle != nullptr) {
791  assert(blamer_bundle != nullptr);
793  }
794  CopySimpleFields(*word);
795 }
void delete_data_pointers()
TWERD * rebuild_word
Definition: pageres.h:259
GenericVector< int > blob_widths
Definition: pageres.h:218
void CopyResults(const BlamerBundle &other)
Definition: blamer.h:206
GenericVector< STRING > correct_text
Definition: pageres.h:274
GenericVector< SEAM * > seam_array
Definition: pageres.h:216
void delete_matrix_pointers()
Definition: matrix.h:458
WERD_CHOICE_LIST best_choices
Definition: pageres.h:242
REJMAP reject_map
Definition: pageres.h:286
tesseract::BoxWord * box_word
Definition: pageres.h:265
ROW * blob_row
Definition: pageres.h:199
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:253
WERD_CHOICE * best_choice
Definition: pageres.h:234
MATRIX * ratings
Definition: pageres.h:230
void move(GenericVector< T > *from)
TWERD * chopped_word
Definition: pageres.h:214
GenericVector< int > best_state
Definition: pageres.h:270
GenericVector< int > blob_gaps
Definition: pageres.h:221
WERD_CHOICE * raw_choice
Definition: pageres.h:239
DENORM denorm
Definition: pageres.h:203
BlamerBundle * blamer_bundle
Definition: pageres.h:245
void WERD_RES::copy_on ( WERD_RES word_res)
inline

Definition at line 661 of file pageres.h.

661  { //from this word
662  word->set_flag(W_BOL, word->flag(W_BOL) || word_res->word->flag(W_BOL));
663  word->set_flag(W_EOL, word->flag(W_EOL) || word_res->word->flag(W_EOL));
664  word->copy_on(word_res->word);
665  }
start of line
Definition: werd.h:32
void copy_on(WERD *other)
Definition: werd.cpp:221
end of line
Definition: werd.h:33
bool flag(WERD_FLAGS mask) const
Definition: werd.h:117
WERD * word
Definition: pageres.h:188
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:118
void WERD_RES::CopySimpleFields ( const WERD_RES source)

Definition at line 253 of file pageres.cpp.

253  {
254  tess_failed = source.tess_failed;
255  tess_accepted = source.tess_accepted;
257  done = source.done;
259  small_caps = source.small_caps;
260  odd_size = source.odd_size;
261  italic = source.italic;
262  bold = source.bold;
263  fontinfo = source.fontinfo;
264  fontinfo2 = source.fontinfo2;
267  x_height = source.x_height;
268  caps_height = source.caps_height;
270  guessed_x_ht = source.guessed_x_ht;
272  reject_spaces = source.reject_spaces;
273  uch_set = source.uch_set;
274  tesseract = source.tesseract;
275 }
float baseline_shift
Definition: pageres.h:312
bool tess_failed
Definition: pageres.h:287
bool done
Definition: pageres.h:297
bool guessed_x_ht
Definition: pageres.h:307
int8_t bold
Definition: pageres.h:301
bool tess_accepted
Definition: pageres.h:295
tesseract::Tesseract * tesseract
Definition: pageres.h:281
bool odd_size
Definition: pageres.h:299
bool tess_would_adapt
Definition: pageres.h:296
const UNICHARSET * uch_set
Definition: pageres.h:205
const FontInfo * fontinfo2
Definition: pageres.h:304
bool guessed_caps_ht
Definition: pageres.h:308
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:309
int8_t fontinfo_id2_count
Definition: pageres.h:306
bool small_caps
Definition: pageres.h:298
const FontInfo * fontinfo
Definition: pageres.h:303
int8_t fontinfo_id_count
Definition: pageres.h:305
float caps_height
Definition: pageres.h:311
float x_height
Definition: pageres.h:310
bool reject_spaces
Definition: pageres.h:335
int8_t italic
Definition: pageres.h:300
void WERD_RES::DebugTopChoice ( const char *  msg) const

Definition at line 503 of file pageres.cpp.

503  {
504  tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ",
506  if (best_choice == nullptr)
507  tprintf("<Null choice>\n");
508  else
509  best_choice->print(msg);
510 }
bool done
Definition: pageres.h:297
bool tess_accepted
Definition: pageres.h:295
bool tess_would_adapt
Definition: pageres.h:296
void print() const
Definition: ratngs.h:580
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
WERD_CHOICE * best_choice
Definition: pageres.h:234
void WERD_RES::DebugWordChoices ( bool  debug,
const char *  word_to_debug 
)

Definition at line 484 of file pageres.cpp.

484  {
485  if (debug ||
486  (word_to_debug != nullptr && *word_to_debug != '\0' && best_choice != nullptr &&
487  best_choice->unichar_string() == STRING(word_to_debug))) {
488  if (raw_choice != nullptr)
489  raw_choice->print("\nBest Raw Choice");
490 
491  WERD_CHOICE_IT it(&best_choices);
492  int index = 0;
493  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
494  WERD_CHOICE* choice = it.data();
495  STRING label;
496  label.add_str_int("\nCooked Choice #", index);
497  choice->print(label.string());
498  }
499  }
500 }
Definition: strngs.h:45
void add_str_int(const char *str, int number)
Definition: strngs.cpp:377
const STRING & unichar_string() const
Definition: ratngs.h:541
WERD_CHOICE_LIST best_choices
Definition: pageres.h:242
void print() const
Definition: ratngs.h:580
const char * string() const
Definition: strngs.cpp:194
WERD_CHOICE * best_choice
Definition: pageres.h:234
WERD_CHOICE * raw_choice
Definition: pageres.h:239
static WERD_RES* WERD_RES::deep_copy ( const WERD_RES src)
inlinestatic

Definition at line 650 of file pageres.h.

650  {
651  auto* result = new WERD_RES(*src);
652  // That didn't copy the ratings, but we want a copy if there is one to
653  // begin with.
654  if (src->ratings != nullptr)
655  result->ratings = src->ratings->DeepCopy();
656  return result;
657  }
WERD_RES()
Definition: pageres.h:337
MATRIX * ratings
Definition: pageres.h:230
MATRIX * DeepCopy() const
Definition: matrix.cpp:94
void WERD_RES::FakeClassifyWord ( int  blob_count,
BLOB_CHOICE **  choices 
)

Definition at line 881 of file pageres.cpp.

881  {
882  // Setup the WERD_RES.
883  ASSERT_HOST(box_word != nullptr);
884  ASSERT_HOST(blob_count == box_word->length());
886  ClearRatings();
887  ratings = new MATRIX(blob_count, 1);
888  for (int c = 0; c < blob_count; ++c) {
889  auto* choice_list = new BLOB_CHOICE_LIST;
890  BLOB_CHOICE_IT choice_it(choice_list);
891  choice_it.add_after_then_move(choices[c]);
892  ratings->put(c, c, choice_list);
893  }
895  reject_map.initialise(blob_count);
896  best_state.init_to_size(blob_count, 1);
897  done = true;
898 }
void ClearWordChoices()
Definition: pageres.cpp:1176
bool done
Definition: pageres.h:297
void FakeWordFromRatings(PermuterType permuter)
Definition: pageres.cpp:902
void init_to_size(int size, const T &t)
int length() const
Definition: boxword.h:83
REJMAP reject_map
Definition: pageres.h:286
void put(ICOORD pos, const T &thing)
Definition: matrix.h:223
void initialise(int16_t length)
Definition: rejctmap.cpp:273
tesseract::BoxWord * box_word
Definition: pageres.h:265
Definition: matrix.h:578
MATRIX * ratings
Definition: pageres.h:230
#define ASSERT_HOST(x)
Definition: errcode.h:88
GenericVector< int > best_state
Definition: pageres.h:270
void ClearRatings()
Definition: pageres.cpp:1184
void WERD_RES::FakeWordFromRatings ( PermuterType  permuter)

Definition at line 902 of file pageres.cpp.

902  {
903  int num_blobs = ratings->dimension();
904  auto* word_choice = new WERD_CHOICE(uch_set, num_blobs);
905  word_choice->set_permuter(permuter);
906  for (int b = 0; b < num_blobs; ++b) {
907  UNICHAR_ID unichar_id = UNICHAR_SPACE;
908  float rating = INT32_MAX;
909  float certainty = -INT32_MAX;
910  BLOB_CHOICE_LIST* choices = ratings->get(b, b);
911  if (choices != nullptr && !choices->empty()) {
912  BLOB_CHOICE_IT bc_it(choices);
913  BLOB_CHOICE* choice = bc_it.data();
914  unichar_id = choice->unichar_id();
915  rating = choice->rating();
916  certainty = choice->certainty();
917  }
918  word_choice->append_unichar_id_space_allocated(unichar_id, 1, rating,
919  certainty);
920  }
921  LogNewRawChoice(word_choice);
922  // Ownership of word_choice taken by word here.
923  LogNewCookedChoice(1, false, word_choice);
924 }
UNICHAR_ID unichar_id() const
Definition: ratngs.h:77
float rating() const
Definition: ratngs.h:80
T get(ICOORD pos) const
Definition: matrix.h:231
const UNICHARSET * uch_set
Definition: pageres.h:205
float certainty() const
Definition: ratngs.h:83
bool LogNewRawChoice(WERD_CHOICE *word_choice)
Definition: pageres.cpp:608
MATRIX * ratings
Definition: pageres.h:230
int dimension() const
Definition: matrix.h:536
int UNICHAR_ID
Definition: unichar.h:34
bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice)
Definition: pageres.cpp:624
void WERD_RES::FilterWordChoices ( int  debug_level)

Definition at line 517 of file pageres.cpp.

517  {
518  if (best_choice == nullptr || best_choices.singleton())
519  return;
520 
521  if (debug_level >= 2)
522  best_choice->print("\nFiltering against best choice");
523  WERD_CHOICE_IT it(&best_choices);
524  int index = 0;
525  for (it.forward(); !it.at_first(); it.forward(), ++index) {
526  WERD_CHOICE* choice = it.data();
527  float threshold = StopperAmbigThreshold(best_choice->adjust_factor(),
528  choice->adjust_factor());
529  // i, j index the blob choice in choice, best_choice.
530  // chunk is an index into the chopped_word blobs (AKA chunks).
531  // Since the two words may use different segmentations of the chunks, we
532  // iterate over the chunks to find out whether a comparable blob
533  // classification is much worse than the best result.
534  int i = 0, j = 0, chunk = 0;
535  // Each iteration of the while deals with 1 chunk. On entry choice_chunk
536  // and best_chunk are the indices of the first chunk in the NEXT blob,
537  // i.e. we don't have to increment i, j while chunk < choice_chunk and
538  // best_chunk respectively.
539  int choice_chunk = choice->state(0), best_chunk = best_choice->state(0);
540  while (i < choice->length() && j < best_choice->length()) {
541  if (choice->unichar_id(i) != best_choice->unichar_id(j) &&
542  choice->certainty(i) - best_choice->certainty(j) < threshold) {
543  if (debug_level >= 2) {
544  choice->print("WorstCertaintyDiffWorseThan");
545  tprintf(
546  "i %d j %d Choice->Blob[i].Certainty %.4g"
547  " WorstOtherChoiceCertainty %g Threshold %g\n",
548  i, j, choice->certainty(i), best_choice->certainty(j), threshold);
549  tprintf("Discarding bad choice #%d\n", index);
550  }
551  delete it.extract();
552  break;
553  }
554  ++chunk;
555  // If needed, advance choice_chunk to keep up with chunk.
556  while (choice_chunk < chunk && ++i < choice->length())
557  choice_chunk += choice->state(i);
558  // If needed, advance best_chunk to keep up with chunk.
559  while (best_chunk < chunk && ++j < best_choice->length())
560  best_chunk += best_choice->state(j);
561  }
562  }
563 }
float adjust_factor() const
Definition: ratngs.h:306
WERD_CHOICE_LIST best_choices
Definition: pageres.h:242
void print() const
Definition: ratngs.h:580
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
float certainty() const
Definition: ratngs.h:330
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
WERD_CHOICE * best_choice
Definition: pageres.h:234
int state(int index) const
Definition: ratngs.h:319
void WERD_RES::fix_hyphens ( )

Definition at line 1051 of file pageres.cpp.

1051  {
1052  if (!uch_set->contains_unichar("-") ||
1054  return; // Don't create it if it is disallowed.
1055 
1059 }
bool HyphenBoxesOverlap(const TBOX &box1, const TBOX &box2)
Definition: pageres.cpp:1045
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:878
const UNICHARSET * uch_set
Definition: pageres.h:205
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:210
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX & > *box_cb)
Definition: pageres.cpp:942
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:671
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
Definition: tesscallback.h:258
UNICHAR_ID BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:1034
void WERD_RES::fix_quotes ( )

Definition at line 1022 of file pageres.cpp.

1022  {
1023  if (!uch_set->contains_unichar("\"") ||
1025  return; // Don't create it if it is disallowed.
1026 
1029  nullptr);
1030 }
UNICHAR_ID BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:1012
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:878
const UNICHARSET * uch_set
Definition: pageres.h:205
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:210
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX & > *box_cb)
Definition: pageres.cpp:942
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:671
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
Definition: tesscallback.h:258
BLOB_CHOICE * WERD_RES::GetBlobChoice ( int  index) const

Definition at line 754 of file pageres.cpp.

754  {
755  if (index < 0 || index >= best_choice->length()) return nullptr;
756  BLOB_CHOICE_LIST* choices = GetBlobChoices(index);
757  return FindMatchingChoice(best_choice->unichar_id(index), choices);
758 }
int length() const
Definition: ratngs.h:303
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:180
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
WERD_CHOICE * best_choice
Definition: pageres.h:234
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
Definition: pageres.cpp:763
BLOB_CHOICE_LIST * WERD_RES::GetBlobChoices ( int  index) const

Definition at line 763 of file pageres.cpp.

763  {
764  return best_choice->blob_choices(index, ratings);
765 }
WERD_CHOICE * best_choice
Definition: pageres.h:234
MATRIX * ratings
Definition: pageres.h:230
BLOB_CHOICE_LIST * blob_choices(int index, MATRIX *ratings) const
Definition: ratngs.cpp:290
int WERD_RES::GetBlobsGap ( int  blob_index)

Definition at line 744 of file pageres.cpp.

744  {
745  if (blob_index < 0 || blob_index >= blob_gaps.size())
746  return 0;
747  return blob_gaps[blob_index];
748 }
GenericVector< int > blob_gaps
Definition: pageres.h:221
int size() const
Definition: genericvector.h:70
int WERD_RES::GetBlobsWidth ( int  start_blob,
int  last_blob 
)

Definition at line 734 of file pageres.cpp.

734  {
735  int result = 0;
736  for (int b = start_blob; b <= last_blob; ++b) {
737  result += blob_widths[b];
738  if (b < last_blob)
739  result += blob_gaps[b];
740  }
741  return result;
742 }
GenericVector< int > blob_widths
Definition: pageres.h:218
GenericVector< int > blob_gaps
Definition: pageres.h:221
bool WERD_RES::HyphenBoxesOverlap ( const TBOX box1,
const TBOX box2 
)

Definition at line 1045 of file pageres.cpp.

1045  {
1046  return box1.right() >= box2.left();
1047 }
int16_t right() const
Definition: rect.h:79
int16_t left() const
Definition: rect.h:72
void WERD_RES::InitForRetryRecognition ( const WERD_RES source)

Definition at line 281 of file pageres.cpp.

281  {
282  word = source.word;
283  CopySimpleFields(source);
284  if (source.blamer_bundle != nullptr) {
285  blamer_bundle = new BlamerBundle();
287  }
288 }
void CopyTruth(const BlamerBundle &other)
Definition: blamer.h:199
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:253
WERD * word
Definition: pageres.h:188
BlamerBundle * blamer_bundle
Definition: pageres.h:245
void WERD_RES::InitNonPointers ( )

Definition at line 1098 of file pageres.cpp.

1098  {
1099  tess_failed = false;
1100  tess_accepted = false;
1101  tess_would_adapt = false;
1102  done = false;
1104  small_caps = false;
1105  odd_size = false;
1106  italic = false;
1107  bold = false;
1108  // The fontinfos and tesseract count as non-pointers as they point to
1109  // data owned elsewhere.
1110  fontinfo = nullptr;
1111  fontinfo2 = nullptr;
1112  tesseract = nullptr;
1113  fontinfo_id_count = 0;
1114  fontinfo_id2_count = 0;
1115  x_height = 0.0;
1116  caps_height = 0.0;
1117  baseline_shift = 0.0f;
1118  space_certainty = 0.0f;
1119  guessed_x_ht = true;
1120  guessed_caps_ht = true;
1121  combination = false;
1122  part_of_combo = false;
1123  reject_spaces = false;
1124 }
float baseline_shift
Definition: pageres.h:312
bool tess_failed
Definition: pageres.h:287
bool done
Definition: pageres.h:297
bool guessed_x_ht
Definition: pageres.h:307
float space_certainty
Definition: pageres.h:315
bool combination
Definition: pageres.h:333
int8_t bold
Definition: pageres.h:301
bool tess_accepted
Definition: pageres.h:295
bool odd_size
Definition: pageres.h:299
bool tess_would_adapt
Definition: pageres.h:296
const FontInfo * fontinfo2
Definition: pageres.h:304
bool guessed_caps_ht
Definition: pageres.h:308
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:309
int8_t fontinfo_id2_count
Definition: pageres.h:306
bool small_caps
Definition: pageres.h:298
const FontInfo * fontinfo
Definition: pageres.h:303
int8_t fontinfo_id_count
Definition: pageres.h:305
bool part_of_combo
Definition: pageres.h:334
float caps_height
Definition: pageres.h:311
float x_height
Definition: pageres.h:310
bool reject_spaces
Definition: pageres.h:335
int8_t italic
Definition: pageres.h:300
void WERD_RES::InitPointers ( )

Definition at line 1126 of file pageres.cpp.

1126  {
1127  word = nullptr;
1128  bln_boxes = nullptr;
1129  blob_row = nullptr;
1130  uch_set = nullptr;
1131  chopped_word = nullptr;
1132  rebuild_word = nullptr;
1133  box_word = nullptr;
1134  ratings = nullptr;
1135  best_choice = nullptr;
1136  raw_choice = nullptr;
1137  ep_choice = nullptr;
1138  blamer_bundle = nullptr;
1139 }
TWERD * rebuild_word
Definition: pageres.h:259
WERD_CHOICE * ep_choice
Definition: pageres.h:285
tesseract::BoxWord * bln_boxes
Definition: pageres.h:197
const UNICHARSET * uch_set
Definition: pageres.h:205
tesseract::BoxWord * box_word
Definition: pageres.h:265
ROW * blob_row
Definition: pageres.h:199
WERD_CHOICE * best_choice
Definition: pageres.h:234
MATRIX * ratings
Definition: pageres.h:230
TWERD * chopped_word
Definition: pageres.h:214
WERD_CHOICE * raw_choice
Definition: pageres.h:239
WERD * word
Definition: pageres.h:188
BlamerBundle * blamer_bundle
Definition: pageres.h:245
void WERD_RES::InsertSeam ( int  blob_number,
SEAM seam 
)

Definition at line 422 of file pageres.cpp.

422  {
423  // Insert the seam into the SEAMS array.
424  seam->PrepareToInsertSeam(seam_array, chopped_word->blobs, blob_number, true);
425  seam_array.insert(seam, blob_number);
426  if (ratings != nullptr) {
427  // Expand the ratings matrix.
428  ratings = ratings->ConsumeAndMakeBigger(blob_number);
429  // Fix all the segmentation states.
430  if (raw_choice != nullptr)
431  raw_choice->UpdateStateForSplit(blob_number);
432  WERD_CHOICE_IT wc_it(&best_choices);
433  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
434  WERD_CHOICE* choice = wc_it.data();
435  choice->UpdateStateForSplit(blob_number);
436  }
438  }
439 }
void UpdateStateForSplit(int blob_position)
Definition: ratngs.cpp:699
GenericVector< TBLOB * > blobs
Definition: blobs.h:438
GenericVector< SEAM * > seam_array
Definition: pageres.h:216
WERD_CHOICE_LIST best_choices
Definition: pageres.h:242
void SetupBlobWidthsAndGaps()
Definition: pageres.cpp:404
MATRIX * ConsumeAndMakeBigger(int ind)
Definition: matrix.cpp:58
bool PrepareToInsertSeam(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int insert_index, bool modify)
Definition: seam.cpp:76
void insert(const T &t, int index)
MATRIX * ratings
Definition: pageres.h:230
TWERD * chopped_word
Definition: pageres.h:214
WERD_CHOICE * raw_choice
Definition: pageres.h:239
bool WERD_RES::IsAmbiguous ( )

Definition at line 456 of file pageres.cpp.

456  {
457  return !best_choices.singleton() || best_choice->dangerous_ambig_found();
458 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:242
bool dangerous_ambig_found() const
Definition: ratngs.h:363
WERD_CHOICE * best_choice
Definition: pageres.h:234
bool WERD_RES::LogNewCookedChoice ( int  max_num_choices,
bool  debug,
WERD_CHOICE word_choice 
)

Definition at line 624 of file pageres.cpp.

625  {
626  if (best_choice != nullptr) {
627  // Throw out obviously bad choices to save some work.
628  // TODO(rays) Get rid of this! This piece of code produces different
629  // results according to the order in which words are found, which is an
630  // undesirable behavior. It would be better to keep all the choices and
631  // prune them later when more information is available.
632  float max_certainty_delta =
633  StopperAmbigThreshold(best_choice->adjust_factor(),
634  word_choice->adjust_factor());
635  if (max_certainty_delta > -kStopperAmbiguityThresholdOffset)
636  max_certainty_delta = -kStopperAmbiguityThresholdOffset;
637  if (word_choice->certainty() - best_choice->certainty() <
638  max_certainty_delta) {
639  if (debug) {
640  STRING bad_string;
641  word_choice->string_and_lengths(&bad_string, nullptr);
642  tprintf("Discarding choice \"%s\" with an overly low certainty"
643  " %.3f vs best choice certainty %.3f (Threshold: %.3f)\n",
644  bad_string.string(), word_choice->certainty(),
646  max_certainty_delta + best_choice->certainty());
647  }
648  delete word_choice;
649  return false;
650  }
651  }
652 
653  // Insert in the list in order of increasing rating, but knock out worse
654  // string duplicates.
655  WERD_CHOICE_IT it(&best_choices);
656  const STRING& new_str = word_choice->unichar_string();
657  bool inserted = false;
658  int num_choices = 0;
659  if (!it.empty()) {
660  do {
661  WERD_CHOICE* choice = it.data();
662  if (choice->rating() > word_choice->rating() && !inserted) {
663  // Time to insert.
664  it.add_before_stay_put(word_choice);
665  inserted = true;
666  if (num_choices == 0)
667  best_choice = word_choice; // This is the new best.
668  ++num_choices;
669  }
670  if (choice->unichar_string() == new_str) {
671  if (inserted) {
672  // New is better.
673  delete it.extract();
674  } else {
675  // Old is better.
676  if (debug) {
677  tprintf("Discarding duplicate choice \"%s\", rating %g vs %g\n",
678  new_str.string(), word_choice->rating(), choice->rating());
679  }
680  delete word_choice;
681  return false;
682  }
683  } else {
684  ++num_choices;
685  if (num_choices > max_num_choices)
686  delete it.extract();
687  }
688  it.forward();
689  } while (!it.at_first());
690  }
691  if (!inserted && num_choices < max_num_choices) {
692  it.add_to_end(word_choice);
693  inserted = true;
694  if (num_choices == 0)
695  best_choice = word_choice; // This is the new best.
696  }
697  if (debug) {
698  if (inserted)
699  tprintf("New %s", best_choice == word_choice ? "Best" : "Secondary");
700  else
701  tprintf("Poor");
702  word_choice->print(" Word Choice");
703  }
704  if (!inserted) {
705  delete word_choice;
706  return false;
707  }
708  return true;
709 }
float adjust_factor() const
Definition: ratngs.h:306
Definition: strngs.h:45
const STRING & unichar_string() const
Definition: ratngs.h:541
WERD_CHOICE_LIST best_choices
Definition: pageres.h:242
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
Definition: ratngs.cpp:449
void print() const
Definition: ratngs.h:580
float rating() const
Definition: ratngs.h:327
const char * string() const
Definition: strngs.cpp:194
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
float certainty() const
Definition: ratngs.h:330
WERD_CHOICE * best_choice
Definition: pageres.h:234
bool WERD_RES::LogNewRawChoice ( WERD_CHOICE word_choice)

Definition at line 608 of file pageres.cpp.

608  {
609  if (raw_choice == nullptr || word_choice->rating() < raw_choice->rating()) {
610  delete raw_choice;
611  raw_choice = new WERD_CHOICE(*word_choice);
613  return true;
614  }
615  return false;
616 }
void set_permuter(uint8_t perm)
Definition: ratngs.h:375
float rating() const
Definition: ratngs.h:327
WERD_CHOICE * raw_choice
Definition: pageres.h:239
void WERD_RES::merge_tess_fails ( )

Definition at line 1071 of file pageres.cpp.

1071  {
1073  NewPermanentTessCallback(this, &WERD_RES::BothSpaces), nullptr)) {
1074  int len = best_choice->length();
1075  ASSERT_HOST(reject_map.length() == len);
1076  ASSERT_HOST(box_word->length() == len);
1077  }
1078 }
int32_t length() const
Definition: rejctmap.h:223
int length() const
Definition: ratngs.h:303
UNICHAR_ID BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:1063
int length() const
Definition: boxword.h:83
REJMAP reject_map
Definition: pageres.h:286
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX & > *box_cb)
Definition: pageres.cpp:942
tesseract::BoxWord * box_word
Definition: pageres.h:265
WERD_CHOICE * best_choice
Definition: pageres.h:234
#define ASSERT_HOST(x)
Definition: errcode.h:88
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
Definition: tesscallback.h:258
void WERD_RES::MergeAdjacentBlobs ( int  index)

Definition at line 978 of file pageres.cpp.

978  {
979  if (reject_map.length() == best_choice->length())
980  reject_map.remove_pos(index);
981  best_choice->remove_unichar_id(index + 1);
982  rebuild_word->MergeBlobs(index, index + 2);
983  box_word->MergeBoxes(index, index + 2);
984  if (index + 1 < best_state.length()) {
985  best_state[index] += best_state[index + 1];
986  best_state.remove(index + 1);
987  }
988 }
void MergeBlobs(int start, int end)
Definition: blobs.cpp:876
void MergeBoxes(int start, int end)
Definition: boxword.cpp:131
void remove_pos(int16_t pos)
Definition: rejctmap.cpp:309
TWERD * rebuild_word
Definition: pageres.h:259
int length() const
Definition: genericvector.h:84
int32_t length() const
Definition: rejctmap.h:223
int length() const
Definition: ratngs.h:303
REJMAP reject_map
Definition: pageres.h:286
void remove(int index)
void remove_unichar_id(int index)
Definition: ratngs.h:484
tesseract::BoxWord * box_word
Definition: pageres.h:265
WERD_CHOICE * best_choice
Definition: pageres.h:234
GenericVector< int > best_state
Definition: pageres.h:270
WERD_RES & WERD_RES::operator= ( const WERD_RES source)

Definition at line 190 of file pageres.cpp.

190  {
191  this->ELIST_LINK::operator=(source);
192  Clear();
193  if (source.combination) {
194  word = new WERD;
195  *word = *(source.word); // deep copy
196  } else {
197  word = source.word; // pt to same word
198  }
199  if (source.bln_boxes != nullptr)
200  bln_boxes = new tesseract::BoxWord(*source.bln_boxes);
201  if (source.chopped_word != nullptr)
202  chopped_word = new TWERD(*source.chopped_word);
203  if (source.rebuild_word != nullptr)
204  rebuild_word = new TWERD(*source.rebuild_word);
205  // TODO(rays) Do we ever need to copy the seam_array?
206  blob_row = source.blob_row;
207  denorm = source.denorm;
208  if (source.box_word != nullptr)
209  box_word = new tesseract::BoxWord(*source.box_word);
210  best_state = source.best_state;
211  correct_text = source.correct_text;
212  blob_widths = source.blob_widths;
213  blob_gaps = source.blob_gaps;
214  // None of the uses of operator= require the ratings matrix to be copied,
215  // so don't as it would be really slow.
216 
217  // Copy the cooked choices.
218  WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&source.best_choices));
219  WERD_CHOICE_IT wc_dest_it(&best_choices);
220  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
221  const WERD_CHOICE *choice = wc_it.data();
222  wc_dest_it.add_after_then_move(new WERD_CHOICE(*choice));
223  }
224  if (!wc_dest_it.empty()) {
225  wc_dest_it.move_to_first();
226  best_choice = wc_dest_it.data();
227  } else {
228  best_choice = nullptr;
229  }
230 
231  if (source.raw_choice != nullptr) {
232  raw_choice = new WERD_CHOICE(*source.raw_choice);
233  } else {
234  raw_choice = nullptr;
235  }
236  if (source.ep_choice != nullptr) {
237  ep_choice = new WERD_CHOICE(*source.ep_choice);
238  } else {
239  ep_choice = nullptr;
240  }
241  reject_map = source.reject_map;
242  combination = source.combination;
243  part_of_combo = source.part_of_combo;
244  CopySimpleFields(source);
245  if (source.blamer_bundle != nullptr) {
246  blamer_bundle = new BlamerBundle(*(source.blamer_bundle));
247  }
248  return *this;
249 }
Definition: werd.h:56
void operator=(const ELIST_LINK &)
Definition: elst.h:99
TWERD * rebuild_word
Definition: pageres.h:259
Definition: blobs.h:397
GenericVector< int > blob_widths
Definition: pageres.h:218
GenericVector< STRING > correct_text
Definition: pageres.h:274
WERD_CHOICE_LIST best_choices
Definition: pageres.h:242
WERD_CHOICE * ep_choice
Definition: pageres.h:285
bool combination
Definition: pageres.h:333
tesseract::BoxWord * bln_boxes
Definition: pageres.h:197
REJMAP reject_map
Definition: pageres.h:286
tesseract::BoxWord * box_word
Definition: pageres.h:265
ROW * blob_row
Definition: pageres.h:199
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:253
void Clear()
Definition: pageres.cpp:1141
WERD_CHOICE * best_choice
Definition: pageres.h:234
bool part_of_combo
Definition: pageres.h:334
TWERD * chopped_word
Definition: pageres.h:214
GenericVector< int > best_state
Definition: pageres.h:270
GenericVector< int > blob_gaps
Definition: pageres.h:221
WERD_CHOICE * raw_choice
Definition: pageres.h:239
WERD * word
Definition: pageres.h:188
DENORM denorm
Definition: pageres.h:203
BlamerBundle * blamer_bundle
Definition: pageres.h:245
bool WERD_RES::PiecesAllNatural ( int  start,
int  count 
) const

Definition at line 1082 of file pageres.cpp.

1082  {
1083  // all seams must have no splits.
1084  for (int index = start; index < start + count - 1; ++index) {
1085  if (index >= 0 && index < seam_array.size()) {
1086  SEAM* seam = seam_array[index];
1087  if (seam != nullptr && seam->HasAnySplits()) return false;
1088  }
1089  }
1090  return true;
1091 }
GenericVector< SEAM * > seam_array
Definition: pageres.h:216
bool HasAnySplits() const
Definition: seam.h:61
Definition: seam.h:38
int size() const
Definition: genericvector.h:70
int count(LIST var_list)
Definition: oldlist.cpp:96
void WERD_RES::PrintBestChoices ( ) const

Definition at line 721 of file pageres.cpp.

721  {
722  STRING alternates_str;
723  WERD_CHOICE_IT it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
724  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
725  if (!it.at_first()) alternates_str += "\", \"";
726  alternates_str += it.data()->unichar_string();
727  }
728  tprintf("Alternates for \"%s\": {\"%s\"}\n",
729  best_choice->unichar_string().string(), alternates_str.string());
730 }
Definition: strngs.h:45
const STRING & unichar_string() const
Definition: ratngs.h:541
WERD_CHOICE_LIST best_choices
Definition: pageres.h:242
const char * string() const
Definition: strngs.cpp:194
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
WERD_CHOICE * best_choice
Definition: pageres.h:234
const char* WERD_RES::RawUTF8 ( int  blob_index) const
inline

Definition at line 375 of file pageres.h.

375  {
376  if (blob_index < 0 || blob_index >= raw_choice->length())
377  return nullptr;
378  UNICHAR_ID id = raw_choice->unichar_id(blob_index);
379  if (id < 0 || id >= uch_set->size())
380  return nullptr;
381  return uch_set->id_to_unichar(id);
382  }
int length() const
Definition: ratngs.h:303
int size() const
Definition: unicharset.h:341
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291
const UNICHARSET * uch_set
Definition: pageres.h:205
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
int UNICHAR_ID
Definition: unichar.h:34
WERD_CHOICE * raw_choice
Definition: pageres.h:239
void WERD_RES::RebuildBestState ( )

Definition at line 812 of file pageres.cpp.

812  {
813  ASSERT_HOST(best_choice != nullptr);
814  delete rebuild_word;
815  rebuild_word = new TWERD;
816  if (seam_array.empty())
818  best_state.truncate(0);
819  int start = 0;
820  for (int i = 0; i < best_choice->length(); ++i) {
821  int length = best_choice->state(i);
822  best_state.push_back(length);
823  if (length > 1) {
825  start + length - 1);
826  }
827  TBLOB* blob = chopped_word->blobs[start];
828  rebuild_word->blobs.push_back(new TBLOB(*blob));
829  if (length > 1) {
831  start + length - 1);
832  }
833  start += length;
834  }
835 }
TWERD * rebuild_word
Definition: pageres.h:259
Definition: blobs.h:397
GenericVector< TBLOB * > blobs
Definition: blobs.h:438
static void JoinPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:210
GenericVector< SEAM * > seam_array
Definition: pageres.h:216
static void BreakPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:188
int length() const
Definition: ratngs.h:303
Definition: blobs.h:263
void truncate(int size)
bool empty() const
Definition: genericvector.h:89
int push_back(T object)
void start_seam_list(TWERD *word, GenericVector< SEAM * > *seam_array)
Definition: seam.cpp:263
WERD_CHOICE * best_choice
Definition: pageres.h:234
#define ASSERT_HOST(x)
Definition: errcode.h:88
TWERD * chopped_word
Definition: pageres.h:214
GenericVector< int > best_state
Definition: pageres.h:270
int state(int index) const
Definition: ratngs.h:319
void WERD_RES::ReplaceBestChoice ( WERD_CHOICE choice)

Definition at line 799 of file pageres.cpp.

799  {
800  best_choice = choice;
802  SetupBoxWord();
803  // Make up a fake reject map of the right length to keep the
804  // rejection pass happy.
808 }
bool done
Definition: pageres.h:297
int length() const
Definition: genericvector.h:84
void SetupBoxWord()
Definition: pageres.cpp:853
bool tess_accepted
Definition: pageres.h:295
bool tess_would_adapt
Definition: pageres.h:296
REJMAP reject_map
Definition: pageres.h:286
void initialise(int16_t length)
Definition: rejctmap.cpp:273
WERD_CHOICE * best_choice
Definition: pageres.h:234
void RebuildBestState()
Definition: pageres.cpp:812
GenericVector< int > best_state
Definition: pageres.h:270
void SetScriptPositions()
Definition: pageres.cpp:862
void WERD_RES::SetAllScriptPositions ( tesseract::ScriptPos  position)

Definition at line 869 of file pageres.cpp.

869  {
871  WERD_CHOICE_IT wc_it(&best_choices);
872  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward())
873  wc_it.data()->SetAllScriptPositions(position);
874 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:242
WERD_CHOICE * raw_choice
Definition: pageres.h:239
void SetAllScriptPositions(tesseract::ScriptPos position)
Definition: ratngs.cpp:623
void WERD_RES::SetScriptPositions ( )

Definition at line 862 of file pageres.cpp.

862  {
864 }
void SetScriptPositions(bool small_caps, TWERD *word, int debug=0)
Definition: ratngs.cpp:550
bool small_caps
Definition: pageres.h:298
WERD_CHOICE * best_choice
Definition: pageres.h:234
TWERD * chopped_word
Definition: pageres.h:214
void WERD_RES::SetupBasicsFromChoppedWord ( const UNICHARSET unicharset_in)

Definition at line 347 of file pageres.cpp.

347  {
352 }
void ClearWordChoices()
Definition: pageres.cpp:1176
GenericVector< SEAM * > seam_array
Definition: pageres.h:216
static BoxWord * CopyFromNormalized(TWERD *tessword)
Definition: boxword.cpp:56
void SetupBlobWidthsAndGaps()
Definition: pageres.cpp:404
tesseract::BoxWord * bln_boxes
Definition: pageres.h:197
void start_seam_list(TWERD *word, GenericVector< SEAM * > *seam_array)
Definition: seam.cpp:263
TWERD * chopped_word
Definition: pageres.h:214
void WERD_RES::SetupBlamerBundle ( )

Definition at line 397 of file pageres.cpp.

397  {
398  if (blamer_bundle != nullptr) {
400  }
401 }
void SetupNormTruthWord(const DENORM &denorm)
Definition: blamer.cpp:150
DENORM denorm
Definition: pageres.h:203
BlamerBundle * blamer_bundle
Definition: pageres.h:245
void WERD_RES::SetupBlobWidthsAndGaps ( )

Definition at line 404 of file pageres.cpp.

404  {
406  blob_gaps.truncate(0);
407  int num_blobs = chopped_word->NumBlobs();
408  for (int b = 0; b < num_blobs; ++b) {
409  TBLOB *blob = chopped_word->blobs[b];
410  TBOX box = blob->bounding_box();
411  blob_widths.push_back(box.width());
412  if (b + 1 < num_blobs) {
414  chopped_word->blobs[b + 1]->bounding_box().left() - box.right());
415  }
416  }
417 }
Definition: rect.h:34
GenericVector< int > blob_widths
Definition: pageres.h:218
GenericVector< TBLOB * > blobs
Definition: blobs.h:438
TBOX bounding_box() const
Definition: blobs.cpp:472
Definition: blobs.h:263
void truncate(int size)
int push_back(T object)
int16_t width() const
Definition: rect.h:115
int16_t right() const
Definition: rect.h:79
int NumBlobs() const
Definition: blobs.h:427
TWERD * chopped_word
Definition: pageres.h:214
GenericVector< int > blob_gaps
Definition: pageres.h:221
void WERD_RES::SetupBoxWord ( )

Definition at line 853 of file pageres.cpp.

853  {
854  delete box_word;
858 }
const BLOCK * block() const
Definition: normalis.h:273
TWERD * rebuild_word
Definition: pageres.h:259
static BoxWord * CopyFromNormalized(TWERD *tessword)
Definition: boxword.cpp:56
void ClipToOriginalWord(const BLOCK *block, WERD *original_word)
Definition: boxword.cpp:92
tesseract::BoxWord * box_word
Definition: pageres.h:265
WERD * word
Definition: pageres.h:188
void ComputeBoundingBoxes()
Definition: blobs.cpp:859
DENORM denorm
Definition: pageres.h:203
void WERD_RES::SetupFake ( const UNICHARSET uch)

Definition at line 356 of file pageres.cpp.

356  {
357  ClearResults();
358  SetupWordScript(unicharset_in);
359  chopped_word = new TWERD;
360  rebuild_word = new TWERD;
363  int blob_count = word->cblob_list()->length();
364  if (blob_count > 0) {
365  auto** fake_choices = new BLOB_CHOICE*[blob_count];
366  // For non-text blocks, just pass any blobs through to the box_word
367  // and call the word failed with a fake classification.
368  C_BLOB_IT b_it(word->cblob_list());
369  int blob_id = 0;
370  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
371  TBOX box = b_it.data()->bounding_box();
372  box_word->InsertBox(box_word->length(), box);
373  fake_choices[blob_id++] = new BLOB_CHOICE;
374  }
375  FakeClassifyWord(blob_count, fake_choices);
376  delete [] fake_choices;
377  } else {
378  auto* word = new WERD_CHOICE(&unicharset_in);
379  word->make_bad();
381  // Ownership of word is taken by *this WERD_RES in LogNewCookedChoice.
382  LogNewCookedChoice(1, false, word);
383  }
384  tess_failed = true;
385  done = true;
386 }
bool tess_failed
Definition: pageres.h:287
bool done
Definition: pageres.h:297
Definition: rect.h:34
TWERD * rebuild_word
Definition: pageres.h:259
Definition: blobs.h:397
tesseract::BoxWord * bln_boxes
Definition: pageres.h:197
int length() const
Definition: boxword.h:83
void SetupWordScript(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:388
bool LogNewRawChoice(WERD_CHOICE *word_choice)
Definition: pageres.cpp:608
void FakeClassifyWord(int blob_count, BLOB_CHOICE **choices)
Definition: pageres.cpp:881
tesseract::BoxWord * box_word
Definition: pageres.h:265
void ClearResults()
Definition: pageres.cpp:1151
TWERD * chopped_word
Definition: pageres.h:214
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:148
WERD * word
Definition: pageres.h:188
bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice)
Definition: pageres.cpp:624
C_BLOB_LIST * cblob_list()
Definition: werd.h:95
bool WERD_RES::SetupForRecognition ( const UNICHARSET unicharset_in,
tesseract::Tesseract tesseract,
Pix *  pix,
int  norm_mode,
const TBOX norm_box,
bool  numeric_mode,
bool  use_body_size,
bool  allow_detailed_fx,
ROW row,
const BLOCK block 
)

Definition at line 306 of file pageres.cpp.

313  {
314  auto norm_mode_hint =
315  static_cast<tesseract::OcrEngineMode>(norm_mode);
316  tesseract = tess;
317  POLY_BLOCK* pb = block != nullptr ? block->pdblk.poly_block() : nullptr;
318  if ((norm_mode_hint != tesseract::OEM_LSTM_ONLY &&
319  word->cblob_list()->empty()) ||
320  (pb != nullptr && !pb->IsText())) {
321  // Empty words occur when all the blobs have been moved to the rej_blobs
322  // list, which seems to occur frequently in junk.
323  SetupFake(unicharset_in);
324  word->set_flag(W_REP_CHAR, false);
325  return false;
326  }
327  ClearResults();
328  SetupWordScript(unicharset_in);
329  chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word);
330  float word_xheight = use_body_size && row != nullptr && row->body_size() > 0.0f
331  ? row->body_size() : x_height;
332  chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE),
333  word_xheight, baseline_shift, numeric_mode,
334  norm_mode_hint, norm_box, &denorm);
335  blob_row = row;
336  SetupBasicsFromChoppedWord(unicharset_in);
338  int num_blobs = chopped_word->NumBlobs();
339  ratings = new MATRIX(num_blobs, kWordrecMaxNumJoinChunks);
340  tess_failed = false;
341  return true;
342 }
float baseline_shift
Definition: pageres.h:312
bool tess_failed
Definition: pageres.h:287
static TWERD * PolygonalCopy(bool allow_detailed_fx, WERD *src)
Definition: blobs.cpp:780
void SetupFake(const UNICHARSET &uch)
Definition: pageres.cpp:356
POLY_BLOCK * poly_block() const
Definition: pdblock.h:56
repeated character
Definition: werd.h:38
void SetupWordScript(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:388
white on black
Definition: werd.h:41
ROW * blob_row
Definition: pageres.h:199
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:191
Definition: matrix.h:578
int NumBlobs() const
Definition: blobs.h:427
void ClearResults()
Definition: pageres.cpp:1151
float body_size() const
Definition: ocrrow.h:73
MATRIX * ratings
Definition: pageres.h:230
bool flag(WERD_FLAGS mask) const
Definition: werd.h:117
TWERD * chopped_word
Definition: pageres.h:214
void BLNormalize(const BLOCK *block, const ROW *row, Pix *pix, bool inverse, float x_height, float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint, const TBOX *norm_box, DENORM *word_denorm)
Definition: blobs.cpp:794
void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:347
bool IsText() const
Definition: polyblk.h:49
void SetupBlamerBundle()
Definition: pageres.cpp:397
float x_height
Definition: pageres.h:310
const int kWordrecMaxNumJoinChunks
Definition: pageres.cpp:53
WERD * word
Definition: pageres.h:188
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:118
DENORM denorm
Definition: pageres.h:203
C_BLOB_LIST * cblob_list()
Definition: werd.h:95
void WERD_RES::SetupWordScript ( const UNICHARSET unicharset_in)

Definition at line 388 of file pageres.cpp.

388  {
389  uch_set = &uch;
390  int script = uch.default_sid();
391  word->set_script_id(script);
392  word->set_flag(W_SCRIPT_HAS_XHEIGHT, uch.script_has_xheight());
393  word->set_flag(W_SCRIPT_IS_LATIN, script == uch.latin_sid());
394 }
int default_sid() const
Definition: unicharset.h:894
Special case latin for y. splitting.
Definition: werd.h:36
const UNICHARSET * uch_set
Definition: pageres.h:205
x-height concept makes sense.
Definition: werd.h:35
WERD * word
Definition: pageres.h:188
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:118
void set_script_id(int id)
Definition: werd.h:104
bool WERD_RES::StatesAllValid ( )

Definition at line 462 of file pageres.cpp.

462  {
463  int ratings_dim = ratings->dimension();
464  if (raw_choice->TotalOfStates() != ratings_dim) {
465  tprintf("raw_choice has total of states = %d vs ratings dim of %d\n",
466  raw_choice->TotalOfStates(), ratings_dim);
467  return false;
468  }
469  WERD_CHOICE_IT it(&best_choices);
470  int index = 0;
471  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
472  WERD_CHOICE* choice = it.data();
473  if (choice->TotalOfStates() != ratings_dim) {
474  tprintf("Cooked #%d has total of states = %d vs ratings dim of %d\n",
475  index, choice->TotalOfStates(), ratings_dim);
476  return false;
477  }
478  }
479  return true;
480 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:242
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
MATRIX * ratings
Definition: pageres.h:230
int dimension() const
Definition: matrix.h:536
WERD_CHOICE * raw_choice
Definition: pageres.h:239
int TotalOfStates() const
Definition: ratngs.cpp:711
UNICHARSET::Direction WERD_RES::SymbolDirection ( int  blob_index) const
inline

Definition at line 384 of file pageres.h.

384  {
385  if (best_choice == nullptr ||
386  blob_index >= best_choice->length() ||
387  blob_index < 0)
389  return uch_set->get_direction(best_choice->unichar_id(blob_index));
390  }
int length() const
Definition: ratngs.h:303
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:690
const UNICHARSET * uch_set
Definition: pageres.h:205
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
WERD_CHOICE * best_choice
Definition: pageres.h:234
bool WERD_RES::UnicharsInReadingOrder ( ) const
inline

Definition at line 426 of file pageres.h.

426  {
428  }
bool unichars_in_script_order() const
Definition: ratngs.h:535
WERD_CHOICE * best_choice
Definition: pageres.h:234

Member Data Documentation

float WERD_RES::baseline_shift

Definition at line 312 of file pageres.h.

WERD_CHOICE* WERD_RES::best_choice

Definition at line 234 of file pageres.h.

WERD_CHOICE_LIST WERD_RES::best_choices

Definition at line 242 of file pageres.h.

GenericVector<int> WERD_RES::best_state

Definition at line 270 of file pageres.h.

BlamerBundle* WERD_RES::blamer_bundle

Definition at line 245 of file pageres.h.

tesseract::BoxWord* WERD_RES::bln_boxes

Definition at line 197 of file pageres.h.

GenericVector<int> WERD_RES::blob_gaps

Definition at line 221 of file pageres.h.

ROW* WERD_RES::blob_row

Definition at line 199 of file pageres.h.

GenericVector<int> WERD_RES::blob_widths

Definition at line 218 of file pageres.h.

int8_t WERD_RES::bold

Definition at line 301 of file pageres.h.

tesseract::BoxWord* WERD_RES::box_word

Definition at line 265 of file pageres.h.

float WERD_RES::caps_height

Definition at line 311 of file pageres.h.

TWERD* WERD_RES::chopped_word

Definition at line 214 of file pageres.h.

bool WERD_RES::combination

Definition at line 333 of file pageres.h.

GenericVector<STRING> WERD_RES::correct_text

Definition at line 274 of file pageres.h.

DENORM WERD_RES::denorm

Definition at line 203 of file pageres.h.

bool WERD_RES::done

Definition at line 297 of file pageres.h.

WERD_CHOICE* WERD_RES::ep_choice

Definition at line 285 of file pageres.h.

const FontInfo* WERD_RES::fontinfo

Definition at line 303 of file pageres.h.

const FontInfo* WERD_RES::fontinfo2

Definition at line 304 of file pageres.h.

int8_t WERD_RES::fontinfo_id2_count

Definition at line 306 of file pageres.h.

int8_t WERD_RES::fontinfo_id_count

Definition at line 305 of file pageres.h.

bool WERD_RES::guessed_caps_ht

Definition at line 308 of file pageres.h.

bool WERD_RES::guessed_x_ht

Definition at line 307 of file pageres.h.

int8_t WERD_RES::italic

Definition at line 300 of file pageres.h.

bool WERD_RES::odd_size

Definition at line 299 of file pageres.h.

bool WERD_RES::part_of_combo

Definition at line 334 of file pageres.h.

MATRIX* WERD_RES::ratings

Definition at line 230 of file pageres.h.

WERD_CHOICE* WERD_RES::raw_choice

Definition at line 239 of file pageres.h.

TWERD* WERD_RES::rebuild_word

Definition at line 259 of file pageres.h.

REJMAP WERD_RES::reject_map

Definition at line 286 of file pageres.h.

bool WERD_RES::reject_spaces

Definition at line 335 of file pageres.h.

GenericVector<SEAM*> WERD_RES::seam_array

Definition at line 216 of file pageres.h.

bool WERD_RES::small_caps

Definition at line 298 of file pageres.h.

float WERD_RES::space_certainty

Definition at line 315 of file pageres.h.

bool WERD_RES::tess_accepted

Definition at line 295 of file pageres.h.

bool WERD_RES::tess_failed

Definition at line 287 of file pageres.h.

bool WERD_RES::tess_would_adapt

Definition at line 296 of file pageres.h.

tesseract::Tesseract* WERD_RES::tesseract

Definition at line 281 of file pageres.h.

std::vector<std::vector<std::pair<const char*, float> > > WERD_RES::timesteps

Definition at line 223 of file pageres.h.

const UNICHARSET* WERD_RES::uch_set

Definition at line 205 of file pageres.h.

CRUNCH_MODE WERD_RES::unlv_crunch_mode

Definition at line 309 of file pageres.h.

WERD* WERD_RES::word

Definition at line 188 of file pageres.h.

float WERD_RES::x_height

Definition at line 310 of file pageres.h.


The documentation for this class was generated from the following files: