19 #ifndef TESSERACT_WORDREC_WORDREC_H_ 20 #define TESSERACT_WORDREC_WORDREC_H_ 22 #ifdef DISABLED_LEGACY_ENGINE 29 namespace tesseract {
class TessdataManager; }
35 class Wordrec :
public Classify {
48 void program_editup(
const char *textbase, TessdataManager *init_classifier,
49 TessdataManager *init_dict);
60 #else // DISABLED_LEGACY_ENGINE not defined 63 #include "config_auto.h" 92 namespace tesseract {
class LMPainPoints; }
93 namespace tesseract {
class TessdataManager; }
94 namespace tesseract {
struct BestChoiceBundle; }
115 : classified_row_(-1),
116 revisit_whole_column_(false),
117 column_classified_(false) {}
122 column_classified_ =
true;
128 classified_row_ = row;
134 revisit_whole_column_ =
true;
139 classified_row_ = -1;
140 revisit_whole_column_ =
false;
141 column_classified_ =
false;
147 return revisit_whole_column_ || column_classified_ || classified_row_ >= 0;
151 return row == classified_row_ || column_classified_;
155 return revisit_whole_column_ || column_classified_ ? -1 : classified_row_;
167 bool revisit_whole_column_;
171 bool column_classified_;
196 "Merge the fragments in the ratings matrix and delete them " 201 "force associator to run regardless of what enable_assoc is." 202 "This is used for CJK where component grouping is necessary.");
205 "Use information from fragments to guide chopping process");
222 "above which we don't care that a chop is not near the center.");
230 "include fixed-pitch heuristics in char segmentation");
233 "Max number of broken pieces to associate");
235 "Only run OCR for words that had truth recorded in BlamerBundle");
240 "Maximum number of pain points stored in the queue");
242 "Maximum number of pain point classifications per word.");
244 "Maximum character width-to-height ratio");
246 "Save alternative paths found during chopping " 247 "and segmentation search");
265 const WERD_CHOICE_LIST &best_choices,
268 (this->*
fill_lattice_)(ratings, best_choices, unicharset, blamer_bundle);
370 EDGEPT_CLIST *new_points);
379 WERD_RES *word_res,
int *blob_number);
382 bool split_next_to_fragment,
398 float rating_ceiling,
399 bool split_next_to_fragment);
417 EDGEPT_CLIST *new_points,
434 const char* description,
452 int16_t num_frag_parts,
455 BLOB_CHOICE_LIST *choice_lists);
460 int16_t num_frag_parts,
461 BLOB_CHOICE_LIST *choice_lists,
472 BLOB_CHOICE_LIST *filtered_choices);
486 const WERD_CHOICE_LIST &best_choices,
492 return (language_model_->AcceptableChoiceFound() ||
493 num_futile_classifications >=
523 float rating_cert_scale,
535 const char* pain_point_type,
558 #endif // DISABLED_LEGACY_ENGINE 560 #endif // TESSERACT_WORDREC_WORDREC_H_ void try_vertical_splits(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
#define BOOL_VAR_H(name, val, comment)
void combine_seam(const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue)
void merge_and_put_fragment_lists(int16_t row, int16_t column, int16_t num_frag_parts, BLOB_CHOICE_LIST *choice_lists, MATRIX *ratings)
void SetColumnClassified()
PRIORITY point_priority(EDGEPT *point)
std::unique_ptr< LanguageModel > language_model_
bool merge_fragments_in_matrix
bool wordrec_enable_assoc
void add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue *seams)
Bundle together all the things pertaining to the best choice/state.
void InitialSegSearch(WERD_RES *word_res, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3)
void improve_by_chopping(float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending)
int segsearch_max_pain_points
int segsearch_max_futile_classifications
bool SegSearchDone(int num_futile_classifications)
int chop_min_outline_area
PRIORITY grade_split_length(SPLIT *split)
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
int select_blob_to_split_from_fixpt(DANGERR *fixpt)
void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points)
WERD_CHOICE * prev_word_best_choice_
SEAM * pick_good_seam(TBLOB *blob)
double chop_split_dist_knob
bool assume_fixed_pitch_char_segment
double chop_sharpness_knob
int chop_min_outline_points
#define INT_VAR_H(name, val, comment)
bool fragments_guide_chopper
void prioritize_points(TESSLINE *outline, PointHeap *points)
BLOB_CHOICE_LIST * call_matcher(TBLOB *blob)
void try_point_pairs(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
void add_point_to_list(PointHeap *point_heap, EDGEPT *point)
#define double_VAR_H(name, val, comment)
double tessedit_certainty_threshold
SEAM * chop_overlapping_blob(const GenericVector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, int *blob_number)
void chop_word_main(WERD_RES *word)
void DoSegSearch(WERD_RES *word_res)
~Wordrec() override=default
void program_editdown(int32_t elasped_time)
void UpdateSegSearchNodes(float rating_cert_scale, int starting_col, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
bool near_point(EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, EDGEPT **near_pt)
void CallFillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
SEAM * improve_one_blob(const GenericVector< BLOB_CHOICE * > &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, int *blob_number)
int chop_centered_maxwidth
virtual BLOB_CHOICE_LIST * classify_piece(const GenericVector< SEAM * > &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
int wordrec_max_join_chunks
void fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices, int fragment_pos, int num_frag_parts, BLOB_CHOICE_LIST *filtered_choices)
BLOB_CHOICE_LIST * classify_blob(TBLOB *blob, const char *string, C_COL color, BlamerBundle *blamer_bundle)
EDGEPT * pick_close_point(EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist)
void SetBlobClassified(int row)
void new_min_point(EDGEPT *local_min, PointHeap *points)
#define ELISTIZEH(CLASSNAME)
SEAM * attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob, const GenericVector< SEAM * > &seams)
bool is_inside_angle(EDGEPT *pt)
void get_fragment_lists(int16_t current_frag, int16_t current_row, int16_t start, int16_t num_frag_parts, int16_t num_blobs, MATRIX *ratings, BLOB_CHOICE_LIST *choice_lists)
void program_editup(const char *textbase, TessdataManager *init_classifier, TessdataManager *init_dict)
bool IsRowJustClassified(int row) const
void ProcessSegSearchPainPoint(float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)
double segsearch_max_char_wh_ratio
int repair_unchopped_blobs
int dict_word(const WERD_CHOICE &word)
void merge_fragments(MATRIX *ratings, int16_t num_blobs)
PRIORITY grade_sharpness(SPLIT *split)
bool wordrec_debug_blamer
void FillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
void new_max_point(EDGEPT *local_max, PointHeap *points)
SEAM * chop_one_blob(const GenericVector< TBOX > &boxes, const GenericVector< BLOB_CHOICE * > &blob_choices, WERD_RES *word_res, int *blob_number)
void cc_recog(WERD_RES *word)
bool wordrec_skip_no_truth_words
int segsearch_debug_level
double wordrec_worst_state
void InitBlamerForSegSearch(WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle, STRING *blamer_debug)
SEAM * chop_numbered_blob(TWERD *word, int32_t blob_number, bool italic_blob, const GenericVector< SEAM * > &seams)
void choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile)
void ResetNGramSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, GenericVector< SegSearchPending > *pending)
void SaveAltChoices(const LIST &best_choices, WERD_RES *word)
double chop_width_change_knob
void RevisitWholeColumn()
int select_blob_to_split(const GenericVector< BLOB_CHOICE * > &blob_choices, float rating_ceiling, bool split_next_to_fragment)
GenericVector< int > blame_reasons_
void SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)