tesseract  4.1.0
wordrec.cpp
Go to the documentation of this file.
1 // File: wordrec.cpp
3 // Description: wordrec class.
4 // Author: Samuel Charron
5 //
6 // (C) Copyright 2006, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #include "wordrec.h"
20 
21 #ifdef DISABLED_LEGACY_ENGINE
22 
23 #include "params.h"
24 
25 
26 namespace tesseract {
28  // control parameters
29 
30  BOOL_MEMBER(wordrec_debug_blamer, false,
31  "Print blamer debug messages", params()),
32 
33  BOOL_MEMBER(wordrec_run_blamer, false,
34  "Try to set the blame for errors", params()) {
35  prev_word_best_choice_ = nullptr;
36 }
37 
38 } // namespace tesseract
39 
40 #else // DISABLED_LEGACY_ENGINE not defined
41 
42 #include "language_model.h"
43 #include "params.h"
44 
45 
46 namespace tesseract {
48  // control parameters
50  "Merge the fragments in the ratings matrix and delete them"
51  " after merging", params()),
52  BOOL_MEMBER(wordrec_no_block, false, "Don't output block information",
53  params()),
54  BOOL_MEMBER(wordrec_enable_assoc, true, "Associator Enable",
55  params()),
57  "force associator to run regardless of what enable_assoc is."
58  " This is used for CJK where component grouping is necessary.",
59  CCUtil::params()),
60  double_MEMBER(wordrec_worst_state, 1.0, "Worst segmentation state",
61  params()),
63  "Use information from fragments to guide chopping process",
64  params()),
65  INT_MEMBER(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped",
66  params()),
67  double_MEMBER(tessedit_certainty_threshold, -2.25, "Good blob limit",
68  params()),
69  INT_MEMBER(chop_debug, 0, "Chop debug",
70  params()),
71  BOOL_MEMBER(chop_enable, 1, "Chop enable",
72  params()),
73  BOOL_MEMBER(chop_vertical_creep, 0, "Vertical creep",
74  params()),
75  INT_MEMBER(chop_split_length, 10000, "Split Length",
76  params()),
77  INT_MEMBER(chop_same_distance, 2, "Same distance",
78  params()),
79  INT_MEMBER(chop_min_outline_points, 6, "Min Number of Points on Outline",
80  params()),
81  INT_MEMBER(chop_seam_pile_size, 150, "Max number of seams in seam_pile",
82  params()),
83  BOOL_MEMBER(chop_new_seam_pile, 1, "Use new seam_pile", params()),
84  INT_MEMBER(chop_inside_angle, -50, "Min Inside Angle Bend",
85  params()),
86  INT_MEMBER(chop_min_outline_area, 2000, "Min Outline Area",
87  params()),
88  double_MEMBER(chop_split_dist_knob, 0.5, "Split length adjustment",
89  params()),
90  double_MEMBER(chop_overlap_knob, 0.9, "Split overlap adjustment",
91  params()),
92  double_MEMBER(chop_center_knob, 0.15, "Split center adjustment",
93  params()),
94  INT_MEMBER(chop_centered_maxwidth, 90, "Width of (smaller) chopped blobs "
95  "above which we don't care that a chop is not near the center.",
96  params()),
97  double_MEMBER(chop_sharpness_knob, 0.06, "Split sharpness adjustment",
98  params()),
99  double_MEMBER(chop_width_change_knob, 5.0, "Width change adjustment",
100  params()),
101  double_MEMBER(chop_ok_split, 100.0, "OK split limit",
102  params()),
103  double_MEMBER(chop_good_split, 50.0, "Good split limit",
104  params()),
105  INT_MEMBER(chop_x_y_weight, 3, "X / Y length weight",
106  params()),
107  INT_MEMBER(segment_adjust_debug, 0, "Segmentation adjustment debug",
108  params()),
110  "include fixed-pitch heuristics in char segmentation",
111  params()),
113  "Debug level for wordrec", params()),
115  "Max number of broken pieces to associate", params()),
117  "Only run OCR for words that had truth recorded in BlamerBundle",
118  params()),
120  "Print blamer debug messages", params()),
122  "Try to set the blame for errors", params()),
124  "SegSearch debug level", params()),
126  "Maximum number of pain points stored in the queue",
127  params()),
129  "Maximum number of pain point classifications per chunk that"
130  " did not result in finding a better word choice.",
131  params()),
133  "Maximum character width-to-height ratio", params()),
135  "Save alternative paths found during chopping"
136  " and segmentation search",
137  params()),
138  pass2_ok_split(0.0f) {
139  prev_word_best_choice_ = nullptr;
141  &(getDict())));
142  fill_lattice_ = nullptr;
143 }
144 
145 } // namespace tesseract
146 
147 #endif // DISABLED_LEGACY_ENGINE
ParamsVectors * params()
Definition: ccutil.h:65
std::unique_ptr< LanguageModel > language_model_
Definition: wordrec.h:476
bool merge_fragments_in_matrix
Definition: wordrec.h:197
bool wordrec_enable_assoc
Definition: wordrec.h:199
bool wordrec_no_block
Definition: wordrec.h:198
int segsearch_max_pain_points
Definition: wordrec.h:240
int segsearch_max_futile_classifications
Definition: wordrec.h:242
int chop_min_outline_area
Definition: wordrec.h:217
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:485
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:324
bool wordrec_run_blamer
Definition: wordrec.h:237
int chop_seam_pile_size
Definition: wordrec.h:214
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:481
virtual Dict & getDict()
Definition: classify.h:107
double chop_overlap_knob
Definition: wordrec.h:219
double chop_split_dist_knob
Definition: wordrec.h:218
bool assume_fixed_pitch_char_segment
Definition: wordrec.h:230
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:318
double chop_sharpness_knob
Definition: wordrec.h:223
int chop_min_outline_points
Definition: wordrec.h:213
bool fragments_guide_chopper
Definition: wordrec.h:205
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:315
bool chop_new_seam_pile
Definition: wordrec.h:215
double tessedit_certainty_threshold
Definition: wordrec.h:207
double chop_good_split
Definition: wordrec.h:226
double chop_center_knob
Definition: wordrec.h:220
int chop_centered_maxwidth
Definition: wordrec.h:222
int wordrec_max_join_chunks
Definition: wordrec.h:233
int wordrec_debug_level
Definition: wordrec.h:231
int segment_adjust_debug
Definition: wordrec.h:228
double chop_ok_split
Definition: wordrec.h:225
bool save_alt_choices
Definition: wordrec.h:247
bool chop_vertical_creep
Definition: wordrec.h:210
UnicityTable< FontInfo > & get_fontinfo_table()
Definition: classify.h:386
double segsearch_max_char_wh_ratio
Definition: wordrec.h:244
int repair_unchopped_blobs
Definition: wordrec.h:206
bool wordrec_debug_blamer
Definition: wordrec.h:236
bool wordrec_skip_no_truth_words
Definition: wordrec.h:235
int segsearch_debug_level
Definition: wordrec.h:238
double wordrec_worst_state
Definition: wordrec.h:203
int chop_same_distance
Definition: wordrec.h:212
double chop_width_change_knob
Definition: wordrec.h:224
bool force_word_assoc
Definition: wordrec.h:202
PRIORITY pass2_ok_split
Definition: wordrec.h:477