tesseract  4.1.0
classify.cpp
Go to the documentation of this file.
1 // File: classify.cpp
3 // Description: classify class.
4 // Author: Samuel Charron
5 //
6 // (C) Copyright 2006, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #include "classify.h"
20 
21 #ifdef DISABLED_LEGACY_ENGINE
22 
23 #include <string.h>
24 
25 namespace tesseract {
26 
28  :
29  INT_MEMBER(classify_debug_level, 0, "Classify debug level",
30  this->params()),
31 
32  BOOL_MEMBER(classify_bln_numeric_mode, 0,
33 "Assume the input is numbers [0-9].", this->params()),
34 
35  double_MEMBER(classify_max_rating_ratio, 1.5,
36  "Veto ratio between classifier ratings", this->params()),
37 
38  double_MEMBER(classify_max_certainty_margin, 5.5,
39  "Veto difference between classifier certainties",
40  this->params()),
41 
42  dict_(this) {}
43 
45 
46 } // namespace tesseract
47 
48 #else // DISABLED_LEGACY_ENGINE not defined
49 
50 #include "fontinfo.h"
51 #include "intproto.h"
52 #include "mfoutline.h"
53 #include "scrollview.h"
54 #include "shapeclassifier.h"
55 #include "shapetable.h"
56 #include "unicity_table.h"
57 #include <cstring>
58 
59 namespace tesseract {
61  : BOOL_MEMBER(allow_blob_division, true, "Use divisible blobs chopping",
62  this->params()),
64  "Prioritize blob division over chopping", this->params()),
65  BOOL_MEMBER(classify_enable_learning, true, "Enable adaptive classifier",
66  this->params()),
67  INT_MEMBER(classify_debug_level, 0, "Classify debug level",
68  this->params()),
69  INT_MEMBER(classify_norm_method, character, "Normalization Method ...",
70  this->params()),
72  "Character Normalization Range ...", this->params()),
73  double_MEMBER(classify_min_norm_scale_x, 0.0, "Min char x-norm scale ...",
74  this->params()), /* PREV DEFAULT 0.1 */
76  "Max char x-norm scale ...",
77  this->params()), /* PREV DEFAULT 0.3 */
78  double_MEMBER(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ...",
79  this->params()), /* PREV DEFAULT 0.1 */
81  "Max char y-norm scale ...",
82  this->params()), /* PREV DEFAULT 0.3 */
84  "Veto ratio between classifier ratings", this->params()),
86  "Veto difference between classifier certainties",
87  this->params()),
88  BOOL_MEMBER(tess_cn_matching, 0, "Character Normalized Matching",
89  this->params()),
90  BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching",
91  this->params()),
93  "Enable adaptive classifier", this->params()),
95  "Use pre-adapted classifier templates", this->params()),
97  "Save adapted templates to a file", this->params()),
98  BOOL_MEMBER(classify_enable_adaptive_debugger, 0, "Enable match debugger",
99  this->params()),
101  "Non-linear stroke-density normalization", this->params()),
102  INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params()),
103  INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params()),
104  INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ",
105  this->params()),
106  double_MEMBER(matcher_good_threshold, 0.125, "Good Match (0-1)",
107  this->params()),
108  double_MEMBER(matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)",
109  this->params()),
110  double_MEMBER(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)",
111  this->params()),
112  double_MEMBER(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)",
113  this->params()),
114  double_MEMBER(matcher_rating_margin, 0.1, "New template margin (0-1)",
115  this->params()),
116  double_MEMBER(matcher_avg_noise_size, 12.0, "Avg. noise blob length",
117  this->params()),
118  INT_MEMBER(matcher_permanent_classes_min, 1, "Min # of permanent classes",
119  this->params()),
121  "Reliable Config Threshold", this->params()),
123  "Enable adaption even if the ambiguities have not been seen",
124  this->params()),
126  "Maximum angle delta for prototype clustering",
127  this->params()),
129  "Penalty to apply when a non-alnum is vertically out of "
130  "its expected textline position",
131  this->params()),
132  double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params()),
133  double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor",
134  this->params()),
136  "Scale factor for features not used", this->params()),
139  "Prune poor adapted results this much worse than best result",
140  this->params()),
142  "Threshold at which classify_adapted_pruning_factor starts",
143  this->params()),
145  "Threshold for good protos during adaptive 0-255",
146  this->params()),
148  "Threshold for good features during adaptive 0-255",
149  this->params()),
151  "Do not include character fragments in the"
152  " results of the classifier",
153  this->params()),
155  -3.0,
156  "Exclude fragments that do not look like whole"
157  " characters from training and adaption",
158  this->params()),
160  "Bring up graphical debugging windows for fragments training",
161  this->params()),
163  "Use two different windows for debugging the matching: "
164  "One for the protos and one for the features.",
165  this->params()),
166  STRING_MEMBER(classify_learn_debug_str, "", "Class str to debug learning",
167  this->params()),
169  "Class Pruner Threshold 0-255", this->params()),
171  "Class Pruner Multiplier 0-255: ", this->params()),
173  "Class Pruner CutoffStrength: ", this->params()),
175  "Integer Matcher Multiplier 0-255: ", this->params()),
176  EnableLearning(true),
178  "Don't adapt to i/I at beginning of word", this->params()),
180  "Assume the input is numbers [0-9].", this->params()),
181  double_MEMBER(speckle_large_max_size, 0.30, "Max large speckle size",
182  this->params()),
184  "Penalty to add to worst rating for noise", this->params()),
186  shape_table_(nullptr),
187  dict_(this),
188  static_classifier_(nullptr) {
189  fontinfo_table_.set_compare_callback(
191  fontinfo_table_.set_clear_callback(
193  fontset_table_.set_compare_callback(
195  fontset_table_.set_clear_callback(
197  AdaptedTemplates = nullptr;
198  BackupAdaptedTemplates = nullptr;
199  PreTrainedTemplates = nullptr;
200  AllProtosOn = nullptr;
201  AllConfigsOn = nullptr;
202  AllConfigsOff = nullptr;
203  TempProtoMask = nullptr;
204  NormProtos = nullptr;
205 
206  NumAdaptationsFailed = 0;
207 
208  learn_debug_win_ = nullptr;
209  learn_fragmented_word_debug_win_ = nullptr;
210  learn_fragments_debug_win_ = nullptr;
211 }
212 
215  delete learn_debug_win_;
216  delete learn_fragmented_word_debug_win_;
217  delete learn_fragments_debug_win_;
218 }
219 
220 
221 // Takes ownership of the given classifier, and uses it for future calls
222 // to CharNormClassifier.
224  delete static_classifier_;
225  static_classifier_ = static_classifier;
226 }
227 
228 // Moved from speckle.cpp
229 // Adds a noise classification result that is a bit worse than the worst
230 // current result, or the worst possible result if no current results.
231 void Classify::AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices) {
232  BLOB_CHOICE_IT bc_it(choices);
233  // If there is no classifier result, we will use the worst possible certainty
234  // and corresponding rating.
235  float certainty = -getDict().certainty_scale;
236  float rating = rating_scale * blob_length;
237  if (!choices->empty() && blob_length > 0) {
238  bc_it.move_to_last();
239  BLOB_CHOICE* worst_choice = bc_it.data();
240  // Add speckle_rating_penalty to worst rating, matching old value.
241  rating = worst_choice->rating() + speckle_rating_penalty;
242  // Compute the rating to correspond to the certainty. (Used to be kept
243  // the same, but that messes up the language model search.)
244  certainty = -rating * getDict().certainty_scale /
245  (rating_scale * blob_length);
246  }
247  auto* blob_choice = new BLOB_CHOICE(UNICHAR_SPACE, rating, certainty,
248  -1, 0.0f, FLT_MAX, 0,
250  bc_it.add_to_end(blob_choice);
251 }
252 
253 // Returns true if the blob is small enough to be a large speckle.
254 bool Classify::LargeSpeckle(const TBLOB &blob) {
255  double speckle_size = kBlnXHeight * speckle_large_max_size;
256  TBOX bbox = blob.bounding_box();
257  return bbox.width() < speckle_size && bbox.height() < speckle_size;
258 }
259 
260 } // namespace tesseract
261 
262 #endif // def DISABLED_LEGACY_ENGINE
ParamsVectors * params()
Definition: ccutil.h:65
const int kBlnXHeight
Definition: normalis.h:24
int matcher_permanent_classes_min
Definition: classify.h:466
bool classify_save_adapted_templates
Definition: classify.h:453
bool classify_enable_learning
Definition: classify.h:429
Definition: rect.h:34
bool classify_enable_adaptive_matcher
Definition: classify.h:449
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:528
double matcher_bad_match_pad
Definition: classify.h:463
float rating() const
Definition: ratngs.h:80
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:324
IntegerMatcher im_
Definition: classify.h:543
virtual Dict & getDict()
Definition: classify.h:107
double speckle_rating_penalty
Definition: classify.h:543
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:318
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:517
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:513
TBOX bounding_box() const
Definition: blobs.cpp:472
double matcher_rating_margin
Definition: classify.h:464
int matcher_min_examples_for_prototyping
Definition: classify.h:468
double classify_max_norm_scale_x
Definition: classify.h:438
double classify_max_norm_scale_y
Definition: classify.h:440
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:315
Definition: blobs.h:263
bool classify_nonlinear_norm
Definition: classify.h:456
NORM_PROTOS * NormProtos
Definition: classify.h:526
void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices)
Definition: classify.cpp:231
BIT_VECTOR TempProtoMask
Definition: classify.h:523
bool prioritize_division
Definition: classify.h:428
UnicityTable< FontSet > fontset_table_
Definition: classify.h:536
char * classify_learn_debug_str
Definition: classify.h:499
double matcher_good_threshold
Definition: classify.h:460
void SetStaticClassifier(ShapeClassifier *static_classifier)
Definition: classify.cpp:223
ShapeTable * shape_table_
Definition: classify.h:552
int classify_integer_matcher_multiplier
Definition: classify.h:509
int16_t height() const
Definition: rect.h:108
bool classify_enable_adaptive_debugger
Definition: classify.h:454
int matcher_sufficient_examples_for_prototyping
Definition: classify.h:470
double classify_min_norm_scale_y
Definition: classify.h:439
double classify_misfit_junk_penalty
Definition: classify.h:475
BIT_VECTOR AllConfigsOn
Definition: classify.h:521
double classify_adapted_pruning_threshold
Definition: classify.h:483
void FontSetDeleteCallback(FontSet fs)
Definition: fontinfo.cpp:147
bool LargeSpeckle(const TBLOB &blob)
Definition: classify.cpp:254
double classify_character_fragments_garbage_certainty_threshold
Definition: classify.h:493
bool CompareFontInfo(const FontInfo &fi1, const FontInfo &fi2)
Definition: fontinfo.cpp:119
bool classify_use_pre_adapted_templates
Definition: classify.h:451
double speckle_large_max_size
Definition: classify.h:541
int16_t width() const
Definition: rect.h:115
double matcher_avg_noise_size
Definition: classify.h:465
double matcher_clustering_max_angle_delta
Definition: classify.h:472
bool matcher_debug_separate_windows
Definition: classify.h:498
~Classify() override
Definition: classify.cpp:213
double classify_max_certainty_margin
Definition: classify.h:444
void FontInfoDeleteCallback(FontInfo f)
Definition: fontinfo.cpp:138
int classify_adapt_feature_threshold
Definition: classify.h:487
bool allow_blob_division
Definition: classify.h:423
double matcher_perfect_threshold
Definition: classify.h:462
void EndAdaptiveClassifier()
Definition: adaptmatch.cpp:459
bool CompareFontSet(const FontSet &fs1, const FontSet &fs2)
Definition: fontinfo.cpp:127
BIT_VECTOR AllConfigsOff
Definition: classify.h:522
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:509
bool disable_character_fragments
Definition: classify.h:490
#define STRING_MEMBER(name, val, comment, vec)
Definition: params.h:321
int classify_cp_cutoff_strength
Definition: classify.h:507
double classify_min_norm_scale_x
Definition: classify.h:437
int classify_class_pruner_threshold
Definition: classify.h:503
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
Definition: tesscallback.h:258
double classify_max_rating_ratio
Definition: classify.h:442
int classify_class_pruner_multiplier
Definition: classify.h:505
int classify_learning_debug_level
Definition: classify.h:459
double classify_char_norm_range
Definition: classify.h:436
double tessedit_class_miss_scale
Definition: classify.h:479
BIT_VECTOR AllProtosOn
Definition: classify.h:520
double certainty_scale
Definition: dict.h:617
double matcher_reliable_adaptive_result
Definition: classify.h:461
double classify_adapted_pruning_factor
Definition: classify.h:481
bool classify_bln_numeric_mode
Definition: classify.h:540
bool classify_debug_character_fragments
Definition: classify.h:495
double certainty_scale
Definition: classify.h:477
int classify_adapt_proto_threshold
Definition: classify.h:485