tesseract  4.1.0
intmatcher.h
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: intmatcher.h
3  ** Purpose: Interface to high level generic classifier routines.
4  ** Author: Robert Moss
5  **
6  ** (c) Copyright Hewlett-Packard Company, 1988.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  ******************************************************************************/
17 #ifndef INTMATCHER_H
18 #define INTMATCHER_H
19 
20 #include "params.h"
21 
22 // Character fragments could be present in the trained templaes
23 // but turned on/off on the language-by-language basis or depending
24 // on particular properties of the corpus (e.g. when we expect the
25 // images to have low exposure).
27  "Do not include character fragments in the"
28  " results of the classifier");
29 
31  "Integer Matcher Multiplier 0-255: ");
32 
33 
37 #include "intproto.h"
38 
39 namespace tesseract {
40 struct UnicharRating;
41 }
42 
44  CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {}
45 
46  float Rating;
48 };
49 
50 /*----------------------------------------------------------------------------
51  Variables
52 -----------------------------------------------------------------------------*/
53 
55  "Threshold for good protos during adaptive 0-255: ");
56 
58  "Threshold for good features during adaptive 0-255: ");
59 
64 #define SE_TABLE_BITS 9
65 #define SE_TABLE_SIZE 512
66 
68  uint8_t feature_evidence_[MAX_NUM_CONFIGS];
69  int sum_feature_evidence_[MAX_NUM_CONFIGS];
70  uint8_t proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX];
71 
72  void Clear(const INT_CLASS class_template);
73  void ClearFeatureEvidence(const INT_CLASS class_template);
74  void NormalizeSums(INT_CLASS ClassTemplate, int16_t NumFeatures);
75  void UpdateSumOfProtoEvidences(
76  INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask);
77 };
78 
79 
81  public:
82  // Integer Matcher Theta Fudge (0-255).
83  static const int kIntThetaFudge = 128;
84  // Bits in Similarity to Evidence Lookup (8-9).
85  static const int kEvidenceTableBits = 9;
86  // Integer Evidence Truncation Bits (8-14).
87  static const int kIntEvidenceTruncBits = 14;
88  // Similarity to Evidence Table Exponential Multiplier.
89  static const float kSEExponentialMultiplier;
90  // Center of Similarity Curve.
91  static const float kSimilarityCenter;
92 
93  IntegerMatcher(tesseract::IntParam *classify_debug_level);
94 
95  void Match(INT_CLASS ClassTemplate,
96  BIT_VECTOR ProtoMask,
97  BIT_VECTOR ConfigMask,
98  int16_t NumFeatures,
99  const INT_FEATURE_STRUCT* Features,
100  tesseract::UnicharRating* Result,
101  int AdaptFeatureThreshold,
102  int Debug,
103  bool SeparateDebugWindows);
104 
105  // Applies the CN normalization factor to the given rating and returns
106  // the modified rating.
107  float ApplyCNCorrection(float rating, int blob_length,
108  int normalization_factor, int matcher_multiplier);
109 
110  int FindGoodProtos(INT_CLASS ClassTemplate,
111  BIT_VECTOR ProtoMask,
112  BIT_VECTOR ConfigMask,
113  int16_t NumFeatures,
114  INT_FEATURE_ARRAY Features,
115  PROTO_ID *ProtoArray,
116  int AdaptProtoThreshold,
117  int Debug);
118 
119  int FindBadFeatures(INT_CLASS ClassTemplate,
120  BIT_VECTOR ProtoMask,
121  BIT_VECTOR ConfigMask,
122  int16_t NumFeatures,
123  INT_FEATURE_ARRAY Features,
124  FEATURE_ID *FeatureArray,
125  int AdaptFeatureThreshold,
126  int Debug);
127 
128  private:
129  int UpdateTablesForFeature(
130  INT_CLASS ClassTemplate,
131  BIT_VECTOR ProtoMask,
132  BIT_VECTOR ConfigMask,
133  int FeatureNum,
134  const INT_FEATURE_STRUCT* Feature,
135  ScratchEvidence *evidence,
136  int Debug);
137 
138  int FindBestMatch(INT_CLASS ClassTemplate,
139  const ScratchEvidence &tables,
140  tesseract::UnicharRating* Result);
141 
142 #ifndef GRAPHICS_DISABLED
143  void DebugFeatureProtoError(
144  INT_CLASS ClassTemplate,
145  BIT_VECTOR ProtoMask,
146  BIT_VECTOR ConfigMask,
147  const ScratchEvidence &tables,
148  int16_t NumFeatures,
149  int Debug);
150 
151  void DisplayProtoDebugInfo(
152  INT_CLASS ClassTemplate,
153  BIT_VECTOR ConfigMask,
154  const ScratchEvidence &tables,
155  bool SeparateDebugWindows);
156 
157  void DisplayFeatureDebugInfo(
158  INT_CLASS ClassTemplate,
159  BIT_VECTOR ProtoMask,
160  BIT_VECTOR ConfigMask,
161  int16_t NumFeatures,
162  const INT_FEATURE_STRUCT* Features,
163  int AdaptFeatureThreshold,
164  int Debug,
165  bool SeparateDebugWindows);
166 #endif
167 
168  private:
169  tesseract::IntParam *classify_debug_level_;
170  uint8_t similarity_evidence_table_[SE_TABLE_SIZE];
171  uint32_t evidence_table_mask_;
172  uint32_t mult_trunc_shift_bits_;
173  uint32_t table_trunc_shift_bits_;
174  uint32_t evidence_mult_mask_;
175 };
176 
177 #endif
#define BOOL_VAR_H(name, val, comment)
Definition: params.h:297
static const float kSimilarityCenter
Definition: intmatcher.h:91
#define MAX_NUM_CONFIGS
Definition: intproto.h:47
bool disable_character_fragments
#define INT_VAR_H(name, val, comment)
Definition: params.h:295
int classify_adapt_proto_thresh
#define SE_TABLE_SIZE
Definition: intmatcher.h:65
static const float kSEExponentialMultiplier
Definition: intmatcher.h:89
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:152
#define MAX_PROTO_INDEX
Definition: intproto.h:44
CLASS_ID Class
Definition: intmatcher.h:47
#define MAX_NUM_PROTOS
Definition: intproto.h:48
uint8_t FEATURE_ID
Definition: matchdefs.h:46
uint32_t * BIT_VECTOR
Definition: bitvec.h:28
int classify_integer_matcher_multiplier
int classify_adapt_feature_thresh
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:34
int16_t PROTO_ID
Definition: matchdefs.h:40