|
tesseract 3.04.01
|
00001 /****************************************************************************** 00002 ** Filename: intmatcher.h 00003 ** Purpose: Interface to high level generic classifier routines. 00004 ** Author: Robert Moss 00005 ** History: Wed Feb 13 15:24:15 MST 1991, RWM, Created. 00006 ** 00007 ** (c) Copyright Hewlett-Packard Company, 1988. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 ******************************************************************************/ 00018 #ifndef INTMATCHER_H 00019 #define INTMATCHER_H 00020 00021 #include "params.h" 00022 00023 // Character fragments could be present in the trained templaes 00024 // but turned on/off on the language-by-language basis or depending 00025 // on particular properties of the corpus (e.g. when we expect the 00026 // images to have low exposure). 00027 extern BOOL_VAR_H(disable_character_fragments, FALSE, 00028 "Do not include character fragments in the" 00029 " results of the classifier"); 00030 00031 extern INT_VAR_H(classify_integer_matcher_multiplier, 10, 00032 "Integer Matcher Multiplier 0-255: "); 00033 00034 00038 #include "intproto.h" 00039 #include "cutoffs.h" 00040 00041 namespace tesseract { 00042 struct UnicharRating; 00043 } 00044 00045 struct CP_RESULT_STRUCT { 00046 CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {} 00047 00048 FLOAT32 Rating; 00049 CLASS_ID Class; 00050 }; 00051 00052 /*---------------------------------------------------------------------------- 00053 Variables 00054 -----------------------------------------------------------------------------*/ 00055 00056 extern INT_VAR_H(classify_adapt_proto_thresh, 230, 00057 "Threshold for good protos during adaptive 0-255: "); 00058 00059 extern INT_VAR_H(classify_adapt_feature_thresh, 230, 00060 "Threshold for good features during adaptive 0-255: "); 00061 00066 #define SE_TABLE_BITS 9 00067 #define SE_TABLE_SIZE 512 00068 00069 struct ScratchEvidence { 00070 uinT8 feature_evidence_[MAX_NUM_CONFIGS]; 00071 int sum_feature_evidence_[MAX_NUM_CONFIGS]; 00072 uinT8 proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]; 00073 00074 void Clear(const INT_CLASS class_template); 00075 void ClearFeatureEvidence(const INT_CLASS class_template); 00076 void NormalizeSums(INT_CLASS ClassTemplate, inT16 NumFeatures, 00077 inT32 used_features); 00078 void UpdateSumOfProtoEvidences( 00079 INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures); 00080 }; 00081 00082 00083 class IntegerMatcher { 00084 public: 00085 // Integer Matcher Theta Fudge (0-255). 00086 static const int kIntThetaFudge = 128; 00087 // Bits in Similarity to Evidence Lookup (8-9). 00088 static const int kEvidenceTableBits = 9; 00089 // Integer Evidence Truncation Bits (8-14). 00090 static const int kIntEvidenceTruncBits = 14; 00091 // Similarity to Evidence Table Exponential Multiplier. 00092 static const float kSEExponentialMultiplier; 00093 // Center of Similarity Curve. 00094 static const float kSimilarityCenter; 00095 00096 IntegerMatcher() : classify_debug_level_(0) {} 00097 00098 void Init(tesseract::IntParam *classify_debug_level); 00099 00100 void Match(INT_CLASS ClassTemplate, 00101 BIT_VECTOR ProtoMask, 00102 BIT_VECTOR ConfigMask, 00103 inT16 NumFeatures, 00104 const INT_FEATURE_STRUCT* Features, 00105 tesseract::UnicharRating* Result, 00106 int AdaptFeatureThreshold, 00107 int Debug, 00108 bool SeparateDebugWindows); 00109 00110 // Applies the CN normalization factor to the given rating and returns 00111 // the modified rating. 00112 float ApplyCNCorrection(float rating, int blob_length, 00113 int normalization_factor, int matcher_multiplier); 00114 00115 int FindGoodProtos(INT_CLASS ClassTemplate, 00116 BIT_VECTOR ProtoMask, 00117 BIT_VECTOR ConfigMask, 00118 uinT16 BlobLength, 00119 inT16 NumFeatures, 00120 INT_FEATURE_ARRAY Features, 00121 PROTO_ID *ProtoArray, 00122 int AdaptProtoThreshold, 00123 int Debug); 00124 00125 int FindBadFeatures(INT_CLASS ClassTemplate, 00126 BIT_VECTOR ProtoMask, 00127 BIT_VECTOR ConfigMask, 00128 uinT16 BlobLength, 00129 inT16 NumFeatures, 00130 INT_FEATURE_ARRAY Features, 00131 FEATURE_ID *FeatureArray, 00132 int AdaptFeatureThreshold, 00133 int Debug); 00134 00135 private: 00136 int UpdateTablesForFeature( 00137 INT_CLASS ClassTemplate, 00138 BIT_VECTOR ProtoMask, 00139 BIT_VECTOR ConfigMask, 00140 int FeatureNum, 00141 const INT_FEATURE_STRUCT* Feature, 00142 ScratchEvidence *evidence, 00143 int Debug); 00144 00145 int FindBestMatch(INT_CLASS ClassTemplate, 00146 const ScratchEvidence &tables, 00147 tesseract::UnicharRating* Result); 00148 00149 #ifndef GRAPHICS_DISABLED 00150 void DebugFeatureProtoError( 00151 INT_CLASS ClassTemplate, 00152 BIT_VECTOR ProtoMask, 00153 BIT_VECTOR ConfigMask, 00154 const ScratchEvidence &tables, 00155 inT16 NumFeatures, 00156 int Debug); 00157 00158 void DisplayProtoDebugInfo( 00159 INT_CLASS ClassTemplate, 00160 BIT_VECTOR ProtoMask, 00161 BIT_VECTOR ConfigMask, 00162 const ScratchEvidence &tables, 00163 bool SeparateDebugWindows); 00164 00165 void DisplayFeatureDebugInfo( 00166 INT_CLASS ClassTemplate, 00167 BIT_VECTOR ProtoMask, 00168 BIT_VECTOR ConfigMask, 00169 inT16 NumFeatures, 00170 const INT_FEATURE_STRUCT* Features, 00171 int AdaptFeatureThreshold, 00172 int Debug, 00173 bool SeparateDebugWindows); 00174 #endif 00175 00176 00177 private: 00178 uinT8 similarity_evidence_table_[SE_TABLE_SIZE]; 00179 uinT32 evidence_table_mask_; 00180 uinT32 mult_trunc_shift_bits_; 00181 uinT32 table_trunc_shift_bits_; 00182 tesseract::IntParam *classify_debug_level_; 00183 uinT32 evidence_mult_mask_; 00184 }; 00185 00189 void IMDebugConfiguration(INT_FEATURE FeatureNum, 00190 uinT16 ActualProtoNum, 00191 uinT8 Evidence, 00192 BIT_VECTOR ConfigMask, 00193 uinT32 ConfigWord); 00194 00195 void IMDebugConfigurationSum(INT_FEATURE FeatureNum, 00196 uinT8 *FeatureEvidence, 00197 inT32 ConfigCount); 00198 00199 void HeapSort (int n, register int ra[], register int rb[]); 00200 00204 #endif