tesseract 3.04.01

classify/intmatcher.h

Go to the documentation of this file.
00001 /******************************************************************************
00002  **     Filename:    intmatcher.h
00003  **     Purpose:     Interface to high level generic classifier routines.
00004  **     Author:      Robert Moss
00005  **     History:     Wed Feb 13 15:24:15 MST 1991, RWM, Created.
00006  **
00007  **     (c) Copyright Hewlett-Packard Company, 1988.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  ******************************************************************************/
00018 #ifndef   INTMATCHER_H
00019 #define   INTMATCHER_H
00020 
00021 #include "params.h"
00022 
00023 // Character fragments could be present in the trained templaes
00024 // but turned on/off on the language-by-language basis or depending
00025 // on particular properties of the corpus (e.g. when we expect the
00026 // images to have low exposure).
00027 extern BOOL_VAR_H(disable_character_fragments, FALSE,
00028                   "Do not include character fragments in the"
00029                   " results of the classifier");
00030 
00031 extern INT_VAR_H(classify_integer_matcher_multiplier, 10,
00032                  "Integer Matcher Multiplier  0-255:   ");
00033 
00034 
00038 #include "intproto.h"
00039 #include "cutoffs.h"
00040 
00041 namespace tesseract {
00042 struct UnicharRating;
00043 }
00044 
00045 struct CP_RESULT_STRUCT {
00046   CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {}
00047 
00048   FLOAT32 Rating;
00049   CLASS_ID Class;
00050 };
00051 
00052 /*----------------------------------------------------------------------------
00053             Variables
00054 -----------------------------------------------------------------------------*/
00055 
00056 extern INT_VAR_H(classify_adapt_proto_thresh, 230,
00057                  "Threshold for good protos during adaptive 0-255:   ");
00058 
00059 extern INT_VAR_H(classify_adapt_feature_thresh, 230,
00060                  "Threshold for good features during adaptive 0-255:   ");
00061 
00066 #define  SE_TABLE_BITS    9
00067 #define  SE_TABLE_SIZE  512
00068 
00069 struct ScratchEvidence {
00070   uinT8 feature_evidence_[MAX_NUM_CONFIGS];
00071   int sum_feature_evidence_[MAX_NUM_CONFIGS];
00072   uinT8 proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX];
00073 
00074   void Clear(const INT_CLASS class_template);
00075   void ClearFeatureEvidence(const INT_CLASS class_template);
00076   void NormalizeSums(INT_CLASS ClassTemplate, inT16 NumFeatures,
00077                      inT32 used_features);
00078   void UpdateSumOfProtoEvidences(
00079     INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures);
00080 };
00081 
00082 
00083 class IntegerMatcher {
00084  public:
00085   // Integer Matcher Theta Fudge (0-255).
00086   static const int kIntThetaFudge = 128;
00087   // Bits in Similarity to Evidence Lookup (8-9).
00088   static const int kEvidenceTableBits = 9;
00089   // Integer Evidence Truncation Bits (8-14).
00090   static const int kIntEvidenceTruncBits = 14;
00091   // Similarity to Evidence Table Exponential Multiplier.
00092   static const float kSEExponentialMultiplier;
00093   // Center of Similarity Curve.
00094   static const float kSimilarityCenter;
00095 
00096   IntegerMatcher() : classify_debug_level_(0) {}
00097 
00098   void Init(tesseract::IntParam *classify_debug_level);
00099 
00100   void Match(INT_CLASS ClassTemplate,
00101              BIT_VECTOR ProtoMask,
00102              BIT_VECTOR ConfigMask,
00103              inT16 NumFeatures,
00104              const INT_FEATURE_STRUCT* Features,
00105              tesseract::UnicharRating* Result,
00106              int AdaptFeatureThreshold,
00107              int Debug,
00108              bool SeparateDebugWindows);
00109 
00110   // Applies the CN normalization factor to the given rating and returns
00111   // the modified rating.
00112   float ApplyCNCorrection(float rating, int blob_length,
00113                           int normalization_factor, int matcher_multiplier);
00114 
00115   int FindGoodProtos(INT_CLASS ClassTemplate,
00116                      BIT_VECTOR ProtoMask,
00117                      BIT_VECTOR ConfigMask,
00118                      uinT16 BlobLength,
00119                      inT16 NumFeatures,
00120                      INT_FEATURE_ARRAY Features,
00121                      PROTO_ID *ProtoArray,
00122                      int AdaptProtoThreshold,
00123                      int Debug);
00124 
00125   int FindBadFeatures(INT_CLASS ClassTemplate,
00126                       BIT_VECTOR ProtoMask,
00127                       BIT_VECTOR ConfigMask,
00128                       uinT16 BlobLength,
00129                       inT16 NumFeatures,
00130                       INT_FEATURE_ARRAY Features,
00131                       FEATURE_ID *FeatureArray,
00132                       int AdaptFeatureThreshold,
00133                       int Debug);
00134 
00135  private:
00136   int UpdateTablesForFeature(
00137       INT_CLASS ClassTemplate,
00138       BIT_VECTOR ProtoMask,
00139       BIT_VECTOR ConfigMask,
00140       int FeatureNum,
00141       const INT_FEATURE_STRUCT* Feature,
00142       ScratchEvidence *evidence,
00143       int Debug);
00144 
00145   int FindBestMatch(INT_CLASS ClassTemplate,
00146                     const ScratchEvidence &tables,
00147                     tesseract::UnicharRating* Result);
00148 
00149 #ifndef GRAPHICS_DISABLED
00150   void DebugFeatureProtoError(
00151       INT_CLASS ClassTemplate,
00152       BIT_VECTOR ProtoMask,
00153       BIT_VECTOR ConfigMask,
00154       const ScratchEvidence &tables,
00155       inT16 NumFeatures,
00156       int Debug);
00157 
00158   void DisplayProtoDebugInfo(
00159       INT_CLASS ClassTemplate,
00160       BIT_VECTOR ProtoMask,
00161       BIT_VECTOR ConfigMask,
00162       const ScratchEvidence &tables,
00163       bool SeparateDebugWindows);
00164 
00165   void DisplayFeatureDebugInfo(
00166       INT_CLASS ClassTemplate,
00167       BIT_VECTOR ProtoMask,
00168       BIT_VECTOR ConfigMask,
00169       inT16 NumFeatures,
00170       const INT_FEATURE_STRUCT* Features,
00171       int AdaptFeatureThreshold,
00172       int Debug,
00173       bool SeparateDebugWindows);
00174 #endif
00175 
00176 
00177  private:
00178   uinT8 similarity_evidence_table_[SE_TABLE_SIZE];
00179   uinT32 evidence_table_mask_;
00180   uinT32 mult_trunc_shift_bits_;
00181   uinT32 table_trunc_shift_bits_;
00182   tesseract::IntParam *classify_debug_level_;
00183   uinT32 evidence_mult_mask_;
00184 };
00185 
00189 void IMDebugConfiguration(INT_FEATURE FeatureNum,
00190                           uinT16 ActualProtoNum,
00191                           uinT8 Evidence,
00192                           BIT_VECTOR ConfigMask,
00193                           uinT32 ConfigWord);
00194 
00195 void IMDebugConfigurationSum(INT_FEATURE FeatureNum,
00196                              uinT8 *FeatureEvidence,
00197                              inT32 ConfigCount);
00198 
00199 void HeapSort (int n, register int ra[], register int rb[]);
00200 
00204 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines