|
tesseract 3.04.01
|
00001 /****************************************************************************** 00002 ** Filename: intmatcher.c 00003 ** Purpose: Generic high level classification routines. 00004 ** Author: Robert Moss 00005 ** History: Wed Feb 13 17:35:28 MST 1991, RWM, Created. 00006 ** Mon Mar 11 16:33:02 MST 1991, RWM, Modified to add 00007 ** support for adaptive matching. 00008 ** (c) Copyright Hewlett-Packard Company, 1988. 00009 ** Licensed under the Apache License, Version 2.0 (the "License"); 00010 ** you may not use this file except in compliance with the License. 00011 ** You may obtain a copy of the License at 00012 ** http://www.apache.org/licenses/LICENSE-2.0 00013 ** Unless required by applicable law or agreed to in writing, software 00014 ** distributed under the License is distributed on an "AS IS" BASIS, 00015 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 ** See the License for the specific language governing permissions and 00017 ** limitations under the License. 00018 ******************************************************************************/ 00019 00020 // Include automatically generated configuration file if running autoconf. 00021 #ifdef HAVE_CONFIG_H 00022 #include "config_auto.h" 00023 #endif 00024 00025 /*---------------------------------------------------------------------------- 00026 Include Files and Type Defines 00027 ----------------------------------------------------------------------------*/ 00028 #include "intmatcher.h" 00029 00030 #include "fontinfo.h" 00031 #include "intproto.h" 00032 #include "callcpp.h" 00033 #include "scrollview.h" 00034 #include "float2int.h" 00035 #include "globals.h" 00036 #include "helpers.h" 00037 #include "classify.h" 00038 #include "shapetable.h" 00039 #include <math.h> 00040 00041 using tesseract::ScoredFont; 00042 using tesseract::UnicharRating; 00043 00044 /*---------------------------------------------------------------------------- 00045 Global Data Definitions and Declarations 00046 ----------------------------------------------------------------------------*/ 00047 // Parameters of the sigmoid used to convert similarity to evidence in the 00048 // similarity_evidence_table_ that is used to convert distance metric to an 00049 // 8 bit evidence value in the secondary matcher. (See IntMatcher::Init). 00050 const float IntegerMatcher::kSEExponentialMultiplier = 0.0; 00051 const float IntegerMatcher::kSimilarityCenter = 0.0075; 00052 00053 #define offset_table_entries \ 00054 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \ 00055 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \ 00056 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, \ 00057 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, \ 00058 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \ 00059 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \ 00060 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, \ 00061 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, \ 00062 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \ 00063 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \ 00064 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 00065 00066 #define INTMATCHER_OFFSET_TABLE_SIZE 256 00067 00068 #define next_table_entries \ 00069 0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e, \ 00070 0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, 0x10, 0x18, 0x18, 0x1a, \ 00071 0x18, 0x1c, 0x1c, 0x1e, 0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26, \ 00072 0x20, 0x28, 0x28, 0x2a, 0x28, 0x2c, 0x2c, 0x2e, 0x20, 0x30, 0x30, 0x32, \ 00073 0x30, 0x34, 0x34, 0x36, 0x30, 0x38, 0x38, 0x3a, 0x38, 0x3c, 0x3c, 0x3e, \ 00074 0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, 0x40, 0x48, 0x48, 0x4a, \ 00075 0x48, 0x4c, 0x4c, 0x4e, 0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56, \ 00076 0x50, 0x58, 0x58, 0x5a, 0x58, 0x5c, 0x5c, 0x5e, 0x40, 0x60, 0x60, 0x62, \ 00077 0x60, 0x64, 0x64, 0x66, 0x60, 0x68, 0x68, 0x6a, 0x68, 0x6c, 0x6c, 0x6e, \ 00078 0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, 0x70, 0x78, 0x78, 0x7a, \ 00079 0x78, 0x7c, 0x7c, 0x7e, 0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86, \ 00080 0x80, 0x88, 0x88, 0x8a, 0x88, 0x8c, 0x8c, 0x8e, 0x80, 0x90, 0x90, 0x92, \ 00081 0x90, 0x94, 0x94, 0x96, 0x90, 0x98, 0x98, 0x9a, 0x98, 0x9c, 0x9c, 0x9e, \ 00082 0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, 0xa0, 0xa8, 0xa8, 0xaa, \ 00083 0xa8, 0xac, 0xac, 0xae, 0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6, \ 00084 0xb0, 0xb8, 0xb8, 0xba, 0xb8, 0xbc, 0xbc, 0xbe, 0x80, 0xc0, 0xc0, 0xc2, \ 00085 0xc0, 0xc4, 0xc4, 0xc6, 0xc0, 0xc8, 0xc8, 0xca, 0xc8, 0xcc, 0xcc, 0xce, \ 00086 0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, 0xd0, 0xd8, 0xd8, 0xda, \ 00087 0xd8, 0xdc, 0xdc, 0xde, 0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6, \ 00088 0xe0, 0xe8, 0xe8, 0xea, 0xe8, 0xec, 0xec, 0xee, 0xe0, 0xf0, 0xf0, 0xf2, \ 00089 0xf0, 0xf4, 0xf4, 0xf6, 0xf0, 0xf8, 0xf8, 0xfa, 0xf8, 0xfc, 0xfc, 0xfe 00090 00091 // See http://b/19318793 (#6) for a complete discussion. Merging arrays 00092 // offset_table and next_table helps improve performance of PIE code. 00093 static const uinT8 data_table[512] = {offset_table_entries, next_table_entries}; 00094 00095 static const uinT8* const offset_table = &data_table[0]; 00096 static const uinT8* const next_table = 00097 &data_table[INTMATCHER_OFFSET_TABLE_SIZE]; 00098 00099 namespace tesseract { 00100 00101 // Encapsulation of the intermediate data and computations made by the class 00102 // pruner. The class pruner implements a simple linear classifier on binary 00103 // features by heavily quantizing the feature space, and applying 00104 // NUM_BITS_PER_CLASS (2)-bit weights to the features. Lack of resolution in 00105 // weights is compensated by a non-constant bias that is dependent on the 00106 // number of features present. 00107 class ClassPruner { 00108 public: 00109 ClassPruner(int max_classes) { 00110 // The unrolled loop in ComputeScores means that the array sizes need to 00111 // be rounded up so that the array is big enough to accommodate the extra 00112 // entries accessed by the unrolling. Each pruner word is of sized 00113 // BITS_PER_WERD and each entry is NUM_BITS_PER_CLASS, so there are 00114 // BITS_PER_WERD / NUM_BITS_PER_CLASS entries. 00115 // See ComputeScores. 00116 max_classes_ = max_classes; 00117 rounded_classes_ = RoundUp( 00118 max_classes, WERDS_PER_CP_VECTOR * BITS_PER_WERD / NUM_BITS_PER_CLASS); 00119 class_count_ = new int[rounded_classes_]; 00120 norm_count_ = new int[rounded_classes_]; 00121 sort_key_ = new int[rounded_classes_ + 1]; 00122 sort_index_ = new int[rounded_classes_ + 1]; 00123 for (int i = 0; i < rounded_classes_; i++) { 00124 class_count_[i] = 0; 00125 } 00126 pruning_threshold_ = 0; 00127 num_features_ = 0; 00128 num_classes_ = 0; 00129 } 00130 00131 ~ClassPruner() { 00132 delete []class_count_; 00133 delete []norm_count_; 00134 delete []sort_key_; 00135 delete []sort_index_; 00136 } 00137 00140 void ComputeScores(const INT_TEMPLATES_STRUCT* int_templates, 00141 int num_features, const INT_FEATURE_STRUCT* features) { 00142 num_features_ = num_features; 00143 int num_pruners = int_templates->NumClassPruners; 00144 for (int f = 0; f < num_features; ++f) { 00145 const INT_FEATURE_STRUCT* feature = &features[f]; 00146 // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS. 00147 int x = feature->X * NUM_CP_BUCKETS >> 8; 00148 int y = feature->Y * NUM_CP_BUCKETS >> 8; 00149 int theta = feature->Theta * NUM_CP_BUCKETS >> 8; 00150 int class_id = 0; 00151 // Each CLASS_PRUNER_STRUCT only covers CLASSES_PER_CP(32) classes, so 00152 // we need a collection of them, indexed by pruner_set. 00153 for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) { 00154 // Look up quantized feature in a 3-D array, an array of weights for 00155 // each class. 00156 const uinT32* pruner_word_ptr = 00157 int_templates->ClassPruners[pruner_set]->p[x][y][theta]; 00158 for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) { 00159 uinT32 pruner_word = *pruner_word_ptr++; 00160 // This inner loop is unrolled to speed up the ClassPruner. 00161 // Currently gcc would not unroll it unless it is set to O3 00162 // level of optimization or -funroll-loops is specified. 00163 /* 00164 uinT32 class_mask = (1 << NUM_BITS_PER_CLASS) - 1; 00165 for (int bit = 0; bit < BITS_PER_WERD/NUM_BITS_PER_CLASS; bit++) { 00166 class_count_[class_id++] += pruner_word & class_mask; 00167 pruner_word >>= NUM_BITS_PER_CLASS; 00168 } 00169 */ 00170 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00171 pruner_word >>= NUM_BITS_PER_CLASS; 00172 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00173 pruner_word >>= NUM_BITS_PER_CLASS; 00174 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00175 pruner_word >>= NUM_BITS_PER_CLASS; 00176 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00177 pruner_word >>= NUM_BITS_PER_CLASS; 00178 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00179 pruner_word >>= NUM_BITS_PER_CLASS; 00180 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00181 pruner_word >>= NUM_BITS_PER_CLASS; 00182 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00183 pruner_word >>= NUM_BITS_PER_CLASS; 00184 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00185 pruner_word >>= NUM_BITS_PER_CLASS; 00186 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00187 pruner_word >>= NUM_BITS_PER_CLASS; 00188 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00189 pruner_word >>= NUM_BITS_PER_CLASS; 00190 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00191 pruner_word >>= NUM_BITS_PER_CLASS; 00192 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00193 pruner_word >>= NUM_BITS_PER_CLASS; 00194 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00195 pruner_word >>= NUM_BITS_PER_CLASS; 00196 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00197 pruner_word >>= NUM_BITS_PER_CLASS; 00198 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00199 pruner_word >>= NUM_BITS_PER_CLASS; 00200 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK; 00201 } 00202 } 00203 } 00204 } 00205 00211 void AdjustForExpectedNumFeatures(const uinT16* expected_num_features, 00212 int cutoff_strength) { 00213 for (int class_id = 0; class_id < max_classes_; ++class_id) { 00214 if (num_features_ < expected_num_features[class_id]) { 00215 int deficit = expected_num_features[class_id] - num_features_; 00216 class_count_[class_id] -= class_count_[class_id] * deficit / 00217 (num_features_ * cutoff_strength + deficit); 00218 } 00219 } 00220 } 00221 00224 void DisableDisabledClasses(const UNICHARSET& unicharset) { 00225 for (int class_id = 0; class_id < max_classes_; ++class_id) { 00226 if (!unicharset.get_enabled(class_id)) 00227 class_count_[class_id] = 0; // This char is disabled! 00228 } 00229 } 00230 00232 void DisableFragments(const UNICHARSET& unicharset) { 00233 for (int class_id = 0; class_id < max_classes_; ++class_id) { 00234 // Do not include character fragments in the class pruner 00235 // results if disable_character_fragments is true. 00236 if (unicharset.get_fragment(class_id)) { 00237 class_count_[class_id] = 0; 00238 } 00239 } 00240 } 00241 00246 void NormalizeForXheight(int norm_multiplier, 00247 const uinT8* normalization_factors) { 00248 for (int class_id = 0; class_id < max_classes_; class_id++) { 00249 norm_count_[class_id] = class_count_[class_id] - 00250 ((norm_multiplier * normalization_factors[class_id]) >> 8); 00251 } 00252 } 00253 00255 void NoNormalization() { 00256 for (int class_id = 0; class_id < max_classes_; class_id++) { 00257 norm_count_[class_id] = class_count_[class_id]; 00258 } 00259 } 00260 00264 void PruneAndSort(int pruning_factor, int keep_this, 00265 bool max_of_non_fragments, const UNICHARSET& unicharset) { 00266 int max_count = 0; 00267 for (int c = 0; c < max_classes_; ++c) { 00268 if (norm_count_[c] > max_count && 00269 // This additional check is added in order to ensure that 00270 // the classifier will return at least one non-fragmented 00271 // character match. 00272 // TODO(daria): verify that this helps accuracy and does not 00273 // hurt performance. 00274 (!max_of_non_fragments || !unicharset.get_fragment(c))) { 00275 max_count = norm_count_[c]; 00276 } 00277 } 00278 // Prune Classes. 00279 pruning_threshold_ = (max_count * pruning_factor) >> 8; 00280 // Select Classes. 00281 if (pruning_threshold_ < 1) 00282 pruning_threshold_ = 1; 00283 num_classes_ = 0; 00284 for (int class_id = 0; class_id < max_classes_; class_id++) { 00285 if (norm_count_[class_id] >= pruning_threshold_ || 00286 class_id == keep_this) { 00287 ++num_classes_; 00288 sort_index_[num_classes_] = class_id; 00289 sort_key_[num_classes_] = norm_count_[class_id]; 00290 } 00291 } 00292 00293 // Sort Classes using Heapsort Algorithm. 00294 if (num_classes_ > 1) 00295 HeapSort(num_classes_, sort_key_, sort_index_); 00296 } 00297 00299 void DebugMatch(const Classify& classify, 00300 const INT_TEMPLATES_STRUCT* int_templates, 00301 const INT_FEATURE_STRUCT* features) const { 00302 int num_pruners = int_templates->NumClassPruners; 00303 int max_num_classes = int_templates->NumClasses; 00304 for (int f = 0; f < num_features_; ++f) { 00305 const INT_FEATURE_STRUCT* feature = &features[f]; 00306 tprintf("F=%3d(%d,%d,%d),", f, feature->X, feature->Y, feature->Theta); 00307 // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS. 00308 int x = feature->X * NUM_CP_BUCKETS >> 8; 00309 int y = feature->Y * NUM_CP_BUCKETS >> 8; 00310 int theta = feature->Theta * NUM_CP_BUCKETS >> 8; 00311 int class_id = 0; 00312 for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) { 00313 // Look up quantized feature in a 3-D array, an array of weights for 00314 // each class. 00315 const uinT32* pruner_word_ptr = 00316 int_templates->ClassPruners[pruner_set]->p[x][y][theta]; 00317 for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) { 00318 uinT32 pruner_word = *pruner_word_ptr++; 00319 for (int word_class = 0; word_class < 16 && 00320 class_id < max_num_classes; ++word_class, ++class_id) { 00321 if (norm_count_[class_id] >= pruning_threshold_) { 00322 tprintf(" %s=%d,", 00323 classify.ClassIDToDebugStr(int_templates, 00324 class_id, 0).string(), 00325 pruner_word & CLASS_PRUNER_CLASS_MASK); 00326 } 00327 pruner_word >>= NUM_BITS_PER_CLASS; 00328 } 00329 } 00330 tprintf("\n"); 00331 } 00332 } 00333 } 00334 00336 void SummarizeResult(const Classify& classify, 00337 const INT_TEMPLATES_STRUCT* int_templates, 00338 const uinT16* expected_num_features, 00339 int norm_multiplier, 00340 const uinT8* normalization_factors) const { 00341 tprintf("CP:%d classes, %d features:\n", num_classes_, num_features_); 00342 for (int i = 0; i < num_classes_; ++i) { 00343 int class_id = sort_index_[num_classes_ - i]; 00344 STRING class_string = classify.ClassIDToDebugStr(int_templates, 00345 class_id, 0); 00346 tprintf("%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n", 00347 class_string.string(), 00348 class_count_[class_id], 00349 expected_num_features[class_id], 00350 (norm_multiplier * normalization_factors[class_id]) >> 8, 00351 sort_key_[num_classes_ - i], 00352 100.0 - 100.0 * sort_key_[num_classes_ - i] / 00353 (CLASS_PRUNER_CLASS_MASK * num_features_)); 00354 } 00355 } 00356 00359 int SetupResults(GenericVector<CP_RESULT_STRUCT>* results) const { 00360 CP_RESULT_STRUCT empty; 00361 results->init_to_size(num_classes_, empty); 00362 for (int c = 0; c < num_classes_; ++c) { 00363 (*results)[c].Class = sort_index_[num_classes_ - c]; 00364 (*results)[c].Rating = 1.0 - sort_key_[num_classes_ - c] / 00365 (static_cast<float>(CLASS_PRUNER_CLASS_MASK) * num_features_); 00366 } 00367 return num_classes_; 00368 } 00369 00370 private: 00372 int *class_count_; 00375 int *norm_count_; 00377 int *sort_key_; 00379 int *sort_index_; 00381 int max_classes_; 00383 int rounded_classes_; 00385 int pruning_threshold_; 00387 int num_features_; 00389 int num_classes_; 00390 }; 00391 00392 /*---------------------------------------------------------------------------- 00393 Public Code 00394 ----------------------------------------------------------------------------*/ 00409 int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates, 00410 int num_features, int keep_this, 00411 const INT_FEATURE_STRUCT* features, 00412 const uinT8* normalization_factors, 00413 const uinT16* expected_num_features, 00414 GenericVector<CP_RESULT_STRUCT>* results) { 00415 ClassPruner pruner(int_templates->NumClasses); 00416 // Compute initial match scores for all classes. 00417 pruner.ComputeScores(int_templates, num_features, features); 00418 // Adjust match scores for number of expected features. 00419 pruner.AdjustForExpectedNumFeatures(expected_num_features, 00420 classify_cp_cutoff_strength); 00421 // Apply disabled classes in unicharset - only works without a shape_table. 00422 if (shape_table_ == NULL) 00423 pruner.DisableDisabledClasses(unicharset); 00424 // If fragments are disabled, remove them, also only without a shape table. 00425 if (disable_character_fragments && shape_table_ == NULL) 00426 pruner.DisableFragments(unicharset); 00427 00428 // If we have good x-heights, apply the given normalization factors. 00429 if (normalization_factors != NULL) { 00430 pruner.NormalizeForXheight(classify_class_pruner_multiplier, 00431 normalization_factors); 00432 } else { 00433 pruner.NoNormalization(); 00434 } 00435 // Do the actual pruning and sort the short-list. 00436 pruner.PruneAndSort(classify_class_pruner_threshold, keep_this, 00437 shape_table_ == NULL, unicharset); 00438 00439 if (classify_debug_level > 2) { 00440 pruner.DebugMatch(*this, int_templates, features); 00441 } 00442 if (classify_debug_level > 1) { 00443 pruner.SummarizeResult(*this, int_templates, expected_num_features, 00444 classify_class_pruner_multiplier, 00445 normalization_factors); 00446 } 00447 // Convert to the expected output format. 00448 return pruner.SetupResults(results); 00449 } 00450 00451 } // namespace tesseract 00452 00472 void IntegerMatcher::Match(INT_CLASS ClassTemplate, 00473 BIT_VECTOR ProtoMask, 00474 BIT_VECTOR ConfigMask, 00475 inT16 NumFeatures, 00476 const INT_FEATURE_STRUCT* Features, 00477 UnicharRating* Result, 00478 int AdaptFeatureThreshold, 00479 int Debug, 00480 bool SeparateDebugWindows) { 00481 ScratchEvidence *tables = new ScratchEvidence(); 00482 int Feature; 00483 int BestMatch; 00484 00485 if (MatchDebuggingOn (Debug)) 00486 cprintf ("Integer Matcher -------------------------------------------\n"); 00487 00488 tables->Clear(ClassTemplate); 00489 Result->feature_misses = 0; 00490 00491 for (Feature = 0; Feature < NumFeatures; Feature++) { 00492 int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask, 00493 Feature, &Features[Feature], 00494 tables, Debug); 00495 // Count features that were missed over all configs. 00496 if (csum == 0) 00497 ++Result->feature_misses; 00498 } 00499 00500 #ifndef GRAPHICS_DISABLED 00501 if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) { 00502 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, 00503 NumFeatures, Debug); 00504 } 00505 00506 if (DisplayProtoMatchesOn(Debug)) { 00507 DisplayProtoDebugInfo(ClassTemplate, ProtoMask, ConfigMask, 00508 *tables, SeparateDebugWindows); 00509 } 00510 00511 if (DisplayFeatureMatchesOn(Debug)) { 00512 DisplayFeatureDebugInfo(ClassTemplate, ProtoMask, ConfigMask, NumFeatures, 00513 Features, AdaptFeatureThreshold, Debug, 00514 SeparateDebugWindows); 00515 } 00516 #endif 00517 00518 tables->UpdateSumOfProtoEvidences(ClassTemplate, ConfigMask, NumFeatures); 00519 tables->NormalizeSums(ClassTemplate, NumFeatures, NumFeatures); 00520 00521 BestMatch = FindBestMatch(ClassTemplate, *tables, Result); 00522 00523 #ifndef GRAPHICS_DISABLED 00524 if (PrintMatchSummaryOn(Debug)) 00525 Result->Print(); 00526 00527 if (MatchDebuggingOn(Debug)) 00528 cprintf("Match Complete --------------------------------------------\n"); 00529 #endif 00530 00531 delete tables; 00532 } 00533 00554 int IntegerMatcher::FindGoodProtos( 00555 INT_CLASS ClassTemplate, 00556 BIT_VECTOR ProtoMask, 00557 BIT_VECTOR ConfigMask, 00558 uinT16 BlobLength, 00559 inT16 NumFeatures, 00560 INT_FEATURE_ARRAY Features, 00561 PROTO_ID *ProtoArray, 00562 int AdaptProtoThreshold, 00563 int Debug) { 00564 ScratchEvidence *tables = new ScratchEvidence(); 00565 int NumGoodProtos = 0; 00566 00567 /* DEBUG opening heading */ 00568 if (MatchDebuggingOn (Debug)) 00569 cprintf 00570 ("Find Good Protos -------------------------------------------\n"); 00571 00572 tables->Clear(ClassTemplate); 00573 00574 for (int Feature = 0; Feature < NumFeatures; Feature++) 00575 UpdateTablesForFeature( 00576 ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]), 00577 tables, Debug); 00578 00579 #ifndef GRAPHICS_DISABLED 00580 if (PrintProtoMatchesOn (Debug) || PrintMatchSummaryOn (Debug)) 00581 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, 00582 NumFeatures, Debug); 00583 #endif 00584 00585 /* Average Proto Evidences & Find Good Protos */ 00586 for (int proto = 0; proto < ClassTemplate->NumProtos; proto++) { 00587 /* Compute Average for Actual Proto */ 00588 int Temp = 0; 00589 for (int i = 0; i < ClassTemplate->ProtoLengths[proto]; i++) 00590 Temp += tables->proto_evidence_[proto][i]; 00591 00592 Temp /= ClassTemplate->ProtoLengths[proto]; 00593 00594 /* Find Good Protos */ 00595 if (Temp >= AdaptProtoThreshold) { 00596 *ProtoArray = proto; 00597 ProtoArray++; 00598 NumGoodProtos++; 00599 } 00600 } 00601 00602 if (MatchDebuggingOn (Debug)) 00603 cprintf ("Match Complete --------------------------------------------\n"); 00604 delete tables; 00605 00606 return NumGoodProtos; 00607 } 00608 00609 00625 int IntegerMatcher::FindBadFeatures( 00626 INT_CLASS ClassTemplate, 00627 BIT_VECTOR ProtoMask, 00628 BIT_VECTOR ConfigMask, 00629 uinT16 BlobLength, 00630 inT16 NumFeatures, 00631 INT_FEATURE_ARRAY Features, 00632 FEATURE_ID *FeatureArray, 00633 int AdaptFeatureThreshold, 00634 int Debug) { 00635 ScratchEvidence *tables = new ScratchEvidence(); 00636 int NumBadFeatures = 0; 00637 00638 /* DEBUG opening heading */ 00639 if (MatchDebuggingOn(Debug)) 00640 cprintf("Find Bad Features -------------------------------------------\n"); 00641 00642 tables->Clear(ClassTemplate); 00643 00644 for (int Feature = 0; Feature < NumFeatures; Feature++) { 00645 UpdateTablesForFeature( 00646 ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature], 00647 tables, Debug); 00648 00649 /* Find Best Evidence for Current Feature */ 00650 int best = 0; 00651 for (int i = 0; i < ClassTemplate->NumConfigs; i++) 00652 if (tables->feature_evidence_[i] > best) 00653 best = tables->feature_evidence_[i]; 00654 00655 /* Find Bad Features */ 00656 if (best < AdaptFeatureThreshold) { 00657 *FeatureArray = Feature; 00658 FeatureArray++; 00659 NumBadFeatures++; 00660 } 00661 } 00662 00663 #ifndef GRAPHICS_DISABLED 00664 if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) 00665 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, 00666 NumFeatures, Debug); 00667 #endif 00668 00669 if (MatchDebuggingOn(Debug)) 00670 cprintf("Match Complete --------------------------------------------\n"); 00671 00672 delete tables; 00673 return NumBadFeatures; 00674 } 00675 00676 00677 void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) { 00678 classify_debug_level_ = classify_debug_level; 00679 00680 /* Initialize table for evidence to similarity lookup */ 00681 for (int i = 0; i < SE_TABLE_SIZE; i++) { 00682 uinT32 IntSimilarity = i << (27 - SE_TABLE_BITS); 00683 double Similarity = ((double) IntSimilarity) / 65536.0 / 65536.0; 00684 double evidence = Similarity / kSimilarityCenter; 00685 evidence = 255.0 / (evidence * evidence + 1.0); 00686 00687 if (kSEExponentialMultiplier > 0.0) { 00688 double scale = 1.0 - exp(-kSEExponentialMultiplier) * 00689 exp(kSEExponentialMultiplier * ((double) i / SE_TABLE_SIZE)); 00690 evidence *= ClipToRange(scale, 0.0, 1.0); 00691 } 00692 00693 similarity_evidence_table_[i] = (uinT8) (evidence + 0.5); 00694 } 00695 00696 /* Initialize evidence computation variables */ 00697 evidence_table_mask_ = 00698 ((1 << kEvidenceTableBits) - 1) << (9 - kEvidenceTableBits); 00699 mult_trunc_shift_bits_ = (14 - kIntEvidenceTruncBits); 00700 table_trunc_shift_bits_ = (27 - SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1)); 00701 evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1); 00702 } 00703 00704 00705 /*---------------------------------------------------------------------------- 00706 Private Code 00707 ----------------------------------------------------------------------------*/ 00708 void ScratchEvidence::Clear(const INT_CLASS class_template) { 00709 memset(sum_feature_evidence_, 0, 00710 class_template->NumConfigs * sizeof(sum_feature_evidence_[0])); 00711 memset(proto_evidence_, 0, 00712 class_template->NumProtos * sizeof(proto_evidence_[0])); 00713 } 00714 00715 void ScratchEvidence::ClearFeatureEvidence(const INT_CLASS class_template) { 00716 memset(feature_evidence_, 0, 00717 class_template->NumConfigs * sizeof(feature_evidence_[0])); 00718 } 00719 00720 00721 00728 void IMDebugConfiguration(int FeatureNum, 00729 uinT16 ActualProtoNum, 00730 uinT8 Evidence, 00731 BIT_VECTOR ConfigMask, 00732 uinT32 ConfigWord) { 00733 cprintf ("F = %3d, P = %3d, E = %3d, Configs = ", 00734 FeatureNum, (int) ActualProtoNum, (int) Evidence); 00735 while (ConfigWord) { 00736 if (ConfigWord & 1) 00737 cprintf ("1"); 00738 else 00739 cprintf ("0"); 00740 ConfigWord >>= 1; 00741 } 00742 cprintf ("\n"); 00743 } 00744 00745 00752 void IMDebugConfigurationSum(int FeatureNum, 00753 uinT8 *FeatureEvidence, 00754 inT32 ConfigCount) { 00755 cprintf("F=%3d, C=", FeatureNum); 00756 for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) { 00757 cprintf("%4d", FeatureEvidence[ConfigNum]); 00758 } 00759 cprintf("\n"); 00760 } 00761 00773 int IntegerMatcher::UpdateTablesForFeature( 00774 INT_CLASS ClassTemplate, 00775 BIT_VECTOR ProtoMask, 00776 BIT_VECTOR ConfigMask, 00777 int FeatureNum, 00778 const INT_FEATURE_STRUCT* Feature, 00779 ScratchEvidence *tables, 00780 int Debug) { 00781 uinT32 ConfigWord; 00782 uinT32 ProtoWord; 00783 uinT32 ProtoNum; 00784 uinT32 ActualProtoNum; 00785 uinT8 proto_byte; 00786 inT32 proto_word_offset; 00787 inT32 proto_offset; 00788 uinT8 config_byte; 00789 inT32 config_offset; 00790 PROTO_SET ProtoSet; 00791 uinT32 *ProtoPrunerPtr; 00792 INT_PROTO Proto; 00793 int ProtoSetIndex; 00794 uinT8 Evidence; 00795 uinT32 XFeatureAddress; 00796 uinT32 YFeatureAddress; 00797 uinT32 ThetaFeatureAddress; 00798 uinT8 *UINT8Pointer; 00799 int ProtoIndex; 00800 uinT8 Temp; 00801 int *IntPointer; 00802 int ConfigNum; 00803 inT32 M3; 00804 inT32 A3; 00805 uinT32 A4; 00806 00807 tables->ClearFeatureEvidence(ClassTemplate); 00808 00809 /* Precompute Feature Address offset for Proto Pruning */ 00810 XFeatureAddress = ((Feature->X >> 2) << 1); 00811 YFeatureAddress = (NUM_PP_BUCKETS << 1) + ((Feature->Y >> 2) << 1); 00812 ThetaFeatureAddress = (NUM_PP_BUCKETS << 2) + ((Feature->Theta >> 2) << 1); 00813 00814 for (ProtoSetIndex = 0, ActualProtoNum = 0; 00815 ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) { 00816 ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; 00817 ProtoPrunerPtr = (uinT32 *) ((*ProtoSet).ProtoPruner); 00818 for (ProtoNum = 0; ProtoNum < PROTOS_PER_PROTO_SET; 00819 ProtoNum += (PROTOS_PER_PROTO_SET >> 1), ActualProtoNum += 00820 (PROTOS_PER_PROTO_SET >> 1), ProtoMask++, ProtoPrunerPtr++) { 00821 /* Prune Protos of current Proto Set */ 00822 ProtoWord = *(ProtoPrunerPtr + XFeatureAddress); 00823 ProtoWord &= *(ProtoPrunerPtr + YFeatureAddress); 00824 ProtoWord &= *(ProtoPrunerPtr + ThetaFeatureAddress); 00825 ProtoWord &= *ProtoMask; 00826 00827 if (ProtoWord != 0) { 00828 proto_byte = ProtoWord & 0xff; 00829 ProtoWord >>= 8; 00830 proto_word_offset = 0; 00831 while (ProtoWord != 0 || proto_byte != 0) { 00832 while (proto_byte == 0) { 00833 proto_byte = ProtoWord & 0xff; 00834 ProtoWord >>= 8; 00835 proto_word_offset += 8; 00836 } 00837 proto_offset = offset_table[proto_byte] + proto_word_offset; 00838 proto_byte = next_table[proto_byte]; 00839 Proto = &(ProtoSet->Protos[ProtoNum + proto_offset]); 00840 ConfigWord = Proto->Configs[0]; 00841 A3 = (((Proto->A * (Feature->X - 128)) << 1) 00842 - (Proto->B * (Feature->Y - 128)) + (Proto->C << 9)); 00843 M3 = 00844 (((inT8) (Feature->Theta - Proto->Angle)) * kIntThetaFudge) << 1; 00845 00846 if (A3 < 0) 00847 A3 = ~A3; 00848 if (M3 < 0) 00849 M3 = ~M3; 00850 A3 >>= mult_trunc_shift_bits_; 00851 M3 >>= mult_trunc_shift_bits_; 00852 if (A3 > evidence_mult_mask_) 00853 A3 = evidence_mult_mask_; 00854 if (M3 > evidence_mult_mask_) 00855 M3 = evidence_mult_mask_; 00856 00857 A4 = (A3 * A3) + (M3 * M3); 00858 A4 >>= table_trunc_shift_bits_; 00859 if (A4 > evidence_table_mask_) 00860 Evidence = 0; 00861 else 00862 Evidence = similarity_evidence_table_[A4]; 00863 00864 if (PrintFeatureMatchesOn (Debug)) 00865 IMDebugConfiguration (FeatureNum, 00866 ActualProtoNum + proto_offset, 00867 Evidence, ConfigMask, ConfigWord); 00868 00869 ConfigWord &= *ConfigMask; 00870 00871 UINT8Pointer = tables->feature_evidence_ - 8; 00872 config_byte = 0; 00873 while (ConfigWord != 0 || config_byte != 0) { 00874 while (config_byte == 0) { 00875 config_byte = ConfigWord & 0xff; 00876 ConfigWord >>= 8; 00877 UINT8Pointer += 8; 00878 } 00879 config_offset = offset_table[config_byte]; 00880 config_byte = next_table[config_byte]; 00881 if (Evidence > UINT8Pointer[config_offset]) 00882 UINT8Pointer[config_offset] = Evidence; 00883 } 00884 00885 UINT8Pointer = 00886 &(tables->proto_evidence_[ActualProtoNum + proto_offset][0]); 00887 for (ProtoIndex = 00888 ClassTemplate->ProtoLengths[ActualProtoNum + proto_offset]; 00889 ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) { 00890 if (Evidence > *UINT8Pointer) { 00891 Temp = *UINT8Pointer; 00892 *UINT8Pointer = Evidence; 00893 Evidence = Temp; 00894 } 00895 else if (Evidence == 0) 00896 break; 00897 } 00898 } 00899 } 00900 } 00901 } 00902 00903 if (PrintFeatureMatchesOn(Debug)) { 00904 IMDebugConfigurationSum(FeatureNum, tables->feature_evidence_, 00905 ClassTemplate->NumConfigs); 00906 } 00907 00908 IntPointer = tables->sum_feature_evidence_; 00909 UINT8Pointer = tables->feature_evidence_; 00910 int SumOverConfigs = 0; 00911 for (ConfigNum = ClassTemplate->NumConfigs; ConfigNum > 0; ConfigNum--) { 00912 int evidence = *UINT8Pointer++; 00913 SumOverConfigs += evidence; 00914 *IntPointer++ += evidence; 00915 } 00916 return SumOverConfigs; 00917 } 00918 00919 00926 #ifndef GRAPHICS_DISABLED 00927 void IntegerMatcher::DebugFeatureProtoError( 00928 INT_CLASS ClassTemplate, 00929 BIT_VECTOR ProtoMask, 00930 BIT_VECTOR ConfigMask, 00931 const ScratchEvidence& tables, 00932 inT16 NumFeatures, 00933 int Debug) { 00934 FLOAT32 ProtoConfigs[MAX_NUM_CONFIGS]; 00935 int ConfigNum; 00936 uinT32 ConfigWord; 00937 int ProtoSetIndex; 00938 uinT16 ProtoNum; 00939 uinT8 ProtoWordNum; 00940 PROTO_SET ProtoSet; 00941 uinT16 ActualProtoNum; 00942 00943 if (PrintMatchSummaryOn(Debug)) { 00944 cprintf("Configuration Mask:\n"); 00945 for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) 00946 cprintf("%1d", (((*ConfigMask) >> ConfigNum) & 1)); 00947 cprintf("\n"); 00948 00949 cprintf("Feature Error for Configurations:\n"); 00950 for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) { 00951 cprintf( 00952 " %5.1f", 00953 100.0 * (1.0 - 00954 (FLOAT32) tables.sum_feature_evidence_[ConfigNum] 00955 / NumFeatures / 256.0)); 00956 } 00957 cprintf("\n\n\n"); 00958 } 00959 00960 if (PrintMatchSummaryOn (Debug)) { 00961 cprintf ("Proto Mask:\n"); 00962 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; 00963 ProtoSetIndex++) { 00964 ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); 00965 for (ProtoWordNum = 0; ProtoWordNum < 2; 00966 ProtoWordNum++, ProtoMask++) { 00967 ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); 00968 for (ProtoNum = 0; 00969 ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1)) 00970 && (ActualProtoNum < ClassTemplate->NumProtos)); 00971 ProtoNum++, ActualProtoNum++) 00972 cprintf ("%1d", (((*ProtoMask) >> ProtoNum) & 1)); 00973 cprintf ("\n"); 00974 } 00975 } 00976 cprintf ("\n"); 00977 } 00978 00979 for (int i = 0; i < ClassTemplate->NumConfigs; i++) 00980 ProtoConfigs[i] = 0; 00981 00982 if (PrintProtoMatchesOn (Debug)) { 00983 cprintf ("Proto Evidence:\n"); 00984 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; 00985 ProtoSetIndex++) { 00986 ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; 00987 ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); 00988 for (ProtoNum = 0; 00989 ((ProtoNum < PROTOS_PER_PROTO_SET) && 00990 (ActualProtoNum < ClassTemplate->NumProtos)); 00991 ProtoNum++, ActualProtoNum++) { 00992 cprintf ("P %3d =", ActualProtoNum); 00993 int temp = 0; 00994 for (int j = 0; j < ClassTemplate->ProtoLengths[ActualProtoNum]; j++) { 00995 uinT8 data = tables.proto_evidence_[ActualProtoNum][j]; 00996 cprintf(" %d", data); 00997 temp += data; 00998 } 00999 01000 cprintf(" = %6.4f%%\n", 01001 temp / 256.0 / ClassTemplate->ProtoLengths[ActualProtoNum]); 01002 01003 ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0]; 01004 ConfigNum = 0; 01005 while (ConfigWord) { 01006 cprintf ("%5d", ConfigWord & 1 ? temp : 0); 01007 if (ConfigWord & 1) 01008 ProtoConfigs[ConfigNum] += temp; 01009 ConfigNum++; 01010 ConfigWord >>= 1; 01011 } 01012 cprintf("\n"); 01013 } 01014 } 01015 } 01016 01017 if (PrintMatchSummaryOn (Debug)) { 01018 cprintf ("Proto Error for Configurations:\n"); 01019 for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) 01020 cprintf (" %5.1f", 01021 100.0 * (1.0 - 01022 ProtoConfigs[ConfigNum] / 01023 ClassTemplate->ConfigLengths[ConfigNum] / 256.0)); 01024 cprintf ("\n\n"); 01025 } 01026 01027 if (PrintProtoMatchesOn (Debug)) { 01028 cprintf ("Proto Sum for Configurations:\n"); 01029 for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) 01030 cprintf (" %4.1f", ProtoConfigs[ConfigNum] / 256.0); 01031 cprintf ("\n\n"); 01032 01033 cprintf ("Proto Length for Configurations:\n"); 01034 for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) 01035 cprintf (" %4.1f", 01036 (float) ClassTemplate->ConfigLengths[ConfigNum]); 01037 cprintf ("\n\n"); 01038 } 01039 01040 } 01041 01042 void IntegerMatcher::DisplayProtoDebugInfo( 01043 INT_CLASS ClassTemplate, 01044 BIT_VECTOR ProtoMask, 01045 BIT_VECTOR ConfigMask, 01046 const ScratchEvidence& tables, 01047 bool SeparateDebugWindows) { 01048 uinT16 ProtoNum; 01049 uinT16 ActualProtoNum; 01050 PROTO_SET ProtoSet; 01051 int ProtoSetIndex; 01052 01053 InitIntMatchWindowIfReqd(); 01054 if (SeparateDebugWindows) { 01055 InitFeatureDisplayWindowIfReqd(); 01056 InitProtoDisplayWindowIfReqd(); 01057 } 01058 01059 01060 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; 01061 ProtoSetIndex++) { 01062 ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; 01063 ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET; 01064 for (ProtoNum = 0; 01065 ((ProtoNum < PROTOS_PER_PROTO_SET) && 01066 (ActualProtoNum < ClassTemplate->NumProtos)); 01067 ProtoNum++, ActualProtoNum++) { 01068 /* Compute Average for Actual Proto */ 01069 int temp = 0; 01070 for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++) 01071 temp += tables.proto_evidence_[ActualProtoNum][i]; 01072 01073 temp /= ClassTemplate->ProtoLengths[ActualProtoNum]; 01074 01075 if ((ProtoSet->Protos[ProtoNum]).Configs[0] & (*ConfigMask)) { 01076 DisplayIntProto(ClassTemplate, ActualProtoNum, temp / 255.0); 01077 } 01078 } 01079 } 01080 } 01081 01082 01083 void IntegerMatcher::DisplayFeatureDebugInfo( 01084 INT_CLASS ClassTemplate, 01085 BIT_VECTOR ProtoMask, 01086 BIT_VECTOR ConfigMask, 01087 inT16 NumFeatures, 01088 const INT_FEATURE_STRUCT* Features, 01089 int AdaptFeatureThreshold, 01090 int Debug, 01091 bool SeparateDebugWindows) { 01092 ScratchEvidence *tables = new ScratchEvidence(); 01093 01094 tables->Clear(ClassTemplate); 01095 01096 InitIntMatchWindowIfReqd(); 01097 if (SeparateDebugWindows) { 01098 InitFeatureDisplayWindowIfReqd(); 01099 InitProtoDisplayWindowIfReqd(); 01100 } 01101 01102 for (int Feature = 0; Feature < NumFeatures; Feature++) { 01103 UpdateTablesForFeature( 01104 ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature], 01105 tables, 0); 01106 01107 /* Find Best Evidence for Current Feature */ 01108 int best = 0; 01109 for (int i = 0; i < ClassTemplate->NumConfigs; i++) 01110 if (tables->feature_evidence_[i] > best) 01111 best = tables->feature_evidence_[i]; 01112 01113 /* Update display for current feature */ 01114 if (ClipMatchEvidenceOn(Debug)) { 01115 if (best < AdaptFeatureThreshold) 01116 DisplayIntFeature(&Features[Feature], 0.0); 01117 else 01118 DisplayIntFeature(&Features[Feature], 1.0); 01119 } else { 01120 DisplayIntFeature(&Features[Feature], best / 255.0); 01121 } 01122 } 01123 01124 delete tables; 01125 } 01126 #endif 01127 01131 void ScratchEvidence::UpdateSumOfProtoEvidences( 01132 INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures) { 01133 01134 int *IntPointer; 01135 uinT32 ConfigWord; 01136 int ProtoSetIndex; 01137 uinT16 ProtoNum; 01138 PROTO_SET ProtoSet; 01139 int NumProtos; 01140 uinT16 ActualProtoNum; 01141 01142 NumProtos = ClassTemplate->NumProtos; 01143 01144 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; 01145 ProtoSetIndex++) { 01146 ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; 01147 ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); 01148 for (ProtoNum = 0; 01149 ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < NumProtos)); 01150 ProtoNum++, ActualProtoNum++) { 01151 int temp = 0; 01152 for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++) 01153 temp += proto_evidence_[ActualProtoNum] [i]; 01154 01155 ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0]; 01156 ConfigWord &= *ConfigMask; 01157 IntPointer = sum_feature_evidence_; 01158 while (ConfigWord) { 01159 if (ConfigWord & 1) 01160 *IntPointer += temp; 01161 IntPointer++; 01162 ConfigWord >>= 1; 01163 } 01164 } 01165 } 01166 } 01167 01168 01169 01174 void ScratchEvidence::NormalizeSums( 01175 INT_CLASS ClassTemplate, inT16 NumFeatures, inT32 used_features) { 01176 01177 for (int i = 0; i < ClassTemplate->NumConfigs; i++) { 01178 sum_feature_evidence_[i] = (sum_feature_evidence_[i] << 8) / 01179 (NumFeatures + ClassTemplate->ConfigLengths[i]); 01180 } 01181 } 01182 01183 01191 int IntegerMatcher::FindBestMatch( 01192 INT_CLASS class_template, 01193 const ScratchEvidence &tables, 01194 UnicharRating* result) { 01195 int best_match = 0; 01196 result->config = 0; 01197 result->fonts.truncate(0); 01198 result->fonts.reserve(class_template->NumConfigs); 01199 01200 /* Find best match */ 01201 for (int c = 0; c < class_template->NumConfigs; ++c) { 01202 int rating = tables.sum_feature_evidence_[c]; 01203 if (*classify_debug_level_ > 2) 01204 tprintf("Config %d, rating=%d\n", c, rating); 01205 if (rating > best_match) { 01206 result->config = c; 01207 best_match = rating; 01208 } 01209 result->fonts.push_back(ScoredFont(c, rating)); 01210 } 01211 01212 // Compute confidence on a Probability scale. 01213 result->rating = best_match / 65536.0f; 01214 01215 return best_match; 01216 } 01217 01222 float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length, 01223 int normalization_factor, 01224 int matcher_multiplier) { 01225 return (rating * blob_length + 01226 matcher_multiplier * normalization_factor / 256.0) / 01227 (blob_length + matcher_multiplier); 01228 } 01229 01241 void 01242 HeapSort (int n, register int ra[], register int rb[]) { 01243 int i, rra, rrb; 01244 int l, j, ir; 01245 01246 l = (n >> 1) + 1; 01247 ir = n; 01248 for (;;) { 01249 if (l > 1) { 01250 rra = ra[--l]; 01251 rrb = rb[l]; 01252 } 01253 else { 01254 rra = ra[ir]; 01255 rrb = rb[ir]; 01256 ra[ir] = ra[1]; 01257 rb[ir] = rb[1]; 01258 if (--ir == 1) { 01259 ra[1] = rra; 01260 rb[1] = rrb; 01261 return; 01262 } 01263 } 01264 i = l; 01265 j = l << 1; 01266 while (j <= ir) { 01267 if (j < ir && ra[j] < ra[j + 1]) 01268 ++j; 01269 if (rra < ra[j]) { 01270 ra[i] = ra[j]; 01271 rb[i] = rb[j]; 01272 j += (i = j); 01273 } 01274 else 01275 j = ir + 1; 01276 } 01277 ra[i] = rra; 01278 rb[i] = rrb; 01279 } 01280 }