|
tesseract 3.04.01
|
00001 // Copyright 2008 Google Inc. All Rights Reserved. 00002 // Author: scharron@google.com (Samuel Charron) 00003 // 00004 // Licensed under the Apache License, Version 2.0 (the "License"); 00005 // you may not use this file except in compliance with the License. 00006 // You may obtain a copy of the License at 00007 // http://www.apache.org/licenses/LICENSE-2.0 00008 // Unless required by applicable law or agreed to in writing, software 00009 // distributed under the License is distributed on an "AS IS" BASIS, 00010 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00011 // See the License for the specific language governing permissions and 00012 // limitations under the License. 00013 00014 #include "commontraining.h" 00015 00016 #include "allheaders.h" 00017 #include "ccutil.h" 00018 #include "classify.h" 00019 #include "cluster.h" 00020 #include "clusttool.h" 00021 #include "efio.h" 00022 #include "emalloc.h" 00023 #include "featdefs.h" 00024 #include "fontinfo.h" 00025 #include "freelist.h" 00026 #include "globals.h" 00027 #include "intfeaturespace.h" 00028 #include "mastertrainer.h" 00029 #include "mf.h" 00030 #include "ndminx.h" 00031 #include "oldlist.h" 00032 #include "params.h" 00033 #include "shapetable.h" 00034 #include "tessdatamanager.h" 00035 #include "tessopt.h" 00036 #include "tprintf.h" 00037 #include "unicity_table.h" 00038 00039 #include <math.h> 00040 00041 using tesseract::CCUtil; 00042 using tesseract::FontInfo; 00043 using tesseract::IntFeatureSpace; 00044 using tesseract::ParamUtils; 00045 using tesseract::ShapeTable; 00046 00047 // Global Variables. 00048 00049 // global variable to hold configuration parameters to control clustering 00050 // -M 0.625 -B 0.05 -I 1.0 -C 1e-6. 00051 CLUSTERCONFIG Config = { elliptical, 0.625, 0.05, 1.0, 1e-6, 0 }; 00052 FEATURE_DEFS_STRUCT feature_defs; 00053 CCUtil ccutil; 00054 00055 INT_PARAM_FLAG(debug_level, 0, "Level of Trainer debugging"); 00056 INT_PARAM_FLAG(load_images, 0, "Load images with tr files"); 00057 STRING_PARAM_FLAG(configfile, "", "File to load more configs from"); 00058 STRING_PARAM_FLAG(D, "", "Directory to write output files to"); 00059 STRING_PARAM_FLAG(F, "font_properties", "File listing font properties"); 00060 STRING_PARAM_FLAG(X, "", "File listing font xheights"); 00061 STRING_PARAM_FLAG(U, "unicharset", "File to load unicharset from"); 00062 STRING_PARAM_FLAG(O, "", "File to write unicharset to"); 00063 STRING_PARAM_FLAG(T, "", "File to load trainer from"); 00064 STRING_PARAM_FLAG(output_trainer, "", "File to write trainer to"); 00065 STRING_PARAM_FLAG(test_ch, "", "UTF8 test character string"); 00066 DOUBLE_PARAM_FLAG(clusterconfig_min_samples_fraction, Config.MinSamples, 00067 "Min number of samples per proto as % of total"); 00068 DOUBLE_PARAM_FLAG(clusterconfig_max_illegal, Config.MaxIllegal, 00069 "Max percentage of samples in a cluster which have more" 00070 " than 1 feature in that cluster"); 00071 DOUBLE_PARAM_FLAG(clusterconfig_independence, Config.Independence, 00072 "Desired independence between dimensions"); 00073 DOUBLE_PARAM_FLAG(clusterconfig_confidence, Config.Confidence, 00074 "Desired confidence in prototypes created"); 00075 00088 void ParseArguments(int* argc, char ***argv) { 00089 STRING usage; 00090 if (*argc) { 00091 usage += (*argv)[0]; 00092 } 00093 usage += " [.tr files ...]"; 00094 tesseract::ParseCommandLineFlags(usage.c_str(), argc, argv, true); 00095 // Record the index of the first non-flag argument to 1, since we set 00096 // remove_flags to true when parsing the flags. 00097 tessoptind = 1; 00098 // Set some global values based on the flags. 00099 Config.MinSamples = 00100 MAX(0.0, MIN(1.0, double(FLAGS_clusterconfig_min_samples_fraction))); 00101 Config.MaxIllegal = 00102 MAX(0.0, MIN(1.0, double(FLAGS_clusterconfig_max_illegal))); 00103 Config.Independence = 00104 MAX(0.0, MIN(1.0, double(FLAGS_clusterconfig_independence))); 00105 Config.Confidence = 00106 MAX(0.0, MIN(1.0, double(FLAGS_clusterconfig_confidence))); 00107 // Set additional parameters from config file if specified. 00108 if (!FLAGS_configfile.empty()) { 00109 tesseract::ParamUtils::ReadParamsFile( 00110 FLAGS_configfile.c_str(), 00111 tesseract::SET_PARAM_CONSTRAINT_NON_INIT_ONLY, 00112 ccutil.params()); 00113 } 00114 } 00115 00116 namespace tesseract { 00117 // Helper loads shape table from the given file. 00118 ShapeTable* LoadShapeTable(const STRING& file_prefix) { 00119 ShapeTable* shape_table = NULL; 00120 STRING shape_table_file = file_prefix; 00121 shape_table_file += kShapeTableFileSuffix; 00122 FILE* shape_fp = fopen(shape_table_file.string(), "rb"); 00123 if (shape_fp != NULL) { 00124 shape_table = new ShapeTable; 00125 if (!shape_table->DeSerialize(false, shape_fp)) { 00126 delete shape_table; 00127 shape_table = NULL; 00128 tprintf("Error: Failed to read shape table %s\n", 00129 shape_table_file.string()); 00130 } else { 00131 int num_shapes = shape_table->NumShapes(); 00132 tprintf("Read shape table %s of %d shapes\n", 00133 shape_table_file.string(), num_shapes); 00134 } 00135 fclose(shape_fp); 00136 } else { 00137 tprintf("Warning: No shape table file present: %s\n", 00138 shape_table_file.string()); 00139 } 00140 return shape_table; 00141 } 00142 00143 // Helper to write the shape_table. 00144 void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table) { 00145 STRING shape_table_file = file_prefix; 00146 shape_table_file += kShapeTableFileSuffix; 00147 FILE* fp = fopen(shape_table_file.string(), "wb"); 00148 if (fp != NULL) { 00149 if (!shape_table.Serialize(fp)) { 00150 fprintf(stderr, "Error writing shape table: %s\n", 00151 shape_table_file.string()); 00152 } 00153 fclose(fp); 00154 } else { 00155 fprintf(stderr, "Error creating shape table: %s\n", 00156 shape_table_file.string()); 00157 } 00158 } 00159 00175 MasterTrainer* LoadTrainingData(int argc, const char* const * argv, 00176 bool replication, 00177 ShapeTable** shape_table, 00178 STRING* file_prefix) { 00179 InitFeatureDefs(&feature_defs); 00180 InitIntegerFX(); 00181 *file_prefix = ""; 00182 if (!FLAGS_D.empty()) { 00183 *file_prefix += FLAGS_D.c_str(); 00184 *file_prefix += "/"; 00185 } 00186 // If we are shape clustering (NULL shape_table) or we successfully load 00187 // a shape_table written by a previous shape clustering, then 00188 // shape_analysis will be true, meaning that the MasterTrainer will replace 00189 // some members of the unicharset with their fragments. 00190 bool shape_analysis = false; 00191 if (shape_table != NULL) { 00192 *shape_table = LoadShapeTable(*file_prefix); 00193 if (*shape_table != NULL) 00194 shape_analysis = true; 00195 } else { 00196 shape_analysis = true; 00197 } 00198 MasterTrainer* trainer = new MasterTrainer(NM_CHAR_ANISOTROPIC, 00199 shape_analysis, 00200 replication, 00201 FLAGS_debug_level); 00202 IntFeatureSpace fs; 00203 fs.Init(kBoostXYBuckets, kBoostXYBuckets, kBoostDirBuckets); 00204 if (FLAGS_T.empty()) { 00205 trainer->LoadUnicharset(FLAGS_U.c_str()); 00206 // Get basic font information from font_properties. 00207 if (!FLAGS_F.empty()) { 00208 if (!trainer->LoadFontInfo(FLAGS_F.c_str())) { 00209 delete trainer; 00210 return NULL; 00211 } 00212 } 00213 if (!FLAGS_X.empty()) { 00214 if (!trainer->LoadXHeights(FLAGS_X.c_str())) { 00215 delete trainer; 00216 return NULL; 00217 } 00218 } 00219 trainer->SetFeatureSpace(fs); 00220 const char* page_name; 00221 // Load training data from .tr files on the command line. 00222 while ((page_name = GetNextFilename(argc, argv)) != NULL) { 00223 tprintf("Reading %s ...\n", page_name); 00224 trainer->ReadTrainingSamples(page_name, feature_defs, false); 00225 00226 // If there is a file with [lang].[fontname].exp[num].fontinfo present, 00227 // read font spacing information in to fontinfo_table. 00228 int pagename_len = strlen(page_name); 00229 char *fontinfo_file_name = new char[pagename_len + 7]; 00230 strncpy(fontinfo_file_name, page_name, pagename_len - 2); // remove "tr" 00231 strcpy(fontinfo_file_name + pagename_len - 2, "fontinfo"); // +"fontinfo" 00232 trainer->AddSpacingInfo(fontinfo_file_name); 00233 delete[] fontinfo_file_name; 00234 00235 // Load the images into memory if required by the classifier. 00236 if (FLAGS_load_images) { 00237 STRING image_name = page_name; 00238 // Chop off the tr and replace with tif. Extension must be tif! 00239 image_name.truncate_at(image_name.length() - 2); 00240 image_name += "tif"; 00241 trainer->LoadPageImages(image_name.string()); 00242 } 00243 } 00244 trainer->PostLoadCleanup(); 00245 // Write the master trainer if required. 00246 if (!FLAGS_output_trainer.empty()) { 00247 FILE* fp = fopen(FLAGS_output_trainer.c_str(), "wb"); 00248 if (fp == NULL) { 00249 tprintf("Can't create saved trainer data!\n"); 00250 } else { 00251 trainer->Serialize(fp); 00252 fclose(fp); 00253 } 00254 } 00255 } else { 00256 bool success = false; 00257 tprintf("Loading master trainer from file:%s\n", 00258 FLAGS_T.c_str()); 00259 FILE* fp = fopen(FLAGS_T.c_str(), "rb"); 00260 if (fp == NULL) { 00261 tprintf("Can't read file %s to initialize master trainer\n", 00262 FLAGS_T.c_str()); 00263 } else { 00264 success = trainer->DeSerialize(false, fp); 00265 fclose(fp); 00266 } 00267 if (!success) { 00268 tprintf("Deserialize of master trainer failed!\n"); 00269 delete trainer; 00270 return NULL; 00271 } 00272 trainer->SetFeatureSpace(fs); 00273 } 00274 trainer->PreTrainingSetup(); 00275 if (!FLAGS_O.empty() && 00276 !trainer->unicharset().save_to_file(FLAGS_O.c_str())) { 00277 fprintf(stderr, "Failed to save unicharset to file %s\n", FLAGS_O.c_str()); 00278 delete trainer; 00279 return NULL; 00280 } 00281 if (shape_table != NULL) { 00282 // If we previously failed to load a shapetable, then shape clustering 00283 // wasn't run so make a flat one now. 00284 if (*shape_table == NULL) { 00285 *shape_table = new ShapeTable; 00286 trainer->SetupFlatShapeTable(*shape_table); 00287 tprintf("Flat shape table summary: %s\n", 00288 (*shape_table)->SummaryStr().string()); 00289 } 00290 (*shape_table)->set_unicharset(trainer->unicharset()); 00291 } 00292 return trainer; 00293 } 00294 00295 } // namespace tesseract. 00296 00297 /*---------------------------------------------------------------------------*/ 00310 const char *GetNextFilename(int argc, const char* const * argv) { 00311 if (tessoptind < argc) 00312 return argv[tessoptind++]; 00313 else 00314 return NULL; 00315 } /* GetNextFilename */ 00316 00317 00318 00319 /*---------------------------------------------------------------------------*/ 00331 LABELEDLIST FindList ( 00332 LIST List, 00333 char *Label) 00334 { 00335 LABELEDLIST LabeledList; 00336 00337 iterate (List) 00338 { 00339 LabeledList = (LABELEDLIST) first_node (List); 00340 if (strcmp (LabeledList->Label, Label) == 0) 00341 return (LabeledList); 00342 } 00343 return (NULL); 00344 00345 } /* FindList */ 00346 00347 /*---------------------------------------------------------------------------*/ 00357 LABELEDLIST NewLabeledList ( 00358 const char *Label) 00359 { 00360 LABELEDLIST LabeledList; 00361 00362 LabeledList = (LABELEDLIST) Emalloc (sizeof (LABELEDLISTNODE)); 00363 LabeledList->Label = (char*)Emalloc (strlen (Label)+1); 00364 strcpy (LabeledList->Label, Label); 00365 LabeledList->List = NIL_LIST; 00366 LabeledList->SampleCount = 0; 00367 LabeledList->font_sample_count = 0; 00368 return (LabeledList); 00369 00370 } /* NewLabeledList */ 00371 00372 /*---------------------------------------------------------------------------*/ 00373 // TODO(rays) This is now used only by cntraining. Convert cntraining to use 00374 // the new method or get rid of it entirely. 00394 void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs, 00395 const char *feature_name, int max_samples, 00396 UNICHARSET* unicharset, 00397 FILE* file, LIST* training_samples) { 00398 char buffer[2048]; 00399 char unichar[UNICHAR_LEN + 1]; 00400 LABELEDLIST char_sample; 00401 FEATURE_SET feature_samples; 00402 CHAR_DESC char_desc; 00403 int i; 00404 int feature_type = ShortNameToFeatureType(feature_defs, feature_name); 00405 // Zero out the font_sample_count for all the classes. 00406 LIST it = *training_samples; 00407 iterate(it) { 00408 char_sample = reinterpret_cast<LABELEDLIST>(first_node(it)); 00409 char_sample->font_sample_count = 0; 00410 } 00411 00412 while (fgets(buffer, 2048, file) != NULL) { 00413 if (buffer[0] == '\n') 00414 continue; 00415 00416 sscanf(buffer, "%*s %s", unichar); 00417 if (unicharset != NULL && !unicharset->contains_unichar(unichar)) { 00418 unicharset->unichar_insert(unichar); 00419 if (unicharset->size() > MAX_NUM_CLASSES) { 00420 tprintf("Error: Size of unicharset in training is " 00421 "greater than MAX_NUM_CLASSES\n"); 00422 exit(1); 00423 } 00424 } 00425 char_sample = FindList(*training_samples, unichar); 00426 if (char_sample == NULL) { 00427 char_sample = NewLabeledList(unichar); 00428 *training_samples = push(*training_samples, char_sample); 00429 } 00430 char_desc = ReadCharDescription(feature_defs, file); 00431 feature_samples = char_desc->FeatureSets[feature_type]; 00432 if (char_sample->font_sample_count < max_samples || max_samples <= 0) { 00433 char_sample->List = push(char_sample->List, feature_samples); 00434 char_sample->SampleCount++; 00435 char_sample->font_sample_count++; 00436 } else { 00437 FreeFeatureSet(feature_samples); 00438 } 00439 for (i = 0; i < char_desc->NumFeatureSets; i++) { 00440 if (feature_type != i) 00441 FreeFeatureSet(char_desc->FeatureSets[i]); 00442 } 00443 free(char_desc); 00444 } 00445 } // ReadTrainingSamples 00446 00447 00448 /*---------------------------------------------------------------------------*/ 00458 void FreeTrainingSamples(LIST CharList) { 00459 LABELEDLIST char_sample; 00460 FEATURE_SET FeatureSet; 00461 LIST FeatureList; 00462 00463 00464 iterate(CharList) { /* iterate through all of the fonts */ 00465 char_sample = (LABELEDLIST) first_node(CharList); 00466 FeatureList = char_sample->List; 00467 iterate(FeatureList) { /* iterate through all of the classes */ 00468 FeatureSet = (FEATURE_SET) first_node(FeatureList); 00469 FreeFeatureSet(FeatureSet); 00470 } 00471 FreeLabeledList(char_sample); 00472 } 00473 destroy(CharList); 00474 } /* FreeTrainingSamples */ 00475 00476 /*---------------------------------------------------------------------------*/ 00487 void FreeLabeledList(LABELEDLIST LabeledList) { 00488 destroy(LabeledList->List); 00489 free(LabeledList->Label); 00490 free(LabeledList); 00491 } /* FreeLabeledList */ 00492 00493 /*---------------------------------------------------------------------------*/ 00507 CLUSTERER *SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs, 00508 LABELEDLIST char_sample, 00509 const char* program_feature_type) { 00510 uinT16 N; 00511 int i, j; 00512 FLOAT32 *Sample = NULL; 00513 CLUSTERER *Clusterer; 00514 inT32 CharID; 00515 LIST FeatureList = NULL; 00516 FEATURE_SET FeatureSet = NULL; 00517 00518 int desc_index = ShortNameToFeatureType(FeatureDefs, program_feature_type); 00519 N = FeatureDefs.FeatureDesc[desc_index]->NumParams; 00520 Clusterer = MakeClusterer(N, FeatureDefs.FeatureDesc[desc_index]->ParamDesc); 00521 00522 FeatureList = char_sample->List; 00523 CharID = 0; 00524 iterate(FeatureList) { 00525 FeatureSet = (FEATURE_SET) first_node(FeatureList); 00526 for (i = 0; i < FeatureSet->MaxNumFeatures; i++) { 00527 if (Sample == NULL) 00528 Sample = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32)); 00529 for (j = 0; j < N; j++) 00530 Sample[j] = FeatureSet->Features[i]->Params[j]; 00531 MakeSample (Clusterer, Sample, CharID); 00532 } 00533 CharID++; 00534 } 00535 if ( Sample != NULL ) free( Sample ); 00536 return( Clusterer ); 00537 00538 } /* SetUpForClustering */ 00539 00540 /*------------------------------------------------------------------------*/ 00541 void MergeInsignificantProtos(LIST ProtoList, const char* label, 00542 CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { 00543 PROTOTYPE *Prototype; 00544 bool debug = strcmp(FLAGS_test_ch.c_str(), label) == 0; 00545 00546 LIST pProtoList = ProtoList; 00547 iterate(pProtoList) { 00548 Prototype = (PROTOTYPE *) first_node (pProtoList); 00549 if (Prototype->Significant || Prototype->Merged) 00550 continue; 00551 FLOAT32 best_dist = 0.125; 00552 PROTOTYPE* best_match = NULL; 00553 // Find the nearest alive prototype. 00554 LIST list_it = ProtoList; 00555 iterate(list_it) { 00556 PROTOTYPE* test_p = (PROTOTYPE *) first_node (list_it); 00557 if (test_p != Prototype && !test_p->Merged) { 00558 FLOAT32 dist = ComputeDistance(Clusterer->SampleSize, 00559 Clusterer->ParamDesc, 00560 Prototype->Mean, test_p->Mean); 00561 if (dist < best_dist) { 00562 best_match = test_p; 00563 best_dist = dist; 00564 } 00565 } 00566 } 00567 if (best_match != NULL && !best_match->Significant) { 00568 if (debug) 00569 tprintf("Merging red clusters (%d+%d) at %g,%g and %g,%g\n", 00570 best_match->NumSamples, Prototype->NumSamples, 00571 best_match->Mean[0], best_match->Mean[1], 00572 Prototype->Mean[0], Prototype->Mean[1]); 00573 best_match->NumSamples = MergeClusters(Clusterer->SampleSize, 00574 Clusterer->ParamDesc, 00575 best_match->NumSamples, 00576 Prototype->NumSamples, 00577 best_match->Mean, 00578 best_match->Mean, Prototype->Mean); 00579 Prototype->NumSamples = 0; 00580 Prototype->Merged = 1; 00581 } else if (best_match != NULL) { 00582 if (debug) 00583 tprintf("Red proto at %g,%g matched a green one at %g,%g\n", 00584 Prototype->Mean[0], Prototype->Mean[1], 00585 best_match->Mean[0], best_match->Mean[1]); 00586 Prototype->Merged = 1; 00587 } 00588 } 00589 // Mark significant those that now have enough samples. 00590 int min_samples = (inT32) (Config->MinSamples * Clusterer->NumChar); 00591 pProtoList = ProtoList; 00592 iterate(pProtoList) { 00593 Prototype = (PROTOTYPE *) first_node (pProtoList); 00594 // Process insignificant protos that do not match a green one 00595 if (!Prototype->Significant && Prototype->NumSamples >= min_samples && 00596 !Prototype->Merged) { 00597 if (debug) 00598 tprintf("Red proto at %g,%g becoming green\n", 00599 Prototype->Mean[0], Prototype->Mean[1]); 00600 Prototype->Significant = true; 00601 } 00602 } 00603 } /* MergeInsignificantProtos */ 00604 00605 /*-----------------------------------------------------------------------------*/ 00606 void CleanUpUnusedData( 00607 LIST ProtoList) 00608 { 00609 PROTOTYPE* Prototype; 00610 00611 iterate(ProtoList) 00612 { 00613 Prototype = (PROTOTYPE *) first_node (ProtoList); 00614 if(Prototype->Variance.Elliptical != NULL) 00615 { 00616 memfree(Prototype->Variance.Elliptical); 00617 Prototype->Variance.Elliptical = NULL; 00618 } 00619 if(Prototype->Magnitude.Elliptical != NULL) 00620 { 00621 memfree(Prototype->Magnitude.Elliptical); 00622 Prototype->Magnitude.Elliptical = NULL; 00623 } 00624 if(Prototype->Weight.Elliptical != NULL) 00625 { 00626 memfree(Prototype->Weight.Elliptical); 00627 Prototype->Weight.Elliptical = NULL; 00628 } 00629 } 00630 } 00631 00632 /*------------------------------------------------------------------------*/ 00633 LIST RemoveInsignificantProtos( 00634 LIST ProtoList, 00635 BOOL8 KeepSigProtos, 00636 BOOL8 KeepInsigProtos, 00637 int N) 00638 00639 { 00640 LIST NewProtoList = NIL_LIST; 00641 LIST pProtoList; 00642 PROTOTYPE* Proto; 00643 PROTOTYPE* NewProto; 00644 int i; 00645 00646 pProtoList = ProtoList; 00647 iterate(pProtoList) 00648 { 00649 Proto = (PROTOTYPE *) first_node (pProtoList); 00650 if ((Proto->Significant && KeepSigProtos) || 00651 (!Proto->Significant && KeepInsigProtos)) 00652 { 00653 NewProto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE)); 00654 00655 NewProto->Mean = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32)); 00656 NewProto->Significant = Proto->Significant; 00657 NewProto->Style = Proto->Style; 00658 NewProto->NumSamples = Proto->NumSamples; 00659 NewProto->Cluster = NULL; 00660 NewProto->Distrib = NULL; 00661 00662 for (i=0; i < N; i++) 00663 NewProto->Mean[i] = Proto->Mean[i]; 00664 if (Proto->Variance.Elliptical != NULL) 00665 { 00666 NewProto->Variance.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32)); 00667 for (i=0; i < N; i++) 00668 NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i]; 00669 } 00670 else 00671 NewProto->Variance.Elliptical = NULL; 00672 //--------------------------------------------- 00673 if (Proto->Magnitude.Elliptical != NULL) 00674 { 00675 NewProto->Magnitude.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32)); 00676 for (i=0; i < N; i++) 00677 NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i]; 00678 } 00679 else 00680 NewProto->Magnitude.Elliptical = NULL; 00681 //------------------------------------------------ 00682 if (Proto->Weight.Elliptical != NULL) 00683 { 00684 NewProto->Weight.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32)); 00685 for (i=0; i < N; i++) 00686 NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i]; 00687 } 00688 else 00689 NewProto->Weight.Elliptical = NULL; 00690 00691 NewProto->TotalMagnitude = Proto->TotalMagnitude; 00692 NewProto->LogMagnitude = Proto->LogMagnitude; 00693 NewProtoList = push_last(NewProtoList, NewProto); 00694 } 00695 } 00696 FreeProtoList(&ProtoList); 00697 return (NewProtoList); 00698 } /* RemoveInsignificantProtos */ 00699 00700 /*----------------------------------------------------------------------------*/ 00701 MERGE_CLASS FindClass ( 00702 LIST List, 00703 const char *Label) 00704 { 00705 MERGE_CLASS MergeClass; 00706 00707 iterate (List) 00708 { 00709 MergeClass = (MERGE_CLASS) first_node (List); 00710 if (strcmp (MergeClass->Label, Label) == 0) 00711 return (MergeClass); 00712 } 00713 return (NULL); 00714 00715 } /* FindClass */ 00716 00717 /*---------------------------------------------------------------------------*/ 00718 MERGE_CLASS NewLabeledClass ( 00719 const char *Label) 00720 { 00721 MERGE_CLASS MergeClass; 00722 00723 MergeClass = new MERGE_CLASS_NODE; 00724 MergeClass->Label = (char*)Emalloc (strlen (Label)+1); 00725 strcpy (MergeClass->Label, Label); 00726 MergeClass->Class = NewClass (MAX_NUM_PROTOS, MAX_NUM_CONFIGS); 00727 return (MergeClass); 00728 00729 } /* NewLabeledClass */ 00730 00731 /*-----------------------------------------------------------------------------*/ 00741 void FreeLabeledClassList ( 00742 LIST ClassList) 00743 { 00744 MERGE_CLASS MergeClass; 00745 00746 iterate (ClassList) /* iterate through all of the fonts */ 00747 { 00748 MergeClass = (MERGE_CLASS) first_node (ClassList); 00749 free (MergeClass->Label); 00750 FreeClass(MergeClass->Class); 00751 delete MergeClass; 00752 } 00753 destroy (ClassList); 00754 00755 } /* FreeLabeledClassList */ 00756 00757 /* SetUpForFloat2Int */ 00758 CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset, 00759 LIST LabeledClassList) { 00760 MERGE_CLASS MergeClass; 00761 CLASS_TYPE Class; 00762 int NumProtos; 00763 int NumConfigs; 00764 int NumWords; 00765 int i, j; 00766 float Values[3]; 00767 PROTO NewProto; 00768 PROTO OldProto; 00769 BIT_VECTOR NewConfig; 00770 BIT_VECTOR OldConfig; 00771 00772 // printf("Float2Int ...\n"); 00773 00774 CLASS_STRUCT* float_classes = new CLASS_STRUCT[unicharset.size()]; 00775 iterate(LabeledClassList) 00776 { 00777 UnicityTableEqEq<int> font_set; 00778 MergeClass = (MERGE_CLASS) first_node (LabeledClassList); 00779 Class = &float_classes[unicharset.unichar_to_id(MergeClass->Label)]; 00780 NumProtos = MergeClass->Class->NumProtos; 00781 NumConfigs = MergeClass->Class->NumConfigs; 00782 font_set.move(&MergeClass->Class->font_set); 00783 Class->NumProtos = NumProtos; 00784 Class->MaxNumProtos = NumProtos; 00785 Class->Prototypes = (PROTO) Emalloc (sizeof(PROTO_STRUCT) * NumProtos); 00786 for(i=0; i < NumProtos; i++) 00787 { 00788 NewProto = ProtoIn(Class, i); 00789 OldProto = ProtoIn(MergeClass->Class, i); 00790 Values[0] = OldProto->X; 00791 Values[1] = OldProto->Y; 00792 Values[2] = OldProto->Angle; 00793 Normalize(Values); 00794 NewProto->X = OldProto->X; 00795 NewProto->Y = OldProto->Y; 00796 NewProto->Length = OldProto->Length; 00797 NewProto->Angle = OldProto->Angle; 00798 NewProto->A = Values[0]; 00799 NewProto->B = Values[1]; 00800 NewProto->C = Values[2]; 00801 } 00802 00803 Class->NumConfigs = NumConfigs; 00804 Class->MaxNumConfigs = NumConfigs; 00805 Class->font_set.move(&font_set); 00806 Class->Configurations = (BIT_VECTOR*) Emalloc (sizeof(BIT_VECTOR) * NumConfigs); 00807 NumWords = WordsInVectorOfSize(NumProtos); 00808 for(i=0; i < NumConfigs; i++) 00809 { 00810 NewConfig = NewBitVector(NumProtos); 00811 OldConfig = MergeClass->Class->Configurations[i]; 00812 for(j=0; j < NumWords; j++) 00813 NewConfig[j] = OldConfig[j]; 00814 Class->Configurations[i] = NewConfig; 00815 } 00816 } 00817 return float_classes; 00818 } // SetUpForFloat2Int 00819 00820 /*--------------------------------------------------------------------------*/ 00821 void Normalize ( 00822 float *Values) 00823 { 00824 register float Slope; 00825 register float Intercept; 00826 register float Normalizer; 00827 00828 Slope = tan (Values [2] * 2 * PI); 00829 Intercept = Values [1] - Slope * Values [0]; 00830 Normalizer = 1 / sqrt (Slope * Slope + 1.0); 00831 00832 Values [0] = Slope * Normalizer; 00833 Values [1] = - Normalizer; 00834 Values [2] = Intercept * Normalizer; 00835 } // Normalize 00836 00837 /*-------------------------------------------------------------------------*/ 00838 void FreeNormProtoList ( 00839 LIST CharList) 00840 00841 { 00842 LABELEDLIST char_sample; 00843 00844 iterate (CharList) /* iterate through all of the fonts */ 00845 { 00846 char_sample = (LABELEDLIST) first_node (CharList); 00847 FreeLabeledList (char_sample); 00848 } 00849 destroy (CharList); 00850 00851 } // FreeNormProtoList 00852 00853 /*---------------------------------------------------------------------------*/ 00854 void AddToNormProtosList( 00855 LIST* NormProtoList, 00856 LIST ProtoList, 00857 char* CharName) 00858 { 00859 PROTOTYPE* Proto; 00860 LABELEDLIST LabeledProtoList; 00861 00862 LabeledProtoList = NewLabeledList(CharName); 00863 iterate(ProtoList) 00864 { 00865 Proto = (PROTOTYPE *) first_node (ProtoList); 00866 LabeledProtoList->List = push(LabeledProtoList->List, Proto); 00867 } 00868 *NormProtoList = push(*NormProtoList, LabeledProtoList); 00869 } 00870 00871 /*---------------------------------------------------------------------------*/ 00872 int NumberOfProtos( 00873 LIST ProtoList, 00874 BOOL8 CountSigProtos, 00875 BOOL8 CountInsigProtos) 00876 { 00877 int N = 0; 00878 PROTOTYPE *Proto; 00879 00880 iterate(ProtoList) 00881 { 00882 Proto = (PROTOTYPE *) first_node ( ProtoList ); 00883 if (( Proto->Significant && CountSigProtos ) || 00884 ( ! Proto->Significant && CountInsigProtos ) ) 00885 N++; 00886 } 00887 return(N); 00888 }