tesseract 3.04.01

training/commontraining.cpp

Go to the documentation of this file.
00001 // Copyright 2008 Google Inc. All Rights Reserved.
00002 // Author: scharron@google.com (Samuel Charron)
00003 //
00004 // Licensed under the Apache License, Version 2.0 (the "License");
00005 // you may not use this file except in compliance with the License.
00006 // You may obtain a copy of the License at
00007 // http://www.apache.org/licenses/LICENSE-2.0
00008 // Unless required by applicable law or agreed to in writing, software
00009 // distributed under the License is distributed on an "AS IS" BASIS,
00010 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00011 // See the License for the specific language governing permissions and
00012 // limitations under the License.
00013 
00014 #include "commontraining.h"
00015 
00016 #include "allheaders.h"
00017 #include "ccutil.h"
00018 #include "classify.h"
00019 #include "cluster.h"
00020 #include "clusttool.h"
00021 #include "efio.h"
00022 #include "emalloc.h"
00023 #include "featdefs.h"
00024 #include "fontinfo.h"
00025 #include "freelist.h"
00026 #include "globals.h"
00027 #include "intfeaturespace.h"
00028 #include "mastertrainer.h"
00029 #include "mf.h"
00030 #include "ndminx.h"
00031 #include "oldlist.h"
00032 #include "params.h"
00033 #include "shapetable.h"
00034 #include "tessdatamanager.h"
00035 #include "tessopt.h"
00036 #include "tprintf.h"
00037 #include "unicity_table.h"
00038 
00039 #include <math.h>
00040 
00041 using tesseract::CCUtil;
00042 using tesseract::FontInfo;
00043 using tesseract::IntFeatureSpace;
00044 using tesseract::ParamUtils;
00045 using tesseract::ShapeTable;
00046 
00047 // Global Variables.
00048 
00049 // global variable to hold configuration parameters to control clustering
00050 // -M 0.625   -B 0.05   -I 1.0   -C 1e-6.
00051 CLUSTERCONFIG Config = { elliptical, 0.625, 0.05, 1.0, 1e-6, 0 };
00052 FEATURE_DEFS_STRUCT feature_defs;
00053 CCUtil ccutil;
00054 
00055 INT_PARAM_FLAG(debug_level, 0, "Level of Trainer debugging");
00056 INT_PARAM_FLAG(load_images, 0, "Load images with tr files");
00057 STRING_PARAM_FLAG(configfile, "", "File to load more configs from");
00058 STRING_PARAM_FLAG(D, "", "Directory to write output files to");
00059 STRING_PARAM_FLAG(F, "font_properties", "File listing font properties");
00060 STRING_PARAM_FLAG(X, "", "File listing font xheights");
00061 STRING_PARAM_FLAG(U, "unicharset", "File to load unicharset from");
00062 STRING_PARAM_FLAG(O, "", "File to write unicharset to");
00063 STRING_PARAM_FLAG(T, "", "File to load trainer from");
00064 STRING_PARAM_FLAG(output_trainer, "", "File to write trainer to");
00065 STRING_PARAM_FLAG(test_ch, "", "UTF8 test character string");
00066 DOUBLE_PARAM_FLAG(clusterconfig_min_samples_fraction, Config.MinSamples,
00067                   "Min number of samples per proto as % of total");
00068 DOUBLE_PARAM_FLAG(clusterconfig_max_illegal, Config.MaxIllegal,
00069                   "Max percentage of samples in a cluster which have more"
00070                   " than 1 feature in that cluster");
00071 DOUBLE_PARAM_FLAG(clusterconfig_independence, Config.Independence,
00072                   "Desired independence between dimensions");
00073 DOUBLE_PARAM_FLAG(clusterconfig_confidence, Config.Confidence,
00074                   "Desired confidence in prototypes created");
00075 
00088 void ParseArguments(int* argc, char ***argv) {
00089   STRING usage;
00090   if (*argc) {
00091     usage += (*argv)[0];
00092   }
00093   usage += " [.tr files ...]";
00094   tesseract::ParseCommandLineFlags(usage.c_str(), argc, argv, true);
00095   // Record the index of the first non-flag argument to 1, since we set
00096   // remove_flags to true when parsing the flags.
00097   tessoptind = 1;
00098   // Set some global values based on the flags.
00099   Config.MinSamples =
00100       MAX(0.0, MIN(1.0, double(FLAGS_clusterconfig_min_samples_fraction)));
00101   Config.MaxIllegal =
00102       MAX(0.0, MIN(1.0, double(FLAGS_clusterconfig_max_illegal)));
00103   Config.Independence =
00104       MAX(0.0, MIN(1.0, double(FLAGS_clusterconfig_independence)));
00105   Config.Confidence =
00106       MAX(0.0, MIN(1.0, double(FLAGS_clusterconfig_confidence)));
00107   // Set additional parameters from config file if specified.
00108   if (!FLAGS_configfile.empty()) {
00109     tesseract::ParamUtils::ReadParamsFile(
00110         FLAGS_configfile.c_str(),
00111         tesseract::SET_PARAM_CONSTRAINT_NON_INIT_ONLY,
00112         ccutil.params());
00113   }
00114 }
00115 
00116 namespace tesseract {
00117 // Helper loads shape table from the given file.
00118 ShapeTable* LoadShapeTable(const STRING& file_prefix) {
00119   ShapeTable* shape_table = NULL;
00120   STRING shape_table_file = file_prefix;
00121   shape_table_file += kShapeTableFileSuffix;
00122   FILE* shape_fp = fopen(shape_table_file.string(), "rb");
00123   if (shape_fp != NULL) {
00124     shape_table = new ShapeTable;
00125     if (!shape_table->DeSerialize(false, shape_fp)) {
00126       delete shape_table;
00127       shape_table = NULL;
00128       tprintf("Error: Failed to read shape table %s\n",
00129               shape_table_file.string());
00130     } else {
00131       int num_shapes = shape_table->NumShapes();
00132       tprintf("Read shape table %s of %d shapes\n",
00133               shape_table_file.string(), num_shapes);
00134     }
00135     fclose(shape_fp);
00136   } else {
00137     tprintf("Warning: No shape table file present: %s\n",
00138             shape_table_file.string());
00139   }
00140   return shape_table;
00141 }
00142 
00143 // Helper to write the shape_table.
00144 void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table) {
00145   STRING shape_table_file = file_prefix;
00146   shape_table_file += kShapeTableFileSuffix;
00147   FILE* fp = fopen(shape_table_file.string(), "wb");
00148   if (fp != NULL) {
00149     if (!shape_table.Serialize(fp)) {
00150       fprintf(stderr, "Error writing shape table: %s\n",
00151               shape_table_file.string());
00152     }
00153     fclose(fp);
00154   } else {
00155     fprintf(stderr, "Error creating shape table: %s\n",
00156             shape_table_file.string());
00157   }
00158 }
00159 
00175 MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
00176                                 bool replication,
00177                                 ShapeTable** shape_table,
00178                                 STRING* file_prefix) {
00179   InitFeatureDefs(&feature_defs);
00180   InitIntegerFX();
00181   *file_prefix = "";
00182   if (!FLAGS_D.empty()) {
00183     *file_prefix += FLAGS_D.c_str();
00184     *file_prefix += "/";
00185   }
00186   // If we are shape clustering (NULL shape_table) or we successfully load
00187   // a shape_table written by a previous shape clustering, then
00188   // shape_analysis will be true, meaning that the MasterTrainer will replace
00189   // some members of the unicharset with their fragments.
00190   bool shape_analysis = false;
00191   if (shape_table != NULL) {
00192     *shape_table = LoadShapeTable(*file_prefix);
00193     if (*shape_table != NULL)
00194       shape_analysis = true;
00195   } else {
00196     shape_analysis = true;
00197   }
00198   MasterTrainer* trainer = new MasterTrainer(NM_CHAR_ANISOTROPIC,
00199                                              shape_analysis,
00200                                              replication,
00201                                              FLAGS_debug_level);
00202   IntFeatureSpace fs;
00203   fs.Init(kBoostXYBuckets, kBoostXYBuckets, kBoostDirBuckets);
00204   if (FLAGS_T.empty()) {
00205     trainer->LoadUnicharset(FLAGS_U.c_str());
00206     // Get basic font information from font_properties.
00207     if (!FLAGS_F.empty()) {
00208       if (!trainer->LoadFontInfo(FLAGS_F.c_str())) {
00209         delete trainer;
00210         return NULL;
00211       }
00212     }
00213     if (!FLAGS_X.empty()) {
00214       if (!trainer->LoadXHeights(FLAGS_X.c_str())) {
00215         delete trainer;
00216         return NULL;
00217       }
00218     }
00219     trainer->SetFeatureSpace(fs);
00220     const char* page_name;
00221     // Load training data from .tr files on the command line.
00222     while ((page_name = GetNextFilename(argc, argv)) != NULL) {
00223       tprintf("Reading %s ...\n", page_name);
00224       trainer->ReadTrainingSamples(page_name, feature_defs, false);
00225 
00226       // If there is a file with [lang].[fontname].exp[num].fontinfo present,
00227       // read font spacing information in to fontinfo_table.
00228       int pagename_len = strlen(page_name);
00229       char *fontinfo_file_name = new char[pagename_len + 7];
00230       strncpy(fontinfo_file_name, page_name, pagename_len - 2);  // remove "tr"
00231       strcpy(fontinfo_file_name + pagename_len - 2, "fontinfo");  // +"fontinfo"
00232       trainer->AddSpacingInfo(fontinfo_file_name);
00233       delete[] fontinfo_file_name;
00234 
00235       // Load the images into memory if required by the classifier.
00236       if (FLAGS_load_images) {
00237         STRING image_name = page_name;
00238         // Chop off the tr and replace with tif. Extension must be tif!
00239         image_name.truncate_at(image_name.length() - 2);
00240         image_name += "tif";
00241         trainer->LoadPageImages(image_name.string());
00242       }
00243     }
00244     trainer->PostLoadCleanup();
00245     // Write the master trainer if required.
00246     if (!FLAGS_output_trainer.empty()) {
00247       FILE* fp = fopen(FLAGS_output_trainer.c_str(), "wb");
00248       if (fp == NULL) {
00249         tprintf("Can't create saved trainer data!\n");
00250       } else {
00251         trainer->Serialize(fp);
00252         fclose(fp);
00253       }
00254     }
00255   } else {
00256     bool success = false;
00257     tprintf("Loading master trainer from file:%s\n",
00258             FLAGS_T.c_str());
00259     FILE* fp = fopen(FLAGS_T.c_str(), "rb");
00260     if (fp == NULL) {
00261       tprintf("Can't read file %s to initialize master trainer\n",
00262               FLAGS_T.c_str());
00263     } else {
00264       success = trainer->DeSerialize(false, fp);
00265       fclose(fp);
00266     }
00267     if (!success) {
00268       tprintf("Deserialize of master trainer failed!\n");
00269       delete trainer;
00270       return NULL;
00271     }
00272     trainer->SetFeatureSpace(fs);
00273   }
00274   trainer->PreTrainingSetup();
00275   if (!FLAGS_O.empty() &&
00276       !trainer->unicharset().save_to_file(FLAGS_O.c_str())) {
00277     fprintf(stderr, "Failed to save unicharset to file %s\n", FLAGS_O.c_str());
00278     delete trainer;
00279     return NULL;
00280   }
00281   if (shape_table != NULL) {
00282     // If we previously failed to load a shapetable, then shape clustering
00283     // wasn't run so make a flat one now.
00284     if (*shape_table == NULL) {
00285       *shape_table = new ShapeTable;
00286       trainer->SetupFlatShapeTable(*shape_table);
00287       tprintf("Flat shape table summary: %s\n",
00288               (*shape_table)->SummaryStr().string());
00289     }
00290     (*shape_table)->set_unicharset(trainer->unicharset());
00291   }
00292   return trainer;
00293 }
00294 
00295 }  // namespace tesseract.
00296 
00297 /*---------------------------------------------------------------------------*/
00310 const char *GetNextFilename(int argc, const char* const * argv) {
00311   if (tessoptind < argc)
00312     return argv[tessoptind++];
00313   else
00314     return NULL;
00315 }       /* GetNextFilename */
00316 
00317 
00318 
00319 /*---------------------------------------------------------------------------*/
00331 LABELEDLIST FindList (
00332     LIST        List,
00333     char        *Label)
00334 {
00335   LABELEDLIST   LabeledList;
00336 
00337   iterate (List)
00338   {
00339     LabeledList = (LABELEDLIST) first_node (List);
00340     if (strcmp (LabeledList->Label, Label) == 0)
00341       return (LabeledList);
00342   }
00343   return (NULL);
00344 
00345 }       /* FindList */
00346 
00347 /*---------------------------------------------------------------------------*/
00357 LABELEDLIST NewLabeledList (
00358     const char  *Label)
00359 {
00360   LABELEDLIST   LabeledList;
00361 
00362   LabeledList = (LABELEDLIST) Emalloc (sizeof (LABELEDLISTNODE));
00363   LabeledList->Label = (char*)Emalloc (strlen (Label)+1);
00364   strcpy (LabeledList->Label, Label);
00365   LabeledList->List = NIL_LIST;
00366   LabeledList->SampleCount = 0;
00367   LabeledList->font_sample_count = 0;
00368   return (LabeledList);
00369 
00370 }       /* NewLabeledList */
00371 
00372 /*---------------------------------------------------------------------------*/
00373 // TODO(rays) This is now used only by cntraining. Convert cntraining to use
00374 // the new method or get rid of it entirely.
00394 void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs,
00395                          const char *feature_name, int max_samples,
00396                          UNICHARSET* unicharset,
00397                          FILE* file, LIST* training_samples) {
00398   char    buffer[2048];
00399   char    unichar[UNICHAR_LEN + 1];
00400   LABELEDLIST char_sample;
00401   FEATURE_SET feature_samples;
00402   CHAR_DESC char_desc;
00403   int   i;
00404   int feature_type = ShortNameToFeatureType(feature_defs, feature_name);
00405   // Zero out the font_sample_count for all the classes.
00406   LIST it = *training_samples;
00407   iterate(it) {
00408     char_sample = reinterpret_cast<LABELEDLIST>(first_node(it));
00409     char_sample->font_sample_count = 0;
00410   }
00411 
00412   while (fgets(buffer, 2048, file) != NULL) {
00413     if (buffer[0] == '\n')
00414       continue;
00415 
00416     sscanf(buffer, "%*s %s", unichar);
00417     if (unicharset != NULL && !unicharset->contains_unichar(unichar)) {
00418       unicharset->unichar_insert(unichar);
00419       if (unicharset->size() > MAX_NUM_CLASSES) {
00420         tprintf("Error: Size of unicharset in training is "
00421                 "greater than MAX_NUM_CLASSES\n");
00422         exit(1);
00423       }
00424     }
00425     char_sample = FindList(*training_samples, unichar);
00426     if (char_sample == NULL) {
00427       char_sample = NewLabeledList(unichar);
00428       *training_samples = push(*training_samples, char_sample);
00429     }
00430     char_desc = ReadCharDescription(feature_defs, file);
00431     feature_samples = char_desc->FeatureSets[feature_type];
00432     if (char_sample->font_sample_count < max_samples || max_samples <= 0) {
00433       char_sample->List = push(char_sample->List, feature_samples);
00434       char_sample->SampleCount++;
00435       char_sample->font_sample_count++;
00436     } else {
00437       FreeFeatureSet(feature_samples);
00438     }
00439     for (i = 0; i < char_desc->NumFeatureSets; i++) {
00440       if (feature_type != i)
00441         FreeFeatureSet(char_desc->FeatureSets[i]);
00442     }
00443     free(char_desc);
00444   }
00445 }  // ReadTrainingSamples
00446 
00447 
00448 /*---------------------------------------------------------------------------*/
00458 void FreeTrainingSamples(LIST CharList) {
00459   LABELEDLIST char_sample;
00460   FEATURE_SET FeatureSet;
00461   LIST FeatureList;
00462 
00463 
00464   iterate(CharList) {  /* iterate through all of the fonts */
00465     char_sample = (LABELEDLIST) first_node(CharList);
00466     FeatureList = char_sample->List;
00467     iterate(FeatureList) {  /* iterate through all of the classes */
00468       FeatureSet = (FEATURE_SET) first_node(FeatureList);
00469       FreeFeatureSet(FeatureSet);
00470     }
00471     FreeLabeledList(char_sample);
00472   }
00473   destroy(CharList);
00474 }  /* FreeTrainingSamples */
00475 
00476 /*---------------------------------------------------------------------------*/
00487 void FreeLabeledList(LABELEDLIST LabeledList) {
00488   destroy(LabeledList->List);
00489   free(LabeledList->Label);
00490   free(LabeledList);
00491 }  /* FreeLabeledList */
00492 
00493 /*---------------------------------------------------------------------------*/
00507 CLUSTERER *SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs,
00508                               LABELEDLIST char_sample,
00509                               const char* program_feature_type) {
00510   uinT16 N;
00511   int i, j;
00512   FLOAT32 *Sample = NULL;
00513   CLUSTERER *Clusterer;
00514   inT32 CharID;
00515   LIST FeatureList = NULL;
00516   FEATURE_SET FeatureSet = NULL;
00517 
00518   int desc_index = ShortNameToFeatureType(FeatureDefs, program_feature_type);
00519   N = FeatureDefs.FeatureDesc[desc_index]->NumParams;
00520   Clusterer = MakeClusterer(N, FeatureDefs.FeatureDesc[desc_index]->ParamDesc);
00521 
00522   FeatureList = char_sample->List;
00523   CharID = 0;
00524   iterate(FeatureList) {
00525     FeatureSet = (FEATURE_SET) first_node(FeatureList);
00526     for (i = 0; i < FeatureSet->MaxNumFeatures; i++) {
00527       if (Sample == NULL)
00528         Sample = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
00529       for (j = 0; j < N; j++)
00530         Sample[j] = FeatureSet->Features[i]->Params[j];
00531       MakeSample (Clusterer, Sample, CharID);
00532     }
00533     CharID++;
00534   }
00535   if ( Sample != NULL ) free( Sample );
00536   return( Clusterer );
00537 
00538 }       /* SetUpForClustering */
00539 
00540 /*------------------------------------------------------------------------*/
00541 void MergeInsignificantProtos(LIST ProtoList, const char* label,
00542                               CLUSTERER *Clusterer, CLUSTERCONFIG *Config) {
00543   PROTOTYPE     *Prototype;
00544   bool debug = strcmp(FLAGS_test_ch.c_str(), label) == 0;
00545 
00546   LIST pProtoList = ProtoList;
00547   iterate(pProtoList) {
00548     Prototype = (PROTOTYPE *) first_node (pProtoList);
00549     if (Prototype->Significant || Prototype->Merged)
00550       continue;
00551     FLOAT32 best_dist = 0.125;
00552     PROTOTYPE* best_match = NULL;
00553     // Find the nearest alive prototype.
00554     LIST list_it = ProtoList;
00555     iterate(list_it) {
00556       PROTOTYPE* test_p = (PROTOTYPE *) first_node (list_it);
00557       if (test_p != Prototype && !test_p->Merged) {
00558         FLOAT32 dist = ComputeDistance(Clusterer->SampleSize,
00559                                        Clusterer->ParamDesc,
00560                                        Prototype->Mean, test_p->Mean);
00561         if (dist < best_dist) {
00562           best_match = test_p;
00563           best_dist = dist;
00564         }
00565       }
00566     }
00567     if (best_match != NULL && !best_match->Significant) {
00568       if (debug)
00569         tprintf("Merging red clusters (%d+%d) at %g,%g and %g,%g\n",
00570                 best_match->NumSamples, Prototype->NumSamples,
00571                 best_match->Mean[0], best_match->Mean[1],
00572                 Prototype->Mean[0], Prototype->Mean[1]);
00573       best_match->NumSamples = MergeClusters(Clusterer->SampleSize,
00574                                              Clusterer->ParamDesc,
00575                                              best_match->NumSamples,
00576                                              Prototype->NumSamples,
00577                                              best_match->Mean,
00578                                              best_match->Mean, Prototype->Mean);
00579       Prototype->NumSamples = 0;
00580       Prototype->Merged = 1;
00581     } else if (best_match != NULL) {
00582       if (debug)
00583         tprintf("Red proto at %g,%g matched a green one at %g,%g\n",
00584                 Prototype->Mean[0], Prototype->Mean[1],
00585                 best_match->Mean[0], best_match->Mean[1]);
00586       Prototype->Merged = 1;
00587     }
00588   }
00589   // Mark significant those that now have enough samples.
00590   int min_samples = (inT32) (Config->MinSamples * Clusterer->NumChar);
00591   pProtoList = ProtoList;
00592   iterate(pProtoList) {
00593     Prototype = (PROTOTYPE *) first_node (pProtoList);
00594     // Process insignificant protos that do not match a green one
00595     if (!Prototype->Significant && Prototype->NumSamples >= min_samples &&
00596         !Prototype->Merged) {
00597       if (debug)
00598         tprintf("Red proto at %g,%g becoming green\n",
00599                 Prototype->Mean[0], Prototype->Mean[1]);
00600       Prototype->Significant = true;
00601     }
00602   }
00603 }       /* MergeInsignificantProtos */
00604 
00605 /*-----------------------------------------------------------------------------*/
00606 void CleanUpUnusedData(
00607     LIST ProtoList)
00608 {
00609   PROTOTYPE* Prototype;
00610 
00611   iterate(ProtoList)
00612   {
00613     Prototype = (PROTOTYPE *) first_node (ProtoList);
00614     if(Prototype->Variance.Elliptical != NULL)
00615     {
00616       memfree(Prototype->Variance.Elliptical);
00617       Prototype->Variance.Elliptical = NULL;
00618     }
00619     if(Prototype->Magnitude.Elliptical != NULL)
00620     {
00621       memfree(Prototype->Magnitude.Elliptical);
00622       Prototype->Magnitude.Elliptical = NULL;
00623     }
00624     if(Prototype->Weight.Elliptical != NULL)
00625     {
00626       memfree(Prototype->Weight.Elliptical);
00627       Prototype->Weight.Elliptical = NULL;
00628     }
00629   }
00630 }
00631 
00632 /*------------------------------------------------------------------------*/
00633 LIST RemoveInsignificantProtos(
00634     LIST ProtoList,
00635     BOOL8 KeepSigProtos,
00636     BOOL8 KeepInsigProtos,
00637     int N)
00638 
00639 {
00640   LIST NewProtoList = NIL_LIST;
00641   LIST pProtoList;
00642   PROTOTYPE* Proto;
00643   PROTOTYPE* NewProto;
00644   int i;
00645 
00646   pProtoList = ProtoList;
00647   iterate(pProtoList)
00648   {
00649     Proto = (PROTOTYPE *) first_node (pProtoList);
00650     if ((Proto->Significant && KeepSigProtos) ||
00651         (!Proto->Significant && KeepInsigProtos))
00652     {
00653       NewProto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE));
00654 
00655       NewProto->Mean = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
00656       NewProto->Significant = Proto->Significant;
00657       NewProto->Style = Proto->Style;
00658       NewProto->NumSamples = Proto->NumSamples;
00659       NewProto->Cluster = NULL;
00660       NewProto->Distrib = NULL;
00661 
00662       for (i=0; i < N; i++)
00663         NewProto->Mean[i] = Proto->Mean[i];
00664       if (Proto->Variance.Elliptical != NULL)
00665       {
00666         NewProto->Variance.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
00667         for (i=0; i < N; i++)
00668           NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i];
00669       }
00670       else
00671         NewProto->Variance.Elliptical = NULL;
00672       //---------------------------------------------
00673       if (Proto->Magnitude.Elliptical != NULL)
00674       {
00675         NewProto->Magnitude.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
00676         for (i=0; i < N; i++)
00677           NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i];
00678       }
00679       else
00680         NewProto->Magnitude.Elliptical = NULL;
00681       //------------------------------------------------
00682       if (Proto->Weight.Elliptical != NULL)
00683       {
00684         NewProto->Weight.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
00685         for (i=0; i < N; i++)
00686           NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i];
00687       }
00688       else
00689         NewProto->Weight.Elliptical = NULL;
00690 
00691       NewProto->TotalMagnitude = Proto->TotalMagnitude;
00692       NewProto->LogMagnitude = Proto->LogMagnitude;
00693       NewProtoList = push_last(NewProtoList, NewProto);
00694     }
00695   }
00696   FreeProtoList(&ProtoList);
00697   return (NewProtoList);
00698 }       /* RemoveInsignificantProtos */
00699 
00700 /*----------------------------------------------------------------------------*/
00701 MERGE_CLASS FindClass (
00702     LIST        List,
00703     const char  *Label)
00704 {
00705   MERGE_CLASS   MergeClass;
00706 
00707   iterate (List)
00708   {
00709     MergeClass = (MERGE_CLASS) first_node (List);
00710     if (strcmp (MergeClass->Label, Label) == 0)
00711       return (MergeClass);
00712   }
00713   return (NULL);
00714 
00715 }       /* FindClass */
00716 
00717 /*---------------------------------------------------------------------------*/
00718 MERGE_CLASS NewLabeledClass (
00719     const char  *Label)
00720 {
00721   MERGE_CLASS   MergeClass;
00722 
00723   MergeClass = new MERGE_CLASS_NODE;
00724   MergeClass->Label = (char*)Emalloc (strlen (Label)+1);
00725   strcpy (MergeClass->Label, Label);
00726   MergeClass->Class = NewClass (MAX_NUM_PROTOS, MAX_NUM_CONFIGS);
00727   return (MergeClass);
00728 
00729 }       /* NewLabeledClass */
00730 
00731 /*-----------------------------------------------------------------------------*/
00741 void FreeLabeledClassList (
00742     LIST        ClassList)
00743 {
00744   MERGE_CLASS   MergeClass;
00745 
00746   iterate (ClassList)           /* iterate through all of the fonts */
00747   {
00748     MergeClass = (MERGE_CLASS) first_node (ClassList);
00749     free (MergeClass->Label);
00750     FreeClass(MergeClass->Class);
00751     delete MergeClass;
00752   }
00753   destroy (ClassList);
00754 
00755 }       /* FreeLabeledClassList */
00756 
00757 /* SetUpForFloat2Int */
00758 CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset,
00759                                 LIST LabeledClassList) {
00760   MERGE_CLASS   MergeClass;
00761   CLASS_TYPE            Class;
00762   int                           NumProtos;
00763   int                           NumConfigs;
00764   int                           NumWords;
00765   int                           i, j;
00766   float                 Values[3];
00767   PROTO                 NewProto;
00768   PROTO                 OldProto;
00769   BIT_VECTOR            NewConfig;
00770   BIT_VECTOR            OldConfig;
00771 
00772   //    printf("Float2Int ...\n");
00773 
00774   CLASS_STRUCT* float_classes = new CLASS_STRUCT[unicharset.size()];
00775   iterate(LabeledClassList)
00776   {
00777     UnicityTableEqEq<int>   font_set;
00778     MergeClass = (MERGE_CLASS) first_node (LabeledClassList);
00779     Class = &float_classes[unicharset.unichar_to_id(MergeClass->Label)];
00780     NumProtos = MergeClass->Class->NumProtos;
00781     NumConfigs = MergeClass->Class->NumConfigs;
00782     font_set.move(&MergeClass->Class->font_set);
00783     Class->NumProtos = NumProtos;
00784     Class->MaxNumProtos = NumProtos;
00785     Class->Prototypes = (PROTO) Emalloc (sizeof(PROTO_STRUCT) * NumProtos);
00786     for(i=0; i < NumProtos; i++)
00787     {
00788       NewProto = ProtoIn(Class, i);
00789       OldProto = ProtoIn(MergeClass->Class, i);
00790       Values[0] = OldProto->X;
00791       Values[1] = OldProto->Y;
00792       Values[2] = OldProto->Angle;
00793       Normalize(Values);
00794       NewProto->X = OldProto->X;
00795       NewProto->Y = OldProto->Y;
00796       NewProto->Length = OldProto->Length;
00797       NewProto->Angle = OldProto->Angle;
00798       NewProto->A = Values[0];
00799       NewProto->B = Values[1];
00800       NewProto->C = Values[2];
00801     }
00802 
00803     Class->NumConfigs = NumConfigs;
00804     Class->MaxNumConfigs = NumConfigs;
00805     Class->font_set.move(&font_set);
00806     Class->Configurations = (BIT_VECTOR*) Emalloc (sizeof(BIT_VECTOR) * NumConfigs);
00807     NumWords = WordsInVectorOfSize(NumProtos);
00808     for(i=0; i < NumConfigs; i++)
00809     {
00810       NewConfig = NewBitVector(NumProtos);
00811       OldConfig = MergeClass->Class->Configurations[i];
00812       for(j=0; j < NumWords; j++)
00813         NewConfig[j] = OldConfig[j];
00814       Class->Configurations[i] = NewConfig;
00815     }
00816   }
00817   return float_classes;
00818 } // SetUpForFloat2Int
00819 
00820 /*--------------------------------------------------------------------------*/
00821 void Normalize (
00822     float  *Values)
00823 {
00824   register float Slope;
00825   register float Intercept;
00826   register float Normalizer;
00827 
00828   Slope      = tan (Values [2] * 2 * PI);
00829   Intercept  = Values [1] - Slope * Values [0];
00830   Normalizer = 1 / sqrt (Slope * Slope + 1.0);
00831 
00832   Values [0] = Slope * Normalizer;
00833   Values [1] = - Normalizer;
00834   Values [2] = Intercept * Normalizer;
00835 } // Normalize
00836 
00837 /*-------------------------------------------------------------------------*/
00838 void FreeNormProtoList (
00839     LIST        CharList)
00840 
00841 {
00842   LABELEDLIST   char_sample;
00843 
00844   iterate (CharList)            /* iterate through all of the fonts */
00845   {
00846     char_sample = (LABELEDLIST) first_node (CharList);
00847     FreeLabeledList (char_sample);
00848   }
00849   destroy (CharList);
00850 
00851 }       // FreeNormProtoList
00852 
00853 /*---------------------------------------------------------------------------*/
00854 void AddToNormProtosList(
00855     LIST* NormProtoList,
00856     LIST ProtoList,
00857     char* CharName)
00858 {
00859   PROTOTYPE* Proto;
00860   LABELEDLIST LabeledProtoList;
00861 
00862   LabeledProtoList = NewLabeledList(CharName);
00863   iterate(ProtoList)
00864   {
00865     Proto = (PROTOTYPE *) first_node (ProtoList);
00866     LabeledProtoList->List = push(LabeledProtoList->List, Proto);
00867   }
00868   *NormProtoList = push(*NormProtoList, LabeledProtoList);
00869 }
00870 
00871 /*---------------------------------------------------------------------------*/
00872 int NumberOfProtos(
00873     LIST ProtoList,
00874     BOOL8       CountSigProtos,
00875     BOOL8       CountInsigProtos)
00876 {
00877   int N = 0;
00878   PROTOTYPE     *Proto;
00879 
00880   iterate(ProtoList)
00881   {
00882     Proto = (PROTOTYPE *) first_node ( ProtoList );
00883     if (( Proto->Significant && CountSigProtos )        ||
00884         ( ! Proto->Significant && CountInsigProtos ) )
00885       N++;
00886   }
00887   return(N);
00888 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines