tesseract 3.04.01

classify/adaptive.cpp

Go to the documentation of this file.
00001 /******************************************************************************
00002  ** Filename:    adaptive.c
00003  ** Purpose:     Adaptive matcher.
00004  ** Author:      Dan Johnson
00005  ** History:     Fri Mar  8 10:00:21 1991, DSJ, Created.
00006  **
00007  ** (c) Copyright Hewlett-Packard Company, 1988.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  ******************************************************************************/
00018 
00019 /*----------------------------------------------------------------------------
00020           Include Files and Type Defines
00021 ----------------------------------------------------------------------------*/
00022 #include "adaptive.h"
00023 #include "emalloc.h"
00024 #include "freelist.h"
00025 #include "globals.h"
00026 #include "classify.h"
00027 
00028 #ifdef __UNIX__
00029 #include <assert.h>
00030 #endif
00031 #include <stdio.h>
00032 
00033 /*----------------------------------------------------------------------------
00034               Public Code
00035 ----------------------------------------------------------------------------*/
00036 /*---------------------------------------------------------------------------*/
00049 void AddAdaptedClass(ADAPT_TEMPLATES Templates,
00050                      ADAPT_CLASS Class,
00051                      CLASS_ID ClassId) {
00052   INT_CLASS IntClass;
00053 
00054   assert (Templates != NULL);
00055   assert (Class != NULL);
00056   assert (LegalClassId (ClassId));
00057   assert (UnusedClassIdIn (Templates->Templates, ClassId));
00058   assert (Class->NumPermConfigs == 0);
00059 
00060   IntClass = NewIntClass (1, 1);
00061   AddIntClass (Templates->Templates, ClassId, IntClass);
00062 
00063   assert (Templates->Class[ClassId] == NULL);
00064   Templates->Class[ClassId] = Class;
00065 
00066 }                                /* AddAdaptedClass */
00067 
00068 
00069 /*---------------------------------------------------------------------------*/
00080 void FreeTempConfig(TEMP_CONFIG Config) {
00081   assert (Config != NULL);
00082 
00083   destroy_nodes (Config->ContextsSeen, memfree);
00084   FreeBitVector (Config->Protos);
00085   free_struct (Config, sizeof (TEMP_CONFIG_STRUCT), "TEMP_CONFIG_STRUCT");
00086 
00087 }                                /* FreeTempConfig */
00088 
00089 /*---------------------------------------------------------------------------*/
00090 void FreeTempProto(void *arg) {
00091   PROTO proto = (PROTO) arg;
00092 
00093   free_struct (proto, sizeof (TEMP_PROTO_STRUCT), "TEMP_PROTO_STRUCT");
00094 }
00095 
00096 void FreePermConfig(PERM_CONFIG Config) {
00097   assert(Config != NULL);
00098   delete [] Config->Ambigs;
00099   free_struct(Config, sizeof(PERM_CONFIG_STRUCT), "PERM_CONFIG_STRUCT");
00100 }
00101 
00102 /*---------------------------------------------------------------------------*/
00113 ADAPT_CLASS NewAdaptedClass() {
00114   ADAPT_CLASS Class;
00115   int i;
00116 
00117   Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
00118   Class->NumPermConfigs = 0;
00119   Class->MaxNumTimesSeen = 0;
00120   Class->TempProtos = NIL_LIST;
00121 
00122   Class->PermProtos = NewBitVector (MAX_NUM_PROTOS);
00123   Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS);
00124   zero_all_bits (Class->PermProtos, WordsInVectorOfSize (MAX_NUM_PROTOS));
00125   zero_all_bits (Class->PermConfigs, WordsInVectorOfSize (MAX_NUM_CONFIGS));
00126 
00127   for (i = 0; i < MAX_NUM_CONFIGS; i++)
00128     TempConfigFor (Class, i) = NULL;
00129 
00130   return (Class);
00131 
00132 }                                /* NewAdaptedClass */
00133 
00134 
00135 /*-------------------------------------------------------------------------*/
00136 void free_adapted_class(ADAPT_CLASS adapt_class) {
00137   int i;
00138 
00139   for (i = 0; i < MAX_NUM_CONFIGS; i++) {
00140     if (ConfigIsPermanent (adapt_class, i)
00141       && PermConfigFor (adapt_class, i) != NULL)
00142       FreePermConfig (PermConfigFor (adapt_class, i));
00143     else if (!ConfigIsPermanent (adapt_class, i)
00144       && TempConfigFor (adapt_class, i) != NULL)
00145       FreeTempConfig (TempConfigFor (adapt_class, i));
00146   }
00147   FreeBitVector (adapt_class->PermProtos);
00148   FreeBitVector (adapt_class->PermConfigs);
00149   destroy_nodes (adapt_class->TempProtos, FreeTempProto);
00150   Efree(adapt_class);
00151 }
00152 
00153 
00154 /*---------------------------------------------------------------------------*/
00155 namespace tesseract {
00167 ADAPT_TEMPLATES Classify::NewAdaptedTemplates(bool InitFromUnicharset) {
00168   ADAPT_TEMPLATES Templates;
00169   int i;
00170 
00171   Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
00172 
00173   Templates->Templates = NewIntTemplates ();
00174   Templates->NumPermClasses = 0;
00175   Templates->NumNonEmptyClasses = 0;
00176 
00177   /* Insert an empty class for each unichar id in unicharset */
00178   for (i = 0; i < MAX_NUM_CLASSES; i++) {
00179     Templates->Class[i] = NULL;
00180     if (InitFromUnicharset && i < unicharset.size()) {
00181       AddAdaptedClass(Templates, NewAdaptedClass(), i);
00182     }
00183   }
00184 
00185   return (Templates);
00186 
00187 }                                /* NewAdaptedTemplates */
00188 
00189 // Returns FontinfoId of the given config of the given adapted class.
00190 int Classify::GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId) {
00191   return (ConfigIsPermanent(Class, ConfigId) ?
00192       PermConfigFor(Class, ConfigId)->FontinfoId :
00193       TempConfigFor(Class, ConfigId)->FontinfoId);
00194 }
00195 
00196 }  // namespace tesseract
00197 
00198 /*----------------------------------------------------------------------------*/
00199 void free_adapted_templates(ADAPT_TEMPLATES templates) {
00200 
00201   if (templates != NULL) {
00202     int i;
00203     for (i = 0; i < (templates->Templates)->NumClasses; i++)
00204       free_adapted_class (templates->Class[i]);
00205     free_int_templates (templates->Templates);
00206     Efree(templates);
00207   }
00208 }
00209 
00210 
00211 /*---------------------------------------------------------------------------*/
00223 TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId) {
00224   TEMP_CONFIG Config;
00225   int NumProtos = MaxProtoId + 1;
00226 
00227   Config =
00228     (TEMP_CONFIG) alloc_struct (sizeof (TEMP_CONFIG_STRUCT),
00229     "TEMP_CONFIG_STRUCT");
00230   Config->Protos = NewBitVector (NumProtos);
00231 
00232   Config->NumTimesSeen = 1;
00233   Config->MaxProtoId = MaxProtoId;
00234   Config->ProtoVectorSize = WordsInVectorOfSize (NumProtos);
00235   Config->ContextsSeen = NIL_LIST;
00236   zero_all_bits (Config->Protos, Config->ProtoVectorSize);
00237   Config->FontinfoId = FontinfoId;
00238 
00239   return (Config);
00240 
00241 }                                /* NewTempConfig */
00242 
00243 
00244 /*---------------------------------------------------------------------------*/
00254 TEMP_PROTO NewTempProto() {
00255   return ((TEMP_PROTO)
00256     alloc_struct (sizeof (TEMP_PROTO_STRUCT), "TEMP_PROTO_STRUCT"));
00257 }                                /* NewTempProto */
00258 
00259 
00260 /*---------------------------------------------------------------------------*/
00261 namespace tesseract {
00273 void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) {
00274   int i;
00275   INT_CLASS IClass;
00276   ADAPT_CLASS AClass;
00277 
00278   fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
00279   fprintf (File, "Num classes = %d;  Num permanent classes = %d\n\n",
00280            Templates->NumNonEmptyClasses, Templates->NumPermClasses);
00281   fprintf (File, "   Id  NC NPC  NP NPP\n");
00282   fprintf (File, "------------------------\n");
00283 
00284   for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
00285     IClass = Templates->Templates->Class[i];
00286     AClass = Templates->Class[i];
00287     if (!IsEmptyAdaptedClass (AClass)) {
00288       fprintf (File, "%5d  %s %3d %3d %3d %3d\n",
00289         i, unicharset.id_to_unichar(i),
00290       IClass->NumConfigs, AClass->NumPermConfigs,
00291       IClass->NumProtos,
00292       IClass->NumProtos - count (AClass->TempProtos));
00293     }
00294   }
00295   fprintf (File, "\n");
00296 
00297 }                                /* PrintAdaptedTemplates */
00298 }  // namespace tesseract
00299 
00300 
00301 /*---------------------------------------------------------------------------*/
00313 ADAPT_CLASS ReadAdaptedClass(FILE *File) {
00314   int NumTempProtos;
00315   int NumConfigs;
00316   int i;
00317   ADAPT_CLASS Class;
00318   TEMP_PROTO TempProto;
00319 
00320   /* first read high level adapted class structure */
00321   Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
00322   fread ((char *) Class, sizeof (ADAPT_CLASS_STRUCT), 1, File);
00323 
00324   /* then read in the definitions of the permanent protos and configs */
00325   Class->PermProtos = NewBitVector (MAX_NUM_PROTOS);
00326   Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS);
00327   fread ((char *) Class->PermProtos, sizeof (uinT32),
00328     WordsInVectorOfSize (MAX_NUM_PROTOS), File);
00329   fread ((char *) Class->PermConfigs, sizeof (uinT32),
00330     WordsInVectorOfSize (MAX_NUM_CONFIGS), File);
00331 
00332   /* then read in the list of temporary protos */
00333   fread ((char *) &NumTempProtos, sizeof (int), 1, File);
00334   Class->TempProtos = NIL_LIST;
00335   for (i = 0; i < NumTempProtos; i++) {
00336     TempProto =
00337       (TEMP_PROTO) alloc_struct (sizeof (TEMP_PROTO_STRUCT),
00338       "TEMP_PROTO_STRUCT");
00339     fread ((char *) TempProto, sizeof (TEMP_PROTO_STRUCT), 1, File);
00340     Class->TempProtos = push_last (Class->TempProtos, TempProto);
00341   }
00342 
00343   /* then read in the adapted configs */
00344   fread ((char *) &NumConfigs, sizeof (int), 1, File);
00345   for (i = 0; i < NumConfigs; i++)
00346     if (test_bit (Class->PermConfigs, i))
00347       Class->Config[i].Perm = ReadPermConfig (File);
00348     else
00349       Class->Config[i].Temp = ReadTempConfig (File);
00350 
00351   return (Class);
00352 
00353 }                                /* ReadAdaptedClass */
00354 
00355 
00356 /*---------------------------------------------------------------------------*/
00357 namespace tesseract {
00369 ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(FILE *File) {
00370   int i;
00371   ADAPT_TEMPLATES Templates;
00372 
00373   /* first read the high level adaptive template struct */
00374   Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
00375   fread ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
00376 
00377   /* then read in the basic integer templates */
00378   Templates->Templates = ReadIntTemplates (File);
00379 
00380   /* then read in the adaptive info for each class */
00381   for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
00382     Templates->Class[i] = ReadAdaptedClass (File);
00383   }
00384   return (Templates);
00385 
00386 }                                /* ReadAdaptedTemplates */
00387 }  // namespace tesseract
00388 
00389 
00390 /*---------------------------------------------------------------------------*/
00402 PERM_CONFIG ReadPermConfig(FILE *File) {
00403   PERM_CONFIG Config = (PERM_CONFIG) alloc_struct(sizeof(PERM_CONFIG_STRUCT),
00404                                                   "PERM_CONFIG_STRUCT");
00405   uinT8 NumAmbigs;
00406   fread ((char *) &NumAmbigs, sizeof(uinT8), 1, File);
00407   Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
00408   fread(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
00409   Config->Ambigs[NumAmbigs] = -1;
00410   fread(&(Config->FontinfoId), sizeof(int), 1, File);
00411 
00412   return (Config);
00413 
00414 }                                /* ReadPermConfig */
00415 
00416 
00417 /*---------------------------------------------------------------------------*/
00429 TEMP_CONFIG ReadTempConfig(FILE *File) {
00430   TEMP_CONFIG Config;
00431 
00432   Config =
00433     (TEMP_CONFIG) alloc_struct (sizeof (TEMP_CONFIG_STRUCT),
00434     "TEMP_CONFIG_STRUCT");
00435   fread ((char *) Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
00436 
00437   Config->Protos = NewBitVector (Config->ProtoVectorSize * BITSINLONG);
00438   fread ((char *) Config->Protos, sizeof (uinT32),
00439     Config->ProtoVectorSize, File);
00440 
00441   return (Config);
00442 
00443 }                                /* ReadTempConfig */
00444 
00445 
00446 /*---------------------------------------------------------------------------*/
00459 void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs) {
00460   int NumTempProtos;
00461   LIST TempProtos;
00462   int i;
00463 
00464   /* first write high level adapted class structure */
00465   fwrite ((char *) Class, sizeof (ADAPT_CLASS_STRUCT), 1, File);
00466 
00467   /* then write out the definitions of the permanent protos and configs */
00468   fwrite ((char *) Class->PermProtos, sizeof (uinT32),
00469     WordsInVectorOfSize (MAX_NUM_PROTOS), File);
00470   fwrite ((char *) Class->PermConfigs, sizeof (uinT32),
00471     WordsInVectorOfSize (MAX_NUM_CONFIGS), File);
00472 
00473   /* then write out the list of temporary protos */
00474   NumTempProtos = count (Class->TempProtos);
00475   fwrite ((char *) &NumTempProtos, sizeof (int), 1, File);
00476   TempProtos = Class->TempProtos;
00477   iterate (TempProtos) {
00478     void* proto = first_node(TempProtos);
00479     fwrite ((char *) proto, sizeof (TEMP_PROTO_STRUCT), 1, File);
00480   }
00481 
00482   /* then write out the adapted configs */
00483   fwrite ((char *) &NumConfigs, sizeof (int), 1, File);
00484   for (i = 0; i < NumConfigs; i++)
00485     if (test_bit (Class->PermConfigs, i))
00486       WritePermConfig (File, Class->Config[i].Perm);
00487     else
00488       WriteTempConfig (File, Class->Config[i].Temp);
00489 
00490 }                                /* WriteAdaptedClass */
00491 
00492 
00493 /*---------------------------------------------------------------------------*/
00494 namespace tesseract {
00505 void Classify::WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) {
00506   int i;
00507 
00508   /* first write the high level adaptive template struct */
00509   fwrite ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
00510 
00511   /* then write out the basic integer templates */
00512   WriteIntTemplates (File, Templates->Templates, unicharset);
00513 
00514   /* then write out the adaptive info for each class */
00515   for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
00516     WriteAdaptedClass (File, Templates->Class[i],
00517       Templates->Templates->Class[i]->NumConfigs);
00518   }
00519 }                                /* WriteAdaptedTemplates */
00520 }  // namespace tesseract
00521 
00522 
00523 /*---------------------------------------------------------------------------*/
00535 void WritePermConfig(FILE *File, PERM_CONFIG Config) {
00536   uinT8 NumAmbigs = 0;
00537 
00538   assert (Config != NULL);
00539   while (Config->Ambigs[NumAmbigs] > 0) ++NumAmbigs;
00540 
00541   fwrite((char *) &NumAmbigs, sizeof(uinT8), 1, File);
00542   fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
00543   fwrite(&(Config->FontinfoId), sizeof(int), 1, File);
00544 }                                /* WritePermConfig */
00545 
00546 
00547 /*---------------------------------------------------------------------------*/
00559 void WriteTempConfig(FILE *File, TEMP_CONFIG Config) {
00560   assert (Config != NULL);
00561                                  /* contexts not yet implemented */
00562   assert (Config->ContextsSeen == NULL);
00563 
00564   fwrite ((char *) Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
00565   fwrite ((char *) Config->Protos, sizeof (uinT32),
00566     Config->ProtoVectorSize, File);
00567 
00568 }                                /* WriteTempConfig */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines