tesseract 3.04.01

classify/clusttool.cpp

Go to the documentation of this file.
00001 /******************************************************************************
00002  **     Filename:       clustertool.c
00003  **     Purpose:        Misc. tools for use with the clustering routines
00004  **     Author:         Dan Johnson
00005  **     History:        6/6/89, DSJ, Created.
00006  **
00007  **     (c) Copyright Hewlett-Packard Company, 1988.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  ******************************************************************************/
00018 
00019 //--------------------------Include Files----------------------------------
00020 #include "clusttool.h"
00021 #include "const.h"
00022 #include "danerror.h"
00023 #include "emalloc.h"
00024 #include "scanutils.h"
00025 #include <stdio.h>
00026 #include <math.h>
00027 
00028 //---------------Global Data Definitions and Declarations--------------------
00029 #define TOKENSIZE 80             //< max size of tokens read from an input file
00030 #define MAXSAMPLESIZE 65535      //< max num of dimensions in feature space
00031 //#define MAXBLOCKSIZE  65535   //< max num of samples in a character (block size)
00032 
00043 uinT16 ReadSampleSize(FILE *File) {
00044   int SampleSize;
00045 
00046   if ((tfscanf(File, "%d", &SampleSize) != 1) ||
00047     (SampleSize < 0) || (SampleSize > MAXSAMPLESIZE))
00048     DoError (ILLEGALSAMPLESIZE, "Illegal sample size");
00049   return (SampleSize);
00050 }
00051 
00066 PARAM_DESC *ReadParamDesc(FILE *File, uinT16 N) {
00067   int i;
00068   PARAM_DESC *ParamDesc;
00069   char Token[TOKENSIZE];
00070 
00071   ParamDesc = (PARAM_DESC *) Emalloc (N * sizeof (PARAM_DESC));
00072   for (i = 0; i < N; i++) {
00073     if (tfscanf(File, "%s", Token) != 1)
00074       DoError (ILLEGALCIRCULARSPEC,
00075         "Illegal circular/linear specification");
00076     if (Token[0] == 'c')
00077       ParamDesc[i].Circular = TRUE;
00078     else
00079       ParamDesc[i].Circular = FALSE;
00080 
00081     if (tfscanf(File, "%s", Token) != 1)
00082       DoError (ILLEGALESSENTIALSPEC,
00083         "Illegal essential/non-essential spec");
00084     if (Token[0] == 'e')
00085       ParamDesc[i].NonEssential = FALSE;
00086     else
00087       ParamDesc[i].NonEssential = TRUE;
00088     if (tfscanf(File, "%f%f", &(ParamDesc[i].Min), &(ParamDesc[i].Max)) != 2)
00089       DoError (ILLEGALMINMAXSPEC, "Illegal min or max specification");
00090     ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
00091     ParamDesc[i].HalfRange = ParamDesc[i].Range / 2;
00092     ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
00093   }
00094   return (ParamDesc);
00095 }
00096 
00113 PROTOTYPE *ReadPrototype(FILE *File, uinT16 N) {
00114   char Token[TOKENSIZE];
00115   int Status;
00116   PROTOTYPE *Proto;
00117   int SampleCount;
00118   int i;
00119 
00120   if ((Status = tfscanf(File, "%s", Token)) == 1) {
00121     Proto = (PROTOTYPE *) Emalloc (sizeof (PROTOTYPE));
00122     Proto->Cluster = NULL;
00123     if (Token[0] == 's')
00124       Proto->Significant = TRUE;
00125     else
00126       Proto->Significant = FALSE;
00127 
00128     Proto->Style = ReadProtoStyle (File);
00129 
00130     if ((tfscanf(File, "%d", &SampleCount) != 1) || (SampleCount < 0))
00131       DoError (ILLEGALSAMPLECOUNT, "Illegal sample count");
00132     Proto->NumSamples = SampleCount;
00133 
00134     Proto->Mean = ReadNFloats (File, N, NULL);
00135     if (Proto->Mean == NULL)
00136       DoError (ILLEGALMEANSPEC, "Illegal prototype mean");
00137 
00138     switch (Proto->Style) {
00139       case spherical:
00140         if (ReadNFloats (File, 1, &(Proto->Variance.Spherical)) == NULL)
00141           DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
00142         Proto->Magnitude.Spherical =
00143           1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Spherical));
00144         Proto->TotalMagnitude =
00145           pow (Proto->Magnitude.Spherical, (float) N);
00146         Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
00147         Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
00148         Proto->Distrib = NULL;
00149         break;
00150       case elliptical:
00151         Proto->Variance.Elliptical = ReadNFloats (File, N, NULL);
00152         if (Proto->Variance.Elliptical == NULL)
00153           DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
00154         Proto->Magnitude.Elliptical =
00155           (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
00156         Proto->Weight.Elliptical =
00157           (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
00158         Proto->TotalMagnitude = 1.0;
00159         for (i = 0; i < N; i++) {
00160           Proto->Magnitude.Elliptical[i] =
00161             1.0 /
00162             sqrt ((double) (2.0 * PI * Proto->Variance.Elliptical[i]));
00163           Proto->Weight.Elliptical[i] =
00164             1.0 / Proto->Variance.Elliptical[i];
00165           Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
00166         }
00167         Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
00168         Proto->Distrib = NULL;
00169         break;
00170       case mixed:
00171         Proto->Distrib =
00172           (DISTRIBUTION *) Emalloc (N * sizeof (DISTRIBUTION));
00173         for (i = 0; i < N; i++) {
00174           if (tfscanf(File, "%s", Token) != 1)
00175             DoError (ILLEGALDISTRIBUTION,
00176               "Illegal prototype distribution");
00177           switch (Token[0]) {
00178             case 'n':
00179               Proto->Distrib[i] = normal;
00180               break;
00181             case 'u':
00182               Proto->Distrib[i] = uniform;
00183               break;
00184             case 'r':
00185               Proto->Distrib[i] = D_random;
00186               break;
00187             default:
00188               DoError (ILLEGALDISTRIBUTION,
00189                 "Illegal prototype distribution");
00190           }
00191         }
00192         Proto->Variance.Elliptical = ReadNFloats (File, N, NULL);
00193         if (Proto->Variance.Elliptical == NULL)
00194           DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
00195         Proto->Magnitude.Elliptical =
00196           (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
00197         Proto->Weight.Elliptical =
00198           (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
00199         Proto->TotalMagnitude = 1.0;
00200         for (i = 0; i < N; i++) {
00201           switch (Proto->Distrib[i]) {
00202             case normal:
00203               Proto->Magnitude.Elliptical[i] = 1.0 /
00204                 sqrt ((double)
00205                 (2.0 * PI * Proto->Variance.Elliptical[i]));
00206               Proto->Weight.Elliptical[i] =
00207                 1.0 / Proto->Variance.Elliptical[i];
00208               break;
00209             case uniform:
00210             case D_random:
00211               Proto->Magnitude.Elliptical[i] = 1.0 /
00212                 (2.0 * Proto->Variance.Elliptical[i]);
00213               break;
00214             case DISTRIBUTION_COUNT:
00215               ASSERT_HOST(!"Distribution count not allowed!");
00216           }
00217           Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
00218         }
00219         Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
00220         break;
00221     }
00222     return (Proto);
00223   }
00224   else if (Status == EOF)
00225     return (NULL);
00226   else {
00227     DoError (ILLEGALSIGNIFICANCESPEC, "Illegal significance specification");
00228     return (NULL);
00229   }
00230 }
00231 
00241 PROTOSTYLE ReadProtoStyle(FILE *File) {
00242   char Token[TOKENSIZE];
00243   PROTOSTYLE Style;
00244 
00245   if (tfscanf(File, "%s", Token) != 1)
00246     DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification");
00247   switch (Token[0]) {
00248     case 's':
00249       Style = spherical;
00250       break;
00251     case 'e':
00252       Style = elliptical;
00253       break;
00254     case 'm':
00255       Style = mixed;
00256       break;
00257     case 'a':
00258       Style = automatic;
00259       break;
00260     default:
00261       Style = elliptical;
00262       DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification");
00263   }
00264   return (Style);
00265 }
00266 
00281 FLOAT32* ReadNFloats(FILE * File, uinT16 N, FLOAT32 Buffer[]) {
00282   bool needs_free = false;
00283   int i;
00284   int NumFloatsRead;
00285 
00286   if (Buffer == NULL) {
00287     Buffer = reinterpret_cast<FLOAT32*>(Emalloc(N * sizeof(FLOAT32)));
00288     needs_free = true;
00289   }
00290 
00291   for (i = 0; i < N; i++) {
00292     NumFloatsRead = tfscanf(File, "%f", &(Buffer[i]));
00293     if (NumFloatsRead != 1) {
00294       if ((NumFloatsRead == EOF) && (i == 0)) {
00295         if (needs_free) {
00296             Efree(Buffer);
00297         }
00298         return NULL;
00299       } else {
00300         DoError(ILLEGALFLOAT, "Illegal float specification");
00301       }
00302     }
00303   }
00304   return Buffer;
00305 }
00306 
00318 void
00319 WriteParamDesc (FILE * File, uinT16 N, PARAM_DESC ParamDesc[]) {
00320   int i;
00321 
00322   for (i = 0; i < N; i++) {
00323     if (ParamDesc[i].Circular)
00324       fprintf (File, "circular ");
00325     else
00326       fprintf (File, "linear   ");
00327 
00328     if (ParamDesc[i].NonEssential)
00329       fprintf (File, "non-essential ");
00330     else
00331       fprintf (File, "essential     ");
00332 
00333     fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max);
00334   }
00335 }
00336 
00348 void WritePrototype(FILE *File, uinT16 N, PROTOTYPE *Proto) {
00349   int i;
00350 
00351   if (Proto->Significant)
00352     fprintf (File, "significant   ");
00353   else
00354     fprintf (File, "insignificant ");
00355   WriteProtoStyle (File, (PROTOSTYLE) Proto->Style);
00356   fprintf (File, "%6d\n\t", Proto->NumSamples);
00357   WriteNFloats (File, N, Proto->Mean);
00358   fprintf (File, "\t");
00359 
00360   switch (Proto->Style) {
00361     case spherical:
00362       WriteNFloats (File, 1, &(Proto->Variance.Spherical));
00363       break;
00364     case elliptical:
00365       WriteNFloats (File, N, Proto->Variance.Elliptical);
00366       break;
00367     case mixed:
00368       for (i = 0; i < N; i++)
00369       switch (Proto->Distrib[i]) {
00370         case normal:
00371           fprintf (File, " %9s", "normal");
00372           break;
00373         case uniform:
00374           fprintf (File, " %9s", "uniform");
00375           break;
00376         case D_random:
00377           fprintf (File, " %9s", "random");
00378           break;
00379         case DISTRIBUTION_COUNT:
00380           ASSERT_HOST(!"Distribution count not allowed!");
00381       }
00382       fprintf (File, "\n\t");
00383       WriteNFloats (File, N, Proto->Variance.Elliptical);
00384   }
00385 }
00386 
00398 void WriteNFloats(FILE * File, uinT16 N, FLOAT32 Array[]) {
00399   for (int i = 0; i < N; i++)
00400     fprintf(File, " %9.6f", Array[i]);
00401   fprintf(File, "\n");
00402 }
00403 
00415 void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) {
00416   switch (ProtoStyle) {
00417     case spherical:
00418       fprintf (File, "spherical");
00419       break;
00420     case elliptical:
00421       fprintf (File, "elliptical");
00422       break;
00423     case mixed:
00424       fprintf (File, "mixed");
00425       break;
00426     case automatic:
00427       fprintf (File, "automatic");
00428       break;
00429   }
00430 }
00431 
00449 void WriteProtoList(
00450      FILE       *File,
00451      uinT16     N,
00452      PARAM_DESC ParamDesc[],
00453      LIST       ProtoList,
00454      BOOL8      WriteSigProtos,
00455      BOOL8      WriteInsigProtos)
00456 {
00457   PROTOTYPE     *Proto;
00458 
00459   /* write file header */
00460   fprintf(File,"%0d\n",N);
00461   WriteParamDesc(File,N,ParamDesc);
00462 
00463   /* write prototypes */
00464   iterate(ProtoList)
00465     {
00466       Proto = (PROTOTYPE *) first_node ( ProtoList );
00467       if (( Proto->Significant && WriteSigProtos )      ||
00468           ( ! Proto->Significant && WriteInsigProtos ) )
00469         WritePrototype( File, N, Proto );
00470     }
00471 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines