|
tesseract 3.04.01
|
00001 /****************************************************************************** 00002 ** Filename: clustertool.c 00003 ** Purpose: Misc. tools for use with the clustering routines 00004 ** Author: Dan Johnson 00005 ** History: 6/6/89, DSJ, Created. 00006 ** 00007 ** (c) Copyright Hewlett-Packard Company, 1988. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 ******************************************************************************/ 00018 00019 //--------------------------Include Files---------------------------------- 00020 #include "clusttool.h" 00021 #include "const.h" 00022 #include "danerror.h" 00023 #include "emalloc.h" 00024 #include "scanutils.h" 00025 #include <stdio.h> 00026 #include <math.h> 00027 00028 //---------------Global Data Definitions and Declarations-------------------- 00029 #define TOKENSIZE 80 //< max size of tokens read from an input file 00030 #define MAXSAMPLESIZE 65535 //< max num of dimensions in feature space 00031 //#define MAXBLOCKSIZE 65535 //< max num of samples in a character (block size) 00032 00043 uinT16 ReadSampleSize(FILE *File) { 00044 int SampleSize; 00045 00046 if ((tfscanf(File, "%d", &SampleSize) != 1) || 00047 (SampleSize < 0) || (SampleSize > MAXSAMPLESIZE)) 00048 DoError (ILLEGALSAMPLESIZE, "Illegal sample size"); 00049 return (SampleSize); 00050 } 00051 00066 PARAM_DESC *ReadParamDesc(FILE *File, uinT16 N) { 00067 int i; 00068 PARAM_DESC *ParamDesc; 00069 char Token[TOKENSIZE]; 00070 00071 ParamDesc = (PARAM_DESC *) Emalloc (N * sizeof (PARAM_DESC)); 00072 for (i = 0; i < N; i++) { 00073 if (tfscanf(File, "%s", Token) != 1) 00074 DoError (ILLEGALCIRCULARSPEC, 00075 "Illegal circular/linear specification"); 00076 if (Token[0] == 'c') 00077 ParamDesc[i].Circular = TRUE; 00078 else 00079 ParamDesc[i].Circular = FALSE; 00080 00081 if (tfscanf(File, "%s", Token) != 1) 00082 DoError (ILLEGALESSENTIALSPEC, 00083 "Illegal essential/non-essential spec"); 00084 if (Token[0] == 'e') 00085 ParamDesc[i].NonEssential = FALSE; 00086 else 00087 ParamDesc[i].NonEssential = TRUE; 00088 if (tfscanf(File, "%f%f", &(ParamDesc[i].Min), &(ParamDesc[i].Max)) != 2) 00089 DoError (ILLEGALMINMAXSPEC, "Illegal min or max specification"); 00090 ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min; 00091 ParamDesc[i].HalfRange = ParamDesc[i].Range / 2; 00092 ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2; 00093 } 00094 return (ParamDesc); 00095 } 00096 00113 PROTOTYPE *ReadPrototype(FILE *File, uinT16 N) { 00114 char Token[TOKENSIZE]; 00115 int Status; 00116 PROTOTYPE *Proto; 00117 int SampleCount; 00118 int i; 00119 00120 if ((Status = tfscanf(File, "%s", Token)) == 1) { 00121 Proto = (PROTOTYPE *) Emalloc (sizeof (PROTOTYPE)); 00122 Proto->Cluster = NULL; 00123 if (Token[0] == 's') 00124 Proto->Significant = TRUE; 00125 else 00126 Proto->Significant = FALSE; 00127 00128 Proto->Style = ReadProtoStyle (File); 00129 00130 if ((tfscanf(File, "%d", &SampleCount) != 1) || (SampleCount < 0)) 00131 DoError (ILLEGALSAMPLECOUNT, "Illegal sample count"); 00132 Proto->NumSamples = SampleCount; 00133 00134 Proto->Mean = ReadNFloats (File, N, NULL); 00135 if (Proto->Mean == NULL) 00136 DoError (ILLEGALMEANSPEC, "Illegal prototype mean"); 00137 00138 switch (Proto->Style) { 00139 case spherical: 00140 if (ReadNFloats (File, 1, &(Proto->Variance.Spherical)) == NULL) 00141 DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance"); 00142 Proto->Magnitude.Spherical = 00143 1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Spherical)); 00144 Proto->TotalMagnitude = 00145 pow (Proto->Magnitude.Spherical, (float) N); 00146 Proto->LogMagnitude = log ((double) Proto->TotalMagnitude); 00147 Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical; 00148 Proto->Distrib = NULL; 00149 break; 00150 case elliptical: 00151 Proto->Variance.Elliptical = ReadNFloats (File, N, NULL); 00152 if (Proto->Variance.Elliptical == NULL) 00153 DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance"); 00154 Proto->Magnitude.Elliptical = 00155 (FLOAT32 *) Emalloc (N * sizeof (FLOAT32)); 00156 Proto->Weight.Elliptical = 00157 (FLOAT32 *) Emalloc (N * sizeof (FLOAT32)); 00158 Proto->TotalMagnitude = 1.0; 00159 for (i = 0; i < N; i++) { 00160 Proto->Magnitude.Elliptical[i] = 00161 1.0 / 00162 sqrt ((double) (2.0 * PI * Proto->Variance.Elliptical[i])); 00163 Proto->Weight.Elliptical[i] = 00164 1.0 / Proto->Variance.Elliptical[i]; 00165 Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i]; 00166 } 00167 Proto->LogMagnitude = log ((double) Proto->TotalMagnitude); 00168 Proto->Distrib = NULL; 00169 break; 00170 case mixed: 00171 Proto->Distrib = 00172 (DISTRIBUTION *) Emalloc (N * sizeof (DISTRIBUTION)); 00173 for (i = 0; i < N; i++) { 00174 if (tfscanf(File, "%s", Token) != 1) 00175 DoError (ILLEGALDISTRIBUTION, 00176 "Illegal prototype distribution"); 00177 switch (Token[0]) { 00178 case 'n': 00179 Proto->Distrib[i] = normal; 00180 break; 00181 case 'u': 00182 Proto->Distrib[i] = uniform; 00183 break; 00184 case 'r': 00185 Proto->Distrib[i] = D_random; 00186 break; 00187 default: 00188 DoError (ILLEGALDISTRIBUTION, 00189 "Illegal prototype distribution"); 00190 } 00191 } 00192 Proto->Variance.Elliptical = ReadNFloats (File, N, NULL); 00193 if (Proto->Variance.Elliptical == NULL) 00194 DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance"); 00195 Proto->Magnitude.Elliptical = 00196 (FLOAT32 *) Emalloc (N * sizeof (FLOAT32)); 00197 Proto->Weight.Elliptical = 00198 (FLOAT32 *) Emalloc (N * sizeof (FLOAT32)); 00199 Proto->TotalMagnitude = 1.0; 00200 for (i = 0; i < N; i++) { 00201 switch (Proto->Distrib[i]) { 00202 case normal: 00203 Proto->Magnitude.Elliptical[i] = 1.0 / 00204 sqrt ((double) 00205 (2.0 * PI * Proto->Variance.Elliptical[i])); 00206 Proto->Weight.Elliptical[i] = 00207 1.0 / Proto->Variance.Elliptical[i]; 00208 break; 00209 case uniform: 00210 case D_random: 00211 Proto->Magnitude.Elliptical[i] = 1.0 / 00212 (2.0 * Proto->Variance.Elliptical[i]); 00213 break; 00214 case DISTRIBUTION_COUNT: 00215 ASSERT_HOST(!"Distribution count not allowed!"); 00216 } 00217 Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i]; 00218 } 00219 Proto->LogMagnitude = log ((double) Proto->TotalMagnitude); 00220 break; 00221 } 00222 return (Proto); 00223 } 00224 else if (Status == EOF) 00225 return (NULL); 00226 else { 00227 DoError (ILLEGALSIGNIFICANCESPEC, "Illegal significance specification"); 00228 return (NULL); 00229 } 00230 } 00231 00241 PROTOSTYLE ReadProtoStyle(FILE *File) { 00242 char Token[TOKENSIZE]; 00243 PROTOSTYLE Style; 00244 00245 if (tfscanf(File, "%s", Token) != 1) 00246 DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification"); 00247 switch (Token[0]) { 00248 case 's': 00249 Style = spherical; 00250 break; 00251 case 'e': 00252 Style = elliptical; 00253 break; 00254 case 'm': 00255 Style = mixed; 00256 break; 00257 case 'a': 00258 Style = automatic; 00259 break; 00260 default: 00261 Style = elliptical; 00262 DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification"); 00263 } 00264 return (Style); 00265 } 00266 00281 FLOAT32* ReadNFloats(FILE * File, uinT16 N, FLOAT32 Buffer[]) { 00282 bool needs_free = false; 00283 int i; 00284 int NumFloatsRead; 00285 00286 if (Buffer == NULL) { 00287 Buffer = reinterpret_cast<FLOAT32*>(Emalloc(N * sizeof(FLOAT32))); 00288 needs_free = true; 00289 } 00290 00291 for (i = 0; i < N; i++) { 00292 NumFloatsRead = tfscanf(File, "%f", &(Buffer[i])); 00293 if (NumFloatsRead != 1) { 00294 if ((NumFloatsRead == EOF) && (i == 0)) { 00295 if (needs_free) { 00296 Efree(Buffer); 00297 } 00298 return NULL; 00299 } else { 00300 DoError(ILLEGALFLOAT, "Illegal float specification"); 00301 } 00302 } 00303 } 00304 return Buffer; 00305 } 00306 00318 void 00319 WriteParamDesc (FILE * File, uinT16 N, PARAM_DESC ParamDesc[]) { 00320 int i; 00321 00322 for (i = 0; i < N; i++) { 00323 if (ParamDesc[i].Circular) 00324 fprintf (File, "circular "); 00325 else 00326 fprintf (File, "linear "); 00327 00328 if (ParamDesc[i].NonEssential) 00329 fprintf (File, "non-essential "); 00330 else 00331 fprintf (File, "essential "); 00332 00333 fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max); 00334 } 00335 } 00336 00348 void WritePrototype(FILE *File, uinT16 N, PROTOTYPE *Proto) { 00349 int i; 00350 00351 if (Proto->Significant) 00352 fprintf (File, "significant "); 00353 else 00354 fprintf (File, "insignificant "); 00355 WriteProtoStyle (File, (PROTOSTYLE) Proto->Style); 00356 fprintf (File, "%6d\n\t", Proto->NumSamples); 00357 WriteNFloats (File, N, Proto->Mean); 00358 fprintf (File, "\t"); 00359 00360 switch (Proto->Style) { 00361 case spherical: 00362 WriteNFloats (File, 1, &(Proto->Variance.Spherical)); 00363 break; 00364 case elliptical: 00365 WriteNFloats (File, N, Proto->Variance.Elliptical); 00366 break; 00367 case mixed: 00368 for (i = 0; i < N; i++) 00369 switch (Proto->Distrib[i]) { 00370 case normal: 00371 fprintf (File, " %9s", "normal"); 00372 break; 00373 case uniform: 00374 fprintf (File, " %9s", "uniform"); 00375 break; 00376 case D_random: 00377 fprintf (File, " %9s", "random"); 00378 break; 00379 case DISTRIBUTION_COUNT: 00380 ASSERT_HOST(!"Distribution count not allowed!"); 00381 } 00382 fprintf (File, "\n\t"); 00383 WriteNFloats (File, N, Proto->Variance.Elliptical); 00384 } 00385 } 00386 00398 void WriteNFloats(FILE * File, uinT16 N, FLOAT32 Array[]) { 00399 for (int i = 0; i < N; i++) 00400 fprintf(File, " %9.6f", Array[i]); 00401 fprintf(File, "\n"); 00402 } 00403 00415 void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) { 00416 switch (ProtoStyle) { 00417 case spherical: 00418 fprintf (File, "spherical"); 00419 break; 00420 case elliptical: 00421 fprintf (File, "elliptical"); 00422 break; 00423 case mixed: 00424 fprintf (File, "mixed"); 00425 break; 00426 case automatic: 00427 fprintf (File, "automatic"); 00428 break; 00429 } 00430 } 00431 00449 void WriteProtoList( 00450 FILE *File, 00451 uinT16 N, 00452 PARAM_DESC ParamDesc[], 00453 LIST ProtoList, 00454 BOOL8 WriteSigProtos, 00455 BOOL8 WriteInsigProtos) 00456 { 00457 PROTOTYPE *Proto; 00458 00459 /* write file header */ 00460 fprintf(File,"%0d\n",N); 00461 WriteParamDesc(File,N,ParamDesc); 00462 00463 /* write prototypes */ 00464 iterate(ProtoList) 00465 { 00466 Proto = (PROTOTYPE *) first_node ( ProtoList ); 00467 if (( Proto->Significant && WriteSigProtos ) || 00468 ( ! Proto->Significant && WriteInsigProtos ) ) 00469 WritePrototype( File, N, Proto ); 00470 } 00471 }