tesseract  3.04.01
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
clusttool.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: clustertool.c
3  ** Purpose: Misc. tools for use with the clustering routines
4  ** Author: Dan Johnson
5  ** History: 6/6/89, DSJ, Created.
6  **
7  ** (c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
18 
19 //--------------------------Include Files----------------------------------
20 #include "clusttool.h"
21 #include "const.h"
22 #include "danerror.h"
23 #include "emalloc.h"
24 #include "scanutils.h"
25 #include <stdio.h>
26 #include <math.h>
27 
28 //---------------Global Data Definitions and Declarations--------------------
29 #define TOKENSIZE 80 //< max size of tokens read from an input file
30 #define MAXSAMPLESIZE 65535 //< max num of dimensions in feature space
31 //#define MAXBLOCKSIZE 65535 //< max num of samples in a character (block size)
32 
43 uinT16 ReadSampleSize(FILE *File) {
44  int SampleSize;
45 
46  if ((tfscanf(File, "%d", &SampleSize) != 1) ||
47  (SampleSize < 0) || (SampleSize > MAXSAMPLESIZE))
48  DoError (ILLEGALSAMPLESIZE, "Illegal sample size");
49  return (SampleSize);
50 }
51 
66 PARAM_DESC *ReadParamDesc(FILE *File, uinT16 N) {
67  int i;
68  PARAM_DESC *ParamDesc;
69  char Token[TOKENSIZE];
70 
71  ParamDesc = (PARAM_DESC *) Emalloc (N * sizeof (PARAM_DESC));
72  for (i = 0; i < N; i++) {
73  if (tfscanf(File, "%s", Token) != 1)
75  "Illegal circular/linear specification");
76  if (Token[0] == 'c')
77  ParamDesc[i].Circular = TRUE;
78  else
79  ParamDesc[i].Circular = FALSE;
80 
81  if (tfscanf(File, "%s", Token) != 1)
83  "Illegal essential/non-essential spec");
84  if (Token[0] == 'e')
85  ParamDesc[i].NonEssential = FALSE;
86  else
87  ParamDesc[i].NonEssential = TRUE;
88  if (tfscanf(File, "%f%f", &(ParamDesc[i].Min), &(ParamDesc[i].Max)) != 2)
89  DoError (ILLEGALMINMAXSPEC, "Illegal min or max specification");
90  ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
91  ParamDesc[i].HalfRange = ParamDesc[i].Range / 2;
92  ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
93  }
94  return (ParamDesc);
95 }
96 
113 PROTOTYPE *ReadPrototype(FILE *File, uinT16 N) {
114  char Token[TOKENSIZE];
115  int Status;
116  PROTOTYPE *Proto;
117  int SampleCount;
118  int i;
119 
120  if ((Status = tfscanf(File, "%s", Token)) == 1) {
121  Proto = (PROTOTYPE *) Emalloc (sizeof (PROTOTYPE));
122  Proto->Cluster = NULL;
123  if (Token[0] == 's')
124  Proto->Significant = TRUE;
125  else
126  Proto->Significant = FALSE;
127 
128  Proto->Style = ReadProtoStyle (File);
129 
130  if ((tfscanf(File, "%d", &SampleCount) != 1) || (SampleCount < 0))
131  DoError (ILLEGALSAMPLECOUNT, "Illegal sample count");
132  Proto->NumSamples = SampleCount;
133 
134  Proto->Mean = ReadNFloats (File, N, NULL);
135  if (Proto->Mean == NULL)
136  DoError (ILLEGALMEANSPEC, "Illegal prototype mean");
137 
138  switch (Proto->Style) {
139  case spherical:
140  if (ReadNFloats (File, 1, &(Proto->Variance.Spherical)) == NULL)
141  DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
142  Proto->Magnitude.Spherical =
143  1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Spherical));
144  Proto->TotalMagnitude =
145  pow (Proto->Magnitude.Spherical, (float) N);
146  Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
147  Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
148  Proto->Distrib = NULL;
149  break;
150  case elliptical:
151  Proto->Variance.Elliptical = ReadNFloats (File, N, NULL);
152  if (Proto->Variance.Elliptical == NULL)
153  DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
154  Proto->Magnitude.Elliptical =
155  (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
156  Proto->Weight.Elliptical =
157  (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
158  Proto->TotalMagnitude = 1.0;
159  for (i = 0; i < N; i++) {
160  Proto->Magnitude.Elliptical[i] =
161  1.0 /
162  sqrt ((double) (2.0 * PI * Proto->Variance.Elliptical[i]));
163  Proto->Weight.Elliptical[i] =
164  1.0 / Proto->Variance.Elliptical[i];
165  Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
166  }
167  Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
168  Proto->Distrib = NULL;
169  break;
170  case mixed:
171  Proto->Distrib =
172  (DISTRIBUTION *) Emalloc (N * sizeof (DISTRIBUTION));
173  for (i = 0; i < N; i++) {
174  if (tfscanf(File, "%s", Token) != 1)
176  "Illegal prototype distribution");
177  switch (Token[0]) {
178  case 'n':
179  Proto->Distrib[i] = normal;
180  break;
181  case 'u':
182  Proto->Distrib[i] = uniform;
183  break;
184  case 'r':
185  Proto->Distrib[i] = D_random;
186  break;
187  default:
189  "Illegal prototype distribution");
190  }
191  }
192  Proto->Variance.Elliptical = ReadNFloats (File, N, NULL);
193  if (Proto->Variance.Elliptical == NULL)
194  DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
195  Proto->Magnitude.Elliptical =
196  (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
197  Proto->Weight.Elliptical =
198  (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
199  Proto->TotalMagnitude = 1.0;
200  for (i = 0; i < N; i++) {
201  switch (Proto->Distrib[i]) {
202  case normal:
203  Proto->Magnitude.Elliptical[i] = 1.0 /
204  sqrt ((double)
205  (2.0 * PI * Proto->Variance.Elliptical[i]));
206  Proto->Weight.Elliptical[i] =
207  1.0 / Proto->Variance.Elliptical[i];
208  break;
209  case uniform:
210  case D_random:
211  Proto->Magnitude.Elliptical[i] = 1.0 /
212  (2.0 * Proto->Variance.Elliptical[i]);
213  break;
214  case DISTRIBUTION_COUNT:
215  ASSERT_HOST(!"Distribution count not allowed!");
216  }
217  Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
218  }
219  Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
220  break;
221  }
222  return (Proto);
223  }
224  else if (Status == EOF)
225  return (NULL);
226  else {
227  DoError (ILLEGALSIGNIFICANCESPEC, "Illegal significance specification");
228  return (NULL);
229  }
230 }
231 
242  char Token[TOKENSIZE];
243  PROTOSTYLE Style;
244 
245  if (tfscanf(File, "%s", Token) != 1)
246  DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification");
247  switch (Token[0]) {
248  case 's':
249  Style = spherical;
250  break;
251  case 'e':
252  Style = elliptical;
253  break;
254  case 'm':
255  Style = mixed;
256  break;
257  case 'a':
258  Style = automatic;
259  break;
260  default:
261  Style = elliptical;
262  DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification");
263  }
264  return (Style);
265 }
266 
281 FLOAT32* ReadNFloats(FILE * File, uinT16 N, FLOAT32 Buffer[]) {
282  bool needs_free = false;
283  int i;
284  int NumFloatsRead;
285 
286  if (Buffer == NULL) {
287  Buffer = reinterpret_cast<FLOAT32*>(Emalloc(N * sizeof(FLOAT32)));
288  needs_free = true;
289  }
290 
291  for (i = 0; i < N; i++) {
292  NumFloatsRead = tfscanf(File, "%f", &(Buffer[i]));
293  if (NumFloatsRead != 1) {
294  if ((NumFloatsRead == EOF) && (i == 0)) {
295  if (needs_free) {
296  Efree(Buffer);
297  }
298  return NULL;
299  } else {
300  DoError(ILLEGALFLOAT, "Illegal float specification");
301  }
302  }
303  }
304  return Buffer;
305 }
306 
318 void
319 WriteParamDesc (FILE * File, uinT16 N, PARAM_DESC ParamDesc[]) {
320  int i;
321 
322  for (i = 0; i < N; i++) {
323  if (ParamDesc[i].Circular)
324  fprintf (File, "circular ");
325  else
326  fprintf (File, "linear ");
327 
328  if (ParamDesc[i].NonEssential)
329  fprintf (File, "non-essential ");
330  else
331  fprintf (File, "essential ");
332 
333  fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max);
334  }
335 }
336 
348 void WritePrototype(FILE *File, uinT16 N, PROTOTYPE *Proto) {
349  int i;
350 
351  if (Proto->Significant)
352  fprintf (File, "significant ");
353  else
354  fprintf (File, "insignificant ");
355  WriteProtoStyle (File, (PROTOSTYLE) Proto->Style);
356  fprintf (File, "%6d\n\t", Proto->NumSamples);
357  WriteNFloats (File, N, Proto->Mean);
358  fprintf (File, "\t");
359 
360  switch (Proto->Style) {
361  case spherical:
362  WriteNFloats (File, 1, &(Proto->Variance.Spherical));
363  break;
364  case elliptical:
365  WriteNFloats (File, N, Proto->Variance.Elliptical);
366  break;
367  case mixed:
368  for (i = 0; i < N; i++)
369  switch (Proto->Distrib[i]) {
370  case normal:
371  fprintf (File, " %9s", "normal");
372  break;
373  case uniform:
374  fprintf (File, " %9s", "uniform");
375  break;
376  case D_random:
377  fprintf (File, " %9s", "random");
378  break;
379  case DISTRIBUTION_COUNT:
380  ASSERT_HOST(!"Distribution count not allowed!");
381  }
382  fprintf (File, "\n\t");
383  WriteNFloats (File, N, Proto->Variance.Elliptical);
384  }
385 }
386 
398 void WriteNFloats(FILE * File, uinT16 N, FLOAT32 Array[]) {
399  for (int i = 0; i < N; i++)
400  fprintf(File, " %9.6f", Array[i]);
401  fprintf(File, "\n");
402 }
403 
415 void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) {
416  switch (ProtoStyle) {
417  case spherical:
418  fprintf (File, "spherical");
419  break;
420  case elliptical:
421  fprintf (File, "elliptical");
422  break;
423  case mixed:
424  fprintf (File, "mixed");
425  break;
426  case automatic:
427  fprintf (File, "automatic");
428  break;
429  }
430 }
431 
450  FILE *File,
451  uinT16 N,
452  PARAM_DESC ParamDesc[],
453  LIST ProtoList,
454  BOOL8 WriteSigProtos,
455  BOOL8 WriteInsigProtos)
456 {
457  PROTOTYPE *Proto;
458 
459  /* write file header */
460  fprintf(File,"%0d\n",N);
461  WriteParamDesc(File,N,ParamDesc);
462 
463  /* write prototypes */
464  iterate(ProtoList)
465  {
466  Proto = (PROTOTYPE *) first_node ( ProtoList );
467  if (( Proto->Significant && WriteSigProtos ) ||
468  ( ! Proto->Significant && WriteInsigProtos ) )
469  WritePrototype( File, N, Proto );
470  }
471 }
#define PI
Definition: const.h:19
void WritePrototype(FILE *File, uinT16 N, PROTOTYPE *Proto)
Definition: clusttool.cpp:348
FLOAT32 Min
Definition: ocrfeatures.h:49
#define ILLEGALSTYLESPEC
Definition: clusttool.h:61
#define first_node(l)
Definition: oldlist.h:139
DISTRIBUTION * Distrib
Definition: cluster.h:77
#define ILLEGALMEANSPEC
Definition: clusttool.h:63
#define ILLEGALSIGNIFICANCESPEC
Definition: clusttool.h:60
FLOAT32 * ReadNFloats(FILE *File, uinT16 N, FLOAT32 Buffer[])
Definition: clusttool.cpp:281
FLOAT32 Spherical
Definition: cluster.h:63
uinT16 ReadSampleSize(FILE *File)
Definition: clusttool.cpp:43
#define iterate(l)
Definition: oldlist.h:159
DISTRIBUTION
Definition: cluster.h:58
FLOAT32 LogMagnitude
Definition: cluster.h:80
FLOATUNION Variance
Definition: cluster.h:81
FLOAT32 * Mean
Definition: cluster.h:78
Definition: cluster.h:59
#define FALSE
Definition: capi.h:29
FLOAT32 HalfRange
Definition: ocrfeatures.h:52
#define TOKENSIZE
Definition: clusttool.cpp:29
void DoError(int Error, const char *Message)
Definition: danerror.cpp:42
unsigned Significant
Definition: cluster.h:68
FLOATUNION Weight
Definition: cluster.h:83
FLOAT32 TotalMagnitude
Definition: cluster.h:79
#define MAXSAMPLESIZE
Definition: clusttool.cpp:30
#define ILLEGALMINMAXSPEC
Definition: clusttool.h:59
FLOAT32 MidRange
Definition: ocrfeatures.h:53
unsigned NumSamples
Definition: cluster.h:75
#define ILLEGALESSENTIALSPEC
Definition: clusttool.h:67
FLOATUNION Magnitude
Definition: cluster.h:82
FLOAT32 * Elliptical
Definition: cluster.h:64
CLUSTER * Cluster
Definition: cluster.h:76
void WriteNFloats(FILE *File, uinT16 N, FLOAT32 Array[])
Definition: clusttool.cpp:398
#define ILLEGALVARIANCESPEC
Definition: clusttool.h:64
inT8 NonEssential
Definition: ocrfeatures.h:48
inT8 Circular
Definition: ocrfeatures.h:47
unsigned short uinT16
Definition: host.h:101
#define ILLEGALDISTRIBUTION
Definition: clusttool.h:65
#define ILLEGALSAMPLESIZE
Definition: clusttool.h:57
#define ILLEGALSAMPLECOUNT
Definition: clusttool.h:62
void * Emalloc(int Size)
Definition: emalloc.cpp:47
Definition: cluster.h:45
#define ILLEGALFLOAT
Definition: clusttool.h:66
unsigned char BOOL8
Definition: host.h:113
#define TRUE
Definition: capi.h:28
FLOAT32 Range
Definition: ocrfeatures.h:51
int tfscanf(FILE *stream, const char *format,...)
Definition: scanutils.cpp:233
void Efree(void *ptr)
Definition: emalloc.cpp:79
unsigned Style
Definition: cluster.h:74
PROTOSTYLE ReadProtoStyle(FILE *File)
Definition: clusttool.cpp:241
PARAM_DESC * ReadParamDesc(FILE *File, uinT16 N)
Definition: clusttool.cpp:66
PROTOSTYLE
Definition: cluster.h:44
float FLOAT32
Definition: host.h:111
#define ASSERT_HOST(x)
Definition: errcode.h:84
void WriteParamDesc(FILE *File, uinT16 N, PARAM_DESC ParamDesc[])
Definition: clusttool.cpp:319
void WriteProtoList(FILE *File, uinT16 N, PARAM_DESC ParamDesc[], LIST ProtoList, BOOL8 WriteSigProtos, BOOL8 WriteInsigProtos)
Definition: clusttool.cpp:449
FLOAT32 Max
Definition: ocrfeatures.h:50
#define ILLEGALCIRCULARSPEC
Definition: clusttool.h:58
PROTOTYPE * ReadPrototype(FILE *File, uinT16 N)
Definition: clusttool.cpp:113
void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle)
Definition: clusttool.cpp:415