tesseract  4.1.0
clusttool.h File Reference
#include <cstdio>
#include "cluster.h"
#include "serialis.h"

Go to the source code of this file.

Functions

uint16_t ReadSampleSize (tesseract::TFile *fp)
 
PARAM_DESCReadParamDesc (tesseract::TFile *fp, uint16_t N)
 
PROTOTYPEReadPrototype (tesseract::TFile *fp, uint16_t N)
 
void WriteParamDesc (FILE *File, uint16_t N, const PARAM_DESC ParamDesc[])
 
void WritePrototype (FILE *File, uint16_t N, PROTOTYPE *Proto)
 

Function Documentation

PARAM_DESC* ReadParamDesc ( TFile fp,
uint16_t  N 
)

This routine reads textual descriptions of sets of parameters which describe the characteristics of feature dimensions.

Parameters
fpopen text file to read N parameter descriptions from
Nnumber of parameter descriptions to read
Returns
Pointer to an array of parameter descriptors.
Note
Globals: None

Definition at line 140 of file clusttool.cpp.

140  {
141  PARAM_DESC *ParamDesc;
142 
143  ParamDesc = static_cast<PARAM_DESC *>(Emalloc (N * sizeof (PARAM_DESC)));
144  for (int i = 0; i < N; i++) {
145  const int kMaxLineSize = TOKENSIZE * 4;
146  char line[kMaxLineSize];
147  ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr);
148  std::istringstream stream(line);
149  // Use "C" locale (needed for float values Min, Max).
150  stream.imbue(std::locale::classic());
151  std::string linear_token;
152  stream >> linear_token;
153  std::string essential_token;
154  stream >> essential_token;
155  stream >> ParamDesc[i].Min;
156  stream >> ParamDesc[i].Max;
157  ASSERT_HOST(!stream.fail());
158  ParamDesc[i].Circular = (linear_token[0] == 'c');
159  ParamDesc[i].NonEssential = (essential_token[0] != 'e');
160  ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
161  ParamDesc[i].HalfRange = ParamDesc[i].Range / 2;
162  ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
163  }
164  return (ParamDesc);
165 }
float Range
Definition: ocrfeatures.h:47
#define TOKENSIZE
max size of tokens read from an input file
Definition: clusttool.cpp:29
char * FGets(char *buffer, int buffer_size)
Definition: serialis.cpp:248
bool Circular
Definition: ocrfeatures.h:43
bool NonEssential
Definition: ocrfeatures.h:44
float Min
Definition: ocrfeatures.h:45
float MidRange
Definition: ocrfeatures.h:49
float HalfRange
Definition: ocrfeatures.h:48
#define ASSERT_HOST(x)
Definition: errcode.h:88
void * Emalloc(int Size)
Definition: emalloc.cpp:31
float Max
Definition: ocrfeatures.h:46
PROTOTYPE* ReadPrototype ( TFile fp,
uint16_t  N 
)

This routine reads a textual description of a prototype from the specified file.

Parameters
fpopen text file to read prototype from
Nnumber of dimensions used in prototype
Returns
List of prototypes
Note
Globals: None

Definition at line 176 of file clusttool.cpp.

176  {
177  char sig_token[TOKENSIZE], shape_token[TOKENSIZE];
178  PROTOTYPE *Proto;
179  int SampleCount;
180  int i;
181 
182  const int kMaxLineSize = TOKENSIZE * 4;
183  char line[kMaxLineSize];
184  if (fp->FGets(line, kMaxLineSize) == nullptr ||
185  sscanf(line, "%" QUOTED_TOKENSIZE "s %" QUOTED_TOKENSIZE "s %d",
186  sig_token, shape_token, &SampleCount) != 3) {
187  tprintf("Invalid prototype: %s\n", line);
188  return nullptr;
189  }
190  Proto = static_cast<PROTOTYPE *>(Emalloc(sizeof(PROTOTYPE)));
191  Proto->Cluster = nullptr;
192  Proto->Significant = (sig_token[0] == 's');
193 
194  switch (shape_token[0]) {
195  case 's':
196  Proto->Style = spherical;
197  break;
198  case 'e':
199  Proto->Style = elliptical;
200  break;
201  case 'a':
202  Proto->Style = automatic;
203  break;
204  default:
205  tprintf("Invalid prototype style specification:%s\n", shape_token);
206  Proto->Style = elliptical;
207  }
208 
209  ASSERT_HOST(SampleCount >= 0);
210  Proto->NumSamples = SampleCount;
211 
212  Proto->Mean = ReadNFloats(fp, N, nullptr);
213  ASSERT_HOST(Proto->Mean != nullptr);
214 
215  switch (Proto->Style) {
216  case spherical:
217  ASSERT_HOST(ReadNFloats(fp, 1, &(Proto->Variance.Spherical)) != nullptr);
218  Proto->Magnitude.Spherical =
219  1.0 / sqrt(2.0 * M_PI * Proto->Variance.Spherical);
220  Proto->TotalMagnitude = pow(Proto->Magnitude.Spherical, static_cast<float>(N));
221  Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude));
222  Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
223  Proto->Distrib = nullptr;
224  break;
225  case elliptical:
226  Proto->Variance.Elliptical = ReadNFloats(fp, N, nullptr);
227  ASSERT_HOST(Proto->Variance.Elliptical != nullptr);
228  Proto->Magnitude.Elliptical = static_cast<float *>(Emalloc(N * sizeof(float)));
229  Proto->Weight.Elliptical = static_cast<float *>(Emalloc(N * sizeof(float)));
230  Proto->TotalMagnitude = 1.0;
231  for (i = 0; i < N; i++) {
232  Proto->Magnitude.Elliptical[i] =
233  1.0 / sqrt(2.0 * M_PI * Proto->Variance.Elliptical[i]);
234  Proto->Weight.Elliptical[i] = 1.0 / Proto->Variance.Elliptical[i];
235  Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
236  }
237  Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude));
238  Proto->Distrib = nullptr;
239  break;
240  default:
241  Efree(Proto);
242  tprintf("Invalid prototype style\n");
243  return nullptr;
244  }
245  return Proto;
246 }
FLOATUNION Weight
Definition: cluster.h:79
DISTRIBUTION * Distrib
Definition: cluster.h:73
unsigned NumSamples
Definition: cluster.h:71
#define TOKENSIZE
max size of tokens read from an input file
Definition: clusttool.cpp:29
char * FGets(char *buffer, int buffer_size)
Definition: serialis.cpp:248
unsigned Style
Definition: cluster.h:70
FLOATUNION Magnitude
Definition: cluster.h:78
float * Mean
Definition: cluster.h:74
bool Significant
Definition: cluster.h:64
#define QUOTED_TOKENSIZE
Definition: clusttool.cpp:30
void Efree(void *ptr)
Definition: emalloc.cpp:45
FLOATUNION Variance
Definition: cluster.h:77
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
#define ASSERT_HOST(x)
Definition: errcode.h:88
void * Emalloc(int Size)
Definition: emalloc.cpp:31
float TotalMagnitude
Definition: cluster.h:75
CLUSTER * Cluster
Definition: cluster.h:72
float * Elliptical
Definition: cluster.h:60
float Spherical
Definition: cluster.h:59
float LogMagnitude
Definition: cluster.h:76
uint16_t ReadSampleSize ( TFile fp)

This routine reads a single integer from the specified file and checks to ensure that it is between 0 and MAXSAMPLESIZE.

Parameters
fpopen text file to read sample size from
Returns
Sample size
Note
Globals: None

Definition at line 120 of file clusttool.cpp.

120  {
121  int SampleSize = 0;
122 
123  const int kMaxLineSize = 100;
124  char line[kMaxLineSize];
125  ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr);
126  ASSERT_HOST(sscanf(line, "%d", &SampleSize) == 1);
127  ASSERT_HOST(SampleSize >= 0 && SampleSize <= MAXSAMPLESIZE);
128  return SampleSize;
129 }
char * FGets(char *buffer, int buffer_size)
Definition: serialis.cpp:248
#define MAXSAMPLESIZE
max num of dimensions in feature space
Definition: clusttool.cpp:31
#define ASSERT_HOST(x)
Definition: errcode.h:88
void WriteParamDesc ( FILE *  File,
uint16_t  N,
const PARAM_DESC  ParamDesc[] 
)

This routine writes an array of dimension descriptors to the specified text file.

Parameters
Fileopen text file to write param descriptors to
Nnumber of param descriptors to write
ParamDescarray of param descriptors to write

Definition at line 255 of file clusttool.cpp.

255  {
256  int i;
257 
258  for (i = 0; i < N; i++) {
259  if (ParamDesc[i].Circular)
260  fprintf (File, "circular ");
261  else
262  fprintf (File, "linear ");
263 
264  if (ParamDesc[i].NonEssential)
265  fprintf (File, "non-essential ");
266  else
267  fprintf (File, "essential ");
268 
269  fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max);
270  }
271 }
void WritePrototype ( FILE *  File,
uint16_t  N,
PROTOTYPE Proto 
)

This routine writes a textual description of a prototype to the specified text file.

Parameters
Fileopen text file to write prototype to
Nnumber of dimensions in feature space
Protoprototype to write out

Definition at line 280 of file clusttool.cpp.

280  {
281  int i;
282 
283  if (Proto->Significant)
284  fprintf (File, "significant ");
285  else
286  fprintf (File, "insignificant ");
287  WriteProtoStyle (File, static_cast<PROTOSTYLE>(Proto->Style));
288  fprintf (File, "%6d\n\t", Proto->NumSamples);
289  WriteNFloats (File, N, Proto->Mean);
290  fprintf (File, "\t");
291 
292  switch (Proto->Style) {
293  case spherical:
294  WriteNFloats (File, 1, &(Proto->Variance.Spherical));
295  break;
296  case elliptical:
297  WriteNFloats (File, N, Proto->Variance.Elliptical);
298  break;
299  case mixed:
300  for (i = 0; i < N; i++)
301  switch (Proto->Distrib[i]) {
302  case normal:
303  fprintf (File, " %9s", "normal");
304  break;
305  case uniform:
306  fprintf (File, " %9s", "uniform");
307  break;
308  case D_random:
309  fprintf (File, " %9s", "random");
310  break;
311  case DISTRIBUTION_COUNT:
312  ASSERT_HOST(!"Distribution count not allowed!");
313  }
314  fprintf (File, "\n\t");
315  WriteNFloats (File, N, Proto->Variance.Elliptical);
316  }
317 }
DISTRIBUTION * Distrib
Definition: cluster.h:73
unsigned NumSamples
Definition: cluster.h:71
Definition: cluster.h:56
unsigned Style
Definition: cluster.h:70
float * Mean
Definition: cluster.h:74
bool Significant
Definition: cluster.h:64
FLOATUNION Variance
Definition: cluster.h:77
Definition: cluster.h:44
#define ASSERT_HOST(x)
Definition: errcode.h:88
float * Elliptical
Definition: cluster.h:60
float Spherical
Definition: cluster.h:59