Tesseract
3.02
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
cluster.h
Go to the documentation of this file.
1
/******************************************************************************
2
** Filename: cluster.h
3
** Purpose: Definition of feature space clustering routines
4
** Author: Dan Johnson
5
** History: 5/29/89, DSJ, Created.
6
**
7
** (c) Copyright Hewlett-Packard Company, 1988.
8
** Licensed under the Apache License, Version 2.0 (the "License");
9
** you may not use this file except in compliance with the License.
10
** You may obtain a copy of the License at
11
** http://www.apache.org/licenses/LICENSE-2.0
12
** Unless required by applicable law or agreed to in writing, software
13
** distributed under the License is distributed on an "AS IS" BASIS,
14
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
** See the License for the specific language governing permissions and
16
** limitations under the License.
17
******************************************************************************/
18
#ifndef CLUSTER_H
19
#define CLUSTER_H
20
21
#include "
kdtree.h
"
22
#include "
oldlist.h
"
23
24
struct
BUCKETS
;
25
26
#define MINBUCKETS 5
27
#define MAXBUCKETS 39
28
29
/*----------------------------------------------------------------------
30
Types
31
----------------------------------------------------------------------*/
32
typedef
struct
sample
{
33
unsigned
Clustered
:1;
// TRUE if included in a higher cluster
34
unsigned
Prototype
:1;
// TRUE if cluster represented by a proto
35
unsigned
SampleCount
:30;
// number of samples in this cluster
36
struct
sample
*
Left
;
// ptr to left sub-cluster
37
struct
sample
*
Right
;
// ptr to right sub-cluster
38
inT32
CharID
;
// identifier of char sample came from
39
FLOAT32
Mean
[1];
// mean of cluster - SampleSize floats
40
}
CLUSTER
;
41
42
typedef
CLUSTER
SAMPLE
;
// can refer to as either sample or cluster
43
44
typedef
enum
{
45
spherical
,
elliptical
,
mixed
,
automatic
46
}
PROTOSTYLE
;
47
48
typedef
struct
{
// parameters to control clustering
49
PROTOSTYLE
ProtoStyle
;
// specifies types of protos to be made
50
FLOAT32
MinSamples
;
// min # of samples per proto - % of total
51
FLOAT32
MaxIllegal
;
// max percentage of samples in a cluster which have
52
// more than 1 feature in that cluster
53
FLOAT32
Independence
;
// desired independence between dimensions
54
FLOAT64
Confidence
;
// desired confidence in prototypes created
55
int
MagicSamples
;
// Ideal number of samples in a cluster.
56
}
CLUSTERCONFIG
;
57
58
typedef
enum
{
59
normal
,
uniform
,
D_random
,
DISTRIBUTION_COUNT
60
}
DISTRIBUTION
;
61
62
typedef
union
{
63
FLOAT32
Spherical
;
64
FLOAT32
*
Elliptical
;
65
}
FLOATUNION
;
66
67
typedef
struct
{
68
unsigned
Significant:1;
// TRUE if prototype is significant
69
unsigned
Merged:1;
// Merged after clustering so do not output
70
// but kept for display purposes. If it has no
71
// samples then it was actually merged.
72
// Otherwise it matched an already significant
73
// cluster.
74
unsigned
Style:2;
// spherical, elliptical, or mixed
75
unsigned
NumSamples:28;
// number of samples in the cluster
76
CLUSTER
*
Cluster
;
// ptr to cluster which made prototype
77
DISTRIBUTION
*
Distrib
;
// different distribution for each dimension
78
FLOAT32
*
Mean
;
// prototype mean
79
FLOAT32
TotalMagnitude
;
// total magnitude over all dimensions
80
FLOAT32
LogMagnitude
;
// log base e of TotalMagnitude
81
FLOATUNION
Variance
;
// prototype variance
82
FLOATUNION
Magnitude
;
// magnitude of density function
83
FLOATUNION
Weight
;
// weight of density function
84
}
PROTOTYPE
;
85
86
typedef
struct
{
87
inT16
SampleSize
;
// number of parameters per sample
88
PARAM_DESC
*
ParamDesc
;
// description of each parameter
89
inT32
NumberOfSamples
;
// total number of samples being clustered
90
KDTREE
*
KDTree
;
// for optimal nearest neighbor searching
91
CLUSTER
*
Root
;
// ptr to root cluster of cluster tree
92
LIST
ProtoList
;
// list of prototypes
93
inT32
NumChar
;
// # of characters represented by samples
94
// cache of reusable histograms by distribution type and number of buckets.
95
BUCKETS
* bucket_cache[
DISTRIBUTION_COUNT
][
MAXBUCKETS
+ 1 -
MINBUCKETS
];
96
}
CLUSTERER
;
97
98
typedef
struct
{
99
inT32
NumSamples
;
// number of samples in list
100
inT32
MaxNumSamples
;
// maximum size of list
101
SAMPLE
*Sample[1];
// array of ptrs to sample data structures
102
}
SAMPLELIST
;
103
104
// low level cluster tree analysis routines.
105
#define InitSampleSearch(S,C) (((C)==NULL)?(S=NIL_LIST):(S=push(NIL_LIST,(C))))
106
107
/*--------------------------------------------------------------------------
108
Public Function Prototypes
109
--------------------------------------------------------------------------*/
110
CLUSTERER
*
MakeClusterer
(
inT16
SampleSize,
const
PARAM_DESC
ParamDesc[]);
111
112
SAMPLE
*
MakeSample
(
CLUSTERER
* Clusterer,
const
FLOAT32
* Feature,
inT32
CharID);
113
114
LIST
ClusterSamples
(
CLUSTERER
*Clusterer,
CLUSTERCONFIG
*
Config
);
115
116
void
FreeClusterer
(
CLUSTERER
*Clusterer);
117
118
void
FreeProtoList
(
LIST
*ProtoList);
119
120
void
FreePrototype
(
void
*arg);
// PROTOTYPE *Prototype);
121
122
CLUSTER
*
NextSample
(
LIST
*SearchState);
123
124
FLOAT32
Mean
(
PROTOTYPE
*Proto,
uinT16
Dimension);
125
126
FLOAT32
StandardDeviation
(
PROTOTYPE
*Proto,
uinT16
Dimension);
127
128
inT32
MergeClusters
(
inT16
N,
PARAM_DESC
ParamDesc[],
inT32
n1,
inT32
n2,
129
FLOAT32
m[],
FLOAT32
m1[],
FLOAT32
m2[]);
130
131
//--------------Global Data Definitions and Declarations---------------------------
132
// define errors that can be trapped
133
#define ALREADYCLUSTERED 4000
134
#endif
CLUSTERCONFIG
Definition:
cluster.h:48
PROTOTYPE::TotalMagnitude
FLOAT32 TotalMagnitude
Definition:
cluster.h:79
PROTOTYPE
Definition:
cluster.h:67
CLUSTERCONFIG::Confidence
FLOAT64 Confidence
Definition:
cluster.h:54
sample::Prototype
unsigned Prototype
Definition:
cluster.h:34
CLUSTERER
Definition:
cluster.h:86
PROTOTYPE::Distrib
DISTRIBUTION * Distrib
Definition:
cluster.h:77
FLOATUNION::Spherical
FLOAT32 Spherical
Definition:
cluster.h:63
kdtree.h
KDTREE
Definition:
kdtree.h:49
sample::SampleCount
unsigned SampleCount
Definition:
cluster.h:35
CLUSTERER::ParamDesc
PARAM_DESC * ParamDesc
Definition:
cluster.h:88
CLUSTERCONFIG::Independence
FLOAT32 Independence
Definition:
cluster.h:53
sample
Definition:
cluster.h:32
CLUSTERER::NumChar
inT32 NumChar
Definition:
cluster.h:93
FLOATUNION::Elliptical
FLOAT32 * Elliptical
Definition:
cluster.h:64
PROTOTYPE::Variance
FLOATUNION Variance
Definition:
cluster.h:81
CLUSTER
struct sample CLUSTER
inT32
int inT32
Definition:
host.h:102
MakeSample
SAMPLE * MakeSample(CLUSTERER *Clusterer, const FLOAT32 *Feature, inT32 CharID)
Definition:
cluster.cpp:450
FLOAT32
float FLOAT32
Definition:
host.h:111
SAMPLELIST
Definition:
cluster.h:98
DISTRIBUTION
DISTRIBUTION
Definition:
cluster.h:58
FLOAT64
double FLOAT64
Definition:
host.h:112
Config
CLUSTERCONFIG Config
Definition:
commontraining.cpp:53
SAMPLELIST::NumSamples
inT32 NumSamples
Definition:
cluster.h:99
MAXBUCKETS
#define MAXBUCKETS
Definition:
cluster.h:27
BUCKETS
Definition:
cluster.cpp:174
mixed
Definition:
cluster.h:45
FLOATUNION
Definition:
cluster.h:62
SAMPLELIST::MaxNumSamples
inT32 MaxNumSamples
Definition:
cluster.h:100
CLUSTERER::SampleSize
inT16 SampleSize
Definition:
cluster.h:87
CLUSTERER::Root
CLUSTER * Root
Definition:
cluster.h:91
FreeClusterer
void FreeClusterer(CLUSTERER *Clusterer)
Definition:
cluster.cpp:532
StandardDeviation
FLOAT32 StandardDeviation(PROTOTYPE *Proto, uinT16 Dimension)
Definition:
cluster.cpp:653
CLUSTERCONFIG::ProtoStyle
PROTOSTYLE ProtoStyle
Definition:
cluster.h:49
SAMPLE
CLUSTER SAMPLE
Definition:
cluster.h:42
sample::Mean
FLOAT32 Mean[1]
Definition:
cluster.h:39
D_random
Definition:
cluster.h:59
PROTOTYPE::LogMagnitude
FLOAT32 LogMagnitude
Definition:
cluster.h:80
PROTOSTYLE
PROTOSTYLE
Definition:
cluster.h:44
PROTOTYPE::Weight
FLOATUNION Weight
Definition:
cluster.h:83
FreePrototype
void FreePrototype(void *arg)
Definition:
cluster.cpp:575
sample::Right
struct sample * Right
Definition:
cluster.h:37
MINBUCKETS
#define MINBUCKETS
Definition:
cluster.h:26
PARAM_DESC
Definition:
ocrfeatures.h:45
DISTRIBUTION_COUNT
Definition:
cluster.h:59
MergeClusters
inT32 MergeClusters(inT16 N, register PARAM_DESC ParamDesc[], register inT32 n1, register inT32 n2, register FLOAT32 m[], register FLOAT32 m1[], register FLOAT32 m2[])
uinT16
unsigned short uinT16
Definition:
host.h:101
CLUSTERER::ProtoList
LIST ProtoList
Definition:
cluster.h:92
CLUSTERCONFIG::MaxIllegal
FLOAT32 MaxIllegal
Definition:
cluster.h:51
CLUSTERER::NumberOfSamples
inT32 NumberOfSamples
Definition:
cluster.h:89
MakeClusterer
CLUSTERER * MakeClusterer(inT16 SampleSize, const PARAM_DESC ParamDesc[])
Definition:
cluster.cpp:395
inT16
short inT16
Definition:
host.h:100
PROTOTYPE::Cluster
CLUSTER * Cluster
Definition:
cluster.h:76
ClusterSamples
LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config)
Definition:
cluster.cpp:504
CLUSTERCONFIG::MinSamples
FLOAT32 MinSamples
Definition:
cluster.h:50
FreeProtoList
void FreeProtoList(LIST *ProtoList)
Definition:
cluster.cpp:560
list_rec
Definition:
oldlist.h:127
Mean
FLOAT32 Mean(PROTOTYPE *Proto, uinT16 Dimension)
Definition:
cluster.cpp:639
PROTOTYPE::Magnitude
FLOATUNION Magnitude
Definition:
cluster.h:82
sample::Clustered
unsigned Clustered
Definition:
cluster.h:33
PROTOTYPE::Mean
FLOAT32 * Mean
Definition:
cluster.h:78
spherical
Definition:
cluster.h:45
sample::Left
struct sample * Left
Definition:
cluster.h:36
CLUSTERCONFIG::MagicSamples
int MagicSamples
Definition:
cluster.h:55
NextSample
CLUSTER * NextSample(LIST *SearchState)
Definition:
cluster.cpp:614
uniform
Definition:
cluster.h:59
oldlist.h
elliptical
Definition:
cluster.h:45
CLUSTERER::KDTree
KDTREE * KDTree
Definition:
cluster.h:90
sample::CharID
inT32 CharID
Definition:
cluster.h:38
automatic
Definition:
cluster.h:45
normal
Definition:
cluster.h:59
home
abuild
rpmbuild
BUILD
tesseract-ocr-3.02.02
classify
cluster.h
Generated on Mon Mar 23 2015 22:22:37 for Tesseract by
1.8.5