tesseract
3.05.01
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
cluster.h
Go to the documentation of this file.
1
/******************************************************************************
2
** Filename: cluster.h
3
** Purpose: Definition of feature space clustering routines
4
** Author: Dan Johnson
5
** History: 5/29/89, DSJ, Created.
6
**
7
** (c) Copyright Hewlett-Packard Company, 1988.
8
** Licensed under the Apache License, Version 2.0 (the "License");
9
** you may not use this file except in compliance with the License.
10
** You may obtain a copy of the License at
11
** http://www.apache.org/licenses/LICENSE-2.0
12
** Unless required by applicable law or agreed to in writing, software
13
** distributed under the License is distributed on an "AS IS" BASIS,
14
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
** See the License for the specific language governing permissions and
16
** limitations under the License.
17
******************************************************************************/
18
#ifndef CLUSTER_H
19
#define CLUSTER_H
20
21
#include "
kdtree.h
"
22
#include "
oldlist.h
"
23
24
struct
BUCKETS
;
25
26
#define MINBUCKETS 5
27
#define MAXBUCKETS 39
28
29
/*----------------------------------------------------------------------
30
Types
31
----------------------------------------------------------------------*/
32
typedef
struct
sample
{
33
unsigned
Clustered
:1;
// TRUE if included in a higher cluster
34
unsigned
Prototype
:1;
// TRUE if cluster represented by a proto
35
unsigned
SampleCount
:30;
// number of samples in this cluster
36
struct
sample
*
Left
;
// ptr to left sub-cluster
37
struct
sample
*
Right
;
// ptr to right sub-cluster
38
inT32
CharID
;
// identifier of char sample came from
39
FLOAT32
Mean
[1];
// mean of cluster - SampleSize floats
40
}
CLUSTER
;
41
42
typedef
CLUSTER
SAMPLE
;
// can refer to as either sample or cluster
43
44
typedef
enum
{
45
spherical
,
elliptical
,
mixed
,
automatic
46
}
PROTOSTYLE
;
47
48
typedef
struct
{
// parameters to control clustering
49
PROTOSTYLE
ProtoStyle
;
// specifies types of protos to be made
50
FLOAT32
MinSamples
;
// min # of samples per proto - % of total
51
FLOAT32
MaxIllegal
;
// max percentage of samples in a cluster which have
52
// more than 1 feature in that cluster
53
FLOAT32
Independence
;
// desired independence between dimensions
54
FLOAT64
Confidence
;
// desired confidence in prototypes created
55
int
MagicSamples
;
// Ideal number of samples in a cluster.
56
}
CLUSTERCONFIG
;
57
58
typedef
enum
{
59
normal
,
uniform
,
D_random
,
DISTRIBUTION_COUNT
60
}
DISTRIBUTION
;
61
62
typedef
union
{
63
FLOAT32
Spherical
;
64
FLOAT32
*
Elliptical
;
65
}
FLOATUNION
;
66
67
typedef
struct
{
68
unsigned
Significant:1;
// TRUE if prototype is significant
69
unsigned
Merged:1;
// Merged after clustering so do not output
70
// but kept for display purposes. If it has no
71
// samples then it was actually merged.
72
// Otherwise it matched an already significant
73
// cluster.
74
unsigned
Style:2;
// spherical, elliptical, or mixed
75
unsigned
NumSamples:28;
// number of samples in the cluster
76
CLUSTER
*
Cluster
;
// ptr to cluster which made prototype
77
DISTRIBUTION
*
Distrib
;
// different distribution for each dimension
78
FLOAT32
*
Mean
;
// prototype mean
79
FLOAT32
TotalMagnitude
;
// total magnitude over all dimensions
80
FLOAT32
LogMagnitude
;
// log base e of TotalMagnitude
81
FLOATUNION
Variance
;
// prototype variance
82
FLOATUNION
Magnitude
;
// magnitude of density function
83
FLOATUNION
Weight
;
// weight of density function
84
}
PROTOTYPE
;
85
86
typedef
struct
{
87
inT16
SampleSize
;
// number of parameters per sample
88
PARAM_DESC
*
ParamDesc
;
// description of each parameter
89
inT32
NumberOfSamples
;
// total number of samples being clustered
90
KDTREE
*
KDTree
;
// for optimal nearest neighbor searching
91
CLUSTER
*
Root
;
// ptr to root cluster of cluster tree
92
LIST
ProtoList
;
// list of prototypes
93
inT32
NumChar
;
// # of characters represented by samples
94
// cache of reusable histograms by distribution type and number of buckets.
95
BUCKETS
* bucket_cache[
DISTRIBUTION_COUNT
][
MAXBUCKETS
+ 1 -
MINBUCKETS
];
96
}
CLUSTERER
;
97
98
typedef
struct
{
99
inT32
NumSamples
;
// number of samples in list
100
inT32
MaxNumSamples
;
// maximum size of list
101
SAMPLE
*Sample[1];
// array of ptrs to sample data structures
102
}
SAMPLELIST
;
103
104
// low level cluster tree analysis routines.
105
#define InitSampleSearch(S,C) (((C)==NULL)?(S=NIL_LIST):(S=push(NIL_LIST,(C))))
106
107
/*--------------------------------------------------------------------------
108
Public Function Prototypes
109
--------------------------------------------------------------------------*/
110
CLUSTERER
TESS_API
*
MakeClusterer
(
inT16
SampleSize,
const
PARAM_DESC
ParamDesc[]);
111
112
SAMPLE
TESS_API
*
MakeSample
(
CLUSTERER
* Clusterer,
const
FLOAT32
* Feature,
inT32
CharID);
113
114
LIST
ClusterSamples
(
CLUSTERER
*Clusterer,
CLUSTERCONFIG
*
Config
);
115
116
void
TESS_API
FreeClusterer
(
CLUSTERER
*Clusterer);
117
118
void
TESS_API
FreeProtoList
(
LIST
*ProtoList);
119
120
void
FreePrototype
(
void
*arg);
// PROTOTYPE *Prototype);
121
122
CLUSTER
*
NextSample
(
LIST
*SearchState);
123
124
FLOAT32
Mean
(
PROTOTYPE
*Proto,
uinT16
Dimension);
125
126
FLOAT32
StandardDeviation
(
PROTOTYPE
*Proto,
uinT16
Dimension);
127
128
inT32
TESS_API
MergeClusters
(
inT16
N,
PARAM_DESC
ParamDesc[],
inT32
n1,
inT32
n2,
129
FLOAT32
m[],
FLOAT32
m1[],
FLOAT32
m2[]);
130
131
//--------------Global Data Definitions and Declarations---------------------------
132
// define errors that can be trapped
133
#define ALREADYCLUSTERED 4000
134
#endif
FreePrototype
void FreePrototype(void *arg)
Definition:
cluster.cpp:588
StandardDeviation
FLOAT32 StandardDeviation(PROTOTYPE *Proto, uinT16 Dimension)
Definition:
cluster.cpp:663
CLUSTERCONFIG
Definition:
cluster.h:48
mixed
Definition:
cluster.h:45
DISTRIBUTION
DISTRIBUTION
Definition:
cluster.h:58
CLUSTERER::NumChar
inT32 NumChar
Definition:
cluster.h:93
PARAM_DESC
Definition:
ocrfeatures.h:46
CLUSTERCONFIG::Independence
FLOAT32 Independence
Definition:
cluster.h:53
FLOATUNION
Definition:
cluster.h:62
ClusterSamples
LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config)
Definition:
cluster.cpp:513
sample
Definition:
cluster.h:32
CLUSTERCONFIG::MaxIllegal
FLOAT32 MaxIllegal
Definition:
cluster.h:51
KDTREE
Definition:
kdtree.h:49
CLUSTERCONFIG::MinSamples
FLOAT32 MinSamples
Definition:
cluster.h:50
CLUSTERER::ParamDesc
PARAM_DESC * ParamDesc
Definition:
cluster.h:88
MakeClusterer
CLUSTERER * MakeClusterer(inT16 SampleSize, const PARAM_DESC ParamDesc[])
Definition:
cluster.cpp:400
PROTOTYPE::LogMagnitude
FLOAT32 LogMagnitude
Definition:
cluster.h:80
CLUSTERER
Definition:
cluster.h:86
CLUSTERCONFIG::MagicSamples
int MagicSamples
Definition:
cluster.h:55
CLUSTERCONFIG::Confidence
FLOAT64 Confidence
Definition:
cluster.h:54
inT32
int inT32
Definition:
host.h:102
CLUSTERER::ProtoList
LIST ProtoList
Definition:
cluster.h:92
sample::CharID
inT32 CharID
Definition:
cluster.h:38
MakeSample
SAMPLE * MakeSample(CLUSTERER *Clusterer, const FLOAT32 *Feature, inT32 CharID)
Definition:
cluster.cpp:456
PROTOTYPE::Weight
FLOATUNION Weight
Definition:
cluster.h:83
NextSample
CLUSTER * NextSample(LIST *SearchState)
Definition:
cluster.cpp:626
Config
CLUSTERCONFIG Config
Definition:
commontraining.cpp:50
FLOAT32
float FLOAT32
Definition:
host.h:111
PROTOTYPE::Mean
FLOAT32 * Mean
Definition:
cluster.h:78
spherical
Definition:
cluster.h:45
DISTRIBUTION_COUNT
Definition:
cluster.h:59
PROTOTYPE::Cluster
CLUSTER * Cluster
Definition:
cluster.h:76
BUCKETS
Definition:
cluster.cpp:178
uinT16
unsigned short uinT16
Definition:
host.h:101
sample::Mean
FLOAT32 Mean[1]
Definition:
cluster.h:39
automatic
Definition:
cluster.h:45
FreeProtoList
void FreeProtoList(LIST *ProtoList)
Definition:
cluster.cpp:574
PROTOTYPE::Variance
FLOATUNION Variance
Definition:
cluster.h:81
MINBUCKETS
#define MINBUCKETS
Definition:
cluster.h:26
CLUSTERER::SampleSize
inT16 SampleSize
Definition:
cluster.h:87
MergeClusters
inT32 MergeClusters(inT16 N, register PARAM_DESC ParamDesc[], register inT32 n1, register inT32 n2, register FLOAT32 m[], register FLOAT32 m1[], register FLOAT32 m2[])
inT16
short inT16
Definition:
host.h:100
SAMPLELIST
Definition:
cluster.h:98
PROTOTYPE::TotalMagnitude
FLOAT32 TotalMagnitude
Definition:
cluster.h:79
D_random
Definition:
cluster.h:59
sample::Right
struct sample * Right
Definition:
cluster.h:37
SAMPLELIST::MaxNumSamples
inT32 MaxNumSamples
Definition:
cluster.h:100
SAMPLELIST::NumSamples
inT32 NumSamples
Definition:
cluster.h:99
sample::Prototype
unsigned Prototype
Definition:
cluster.h:34
normal
Definition:
cluster.h:59
FLOATUNION::Elliptical
FLOAT32 * Elliptical
Definition:
cluster.h:64
PROTOTYPE
Definition:
cluster.h:67
elliptical
Definition:
cluster.h:45
FLOAT64
double FLOAT64
Definition:
host.h:112
kdtree.h
FLOATUNION::Spherical
FLOAT32 Spherical
Definition:
cluster.h:63
Mean
FLOAT32 Mean(PROTOTYPE *Proto, uinT16 Dimension)
Definition:
cluster.cpp:650
CLUSTER
struct sample CLUSTER
CLUSTERER::KDTree
KDTREE * KDTree
Definition:
cluster.h:90
list_rec
Definition:
oldlist.h:127
sample::Clustered
unsigned Clustered
Definition:
cluster.h:33
oldlist.h
CLUSTERCONFIG::ProtoStyle
PROTOSTYLE ProtoStyle
Definition:
cluster.h:49
PROTOSTYLE
PROTOSTYLE
Definition:
cluster.h:44
PROTOTYPE::Distrib
DISTRIBUTION * Distrib
Definition:
cluster.h:77
FreeClusterer
void FreeClusterer(CLUSTERER *Clusterer)
Definition:
cluster.cpp:547
sample::SampleCount
unsigned SampleCount
Definition:
cluster.h:35
SAMPLE
CLUSTER SAMPLE
Definition:
cluster.h:42
uniform
Definition:
cluster.h:59
CLUSTERER::Root
CLUSTER * Root
Definition:
cluster.h:91
MAXBUCKETS
#define MAXBUCKETS
Definition:
cluster.h:27
CLUSTERER::NumberOfSamples
inT32 NumberOfSamples
Definition:
cluster.h:89
PROTOTYPE::Magnitude
FLOATUNION Magnitude
Definition:
cluster.h:82
TESS_API
#define TESS_API
Definition:
platform.h:81
sample::Left
struct sample * Left
Definition:
cluster.h:36
classify
cluster.h
Generated by
1.8.5