|
tesseract 3.04.01
|
00001 /****************************************************************************** 00002 ** Filename: picofeat.c 00003 ** Purpose: Definition of pico-features. 00004 ** Author: Dan Johnson 00005 ** History: 9/4/90, DSJ, Created. 00006 ** 00007 ** (c) Copyright Hewlett-Packard Company, 1988. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 ******************************************************************************/ 00018 /*---------------------------------------------------------------------------- 00019 Include Files and Type Defines 00020 ----------------------------------------------------------------------------*/ 00021 #include "picofeat.h" 00022 00023 #include "classify.h" 00024 #include "efio.h" 00025 #include "featdefs.h" 00026 #include "fpoint.h" 00027 #include "mfoutline.h" 00028 #include "ocrfeatures.h" 00029 #include "params.h" 00030 #include "trainingsample.h" 00031 00032 #include <math.h> 00033 #include <stdio.h> 00034 00035 /*--------------------------------------------------------------------------- 00036 Variables 00037 ----------------------------------------------------------------------------*/ 00038 00039 double_VAR(classify_pico_feature_length, 0.05, "Pico Feature Length"); 00040 00041 /*--------------------------------------------------------------------------- 00042 Private Function Prototypes 00043 ----------------------------------------------------------------------------*/ 00044 void ConvertSegmentToPicoFeat(FPOINT *Start, 00045 FPOINT *End, 00046 FEATURE_SET FeatureSet); 00047 00048 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet); 00049 00050 void NormalizePicoX(FEATURE_SET FeatureSet); 00051 00052 /*---------------------------------------------------------------------------- 00053 Public Code 00054 ----------------------------------------------------------------------------*/ 00055 /*---------------------------------------------------------------------------*/ 00056 namespace tesseract { 00067 FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) { 00068 LIST Outlines; 00069 LIST RemainingOutlines; 00070 MFOUTLINE Outline; 00071 FEATURE_SET FeatureSet; 00072 FLOAT32 XScale, YScale; 00073 00074 FeatureSet = NewFeatureSet(MAX_PICO_FEATURES); 00075 Outlines = ConvertBlob(Blob); 00076 NormalizeOutlines(Outlines, &XScale, &YScale); 00077 RemainingOutlines = Outlines; 00078 iterate(RemainingOutlines) { 00079 Outline = (MFOUTLINE) first_node (RemainingOutlines); 00080 ConvertToPicoFeatures2(Outline, FeatureSet); 00081 } 00082 if (classify_norm_method == baseline) 00083 NormalizePicoX(FeatureSet); 00084 FreeOutlines(Outlines); 00085 return (FeatureSet); 00086 00087 } /* ExtractPicoFeatures */ 00088 } // namespace tesseract 00089 00090 /*---------------------------------------------------------------------------- 00091 Private Code 00092 ----------------------------------------------------------------------------*/ 00093 /*---------------------------------------------------------------------------*/ 00109 void ConvertSegmentToPicoFeat(FPOINT *Start, 00110 FPOINT *End, 00111 FEATURE_SET FeatureSet) { 00112 FEATURE Feature; 00113 FLOAT32 Angle; 00114 FLOAT32 Length; 00115 int NumFeatures; 00116 FPOINT Center; 00117 FPOINT Delta; 00118 int i; 00119 00120 Angle = NormalizedAngleFrom (Start, End, 1.0); 00121 Length = DistanceBetween (*Start, *End); 00122 NumFeatures = (int) floor (Length / classify_pico_feature_length + 0.5); 00123 if (NumFeatures < 1) 00124 NumFeatures = 1; 00125 00126 /* compute vector for one pico feature */ 00127 Delta.x = XDelta (*Start, *End) / NumFeatures; 00128 Delta.y = YDelta (*Start, *End) / NumFeatures; 00129 00130 /* compute position of first pico feature */ 00131 Center.x = Start->x + Delta.x / 2.0; 00132 Center.y = Start->y + Delta.y / 2.0; 00133 00134 /* compute each pico feature in segment and add to feature set */ 00135 for (i = 0; i < NumFeatures; i++) { 00136 Feature = NewFeature (&PicoFeatDesc); 00137 Feature->Params[PicoFeatDir] = Angle; 00138 Feature->Params[PicoFeatX] = Center.x; 00139 Feature->Params[PicoFeatY] = Center.y; 00140 AddFeature(FeatureSet, Feature); 00141 00142 Center.x += Delta.x; 00143 Center.y += Delta.y; 00144 } 00145 } /* ConvertSegmentToPicoFeat */ 00146 00147 00148 /*---------------------------------------------------------------------------*/ 00163 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) { 00164 MFOUTLINE Next; 00165 MFOUTLINE First; 00166 MFOUTLINE Current; 00167 00168 if (DegenerateOutline(Outline)) 00169 return; 00170 00171 First = Outline; 00172 Current = First; 00173 Next = NextPointAfter(Current); 00174 do { 00175 /* note that an edge is hidden if the ending point of the edge is 00176 marked as hidden. This situation happens because the order of 00177 the outlines is reversed when they are converted from the old 00178 format. In the old format, a hidden edge is marked by the 00179 starting point for that edge. */ 00180 if (!(PointAt(Next)->Hidden)) 00181 ConvertSegmentToPicoFeat (&(PointAt(Current)->Point), 00182 &(PointAt(Next)->Point), FeatureSet); 00183 00184 Current = Next; 00185 Next = NextPointAfter(Current); 00186 } 00187 while (Current != First); 00188 00189 } /* ConvertToPicoFeatures2 */ 00190 00191 00192 /*---------------------------------------------------------------------------*/ 00204 void NormalizePicoX(FEATURE_SET FeatureSet) { 00205 int i; 00206 FEATURE Feature; 00207 FLOAT32 Origin = 0.0; 00208 00209 for (i = 0; i < FeatureSet->NumFeatures; i++) { 00210 Feature = FeatureSet->Features[i]; 00211 Origin += Feature->Params[PicoFeatX]; 00212 } 00213 Origin /= FeatureSet->NumFeatures; 00214 00215 for (i = 0; i < FeatureSet->NumFeatures; i++) { 00216 Feature = FeatureSet->Features[i]; 00217 Feature->Params[PicoFeatX] -= Origin; 00218 } 00219 } /* NormalizePicoX */ 00220 00221 namespace tesseract { 00222 /*---------------------------------------------------------------------------*/ 00230 FEATURE_SET Classify::ExtractIntCNFeatures( 00231 const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) { 00232 INT_FX_RESULT_STRUCT local_fx_info(fx_info); 00233 GenericVector<INT_FEATURE_STRUCT> bl_features; 00234 tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample( 00235 blob, false, &local_fx_info, &bl_features); 00236 if (sample == NULL) return NULL; 00237 00238 int num_features = sample->num_features(); 00239 const INT_FEATURE_STRUCT* features = sample->features(); 00240 FEATURE_SET feature_set = NewFeatureSet(num_features); 00241 for (int f = 0; f < num_features; ++f) { 00242 FEATURE feature = NewFeature(&IntFeatDesc); 00243 00244 feature->Params[IntX] = features[f].X; 00245 feature->Params[IntY] = features[f].Y; 00246 feature->Params[IntDir] = features[f].Theta; 00247 AddFeature(feature_set, feature); 00248 } 00249 delete sample; 00250 00251 return feature_set; 00252 } /* ExtractIntCNFeatures */ 00253 00254 /*---------------------------------------------------------------------------*/ 00262 FEATURE_SET Classify::ExtractIntGeoFeatures( 00263 const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) { 00264 INT_FX_RESULT_STRUCT local_fx_info(fx_info); 00265 GenericVector<INT_FEATURE_STRUCT> bl_features; 00266 tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample( 00267 blob, false, &local_fx_info, &bl_features); 00268 if (sample == NULL) return NULL; 00269 00270 FEATURE_SET feature_set = NewFeatureSet(1); 00271 FEATURE feature = NewFeature(&IntFeatDesc); 00272 00273 feature->Params[GeoBottom] = sample->geo_feature(GeoBottom); 00274 feature->Params[GeoTop] = sample->geo_feature(GeoTop); 00275 feature->Params[GeoWidth] = sample->geo_feature(GeoWidth); 00276 AddFeature(feature_set, feature); 00277 delete sample; 00278 00279 return feature_set; 00280 } /* ExtractIntGeoFeatures */ 00281 00282 } // namespace tesseract.