tesseract 3.04.01

classify/picofeat.cpp

Go to the documentation of this file.
00001 /******************************************************************************
00002  **     Filename:    picofeat.c
00003  **     Purpose:     Definition of pico-features.
00004  **     Author:      Dan Johnson
00005  **     History:     9/4/90, DSJ, Created.
00006  **
00007  **     (c) Copyright Hewlett-Packard Company, 1988.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  ******************************************************************************/
00018 /*----------------------------------------------------------------------------
00019           Include Files and Type Defines
00020 ----------------------------------------------------------------------------*/
00021 #include "picofeat.h"
00022 
00023 #include "classify.h"
00024 #include "efio.h"
00025 #include "featdefs.h"
00026 #include "fpoint.h"
00027 #include "mfoutline.h"
00028 #include "ocrfeatures.h"
00029 #include "params.h"
00030 #include "trainingsample.h"
00031 
00032 #include <math.h>
00033 #include <stdio.h>
00034 
00035 /*---------------------------------------------------------------------------
00036           Variables
00037 ----------------------------------------------------------------------------*/
00038 
00039 double_VAR(classify_pico_feature_length, 0.05, "Pico Feature Length");
00040 
00041 /*---------------------------------------------------------------------------
00042           Private Function Prototypes
00043 ----------------------------------------------------------------------------*/
00044 void ConvertSegmentToPicoFeat(FPOINT *Start,
00045                               FPOINT *End,
00046                               FEATURE_SET FeatureSet);
00047 
00048 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet);
00049 
00050 void NormalizePicoX(FEATURE_SET FeatureSet);
00051 
00052 /*----------------------------------------------------------------------------
00053               Public Code
00054 ----------------------------------------------------------------------------*/
00055 /*---------------------------------------------------------------------------*/
00056 namespace tesseract {
00067 FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) {
00068   LIST Outlines;
00069   LIST RemainingOutlines;
00070   MFOUTLINE Outline;
00071   FEATURE_SET FeatureSet;
00072   FLOAT32 XScale, YScale;
00073 
00074   FeatureSet = NewFeatureSet(MAX_PICO_FEATURES);
00075   Outlines = ConvertBlob(Blob);
00076   NormalizeOutlines(Outlines, &XScale, &YScale);
00077   RemainingOutlines = Outlines;
00078   iterate(RemainingOutlines) {
00079     Outline = (MFOUTLINE) first_node (RemainingOutlines);
00080     ConvertToPicoFeatures2(Outline, FeatureSet);
00081   }
00082   if (classify_norm_method == baseline)
00083     NormalizePicoX(FeatureSet);
00084   FreeOutlines(Outlines);
00085   return (FeatureSet);
00086 
00087 }                                /* ExtractPicoFeatures */
00088 }  // namespace tesseract
00089 
00090 /*----------------------------------------------------------------------------
00091               Private Code
00092 ----------------------------------------------------------------------------*/
00093 /*---------------------------------------------------------------------------*/
00109 void ConvertSegmentToPicoFeat(FPOINT *Start,
00110                               FPOINT *End,
00111                               FEATURE_SET FeatureSet) {
00112   FEATURE Feature;
00113   FLOAT32 Angle;
00114   FLOAT32 Length;
00115   int NumFeatures;
00116   FPOINT Center;
00117   FPOINT Delta;
00118   int i;
00119 
00120   Angle = NormalizedAngleFrom (Start, End, 1.0);
00121   Length = DistanceBetween (*Start, *End);
00122   NumFeatures = (int) floor (Length / classify_pico_feature_length + 0.5);
00123   if (NumFeatures < 1)
00124     NumFeatures = 1;
00125 
00126   /* compute vector for one pico feature */
00127   Delta.x = XDelta (*Start, *End) / NumFeatures;
00128   Delta.y = YDelta (*Start, *End) / NumFeatures;
00129 
00130   /* compute position of first pico feature */
00131   Center.x = Start->x + Delta.x / 2.0;
00132   Center.y = Start->y + Delta.y / 2.0;
00133 
00134   /* compute each pico feature in segment and add to feature set */
00135   for (i = 0; i < NumFeatures; i++) {
00136     Feature = NewFeature (&PicoFeatDesc);
00137     Feature->Params[PicoFeatDir] = Angle;
00138     Feature->Params[PicoFeatX] = Center.x;
00139     Feature->Params[PicoFeatY] = Center.y;
00140     AddFeature(FeatureSet, Feature);
00141 
00142     Center.x += Delta.x;
00143     Center.y += Delta.y;
00144   }
00145 }                                /* ConvertSegmentToPicoFeat */
00146 
00147 
00148 /*---------------------------------------------------------------------------*/
00163 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
00164   MFOUTLINE Next;
00165   MFOUTLINE First;
00166   MFOUTLINE Current;
00167 
00168   if (DegenerateOutline(Outline))
00169     return;
00170 
00171   First = Outline;
00172   Current = First;
00173   Next = NextPointAfter(Current);
00174   do {
00175     /* note that an edge is hidden if the ending point of the edge is
00176        marked as hidden.  This situation happens because the order of
00177        the outlines is reversed when they are converted from the old
00178        format.  In the old format, a hidden edge is marked by the
00179        starting point for that edge. */
00180     if (!(PointAt(Next)->Hidden))
00181       ConvertSegmentToPicoFeat (&(PointAt(Current)->Point),
00182         &(PointAt(Next)->Point), FeatureSet);
00183 
00184     Current = Next;
00185     Next = NextPointAfter(Current);
00186   }
00187   while (Current != First);
00188 
00189 }                                /* ConvertToPicoFeatures2 */
00190 
00191 
00192 /*---------------------------------------------------------------------------*/
00204 void NormalizePicoX(FEATURE_SET FeatureSet) {
00205   int i;
00206   FEATURE Feature;
00207   FLOAT32 Origin = 0.0;
00208 
00209   for (i = 0; i < FeatureSet->NumFeatures; i++) {
00210     Feature = FeatureSet->Features[i];
00211     Origin += Feature->Params[PicoFeatX];
00212   }
00213   Origin /= FeatureSet->NumFeatures;
00214 
00215   for (i = 0; i < FeatureSet->NumFeatures; i++) {
00216     Feature = FeatureSet->Features[i];
00217     Feature->Params[PicoFeatX] -= Origin;
00218   }
00219 }                                /* NormalizePicoX */
00220 
00221 namespace tesseract {
00222 /*---------------------------------------------------------------------------*/
00230 FEATURE_SET Classify::ExtractIntCNFeatures(
00231     const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) {
00232   INT_FX_RESULT_STRUCT local_fx_info(fx_info);
00233   GenericVector<INT_FEATURE_STRUCT> bl_features;
00234   tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample(
00235       blob, false, &local_fx_info, &bl_features);
00236   if (sample == NULL) return NULL;
00237 
00238   int num_features = sample->num_features();
00239   const INT_FEATURE_STRUCT* features = sample->features();
00240   FEATURE_SET feature_set = NewFeatureSet(num_features);
00241   for (int f = 0; f < num_features; ++f) {
00242     FEATURE feature = NewFeature(&IntFeatDesc);
00243 
00244     feature->Params[IntX] = features[f].X;
00245     feature->Params[IntY] = features[f].Y;
00246     feature->Params[IntDir] = features[f].Theta;
00247     AddFeature(feature_set, feature);
00248   }
00249   delete sample;
00250 
00251   return feature_set;
00252 }                                /* ExtractIntCNFeatures */
00253 
00254 /*---------------------------------------------------------------------------*/
00262 FEATURE_SET Classify::ExtractIntGeoFeatures(
00263     const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) {
00264   INT_FX_RESULT_STRUCT local_fx_info(fx_info);
00265   GenericVector<INT_FEATURE_STRUCT> bl_features;
00266   tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample(
00267       blob, false, &local_fx_info, &bl_features);
00268   if (sample == NULL) return NULL;
00269 
00270   FEATURE_SET feature_set = NewFeatureSet(1);
00271   FEATURE feature = NewFeature(&IntFeatDesc);
00272 
00273   feature->Params[GeoBottom] = sample->geo_feature(GeoBottom);
00274   feature->Params[GeoTop] = sample->geo_feature(GeoTop);
00275   feature->Params[GeoWidth] = sample->geo_feature(GeoWidth);
00276   AddFeature(feature_set, feature);
00277   delete sample;
00278 
00279   return feature_set;
00280 }                                /* ExtractIntGeoFeatures */
00281 
00282 }  // namespace tesseract.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines