|
tesseract 3.04.01
|
00001 /****************************************************************************** 00002 ** Filename: outfeat.c 00003 ** Purpose: Definition of outline-features. 00004 ** Author: Dan Johnson 00005 ** History: 11/13/90, DSJ, Created. 00006 ** 00007 ** (c) Copyright Hewlett-Packard Company, 1988. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 ******************************************************************************/ 00018 /*---------------------------------------------------------------------------- 00019 Include Files and Type Defines 00020 ----------------------------------------------------------------------------*/ 00021 #include "outfeat.h" 00022 00023 #include "classify.h" 00024 #include "efio.h" 00025 #include "featdefs.h" 00026 #include "mfoutline.h" 00027 #include "ocrfeatures.h" 00028 00029 #include <stdio.h> 00030 00031 /*---------------------------------------------------------------------------- 00032 Public Code 00033 ----------------------------------------------------------------------------*/ 00034 /*---------------------------------------------------------------------------*/ 00035 namespace tesseract { 00047 FEATURE_SET Classify::ExtractOutlineFeatures(TBLOB *Blob) { 00048 LIST Outlines; 00049 LIST RemainingOutlines; 00050 MFOUTLINE Outline; 00051 FEATURE_SET FeatureSet; 00052 FLOAT32 XScale, YScale; 00053 00054 FeatureSet = NewFeatureSet (MAX_OUTLINE_FEATURES); 00055 if (Blob == NULL) 00056 return (FeatureSet); 00057 00058 Outlines = ConvertBlob (Blob); 00059 00060 NormalizeOutlines(Outlines, &XScale, &YScale); 00061 RemainingOutlines = Outlines; 00062 iterate(RemainingOutlines) { 00063 Outline = (MFOUTLINE) first_node (RemainingOutlines); 00064 ConvertToOutlineFeatures(Outline, FeatureSet); 00065 } 00066 if (classify_norm_method == baseline) 00067 NormalizeOutlineX(FeatureSet); 00068 FreeOutlines(Outlines); 00069 return (FeatureSet); 00070 } /* ExtractOutlineFeatures */ 00071 } // namespace tesseract 00072 00073 /*---------------------------------------------------------------------------- 00074 Private Code 00075 ----------------------------------------------------------------------------*/ 00076 /*---------------------------------------------------------------------------*/ 00093 void AddOutlineFeatureToSet(FPOINT *Start, 00094 FPOINT *End, 00095 FEATURE_SET FeatureSet) { 00096 FEATURE Feature; 00097 00098 Feature = NewFeature(&OutlineFeatDesc); 00099 Feature->Params[OutlineFeatDir] = NormalizedAngleFrom(Start, End, 1.0); 00100 Feature->Params[OutlineFeatX] = AverageOf(Start->x, End->x); 00101 Feature->Params[OutlineFeatY] = AverageOf(Start->y, End->y); 00102 Feature->Params[OutlineFeatLength] = DistanceBetween(*Start, *End); 00103 AddFeature(FeatureSet, Feature); 00104 00105 } /* AddOutlineFeatureToSet */ 00106 00107 00108 /*---------------------------------------------------------------------------*/ 00122 void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet) { 00123 MFOUTLINE Next; 00124 MFOUTLINE First; 00125 FPOINT FeatureStart; 00126 FPOINT FeatureEnd; 00127 00128 if (DegenerateOutline (Outline)) 00129 return; 00130 00131 First = Outline; 00132 Next = First; 00133 do { 00134 FeatureStart = PointAt(Next)->Point; 00135 Next = NextPointAfter(Next); 00136 00137 /* note that an edge is hidden if the ending point of the edge is 00138 marked as hidden. This situation happens because the order of 00139 the outlines is reversed when they are converted from the old 00140 format. In the old format, a hidden edge is marked by the 00141 starting point for that edge. */ 00142 if (!PointAt(Next)->Hidden) { 00143 FeatureEnd = PointAt(Next)->Point; 00144 AddOutlineFeatureToSet(&FeatureStart, &FeatureEnd, FeatureSet); 00145 } 00146 } 00147 while (Next != First); 00148 } /* ConvertToOutlineFeatures */ 00149 00150 00151 /*---------------------------------------------------------------------------*/ 00163 void NormalizeOutlineX(FEATURE_SET FeatureSet) { 00164 int i; 00165 FEATURE Feature; 00166 FLOAT32 Length; 00167 FLOAT32 TotalX = 0.0; 00168 FLOAT32 TotalWeight = 0.0; 00169 FLOAT32 Origin; 00170 00171 if (FeatureSet->NumFeatures <= 0) 00172 return; 00173 00174 for (i = 0; i < FeatureSet->NumFeatures; i++) { 00175 Feature = FeatureSet->Features[i]; 00176 Length = Feature->Params[OutlineFeatLength]; 00177 TotalX += Feature->Params[OutlineFeatX] * Length; 00178 TotalWeight += Length; 00179 } 00180 Origin = TotalX / TotalWeight; 00181 00182 for (i = 0; i < FeatureSet->NumFeatures; i++) { 00183 Feature = FeatureSet->Features[i]; 00184 Feature->Params[OutlineFeatX] -= Origin; 00185 } 00186 } /* NormalizeOutlineX */