|
tesseract 3.04.01
|
00001 /********************************************************************** 00002 * File: cube_object.cpp 00003 * Description: Implementation of the Cube Object Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2007 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include <math.h> 00021 #include "cube_object.h" 00022 #include "cube_utils.h" 00023 #include "word_list_lang_model.h" 00024 00025 namespace tesseract { 00026 CubeObject::CubeObject(CubeRecoContext *cntxt, CharSamp *char_samp) { 00027 Init(); 00028 char_samp_ = char_samp; 00029 cntxt_ = cntxt; 00030 } 00031 00032 CubeObject::CubeObject(CubeRecoContext *cntxt, Pix *pix, 00033 int left, int top, int wid, int hgt) { 00034 Init(); 00035 char_samp_ = CubeUtils::CharSampleFromPix(pix, left, top, wid, hgt); 00036 own_char_samp_ = true; 00037 cntxt_ = cntxt; 00038 } 00039 00040 // Data member initialization function 00041 void CubeObject::Init() { 00042 char_samp_ = NULL; 00043 own_char_samp_ = false; 00044 alt_list_ = NULL; 00045 srch_obj_ = NULL; 00046 deslanted_alt_list_ = NULL; 00047 deslanted_srch_obj_ = NULL; 00048 deslanted_ = false; 00049 deslanted_char_samp_ = NULL; 00050 beam_obj_ = NULL; 00051 deslanted_beam_obj_ = NULL; 00052 cntxt_ = NULL; 00053 } 00054 00055 // Cleanup function 00056 void CubeObject::Cleanup() { 00057 if (alt_list_ != NULL) { 00058 delete alt_list_; 00059 alt_list_ = NULL; 00060 } 00061 00062 if (deslanted_alt_list_ != NULL) { 00063 delete deslanted_alt_list_; 00064 deslanted_alt_list_ = NULL; 00065 } 00066 } 00067 00068 CubeObject::~CubeObject() { 00069 if (char_samp_ != NULL && own_char_samp_ == true) { 00070 delete char_samp_; 00071 char_samp_ = NULL; 00072 } 00073 00074 if (srch_obj_ != NULL) { 00075 delete srch_obj_; 00076 srch_obj_ = NULL; 00077 } 00078 00079 if (deslanted_srch_obj_ != NULL) { 00080 delete deslanted_srch_obj_; 00081 deslanted_srch_obj_ = NULL; 00082 } 00083 00084 if (beam_obj_ != NULL) { 00085 delete beam_obj_; 00086 beam_obj_ = NULL; 00087 } 00088 00089 if (deslanted_beam_obj_ != NULL) { 00090 delete deslanted_beam_obj_; 00091 deslanted_beam_obj_ = NULL; 00092 } 00093 00094 if (deslanted_char_samp_ != NULL) { 00095 delete deslanted_char_samp_; 00096 deslanted_char_samp_ = NULL; 00097 } 00098 00099 Cleanup(); 00100 } 00101 00108 WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) { 00109 if (char_samp_ == NULL) { 00110 return NULL; 00111 } 00112 00113 // clear alt lists 00114 Cleanup(); 00115 00116 // no specified language model, use the one in the reco context 00117 if (lang_mod == NULL) { 00118 lang_mod = cntxt_->LangMod(); 00119 } 00120 00121 // normalize if necessary 00122 if (cntxt_->SizeNormalization()) { 00123 Normalize(); 00124 } 00125 00126 // assume not de-slanted by default 00127 deslanted_ = false; 00128 00129 // create a beam search object 00130 if (beam_obj_ == NULL) { 00131 beam_obj_ = new BeamSearch(cntxt_, word_mode); 00132 if (beam_obj_ == NULL) { 00133 fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not construct " 00134 "BeamSearch\n"); 00135 return NULL; 00136 } 00137 } 00138 00139 // create a cube search object 00140 if (srch_obj_ == NULL) { 00141 srch_obj_ = new CubeSearchObject(cntxt_, char_samp_); 00142 if (srch_obj_ == NULL) { 00143 fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not construct " 00144 "CubeSearchObject\n"); 00145 return NULL; 00146 } 00147 } 00148 00149 // run a beam search against the tesslang model 00150 alt_list_ = beam_obj_->Search(srch_obj_, lang_mod); 00151 00152 // deslant (if supported by language) and re-reco if probability is low enough 00153 if (cntxt_->HasItalics() == true && 00154 (alt_list_ == NULL || alt_list_->AltCount() < 1 || 00155 alt_list_->AltCost(0) > CubeUtils::Prob2Cost(kMinProbSkipDeslanted))) { 00156 00157 if (deslanted_beam_obj_ == NULL) { 00158 deslanted_beam_obj_ = new BeamSearch(cntxt_); 00159 if (deslanted_beam_obj_ == NULL) { 00160 fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not " 00161 "construct deslanted BeamSearch\n"); 00162 return NULL; 00163 } 00164 } 00165 00166 if (deslanted_srch_obj_ == NULL) { 00167 deslanted_char_samp_ = char_samp_->Clone(); 00168 if (deslanted_char_samp_ == NULL) { 00169 fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not " 00170 "construct deslanted CharSamp\n"); 00171 return NULL; 00172 } 00173 00174 if (deslanted_char_samp_->Deslant() == false) { 00175 return NULL; 00176 } 00177 00178 deslanted_srch_obj_ = new CubeSearchObject(cntxt_, deslanted_char_samp_); 00179 if (deslanted_srch_obj_ == NULL) { 00180 fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not " 00181 "construct deslanted CubeSearchObject\n"); 00182 return NULL; 00183 } 00184 } 00185 00186 // run a beam search against the tesslang model 00187 deslanted_alt_list_ = deslanted_beam_obj_->Search(deslanted_srch_obj_, 00188 lang_mod); 00189 // should we use de-slanted altlist? 00190 if (deslanted_alt_list_ != NULL && deslanted_alt_list_->AltCount() > 0) { 00191 if (alt_list_ == NULL || alt_list_->AltCount() < 1 || 00192 deslanted_alt_list_->AltCost(0) < alt_list_->AltCost(0)) { 00193 deslanted_ = true; 00194 return deslanted_alt_list_; 00195 } 00196 } 00197 } 00198 00199 return alt_list_; 00200 } 00201 00205 WordAltList *CubeObject::RecognizeWord(LangModel *lang_mod) { 00206 return Recognize(lang_mod, true); 00207 } 00208 00212 WordAltList *CubeObject::RecognizePhrase(LangModel *lang_mod) { 00213 return Recognize(lang_mod, false); 00214 } 00215 00220 int CubeObject::WordCost(const char *str) { 00221 WordListLangModel *lang_mod = new WordListLangModel(cntxt_); 00222 if (lang_mod == NULL) { 00223 return WORST_COST; 00224 } 00225 00226 if (lang_mod->AddString(str) == false) { 00227 delete lang_mod; 00228 return WORST_COST; 00229 } 00230 00231 // run a beam search against the single string wordlist model 00232 WordAltList *alt_list = RecognizeWord(lang_mod); 00233 delete lang_mod; 00234 00235 int cost = WORST_COST; 00236 if (alt_list != NULL) { 00237 if (alt_list->AltCount() > 0) { 00238 cost = alt_list->AltCost(0); 00239 } 00240 } 00241 00242 return cost; 00243 } 00244 00245 // Recognizes a single character and returns the list of results. 00246 CharAltList *CubeObject::RecognizeChar() { 00247 if (char_samp_ == NULL) return NULL; 00248 CharAltList* alt_list = NULL; 00249 CharClassifier *char_classifier = cntxt_->Classifier(); 00250 ASSERT_HOST(char_classifier != NULL); 00251 alt_list = char_classifier->Classify(char_samp_); 00252 return alt_list; 00253 } 00254 00255 // Normalize the input word bitmap to have a minimum aspect ratio 00256 bool CubeObject::Normalize() { 00257 // create a cube search object 00258 CubeSearchObject *srch_obj = new CubeSearchObject(cntxt_, char_samp_); 00259 if (srch_obj == NULL) { 00260 return false; 00261 } 00262 // Perform over-segmentation 00263 int seg_cnt = srch_obj->SegPtCnt(); 00264 // Only perform normalization if segment count is large enough 00265 if (seg_cnt < kMinNormalizationSegmentCnt) { 00266 delete srch_obj; 00267 return true; 00268 } 00269 // compute the mean AR of the segments 00270 double ar_mean = 0.0; 00271 for (int seg_idx = 0; seg_idx <= seg_cnt; seg_idx++) { 00272 CharSamp *seg_samp = srch_obj->CharSample(seg_idx - 1, seg_idx); 00273 if (seg_samp != NULL && seg_samp->Width() > 0) { 00274 ar_mean += (1.0 * seg_samp->Height() / seg_samp->Width()); 00275 } 00276 } 00277 ar_mean /= (seg_cnt + 1); 00278 // perform normalization if segment AR is too high 00279 if (ar_mean > kMinNormalizationAspectRatio) { 00280 // scale down the image in the y-direction to attain AR 00281 CharSamp *new_samp = char_samp_->Scale(char_samp_->Width(), 00282 2.0 * char_samp_->Height() / ar_mean, 00283 false); 00284 if (new_samp != NULL) { 00285 // free existing char samp if owned 00286 if (own_char_samp_) { 00287 delete char_samp_; 00288 } 00289 // update with new scaled charsamp and set ownership flag 00290 char_samp_ = new_samp; 00291 own_char_samp_ = true; 00292 } 00293 } 00294 delete srch_obj; 00295 return true; 00296 } 00297 }