|
tesseract 3.04.01
|
00001 /********************************************************************** 00002 * File: cube_reco_context.cpp 00003 * Description: Implementation of the Cube Recognition Context Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2007 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include <string> 00021 #include <limits.h> 00022 00023 #include "cube_reco_context.h" 00024 00025 #include "classifier_factory.h" 00026 #include "cube_tuning_params.h" 00027 #include "dict.h" 00028 #include "feature_bmp.h" 00029 #include "tessdatamanager.h" 00030 #include "tesseractclass.h" 00031 #include "tess_lang_model.h" 00032 00033 namespace tesseract { 00034 00042 CubeRecoContext::CubeRecoContext(Tesseract *tess_obj) { 00043 tess_obj_ = tess_obj; 00044 lang_ = ""; 00045 loaded_ = false; 00046 lang_mod_ = NULL; 00047 params_ = NULL; 00048 char_classifier_ = NULL; 00049 char_set_ = NULL; 00050 word_size_model_ = NULL; 00051 char_bigrams_ = NULL; 00052 word_unigrams_ = NULL; 00053 noisy_input_ = false; 00054 size_normalization_ = false; 00055 } 00056 00057 CubeRecoContext::~CubeRecoContext() { 00058 if (char_classifier_ != NULL) { 00059 delete char_classifier_; 00060 char_classifier_ = NULL; 00061 } 00062 00063 if (word_size_model_ != NULL) { 00064 delete word_size_model_; 00065 word_size_model_ = NULL; 00066 } 00067 00068 if (char_set_ != NULL) { 00069 delete char_set_; 00070 char_set_ = NULL; 00071 } 00072 00073 if (char_bigrams_ != NULL) { 00074 delete char_bigrams_; 00075 char_bigrams_ = NULL; 00076 } 00077 00078 if (word_unigrams_ != NULL) { 00079 delete word_unigrams_; 00080 word_unigrams_ = NULL; 00081 } 00082 00083 if (lang_mod_ != NULL) { 00084 delete lang_mod_; 00085 lang_mod_ = NULL; 00086 } 00087 00088 if (params_ != NULL) { 00089 delete params_; 00090 params_ = NULL; 00091 } 00092 } 00093 00098 bool CubeRecoContext::GetDataFilePath(string *path) const { 00099 *path = tess_obj_->datadir.string(); 00100 return true; 00101 } 00102 00115 bool CubeRecoContext::Load(TessdataManager *tessdata_manager, 00116 UNICHARSET *tess_unicharset) { 00117 ASSERT_HOST(tess_obj_ != NULL); 00118 tess_unicharset_ = tess_unicharset; 00119 string data_file_path; 00120 00121 // Get the data file path. 00122 if (GetDataFilePath(&data_file_path) == false) { 00123 fprintf(stderr, "Unable to get data file path\n"); 00124 return false; 00125 } 00126 00127 // Get the language from the Tesseract object. 00128 lang_ = tess_obj_->lang.string(); 00129 00130 // Create the char set. 00131 if ((char_set_ = 00132 CharSet::Create(tessdata_manager, tess_unicharset)) == NULL) { 00133 fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load " 00134 "CharSet\n"); 00135 return false; 00136 } 00137 // Create the language model. 00138 string lm_file_name = data_file_path + lang_ + ".cube.lm"; 00139 string lm_params; 00140 if (!CubeUtils::ReadFileToString(lm_file_name, &lm_params)) { 00141 fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read cube " 00142 "language model params from %s\n", lm_file_name.c_str()); 00143 return false; 00144 } 00145 lang_mod_ = new TessLangModel(lm_params, data_file_path, 00146 tess_obj_->getDict().load_system_dawg, 00147 tessdata_manager, this); 00148 if (lang_mod_ == NULL) { 00149 fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to create " 00150 "TessLangModel\n"); 00151 return false; 00152 } 00153 00154 // Create the optional char bigrams object. 00155 char_bigrams_ = CharBigrams::Create(data_file_path, lang_); 00156 00157 // Create the optional word unigrams object. 00158 word_unigrams_ = WordUnigrams::Create(data_file_path, lang_); 00159 00160 // Create the optional size model. 00161 word_size_model_ = WordSizeModel::Create(data_file_path, lang_, 00162 char_set_, Contextual()); 00163 00164 // Load tuning params. 00165 params_ = CubeTuningParams::Create(data_file_path, lang_); 00166 if (params_ == NULL) { 00167 fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read " 00168 "CubeTuningParams from %s\n", data_file_path.c_str()); 00169 return false; 00170 } 00171 00172 // Create the char classifier. 00173 char_classifier_ = CharClassifierFactory::Create(data_file_path, lang_, 00174 lang_mod_, char_set_, 00175 params_); 00176 if (char_classifier_ == NULL) { 00177 fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load " 00178 "CharClassifierFactory object from %s\n", data_file_path.c_str()); 00179 return false; 00180 } 00181 00182 loaded_ = true; 00183 00184 return true; 00185 } 00186 00188 CubeRecoContext * CubeRecoContext::Create(Tesseract *tess_obj, 00189 TessdataManager *tessdata_manager, 00190 UNICHARSET *tess_unicharset) { 00191 // create the object 00192 CubeRecoContext *cntxt = new CubeRecoContext(tess_obj); 00193 if (cntxt == NULL) { 00194 fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to create " 00195 "CubeRecoContext object\n"); 00196 return NULL; 00197 } 00198 // load the necessary components 00199 if (cntxt->Load(tessdata_manager, tess_unicharset) == false) { 00200 fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to init " 00201 "CubeRecoContext object\n"); 00202 delete cntxt; 00203 return NULL; 00204 } 00205 // success 00206 return cntxt; 00207 } 00208 } // tesseract}