tesseract 3.04.01

ccmain/cube_reco_context.cpp

Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        cube_reco_context.cpp
00003  * Description: Implementation of the Cube Recognition Context Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include <string>
00021 #include <limits.h>
00022 
00023 #include "cube_reco_context.h"
00024 
00025 #include "classifier_factory.h"
00026 #include "cube_tuning_params.h"
00027 #include "dict.h"
00028 #include "feature_bmp.h"
00029 #include "tessdatamanager.h"
00030 #include "tesseractclass.h"
00031 #include "tess_lang_model.h"
00032 
00033 namespace tesseract {
00034 
00042 CubeRecoContext::CubeRecoContext(Tesseract *tess_obj) {
00043   tess_obj_ = tess_obj;
00044   lang_ = "";
00045   loaded_ = false;
00046   lang_mod_ = NULL;
00047   params_ = NULL;
00048   char_classifier_ = NULL;
00049   char_set_ = NULL;
00050   word_size_model_ = NULL;
00051   char_bigrams_ = NULL;
00052   word_unigrams_ = NULL;
00053   noisy_input_ = false;
00054   size_normalization_ = false;
00055 }
00056 
00057 CubeRecoContext::~CubeRecoContext() {
00058   if (char_classifier_ != NULL) {
00059     delete char_classifier_;
00060     char_classifier_ = NULL;
00061   }
00062 
00063   if (word_size_model_ != NULL) {
00064     delete word_size_model_;
00065     word_size_model_ = NULL;
00066   }
00067 
00068   if (char_set_ != NULL) {
00069     delete char_set_;
00070     char_set_ = NULL;
00071   }
00072 
00073   if (char_bigrams_ != NULL) {
00074     delete char_bigrams_;
00075     char_bigrams_ = NULL;
00076   }
00077 
00078   if (word_unigrams_ != NULL) {
00079     delete word_unigrams_;
00080     word_unigrams_ = NULL;
00081   }
00082 
00083   if (lang_mod_ != NULL) {
00084     delete lang_mod_;
00085     lang_mod_ = NULL;
00086   }
00087 
00088   if (params_ != NULL) {
00089     delete params_;
00090     params_ = NULL;
00091   }
00092 }
00093 
00098 bool CubeRecoContext::GetDataFilePath(string *path) const {
00099   *path = tess_obj_->datadir.string();
00100   return true;
00101 }
00102 
00115 bool CubeRecoContext::Load(TessdataManager *tessdata_manager,
00116                            UNICHARSET *tess_unicharset) {
00117   ASSERT_HOST(tess_obj_ != NULL);
00118   tess_unicharset_ = tess_unicharset;
00119   string data_file_path;
00120 
00121   // Get the data file path.
00122   if (GetDataFilePath(&data_file_path) == false) {
00123     fprintf(stderr, "Unable to get data file path\n");
00124     return false;
00125   }
00126 
00127   // Get the language from the Tesseract object.
00128   lang_ = tess_obj_->lang.string();
00129 
00130   // Create the char set.
00131   if ((char_set_ =
00132        CharSet::Create(tessdata_manager, tess_unicharset)) == NULL) {
00133     fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
00134             "CharSet\n");
00135     return false;
00136   }
00137   // Create the language model.
00138   string lm_file_name = data_file_path + lang_ + ".cube.lm";
00139   string lm_params;
00140   if (!CubeUtils::ReadFileToString(lm_file_name, &lm_params)) {
00141     fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read cube "
00142             "language model params from %s\n", lm_file_name.c_str());
00143     return false;
00144   }
00145   lang_mod_ = new TessLangModel(lm_params, data_file_path,
00146                                 tess_obj_->getDict().load_system_dawg,
00147                                 tessdata_manager, this);
00148   if (lang_mod_ == NULL) {
00149     fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to create "
00150             "TessLangModel\n");
00151     return false;
00152   }
00153 
00154   // Create the optional char bigrams object.
00155   char_bigrams_ = CharBigrams::Create(data_file_path, lang_);
00156 
00157   // Create the optional word unigrams object.
00158   word_unigrams_ = WordUnigrams::Create(data_file_path, lang_);
00159 
00160   // Create the optional size model.
00161   word_size_model_ = WordSizeModel::Create(data_file_path, lang_,
00162     char_set_, Contextual());
00163 
00164   // Load tuning params.
00165   params_ = CubeTuningParams::Create(data_file_path, lang_);
00166   if (params_ == NULL) {
00167     fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read "
00168             "CubeTuningParams from %s\n", data_file_path.c_str());
00169     return false;
00170   }
00171 
00172   // Create the char classifier.
00173   char_classifier_ = CharClassifierFactory::Create(data_file_path, lang_,
00174                                                    lang_mod_, char_set_,
00175                                                    params_);
00176   if (char_classifier_ == NULL) {
00177     fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
00178             "CharClassifierFactory object from %s\n", data_file_path.c_str());
00179     return false;
00180   }
00181 
00182   loaded_ = true;
00183 
00184   return true;
00185 }
00186 
00188 CubeRecoContext * CubeRecoContext::Create(Tesseract *tess_obj,
00189                                           TessdataManager *tessdata_manager,
00190                                           UNICHARSET *tess_unicharset) {
00191   // create the object
00192   CubeRecoContext *cntxt = new CubeRecoContext(tess_obj);
00193   if (cntxt == NULL) {
00194     fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to create "
00195             "CubeRecoContext object\n");
00196     return NULL;
00197   }
00198   // load the necessary components
00199   if (cntxt->Load(tessdata_manager, tess_unicharset) == false) {
00200     fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to init "
00201             "CubeRecoContext object\n");
00202     delete cntxt;
00203     return NULL;
00204   }
00205   // success
00206   return cntxt;
00207 }
00208 }  // tesseract}
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines