|
tesseract 3.04.01
|
00001 00002 // File: lm_state.h 00003 // Description: Structures and functionality for capturing the state of 00004 // segmentation search guided by the language model. 00005 // 00006 // Author: Rika Antonova 00007 // Created: Mon Jun 20 11:26:43 PST 2012 00008 // 00009 // (C) Copyright 2012, Google Inc. 00010 // Licensed under the Apache License, Version 2.0 (the "License"); 00011 // you may not use this file except in compliance with the License. 00012 // You may obtain a copy of the License at 00013 // http://www.apache.org/licenses/LICENSE-2.0 00014 // Unless required by applicable law or agreed to in writing, software 00015 // distributed under the License is distributed on an "AS IS" BASIS, 00016 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00017 // See the License for the specific language governing permissions and 00018 // limitations under the License. 00019 // 00021 00022 #ifndef TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_ 00023 #define TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_ 00024 00025 #include "associate.h" 00026 #include "elst.h" 00027 #include "dawg.h" 00028 #include "lm_consistency.h" 00029 #include "matrix.h" 00030 #include "ratngs.h" 00031 #include "stopper.h" 00032 #include "strngs.h" 00033 00034 namespace tesseract { 00035 00037 typedef unsigned char LanguageModelFlagsType; 00038 00057 00061 struct LanguageModelDawgInfo { 00062 LanguageModelDawgInfo(DawgPositionVector *a, PermuterType pt) : permuter(pt) { 00063 active_dawgs = new DawgPositionVector(*a); 00064 } 00065 ~LanguageModelDawgInfo() { 00066 delete active_dawgs; 00067 } 00068 DawgPositionVector *active_dawgs; 00069 PermuterType permuter; 00070 }; 00071 00074 struct LanguageModelNgramInfo { 00075 LanguageModelNgramInfo(const char *c, int l, bool p, float nc, float ncc) 00076 : context(c), context_unichar_step_len(l), pruned(p), ngram_cost(nc), 00077 ngram_and_classifier_cost(ncc) {} 00078 STRING context; //< context string 00081 int context_unichar_step_len; 00086 bool pruned; 00088 float ngram_cost; 00090 float ngram_and_classifier_cost; 00091 }; 00092 00095 struct ViterbiStateEntry : public ELIST_LINK { 00096 ViterbiStateEntry(ViterbiStateEntry *pe, 00097 BLOB_CHOICE *b, float c, float ol, 00098 const LMConsistencyInfo &ci, 00099 const AssociateStats &as, 00100 LanguageModelFlagsType tcf, 00101 LanguageModelDawgInfo *d, 00102 LanguageModelNgramInfo *n, 00103 const char *debug_uch) 00104 : cost(c), curr_b(b), parent_vse(pe), competing_vse(NULL), 00105 ratings_sum(b->rating()), 00106 min_certainty(b->certainty()), adapted(b->IsAdapted()), length(1), 00107 outline_length(ol), consistency_info(ci), associate_stats(as), 00108 top_choice_flags(tcf), dawg_info(d), ngram_info(n), 00109 updated(true) { 00110 debug_str = (debug_uch == NULL) ? NULL : new STRING(); 00111 if (pe != NULL) { 00112 ratings_sum += pe->ratings_sum; 00113 if (pe->min_certainty < min_certainty) { 00114 min_certainty = pe->min_certainty; 00115 } 00116 adapted += pe->adapted; 00117 length += pe->length; 00118 outline_length += pe->outline_length; 00119 if (debug_uch != NULL) *debug_str += *(pe->debug_str); 00120 } 00121 if (debug_str != NULL && debug_uch != NULL) *debug_str += debug_uch; 00122 } 00123 ~ViterbiStateEntry() { 00124 delete dawg_info; 00125 delete ngram_info; 00126 delete debug_str; 00127 } 00130 static int Compare(const void *e1, const void *e2) { 00131 const ViterbiStateEntry *ve1 = 00132 *reinterpret_cast<const ViterbiStateEntry * const *>(e1); 00133 const ViterbiStateEntry *ve2 = 00134 *reinterpret_cast<const ViterbiStateEntry * const *>(e2); 00135 return (ve1->cost < ve2->cost) ? -1 : 1; 00136 } 00137 inline bool Consistent() const { 00138 if (dawg_info != NULL && consistency_info.NumInconsistentCase() == 0) { 00139 return true; 00140 } 00141 return consistency_info.Consistent(); 00142 } 00145 bool HasAlnumChoice(const UNICHARSET& unicharset) { 00146 if (curr_b == NULL) return false; 00147 UNICHAR_ID unichar_id = curr_b->unichar_id(); 00148 if (unicharset.get_isalpha(unichar_id) || 00149 unicharset.get_isdigit(unichar_id)) 00150 return true; 00151 return false; 00152 } 00153 void Print(const char *msg) const; 00154 00157 float cost; 00158 00160 BLOB_CHOICE *curr_b; 00161 ViterbiStateEntry *parent_vse; 00164 ViterbiStateEntry *competing_vse; 00165 00168 float ratings_sum; //< sum of ratings of character on the path 00169 float min_certainty; //< minimum certainty on the path 00170 int adapted; //< number of BLOB_CHOICES from adapted templates 00171 int length; //< number of characters on the path 00172 float outline_length; //< length of the outline so far 00173 LMConsistencyInfo consistency_info; //< path consistency info 00174 AssociateStats associate_stats; //< character widths/gaps/seams 00175 00178 LanguageModelFlagsType top_choice_flags; 00179 00182 LanguageModelDawgInfo *dawg_info; 00183 00186 LanguageModelNgramInfo *ngram_info; 00187 00188 bool updated; //< set to true if the entry has just been created/updated 00191 STRING *debug_str; 00192 }; 00193 00194 ELISTIZEH(ViterbiStateEntry); 00195 00197 struct LanguageModelState { 00198 LanguageModelState() : 00199 viterbi_state_entries_prunable_length(0), 00200 viterbi_state_entries_prunable_max_cost(MAX_FLOAT32), 00201 viterbi_state_entries_length(0) {} 00202 ~LanguageModelState() {} 00203 00205 void Clear(); 00206 00207 void Print(const char *msg); 00208 00210 ViterbiStateEntry_LIST viterbi_state_entries; 00212 int viterbi_state_entries_prunable_length; 00213 float viterbi_state_entries_prunable_max_cost; 00215 int viterbi_state_entries_length; 00216 }; 00217 00219 struct BestChoiceBundle { 00220 explicit BestChoiceBundle(int matrix_dimension) 00221 : updated(false), best_vse(NULL) { 00222 beam.reserve(matrix_dimension); 00223 for (int i = 0; i < matrix_dimension; ++i) 00224 beam.push_back(new LanguageModelState); 00225 } 00226 ~BestChoiceBundle() {} 00227 00229 bool updated; 00231 DANGERR fixpt; 00235 PointerVector<LanguageModelState> beam; 00237 ViterbiStateEntry *best_vse; 00238 }; 00239 00240 } // namespace tesseract 00241 00242 #endif // TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_