|
tesseract 3.04.01
|
00001 /********************************************************************** 00002 * File: tess_lang_mod_edge.cpp 00003 * Description: Implementation of the Tesseract Language Model Edge Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2008 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include "tess_lang_mod_edge.h" 00021 #include "const.h" 00022 #include "unichar.h" 00023 00024 00025 00026 namespace tesseract { 00027 // OOD constructor 00028 TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, int class_id) { 00029 root_ = false; 00030 cntxt_ = cntxt; 00031 dawg_ = NULL; 00032 start_edge_ = 0; 00033 end_edge_ = 0; 00034 edge_mask_ = 0; 00035 class_id_ = class_id; 00036 str_ = cntxt_->CharacterSet()->ClassString(class_id); 00037 path_cost_ = Cost(); 00038 } 00039 00043 TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, 00044 const Dawg *dawg, EDGE_REF edge_idx, int class_id) { 00045 root_ = false; 00046 cntxt_ = cntxt; 00047 dawg_ = dawg; 00048 start_edge_ = edge_idx; 00049 end_edge_ = edge_idx; 00050 edge_mask_ = 0; 00051 class_id_ = class_id; 00052 str_ = cntxt_->CharacterSet()->ClassString(class_id); 00053 path_cost_ = Cost(); 00054 } 00055 00059 TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, const Dawg *dawg, 00060 EDGE_REF start_edge_idx, EDGE_REF end_edge_idx, 00061 int class_id) { 00062 root_ = false; 00063 cntxt_ = cntxt; 00064 dawg_ = dawg; 00065 start_edge_ = start_edge_idx; 00066 end_edge_ = end_edge_idx; 00067 edge_mask_ = 0; 00068 class_id_ = class_id; 00069 str_ = cntxt_->CharacterSet()->ClassString(class_id); 00070 path_cost_ = Cost(); 00071 } 00072 00073 char *TessLangModEdge::Description() const { 00074 char *char_ptr = new char[256]; 00075 if (!char_ptr) { 00076 return NULL; 00077 } 00078 00079 char dawg_str[256]; 00080 char edge_str[32]; 00081 if (dawg_ == (Dawg *)DAWG_OOD) { 00082 strcpy(dawg_str, "OOD"); 00083 } else if (dawg_ == (Dawg *)DAWG_NUMBER) { 00084 strcpy(dawg_str, "NUM"); 00085 } else if (dawg_->permuter() == SYSTEM_DAWG_PERM) { 00086 strcpy(dawg_str, "Main"); 00087 } else if (dawg_->permuter() == USER_DAWG_PERM) { 00088 strcpy(dawg_str, "User"); 00089 } else if (dawg_->permuter() == DOC_DAWG_PERM) { 00090 strcpy(dawg_str, "Doc"); 00091 } else { 00092 strcpy(dawg_str, "N/A"); 00093 } 00094 00095 sprintf(edge_str, "%d", static_cast<int>(start_edge_)); 00096 if (IsLeadingPuncEdge(edge_mask_)) { 00097 strcat(edge_str, "-LP"); 00098 } 00099 if (IsTrailingPuncEdge(edge_mask_)) { 00100 strcat(edge_str, "-TP"); 00101 } 00102 sprintf(char_ptr, "%s(%s)%s, Wtd Dawg Cost=%d", 00103 dawg_str, edge_str, IsEOW() ? "-EOW-" : "", path_cost_); 00104 00105 return char_ptr; 00106 } 00107 00108 int TessLangModEdge::CreateChildren(CubeRecoContext *cntxt, 00109 const Dawg *dawg, 00110 NODE_REF parent_node, 00111 LangModEdge **edge_array) { 00112 int edge_cnt = 0; 00113 NodeChildVector vec; 00114 dawg->unichar_ids_of(parent_node, &vec, false); // find all children 00115 for (int i = 0; i < vec.size(); ++i) { 00116 const NodeChild &child = vec[i]; 00117 if (child.unichar_id == INVALID_UNICHAR_ID) continue; 00118 edge_array[edge_cnt] = 00119 new TessLangModEdge(cntxt, dawg, child.edge_ref, child.unichar_id); 00120 if (edge_array[edge_cnt] != NULL) edge_cnt++; 00121 } 00122 return edge_cnt; 00123 } 00124 }