|
tesseract 3.04.01
|
00001 /********************************************************************** 00002 * File: word_altlist.cpp 00003 * Description: Implementation of the Word Alternate List Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2008 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include "word_altlist.h" 00021 00022 namespace tesseract { 00023 WordAltList::WordAltList(int max_alt) 00024 : AltList(max_alt) { 00025 word_alt_ = NULL; 00026 } 00027 00028 WordAltList::~WordAltList() { 00029 if (word_alt_ != NULL) { 00030 for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) { 00031 if (word_alt_[alt_idx] != NULL) { 00032 delete []word_alt_[alt_idx]; 00033 } 00034 } 00035 delete []word_alt_; 00036 word_alt_ = NULL; 00037 } 00038 } 00039 00043 bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) { 00044 if (word_alt_ == NULL || alt_cost_ == NULL) { 00045 word_alt_ = new char_32*[max_alt_]; 00046 alt_cost_ = new int[max_alt_]; 00047 alt_tag_ = new void *[max_alt_]; 00048 00049 if (word_alt_ == NULL || alt_cost_ == NULL || alt_tag_ == NULL) { 00050 return false; 00051 } 00052 00053 memset(alt_tag_, 0, max_alt_ * sizeof(*alt_tag_)); 00054 } else { 00055 // check if alt already exists 00056 for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) { 00057 if (CubeUtils::StrCmp(word_str, word_alt_[alt_idx]) == 0) { 00058 // update the cost if we have a lower one 00059 if (cost < alt_cost_[alt_idx]) { 00060 alt_cost_[alt_idx] = cost; 00061 alt_tag_[alt_idx] = tag; 00062 } 00063 return true; 00064 } 00065 } 00066 } 00067 00068 // determine length of alternate 00069 int len = CubeUtils::StrLen(word_str); 00070 00071 word_alt_[alt_cnt_] = new char_32[len + 1]; 00072 if (word_alt_[alt_cnt_] == NULL) { 00073 return false; 00074 } 00075 00076 if (len > 0) { 00077 memcpy(word_alt_[alt_cnt_], word_str, len * sizeof(*word_str)); 00078 } 00079 00080 word_alt_[alt_cnt_][len] = 0; 00081 alt_cost_[alt_cnt_] = cost; 00082 alt_tag_[alt_cnt_] = tag; 00083 00084 alt_cnt_++; 00085 00086 return true; 00087 } 00088 00092 void WordAltList::Sort() { 00093 for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) { 00094 for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) { 00095 if (alt_cost_[alt_idx] > alt_cost_[alt]) { 00096 char_32 *pchTemp = word_alt_[alt_idx]; 00097 word_alt_[alt_idx] = word_alt_[alt]; 00098 word_alt_[alt] = pchTemp; 00099 00100 int temp = alt_cost_[alt_idx]; 00101 alt_cost_[alt_idx] = alt_cost_[alt]; 00102 alt_cost_[alt] = temp; 00103 00104 void *tag = alt_tag_[alt_idx]; 00105 alt_tag_[alt_idx] = alt_tag_[alt]; 00106 alt_tag_[alt] = tag; 00107 } 00108 } 00109 } 00110 } 00111 00112 void WordAltList::PrintDebug() { 00113 for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) { 00114 char_32 *word_32 = word_alt_[alt_idx]; 00115 string word_str; 00116 CubeUtils::UTF32ToUTF8(word_32, &word_str); 00117 int num_unichars = CubeUtils::StrLen(word_32); 00118 fprintf(stderr, "Alt[%d]=%s (cost=%d, num_unichars=%d); unichars=", alt_idx, 00119 word_str.c_str(), alt_cost_[alt_idx], num_unichars); 00120 for (int i = 0; i < num_unichars; ++i) 00121 fprintf(stderr, "%d ", word_32[i]); 00122 fprintf(stderr, "\n"); 00123 } 00124 } 00125 } // namespace tesseract