tesseract 3.04.01

cube/word_altlist.cpp

Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        word_altlist.cpp
00003  * Description: Implementation of the Word Alternate List Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2008
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include "word_altlist.h"
00021 
00022 namespace tesseract {
00023 WordAltList::WordAltList(int max_alt)
00024     : AltList(max_alt) {
00025   word_alt_ = NULL;
00026 }
00027 
00028 WordAltList::~WordAltList() {
00029   if (word_alt_ != NULL) {
00030     for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
00031       if (word_alt_[alt_idx] != NULL) {
00032         delete []word_alt_[alt_idx];
00033       }
00034     }
00035     delete []word_alt_;
00036     word_alt_ = NULL;
00037   }
00038 }
00039 
00043 bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) {
00044   if (word_alt_ == NULL || alt_cost_ == NULL) {
00045     word_alt_ = new char_32*[max_alt_];
00046     alt_cost_ = new int[max_alt_];
00047     alt_tag_ = new void *[max_alt_];
00048 
00049     if (word_alt_ == NULL || alt_cost_ == NULL || alt_tag_ == NULL) {
00050       return false;
00051     }
00052 
00053     memset(alt_tag_, 0, max_alt_ * sizeof(*alt_tag_));
00054   } else {
00055     // check if alt already exists
00056     for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
00057       if (CubeUtils::StrCmp(word_str, word_alt_[alt_idx]) == 0) {
00058         // update the cost if we have a lower one
00059         if (cost < alt_cost_[alt_idx]) {
00060           alt_cost_[alt_idx] = cost;
00061           alt_tag_[alt_idx] = tag;
00062         }
00063         return true;
00064       }
00065     }
00066   }
00067 
00068   // determine length of alternate
00069   int len = CubeUtils::StrLen(word_str);
00070 
00071   word_alt_[alt_cnt_] = new char_32[len + 1];
00072   if (word_alt_[alt_cnt_] == NULL) {
00073     return false;
00074   }
00075 
00076   if (len > 0) {
00077     memcpy(word_alt_[alt_cnt_], word_str, len * sizeof(*word_str));
00078   }
00079 
00080   word_alt_[alt_cnt_][len] = 0;
00081   alt_cost_[alt_cnt_] = cost;
00082   alt_tag_[alt_cnt_] = tag;
00083 
00084   alt_cnt_++;
00085 
00086   return true;
00087 }
00088 
00092 void WordAltList::Sort() {
00093   for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
00094     for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) {
00095       if (alt_cost_[alt_idx] > alt_cost_[alt]) {
00096         char_32 *pchTemp = word_alt_[alt_idx];
00097         word_alt_[alt_idx] = word_alt_[alt];
00098         word_alt_[alt] = pchTemp;
00099 
00100         int temp = alt_cost_[alt_idx];
00101         alt_cost_[alt_idx] = alt_cost_[alt];
00102         alt_cost_[alt] = temp;
00103 
00104         void *tag = alt_tag_[alt_idx];
00105         alt_tag_[alt_idx] = alt_tag_[alt];
00106         alt_tag_[alt] = tag;
00107       }
00108     }
00109   }
00110 }
00111 
00112 void WordAltList::PrintDebug() {
00113   for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
00114     char_32 *word_32 = word_alt_[alt_idx];
00115     string word_str;
00116     CubeUtils::UTF32ToUTF8(word_32, &word_str);
00117     int num_unichars = CubeUtils::StrLen(word_32);
00118     fprintf(stderr, "Alt[%d]=%s (cost=%d, num_unichars=%d); unichars=", alt_idx,
00119             word_str.c_str(), alt_cost_[alt_idx], num_unichars);
00120     for (int i = 0; i < num_unichars; ++i)
00121       fprintf(stderr, "%d ", word_32[i]);
00122     fprintf(stderr, "\n");
00123   }
00124 }
00125 }  // namespace tesseract
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines