tesseract 3.04.01

cube/char_samp.cpp

Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        char_samp.cpp
00003  * Description: Implementation of a Character Bitmap Sample Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include <string.h>
00021 #include <string>
00022 #include "char_samp.h"
00023 #include "cube_utils.h"
00024 
00025 namespace tesseract {
00026 
00027 #define MAX_LINE_LEN  1024
00028 
00029 CharSamp::CharSamp()
00030     : Bmp8(0, 0) {
00031   left_ = 0;
00032   top_ = 0;
00033   label32_ = NULL;
00034   page_ = -1;
00035 }
00036 
00037 CharSamp::CharSamp(int wid, int hgt)
00038     : Bmp8(wid, hgt) {
00039   left_ = 0;
00040   top_ = 0;
00041   label32_ = NULL;
00042   page_ = -1;
00043 }
00044 
00045 CharSamp::CharSamp(int left, int top, int wid, int hgt)
00046     : Bmp8(wid, hgt)
00047     , left_(left)
00048     , top_(top) {
00049   label32_ = NULL;
00050   page_ = -1;
00051 }
00052 
00053 CharSamp::~CharSamp() {
00054   if (label32_ != NULL) {
00055     delete []label32_;
00056     label32_ = NULL;
00057   }
00058 }
00059 
00060 // returns a UTF-8 version of the string label
00061 string CharSamp::stringLabel() const {
00062   string str = "";
00063   if (label32_ != NULL) {
00064     string_32 str32(label32_);
00065     CubeUtils::UTF32ToUTF8(str32.c_str(), &str);
00066   }
00067   return str;
00068 }
00069 
00070 // set a the string label using a UTF encoded string
00071 void CharSamp::SetLabel(string str) {
00072   if (label32_ != NULL) {
00073     delete []label32_;
00074     label32_ = NULL;
00075   }
00076   string_32 str32;
00077   CubeUtils::UTF8ToUTF32(str.c_str(), &str32);
00078   SetLabel(reinterpret_cast<const char_32 *>(str32.c_str()));
00079 }
00080 
00081 // creates a CharSamp object from file
00082 CharSamp *CharSamp::FromCharDumpFile(CachedFile *fp) {
00083   unsigned short left;
00084   unsigned short top;
00085   unsigned short page;
00086   unsigned short first_char;
00087   unsigned short last_char;
00088   unsigned short norm_top;
00089   unsigned short norm_bottom;
00090   unsigned short norm_aspect_ratio;
00091   unsigned int val32;
00092 
00093   char_32 *label32;
00094 
00095   // read and check 32 bit marker
00096   if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
00097     return NULL;
00098   }
00099   if (val32 != 0xabd0fefe) {
00100     return NULL;
00101   }
00102   // read label length,
00103   if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
00104     return NULL;
00105   }
00106   // the label is not null terminated in the file
00107   if (val32 > 0 && val32 < MAX_UINT32) {
00108     label32 = new char_32[val32 + 1];
00109     if (label32 == NULL) {
00110       return NULL;
00111     }
00112     // read label
00113     if (fp->Read(label32, val32 * sizeof(*label32)) !=
00114         (val32 * sizeof(*label32))) {
00115       return NULL;
00116     }
00117     // null terminate
00118     label32[val32] = 0;
00119   } else {
00120     label32 = NULL;
00121   }
00122   // read coordinates
00123   if (fp->Read(&page, sizeof(page)) != sizeof(page)) {
00124     return NULL;
00125   }
00126   if (fp->Read(&left, sizeof(left)) != sizeof(left)) {
00127     return NULL;
00128   }
00129   if (fp->Read(&top, sizeof(top)) != sizeof(top)) {
00130     return NULL;
00131   }
00132   if (fp->Read(&first_char, sizeof(first_char)) != sizeof(first_char)) {
00133     return NULL;
00134   }
00135   if (fp->Read(&last_char, sizeof(last_char)) != sizeof(last_char)) {
00136     return NULL;
00137   }
00138   if (fp->Read(&norm_top, sizeof(norm_top)) != sizeof(norm_top)) {
00139     return NULL;
00140   }
00141   if (fp->Read(&norm_bottom, sizeof(norm_bottom)) != sizeof(norm_bottom)) {
00142     return NULL;
00143   }
00144   if (fp->Read(&norm_aspect_ratio, sizeof(norm_aspect_ratio)) !=
00145       sizeof(norm_aspect_ratio)) {
00146     return NULL;
00147   }
00148   // create the object
00149   CharSamp *char_samp = new CharSamp();
00150   if (char_samp == NULL) {
00151     return NULL;
00152   }
00153   // init
00154   char_samp->label32_ = label32;
00155   char_samp->page_ = page;
00156   char_samp->left_ = left;
00157   char_samp->top_ = top;
00158   char_samp->first_char_ = first_char;
00159   char_samp->last_char_ = last_char;
00160   char_samp->norm_top_ = norm_top;
00161   char_samp->norm_bottom_ = norm_bottom;
00162   char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
00163   // load the Bmp8 part
00164   if (char_samp->LoadFromCharDumpFile(fp) == false) {
00165     delete char_samp;
00166     return NULL;
00167   }
00168   return char_samp;
00169 }
00170 
00171 // Load a Char Samp from a dump file
00172 CharSamp *CharSamp::FromCharDumpFile(FILE *fp) {
00173   unsigned short left;
00174   unsigned short top;
00175   unsigned short page;
00176   unsigned short first_char;
00177   unsigned short last_char;
00178   unsigned short norm_top;
00179   unsigned short norm_bottom;
00180   unsigned short norm_aspect_ratio;
00181   unsigned int val32;
00182   char_32 *label32;
00183 
00184   // read and check 32 bit marker
00185   if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
00186     return NULL;
00187   }
00188   if (val32 != 0xabd0fefe) {
00189     return NULL;
00190   }
00191   // read label length,
00192   if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
00193     return NULL;
00194   }
00195   // the label is not null terminated in the file
00196   if (val32 > 0 && val32 < MAX_UINT32) {
00197     label32 = new char_32[val32 + 1];
00198     if (label32 == NULL) {
00199       return NULL;
00200     }
00201     // read label
00202     if (fread(label32, 1, val32 * sizeof(*label32), fp) !=
00203         (val32 * sizeof(*label32))) {
00204       delete [] label32;
00205       return NULL;
00206     }
00207     // null terminate
00208     label32[val32] = 0;
00209   } else {
00210     label32 = NULL;
00211   }
00212   // read coordinates
00213   if (fread(&page, 1, sizeof(page), fp) != sizeof(page) ||
00214       fread(&left, 1, sizeof(left), fp) != sizeof(left) ||
00215       fread(&top, 1, sizeof(top), fp) != sizeof(top) ||
00216       fread(&first_char, 1, sizeof(first_char), fp) != sizeof(first_char) ||
00217       fread(&last_char, 1, sizeof(last_char), fp) != sizeof(last_char) ||
00218       fread(&norm_top, 1, sizeof(norm_top), fp) != sizeof(norm_top) ||
00219       fread(&norm_bottom, 1, sizeof(norm_bottom), fp) != sizeof(norm_bottom) ||
00220       fread(&norm_aspect_ratio, 1, sizeof(norm_aspect_ratio), fp) !=
00221           sizeof(norm_aspect_ratio)) {
00222     delete [] label32;
00223     return NULL;
00224   }
00225   // create the object
00226   CharSamp *char_samp = new CharSamp();
00227   if (char_samp == NULL) {
00228     delete [] label32;
00229     return NULL;
00230   }
00231   // init
00232   char_samp->label32_ = label32;
00233   char_samp->page_ = page;
00234   char_samp->left_ = left;
00235   char_samp->top_ = top;
00236   char_samp->first_char_ = first_char;
00237   char_samp->last_char_ = last_char;
00238   char_samp->norm_top_ = norm_top;
00239   char_samp->norm_bottom_ = norm_bottom;
00240   char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
00241   // load the Bmp8 part
00242   if (char_samp->LoadFromCharDumpFile(fp) == false) {
00243     delete char_samp;  // It owns label32.
00244     return NULL;
00245   }
00246   return char_samp;
00247 }
00248 
00249 // returns a copy of the charsamp that is scaled to the
00250 // specified width and height
00251 CharSamp *CharSamp::Scale(int wid, int hgt, bool isotropic) {
00252   CharSamp *scaled_samp = new CharSamp(wid, hgt);
00253   if (scaled_samp == NULL) {
00254     return NULL;
00255   }
00256   if (scaled_samp->ScaleFrom(this, isotropic) == false) {
00257     delete scaled_samp;
00258     return NULL;
00259   }
00260   scaled_samp->left_ = left_;
00261   scaled_samp->top_ = top_;
00262   scaled_samp->page_ = page_;
00263   scaled_samp->SetLabel(label32_);
00264   scaled_samp->first_char_ = first_char_;
00265   scaled_samp->last_char_ = last_char_;
00266   scaled_samp->norm_top_ = norm_top_;
00267   scaled_samp->norm_bottom_ = norm_bottom_;
00268   scaled_samp->norm_aspect_ratio_ = norm_aspect_ratio_;
00269   return scaled_samp;
00270 }
00271 
00272 // Load a Char Samp from a dump file
00273 CharSamp *CharSamp::FromRawData(int left, int top, int wid, int hgt,
00274                                 unsigned char *data) {
00275   // create the object
00276   CharSamp *char_samp = new CharSamp(left, top, wid, hgt);
00277   if (char_samp == NULL) {
00278     return NULL;
00279   }
00280   if (char_samp->LoadFromRawData(data) == false) {
00281     delete char_samp;
00282     return NULL;
00283   }
00284   return char_samp;
00285 }
00286 
00287 // Saves the charsamp to a dump file
00288 bool CharSamp::Save2CharDumpFile(FILE *fp) const {
00289   unsigned int val32;
00290   // write and check 32 bit marker
00291   val32 = 0xabd0fefe;
00292   if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
00293     return false;
00294   }
00295   // write label length
00296   val32 = (label32_ == NULL) ? 0 : LabelLen(label32_);
00297   if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
00298     return false;
00299   }
00300   // write label
00301   if (label32_ != NULL) {
00302     if (fwrite(label32_, 1, val32 * sizeof(*label32_), fp) !=
00303         (val32 * sizeof(*label32_))) {
00304       return false;
00305     }
00306   }
00307   // write coordinates
00308   if (fwrite(&page_, 1, sizeof(page_), fp) != sizeof(page_)) {
00309     return false;
00310   }
00311   if (fwrite(&left_, 1, sizeof(left_), fp) != sizeof(left_)) {
00312     return false;
00313   }
00314   if (fwrite(&top_, 1, sizeof(top_), fp) != sizeof(top_)) {
00315     return false;
00316   }
00317   if (fwrite(&first_char_, 1, sizeof(first_char_), fp) !=
00318       sizeof(first_char_)) {
00319     return false;
00320   }
00321   if (fwrite(&last_char_, 1, sizeof(last_char_), fp) != sizeof(last_char_)) {
00322     return false;
00323   }
00324   if (fwrite(&norm_top_, 1, sizeof(norm_top_), fp) != sizeof(norm_top_)) {
00325     return false;
00326   }
00327   if (fwrite(&norm_bottom_, 1, sizeof(norm_bottom_), fp) !=
00328       sizeof(norm_bottom_)) {
00329     return false;
00330   }
00331   if (fwrite(&norm_aspect_ratio_, 1, sizeof(norm_aspect_ratio_), fp) !=
00332       sizeof(norm_aspect_ratio_)) {
00333     return false;
00334   }
00335   if (SaveBmp2CharDumpFile(fp) == false) {
00336     return false;
00337   }
00338   return true;
00339 }
00340 
00341 // Crop the char samp such that there are no white spaces on any side.
00342 // The norm_top_ and norm_bottom_ fields are the character top/bottom
00343 // with respect to whatever context the character is being recognized
00344 // in (e.g. word bounding box) normalized to a standard size of
00345 // 255. Here they default to 0 and 255 (word box boundaries), but
00346 // since they are context dependent, they may need to be reset by the
00347 // calling function.
00348 CharSamp *CharSamp::Crop() {
00349   // get the dimesions of the cropped img
00350   int cropped_left = 0;
00351   int cropped_top = 0;
00352   int cropped_wid = wid_;
00353   int cropped_hgt = hgt_;
00354   Bmp8::Crop(&cropped_left, &cropped_top,
00355              &cropped_wid, &cropped_hgt);
00356 
00357   if (cropped_wid == 0 || cropped_hgt == 0) {
00358     return NULL;
00359   }
00360   // create the cropped char samp
00361   CharSamp *cropped_samp = new CharSamp(left_ + cropped_left,
00362                                         top_ + cropped_top,
00363                                         cropped_wid, cropped_hgt);
00364   cropped_samp->SetLabel(label32_);
00365   cropped_samp->SetFirstChar(first_char_);
00366   cropped_samp->SetLastChar(last_char_);
00367   // the following 3 fields may/should be reset by the calling function
00368   // using context information, i.e., location of character box
00369   // w.r.t. the word bounding box
00370   cropped_samp->SetNormAspectRatio(255 *
00371                                    cropped_wid / (cropped_wid + cropped_hgt));
00372   cropped_samp->SetNormTop(0);
00373   cropped_samp->SetNormBottom(255);
00374 
00375   // copy the bitmap to the cropped img
00376   Copy(cropped_left, cropped_top, cropped_wid, cropped_hgt, cropped_samp);
00377   return cropped_samp;
00378 }
00379 
00380 // segment the char samp to connected components
00381 // based on contiguity and vertical pixel density histogram
00382 ConComp **CharSamp::Segment(int *segment_cnt, bool right_2_left,
00383                             int max_hist_wnd, int min_con_comp_size) const {
00384   // init
00385   (*segment_cnt) = 0;
00386   int concomp_cnt = 0;
00387   int seg_cnt = 0;
00388   // find the concomps of the image
00389   ConComp **concomp_array = FindConComps(&concomp_cnt, min_con_comp_size);
00390   if (concomp_cnt <= 0 || !concomp_array) {
00391     if (concomp_array)
00392       delete []concomp_array;
00393     return NULL;
00394   }
00395   ConComp **seg_array = NULL;
00396   // segment each concomp further using vertical histogram
00397   for (int concomp = 0; concomp < concomp_cnt; concomp++) {
00398     int concomp_seg_cnt = 0;
00399     // segment the concomp
00400     ConComp **concomp_seg_array = NULL;
00401     ConComp **concomp_alloc_seg =
00402         concomp_array[concomp]->Segment(max_hist_wnd, &concomp_seg_cnt);
00403     // no segments, add the whole concomp
00404     if (concomp_alloc_seg == NULL) {
00405       concomp_seg_cnt = 1;
00406       concomp_seg_array = concomp_array + concomp;
00407     } else {
00408       // delete the original concomp, we no longer need it
00409       concomp_seg_array = concomp_alloc_seg;
00410       delete concomp_array[concomp];
00411     }
00412     // add the resulting segments
00413     for (int seg_idx = 0; seg_idx < concomp_seg_cnt; seg_idx++) {
00414       // too small of a segment: ignore
00415       if (concomp_seg_array[seg_idx]->Width() < 2 &&
00416           concomp_seg_array[seg_idx]->Height() < 2) {
00417         delete concomp_seg_array[seg_idx];
00418       } else {
00419         // add the new segment
00420         // extend the segment array
00421         if ((seg_cnt % kConCompAllocChunk) == 0) {
00422           ConComp **temp_segm_array =
00423               new ConComp *[seg_cnt + kConCompAllocChunk];
00424           if (temp_segm_array == NULL) {
00425             fprintf(stderr, "Cube ERROR (CharSamp::Segment): could not "
00426                     "allocate additional connected components\n");
00427             delete []concomp_seg_array;
00428             delete []concomp_array;
00429             delete []seg_array;
00430             return NULL;
00431           }
00432           if (seg_cnt > 0) {
00433             memcpy(temp_segm_array, seg_array, seg_cnt * sizeof(*seg_array));
00434             delete []seg_array;
00435           }
00436           seg_array = temp_segm_array;
00437         }
00438         seg_array[seg_cnt++] = concomp_seg_array[seg_idx];
00439       }
00440     }  // segment
00441     if (concomp_alloc_seg != NULL) {
00442       delete []concomp_alloc_seg;
00443     }
00444   }  // concomp
00445   delete []concomp_array;
00446 
00447   // sort the concomps from Left2Right or Right2Left, based on the reading order
00448   if (seg_cnt > 0 && seg_array != NULL) {
00449     qsort(seg_array, seg_cnt, sizeof(*seg_array), right_2_left ?
00450         ConComp::Right2LeftComparer : ConComp::Left2RightComparer);
00451   }
00452   (*segment_cnt) = seg_cnt;
00453   return seg_array;
00454 }
00455 
00456 // builds a char samp from a set of connected components
00457 CharSamp *CharSamp::FromConComps(ConComp **concomp_array, int strt_concomp,
00458                                  int seg_flags_size, int *seg_flags,
00459                                  bool *left_most, bool *right_most,
00460                                  int word_hgt) {
00461   int concomp;
00462   int end_concomp;
00463   int concomp_cnt = 0;
00464   end_concomp = strt_concomp + seg_flags_size;
00465   // determine ID range
00466   bool once = false;
00467   int min_id = -1;
00468   int max_id = -1;
00469   for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
00470     if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
00471       if (!once) {
00472         min_id = concomp_array[concomp]->ID();
00473         max_id = concomp_array[concomp]->ID();
00474         once = true;
00475       } else {
00476         UpdateRange(concomp_array[concomp]->ID(), &min_id, &max_id);
00477       }
00478       concomp_cnt++;
00479     }
00480   }
00481   if (concomp_cnt < 1 || !once || min_id == -1 || max_id == -1) {
00482     return NULL;
00483   }
00484   // alloc memo for computing leftmost and right most attributes
00485   int id_cnt = max_id - min_id + 1;
00486   bool *id_exist = new bool[id_cnt];
00487   bool *left_most_exist = new bool[id_cnt];
00488   bool *right_most_exist = new bool[id_cnt];
00489   if (!id_exist || !left_most_exist || !right_most_exist)
00490     return NULL;
00491   memset(id_exist, 0, id_cnt * sizeof(*id_exist));
00492   memset(left_most_exist, 0, id_cnt * sizeof(*left_most_exist));
00493   memset(right_most_exist, 0, id_cnt * sizeof(*right_most_exist));
00494   // find the dimensions of the charsamp
00495   once = false;
00496   int left = -1;
00497   int right = -1;
00498   int top = -1;
00499   int bottom = -1;
00500   int unq_ids = 0;
00501   int unq_left_most = 0;
00502   int unq_right_most = 0;
00503   for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
00504     if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
00505       if (!once) {
00506         left = concomp_array[concomp]->Left();
00507         right = concomp_array[concomp]->Right();
00508         top = concomp_array[concomp]->Top();
00509         bottom = concomp_array[concomp]->Bottom();
00510         once = true;
00511       } else {
00512         UpdateRange(concomp_array[concomp]->Left(),
00513                     concomp_array[concomp]->Right(), &left, &right);
00514         UpdateRange(concomp_array[concomp]->Top(),
00515                     concomp_array[concomp]->Bottom(), &top, &bottom);
00516       }
00517       // count unq ids, unq left most and right mosts ids
00518       int concomp_id = concomp_array[concomp]->ID() - min_id;
00519       if (!id_exist[concomp_id]) {
00520         id_exist[concomp_id] = true;
00521         unq_ids++;
00522       }
00523       if (concomp_array[concomp]->LeftMost()) {
00524         if (left_most_exist[concomp_id] == false) {
00525           left_most_exist[concomp_id] = true;
00526           unq_left_most++;
00527         }
00528       }
00529       if (concomp_array[concomp]->RightMost()) {
00530         if (right_most_exist[concomp_id] == false) {
00531           right_most_exist[concomp_id] = true;
00532           unq_right_most++;
00533         }
00534       }
00535     }
00536   }
00537   delete []id_exist;
00538   delete []left_most_exist;
00539   delete []right_most_exist;
00540   if (!once || left == -1 || top == -1 || right == -1 || bottom == -1) {
00541     return NULL;
00542   }
00543   (*left_most) = (unq_left_most >= unq_ids);
00544   (*right_most) = (unq_right_most >= unq_ids);
00545   // create the char sample object
00546   CharSamp *samp = new CharSamp(left, top, right - left + 1, bottom - top + 1);
00547   if (!samp) {
00548     return NULL;
00549   }
00550 
00551   // set the foreground pixels
00552   for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
00553     if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
00554       ConCompPt *pt_ptr = concomp_array[concomp]->Head();
00555       while (pt_ptr) {
00556         samp->line_buff_[pt_ptr->y() - top][pt_ptr->x() - left] = 0;
00557         pt_ptr = pt_ptr->Next();
00558       }
00559     }
00560   }
00561   return samp;
00562 }
00563 
00564 // clones the object
00565 CharSamp *CharSamp::Clone() const {
00566   // create the cropped char samp
00567   CharSamp *samp = new CharSamp(left_, top_, wid_, hgt_);
00568   samp->SetLabel(label32_);
00569   samp->SetFirstChar(first_char_);
00570   samp->SetLastChar(last_char_);
00571   samp->SetNormTop(norm_top_);
00572   samp->SetNormBottom(norm_bottom_);
00573   samp->SetNormAspectRatio(norm_aspect_ratio_);
00574   // copy the bitmap to the cropped img
00575   Copy(0, 0, wid_, hgt_, samp);
00576   return samp;
00577 }
00578 
00579 // Load a Char Samp from a dump file
00580 CharSamp *CharSamp::FromCharDumpFile(unsigned char **raw_data_ptr) {
00581   unsigned int val32;
00582   char_32 *label32;
00583   unsigned char *raw_data = *raw_data_ptr;
00584 
00585   // read and check 32 bit marker
00586   memcpy(&val32, raw_data, sizeof(val32));
00587   raw_data += sizeof(val32);
00588   if (val32 != 0xabd0fefe) {
00589     return NULL;
00590   }
00591   // read label length,
00592   memcpy(&val32, raw_data, sizeof(val32));
00593   raw_data += sizeof(val32);
00594   // the label is not null terminated in the file
00595   if (val32 > 0 && val32 < MAX_UINT32) {
00596     label32 = new char_32[val32 + 1];
00597     if (label32 == NULL) {
00598       return NULL;
00599     }
00600     // read label
00601     memcpy(label32, raw_data, val32 * sizeof(*label32));
00602     raw_data += (val32 * sizeof(*label32));
00603     // null terminate
00604     label32[val32] = 0;
00605   } else {
00606     label32 = NULL;
00607   }
00608 
00609   // create the object
00610   CharSamp *char_samp = new CharSamp();
00611   if (char_samp == NULL) {
00612     return NULL;
00613   }
00614 
00615   // read coordinates
00616   char_samp->label32_ = label32;
00617   memcpy(&char_samp->page_, raw_data, sizeof(char_samp->page_));
00618   raw_data += sizeof(char_samp->page_);
00619   memcpy(&char_samp->left_, raw_data, sizeof(char_samp->left_));
00620   raw_data += sizeof(char_samp->left_);
00621   memcpy(&char_samp->top_, raw_data, sizeof(char_samp->top_));
00622   raw_data += sizeof(char_samp->top_);
00623   memcpy(&char_samp->first_char_, raw_data, sizeof(char_samp->first_char_));
00624   raw_data += sizeof(char_samp->first_char_);
00625   memcpy(&char_samp->last_char_, raw_data, sizeof(char_samp->last_char_));
00626   raw_data += sizeof(char_samp->last_char_);
00627   memcpy(&char_samp->norm_top_, raw_data, sizeof(char_samp->norm_top_));
00628   raw_data += sizeof(char_samp->norm_top_);
00629   memcpy(&char_samp->norm_bottom_, raw_data, sizeof(char_samp->norm_bottom_));
00630   raw_data += sizeof(char_samp->norm_bottom_);
00631   memcpy(&char_samp->norm_aspect_ratio_, raw_data,
00632          sizeof(char_samp->norm_aspect_ratio_));
00633   raw_data += sizeof(char_samp->norm_aspect_ratio_);
00634 
00635   // load the Bmp8 part
00636   if (char_samp->LoadFromCharDumpFile(&raw_data) == false) {
00637     delete char_samp;
00638     return NULL;
00639   }
00640 
00641   (*raw_data_ptr) = raw_data;
00642   return char_samp;
00643 }
00644 
00645 // computes the features corresponding to the char sample
00646 bool CharSamp::ComputeFeatures(int conv_grid_size, float *features) {
00647   // Create a scaled BMP
00648   CharSamp *scaled_bmp = Scale(conv_grid_size, conv_grid_size);
00649   if (!scaled_bmp) {
00650     return false;
00651   }
00652   // prepare input
00653   unsigned char *buff = scaled_bmp->RawData();
00654   // bitmap features
00655   int input;
00656   int bmp_size = conv_grid_size * conv_grid_size;
00657   for (input = 0; input < bmp_size; input++) {
00658     features[input] = 255.0f - (1.0f * buff[input]);
00659   }
00660   // word context features
00661   features[input++] = FirstChar();
00662   features[input++] = LastChar();
00663   features[input++] = NormTop();
00664   features[input++] = NormBottom();
00665   features[input++] = NormAspectRatio();
00666   delete scaled_bmp;
00667   return true;
00668 }
00669 }  // namespace tesseract
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines