|
tesseract 3.04.01
|
00001 /********************************************************************** 00002 * File: char_samp.cpp 00003 * Description: Implementation of a Character Bitmap Sample Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2007 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include <string.h> 00021 #include <string> 00022 #include "char_samp.h" 00023 #include "cube_utils.h" 00024 00025 namespace tesseract { 00026 00027 #define MAX_LINE_LEN 1024 00028 00029 CharSamp::CharSamp() 00030 : Bmp8(0, 0) { 00031 left_ = 0; 00032 top_ = 0; 00033 label32_ = NULL; 00034 page_ = -1; 00035 } 00036 00037 CharSamp::CharSamp(int wid, int hgt) 00038 : Bmp8(wid, hgt) { 00039 left_ = 0; 00040 top_ = 0; 00041 label32_ = NULL; 00042 page_ = -1; 00043 } 00044 00045 CharSamp::CharSamp(int left, int top, int wid, int hgt) 00046 : Bmp8(wid, hgt) 00047 , left_(left) 00048 , top_(top) { 00049 label32_ = NULL; 00050 page_ = -1; 00051 } 00052 00053 CharSamp::~CharSamp() { 00054 if (label32_ != NULL) { 00055 delete []label32_; 00056 label32_ = NULL; 00057 } 00058 } 00059 00060 // returns a UTF-8 version of the string label 00061 string CharSamp::stringLabel() const { 00062 string str = ""; 00063 if (label32_ != NULL) { 00064 string_32 str32(label32_); 00065 CubeUtils::UTF32ToUTF8(str32.c_str(), &str); 00066 } 00067 return str; 00068 } 00069 00070 // set a the string label using a UTF encoded string 00071 void CharSamp::SetLabel(string str) { 00072 if (label32_ != NULL) { 00073 delete []label32_; 00074 label32_ = NULL; 00075 } 00076 string_32 str32; 00077 CubeUtils::UTF8ToUTF32(str.c_str(), &str32); 00078 SetLabel(reinterpret_cast<const char_32 *>(str32.c_str())); 00079 } 00080 00081 // creates a CharSamp object from file 00082 CharSamp *CharSamp::FromCharDumpFile(CachedFile *fp) { 00083 unsigned short left; 00084 unsigned short top; 00085 unsigned short page; 00086 unsigned short first_char; 00087 unsigned short last_char; 00088 unsigned short norm_top; 00089 unsigned short norm_bottom; 00090 unsigned short norm_aspect_ratio; 00091 unsigned int val32; 00092 00093 char_32 *label32; 00094 00095 // read and check 32 bit marker 00096 if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) { 00097 return NULL; 00098 } 00099 if (val32 != 0xabd0fefe) { 00100 return NULL; 00101 } 00102 // read label length, 00103 if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) { 00104 return NULL; 00105 } 00106 // the label is not null terminated in the file 00107 if (val32 > 0 && val32 < MAX_UINT32) { 00108 label32 = new char_32[val32 + 1]; 00109 if (label32 == NULL) { 00110 return NULL; 00111 } 00112 // read label 00113 if (fp->Read(label32, val32 * sizeof(*label32)) != 00114 (val32 * sizeof(*label32))) { 00115 return NULL; 00116 } 00117 // null terminate 00118 label32[val32] = 0; 00119 } else { 00120 label32 = NULL; 00121 } 00122 // read coordinates 00123 if (fp->Read(&page, sizeof(page)) != sizeof(page)) { 00124 return NULL; 00125 } 00126 if (fp->Read(&left, sizeof(left)) != sizeof(left)) { 00127 return NULL; 00128 } 00129 if (fp->Read(&top, sizeof(top)) != sizeof(top)) { 00130 return NULL; 00131 } 00132 if (fp->Read(&first_char, sizeof(first_char)) != sizeof(first_char)) { 00133 return NULL; 00134 } 00135 if (fp->Read(&last_char, sizeof(last_char)) != sizeof(last_char)) { 00136 return NULL; 00137 } 00138 if (fp->Read(&norm_top, sizeof(norm_top)) != sizeof(norm_top)) { 00139 return NULL; 00140 } 00141 if (fp->Read(&norm_bottom, sizeof(norm_bottom)) != sizeof(norm_bottom)) { 00142 return NULL; 00143 } 00144 if (fp->Read(&norm_aspect_ratio, sizeof(norm_aspect_ratio)) != 00145 sizeof(norm_aspect_ratio)) { 00146 return NULL; 00147 } 00148 // create the object 00149 CharSamp *char_samp = new CharSamp(); 00150 if (char_samp == NULL) { 00151 return NULL; 00152 } 00153 // init 00154 char_samp->label32_ = label32; 00155 char_samp->page_ = page; 00156 char_samp->left_ = left; 00157 char_samp->top_ = top; 00158 char_samp->first_char_ = first_char; 00159 char_samp->last_char_ = last_char; 00160 char_samp->norm_top_ = norm_top; 00161 char_samp->norm_bottom_ = norm_bottom; 00162 char_samp->norm_aspect_ratio_ = norm_aspect_ratio; 00163 // load the Bmp8 part 00164 if (char_samp->LoadFromCharDumpFile(fp) == false) { 00165 delete char_samp; 00166 return NULL; 00167 } 00168 return char_samp; 00169 } 00170 00171 // Load a Char Samp from a dump file 00172 CharSamp *CharSamp::FromCharDumpFile(FILE *fp) { 00173 unsigned short left; 00174 unsigned short top; 00175 unsigned short page; 00176 unsigned short first_char; 00177 unsigned short last_char; 00178 unsigned short norm_top; 00179 unsigned short norm_bottom; 00180 unsigned short norm_aspect_ratio; 00181 unsigned int val32; 00182 char_32 *label32; 00183 00184 // read and check 32 bit marker 00185 if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) { 00186 return NULL; 00187 } 00188 if (val32 != 0xabd0fefe) { 00189 return NULL; 00190 } 00191 // read label length, 00192 if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) { 00193 return NULL; 00194 } 00195 // the label is not null terminated in the file 00196 if (val32 > 0 && val32 < MAX_UINT32) { 00197 label32 = new char_32[val32 + 1]; 00198 if (label32 == NULL) { 00199 return NULL; 00200 } 00201 // read label 00202 if (fread(label32, 1, val32 * sizeof(*label32), fp) != 00203 (val32 * sizeof(*label32))) { 00204 delete [] label32; 00205 return NULL; 00206 } 00207 // null terminate 00208 label32[val32] = 0; 00209 } else { 00210 label32 = NULL; 00211 } 00212 // read coordinates 00213 if (fread(&page, 1, sizeof(page), fp) != sizeof(page) || 00214 fread(&left, 1, sizeof(left), fp) != sizeof(left) || 00215 fread(&top, 1, sizeof(top), fp) != sizeof(top) || 00216 fread(&first_char, 1, sizeof(first_char), fp) != sizeof(first_char) || 00217 fread(&last_char, 1, sizeof(last_char), fp) != sizeof(last_char) || 00218 fread(&norm_top, 1, sizeof(norm_top), fp) != sizeof(norm_top) || 00219 fread(&norm_bottom, 1, sizeof(norm_bottom), fp) != sizeof(norm_bottom) || 00220 fread(&norm_aspect_ratio, 1, sizeof(norm_aspect_ratio), fp) != 00221 sizeof(norm_aspect_ratio)) { 00222 delete [] label32; 00223 return NULL; 00224 } 00225 // create the object 00226 CharSamp *char_samp = new CharSamp(); 00227 if (char_samp == NULL) { 00228 delete [] label32; 00229 return NULL; 00230 } 00231 // init 00232 char_samp->label32_ = label32; 00233 char_samp->page_ = page; 00234 char_samp->left_ = left; 00235 char_samp->top_ = top; 00236 char_samp->first_char_ = first_char; 00237 char_samp->last_char_ = last_char; 00238 char_samp->norm_top_ = norm_top; 00239 char_samp->norm_bottom_ = norm_bottom; 00240 char_samp->norm_aspect_ratio_ = norm_aspect_ratio; 00241 // load the Bmp8 part 00242 if (char_samp->LoadFromCharDumpFile(fp) == false) { 00243 delete char_samp; // It owns label32. 00244 return NULL; 00245 } 00246 return char_samp; 00247 } 00248 00249 // returns a copy of the charsamp that is scaled to the 00250 // specified width and height 00251 CharSamp *CharSamp::Scale(int wid, int hgt, bool isotropic) { 00252 CharSamp *scaled_samp = new CharSamp(wid, hgt); 00253 if (scaled_samp == NULL) { 00254 return NULL; 00255 } 00256 if (scaled_samp->ScaleFrom(this, isotropic) == false) { 00257 delete scaled_samp; 00258 return NULL; 00259 } 00260 scaled_samp->left_ = left_; 00261 scaled_samp->top_ = top_; 00262 scaled_samp->page_ = page_; 00263 scaled_samp->SetLabel(label32_); 00264 scaled_samp->first_char_ = first_char_; 00265 scaled_samp->last_char_ = last_char_; 00266 scaled_samp->norm_top_ = norm_top_; 00267 scaled_samp->norm_bottom_ = norm_bottom_; 00268 scaled_samp->norm_aspect_ratio_ = norm_aspect_ratio_; 00269 return scaled_samp; 00270 } 00271 00272 // Load a Char Samp from a dump file 00273 CharSamp *CharSamp::FromRawData(int left, int top, int wid, int hgt, 00274 unsigned char *data) { 00275 // create the object 00276 CharSamp *char_samp = new CharSamp(left, top, wid, hgt); 00277 if (char_samp == NULL) { 00278 return NULL; 00279 } 00280 if (char_samp->LoadFromRawData(data) == false) { 00281 delete char_samp; 00282 return NULL; 00283 } 00284 return char_samp; 00285 } 00286 00287 // Saves the charsamp to a dump file 00288 bool CharSamp::Save2CharDumpFile(FILE *fp) const { 00289 unsigned int val32; 00290 // write and check 32 bit marker 00291 val32 = 0xabd0fefe; 00292 if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) { 00293 return false; 00294 } 00295 // write label length 00296 val32 = (label32_ == NULL) ? 0 : LabelLen(label32_); 00297 if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) { 00298 return false; 00299 } 00300 // write label 00301 if (label32_ != NULL) { 00302 if (fwrite(label32_, 1, val32 * sizeof(*label32_), fp) != 00303 (val32 * sizeof(*label32_))) { 00304 return false; 00305 } 00306 } 00307 // write coordinates 00308 if (fwrite(&page_, 1, sizeof(page_), fp) != sizeof(page_)) { 00309 return false; 00310 } 00311 if (fwrite(&left_, 1, sizeof(left_), fp) != sizeof(left_)) { 00312 return false; 00313 } 00314 if (fwrite(&top_, 1, sizeof(top_), fp) != sizeof(top_)) { 00315 return false; 00316 } 00317 if (fwrite(&first_char_, 1, sizeof(first_char_), fp) != 00318 sizeof(first_char_)) { 00319 return false; 00320 } 00321 if (fwrite(&last_char_, 1, sizeof(last_char_), fp) != sizeof(last_char_)) { 00322 return false; 00323 } 00324 if (fwrite(&norm_top_, 1, sizeof(norm_top_), fp) != sizeof(norm_top_)) { 00325 return false; 00326 } 00327 if (fwrite(&norm_bottom_, 1, sizeof(norm_bottom_), fp) != 00328 sizeof(norm_bottom_)) { 00329 return false; 00330 } 00331 if (fwrite(&norm_aspect_ratio_, 1, sizeof(norm_aspect_ratio_), fp) != 00332 sizeof(norm_aspect_ratio_)) { 00333 return false; 00334 } 00335 if (SaveBmp2CharDumpFile(fp) == false) { 00336 return false; 00337 } 00338 return true; 00339 } 00340 00341 // Crop the char samp such that there are no white spaces on any side. 00342 // The norm_top_ and norm_bottom_ fields are the character top/bottom 00343 // with respect to whatever context the character is being recognized 00344 // in (e.g. word bounding box) normalized to a standard size of 00345 // 255. Here they default to 0 and 255 (word box boundaries), but 00346 // since they are context dependent, they may need to be reset by the 00347 // calling function. 00348 CharSamp *CharSamp::Crop() { 00349 // get the dimesions of the cropped img 00350 int cropped_left = 0; 00351 int cropped_top = 0; 00352 int cropped_wid = wid_; 00353 int cropped_hgt = hgt_; 00354 Bmp8::Crop(&cropped_left, &cropped_top, 00355 &cropped_wid, &cropped_hgt); 00356 00357 if (cropped_wid == 0 || cropped_hgt == 0) { 00358 return NULL; 00359 } 00360 // create the cropped char samp 00361 CharSamp *cropped_samp = new CharSamp(left_ + cropped_left, 00362 top_ + cropped_top, 00363 cropped_wid, cropped_hgt); 00364 cropped_samp->SetLabel(label32_); 00365 cropped_samp->SetFirstChar(first_char_); 00366 cropped_samp->SetLastChar(last_char_); 00367 // the following 3 fields may/should be reset by the calling function 00368 // using context information, i.e., location of character box 00369 // w.r.t. the word bounding box 00370 cropped_samp->SetNormAspectRatio(255 * 00371 cropped_wid / (cropped_wid + cropped_hgt)); 00372 cropped_samp->SetNormTop(0); 00373 cropped_samp->SetNormBottom(255); 00374 00375 // copy the bitmap to the cropped img 00376 Copy(cropped_left, cropped_top, cropped_wid, cropped_hgt, cropped_samp); 00377 return cropped_samp; 00378 } 00379 00380 // segment the char samp to connected components 00381 // based on contiguity and vertical pixel density histogram 00382 ConComp **CharSamp::Segment(int *segment_cnt, bool right_2_left, 00383 int max_hist_wnd, int min_con_comp_size) const { 00384 // init 00385 (*segment_cnt) = 0; 00386 int concomp_cnt = 0; 00387 int seg_cnt = 0; 00388 // find the concomps of the image 00389 ConComp **concomp_array = FindConComps(&concomp_cnt, min_con_comp_size); 00390 if (concomp_cnt <= 0 || !concomp_array) { 00391 if (concomp_array) 00392 delete []concomp_array; 00393 return NULL; 00394 } 00395 ConComp **seg_array = NULL; 00396 // segment each concomp further using vertical histogram 00397 for (int concomp = 0; concomp < concomp_cnt; concomp++) { 00398 int concomp_seg_cnt = 0; 00399 // segment the concomp 00400 ConComp **concomp_seg_array = NULL; 00401 ConComp **concomp_alloc_seg = 00402 concomp_array[concomp]->Segment(max_hist_wnd, &concomp_seg_cnt); 00403 // no segments, add the whole concomp 00404 if (concomp_alloc_seg == NULL) { 00405 concomp_seg_cnt = 1; 00406 concomp_seg_array = concomp_array + concomp; 00407 } else { 00408 // delete the original concomp, we no longer need it 00409 concomp_seg_array = concomp_alloc_seg; 00410 delete concomp_array[concomp]; 00411 } 00412 // add the resulting segments 00413 for (int seg_idx = 0; seg_idx < concomp_seg_cnt; seg_idx++) { 00414 // too small of a segment: ignore 00415 if (concomp_seg_array[seg_idx]->Width() < 2 && 00416 concomp_seg_array[seg_idx]->Height() < 2) { 00417 delete concomp_seg_array[seg_idx]; 00418 } else { 00419 // add the new segment 00420 // extend the segment array 00421 if ((seg_cnt % kConCompAllocChunk) == 0) { 00422 ConComp **temp_segm_array = 00423 new ConComp *[seg_cnt + kConCompAllocChunk]; 00424 if (temp_segm_array == NULL) { 00425 fprintf(stderr, "Cube ERROR (CharSamp::Segment): could not " 00426 "allocate additional connected components\n"); 00427 delete []concomp_seg_array; 00428 delete []concomp_array; 00429 delete []seg_array; 00430 return NULL; 00431 } 00432 if (seg_cnt > 0) { 00433 memcpy(temp_segm_array, seg_array, seg_cnt * sizeof(*seg_array)); 00434 delete []seg_array; 00435 } 00436 seg_array = temp_segm_array; 00437 } 00438 seg_array[seg_cnt++] = concomp_seg_array[seg_idx]; 00439 } 00440 } // segment 00441 if (concomp_alloc_seg != NULL) { 00442 delete []concomp_alloc_seg; 00443 } 00444 } // concomp 00445 delete []concomp_array; 00446 00447 // sort the concomps from Left2Right or Right2Left, based on the reading order 00448 if (seg_cnt > 0 && seg_array != NULL) { 00449 qsort(seg_array, seg_cnt, sizeof(*seg_array), right_2_left ? 00450 ConComp::Right2LeftComparer : ConComp::Left2RightComparer); 00451 } 00452 (*segment_cnt) = seg_cnt; 00453 return seg_array; 00454 } 00455 00456 // builds a char samp from a set of connected components 00457 CharSamp *CharSamp::FromConComps(ConComp **concomp_array, int strt_concomp, 00458 int seg_flags_size, int *seg_flags, 00459 bool *left_most, bool *right_most, 00460 int word_hgt) { 00461 int concomp; 00462 int end_concomp; 00463 int concomp_cnt = 0; 00464 end_concomp = strt_concomp + seg_flags_size; 00465 // determine ID range 00466 bool once = false; 00467 int min_id = -1; 00468 int max_id = -1; 00469 for (concomp = strt_concomp; concomp < end_concomp; concomp++) { 00470 if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) { 00471 if (!once) { 00472 min_id = concomp_array[concomp]->ID(); 00473 max_id = concomp_array[concomp]->ID(); 00474 once = true; 00475 } else { 00476 UpdateRange(concomp_array[concomp]->ID(), &min_id, &max_id); 00477 } 00478 concomp_cnt++; 00479 } 00480 } 00481 if (concomp_cnt < 1 || !once || min_id == -1 || max_id == -1) { 00482 return NULL; 00483 } 00484 // alloc memo for computing leftmost and right most attributes 00485 int id_cnt = max_id - min_id + 1; 00486 bool *id_exist = new bool[id_cnt]; 00487 bool *left_most_exist = new bool[id_cnt]; 00488 bool *right_most_exist = new bool[id_cnt]; 00489 if (!id_exist || !left_most_exist || !right_most_exist) 00490 return NULL; 00491 memset(id_exist, 0, id_cnt * sizeof(*id_exist)); 00492 memset(left_most_exist, 0, id_cnt * sizeof(*left_most_exist)); 00493 memset(right_most_exist, 0, id_cnt * sizeof(*right_most_exist)); 00494 // find the dimensions of the charsamp 00495 once = false; 00496 int left = -1; 00497 int right = -1; 00498 int top = -1; 00499 int bottom = -1; 00500 int unq_ids = 0; 00501 int unq_left_most = 0; 00502 int unq_right_most = 0; 00503 for (concomp = strt_concomp; concomp < end_concomp; concomp++) { 00504 if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) { 00505 if (!once) { 00506 left = concomp_array[concomp]->Left(); 00507 right = concomp_array[concomp]->Right(); 00508 top = concomp_array[concomp]->Top(); 00509 bottom = concomp_array[concomp]->Bottom(); 00510 once = true; 00511 } else { 00512 UpdateRange(concomp_array[concomp]->Left(), 00513 concomp_array[concomp]->Right(), &left, &right); 00514 UpdateRange(concomp_array[concomp]->Top(), 00515 concomp_array[concomp]->Bottom(), &top, &bottom); 00516 } 00517 // count unq ids, unq left most and right mosts ids 00518 int concomp_id = concomp_array[concomp]->ID() - min_id; 00519 if (!id_exist[concomp_id]) { 00520 id_exist[concomp_id] = true; 00521 unq_ids++; 00522 } 00523 if (concomp_array[concomp]->LeftMost()) { 00524 if (left_most_exist[concomp_id] == false) { 00525 left_most_exist[concomp_id] = true; 00526 unq_left_most++; 00527 } 00528 } 00529 if (concomp_array[concomp]->RightMost()) { 00530 if (right_most_exist[concomp_id] == false) { 00531 right_most_exist[concomp_id] = true; 00532 unq_right_most++; 00533 } 00534 } 00535 } 00536 } 00537 delete []id_exist; 00538 delete []left_most_exist; 00539 delete []right_most_exist; 00540 if (!once || left == -1 || top == -1 || right == -1 || bottom == -1) { 00541 return NULL; 00542 } 00543 (*left_most) = (unq_left_most >= unq_ids); 00544 (*right_most) = (unq_right_most >= unq_ids); 00545 // create the char sample object 00546 CharSamp *samp = new CharSamp(left, top, right - left + 1, bottom - top + 1); 00547 if (!samp) { 00548 return NULL; 00549 } 00550 00551 // set the foreground pixels 00552 for (concomp = strt_concomp; concomp < end_concomp; concomp++) { 00553 if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) { 00554 ConCompPt *pt_ptr = concomp_array[concomp]->Head(); 00555 while (pt_ptr) { 00556 samp->line_buff_[pt_ptr->y() - top][pt_ptr->x() - left] = 0; 00557 pt_ptr = pt_ptr->Next(); 00558 } 00559 } 00560 } 00561 return samp; 00562 } 00563 00564 // clones the object 00565 CharSamp *CharSamp::Clone() const { 00566 // create the cropped char samp 00567 CharSamp *samp = new CharSamp(left_, top_, wid_, hgt_); 00568 samp->SetLabel(label32_); 00569 samp->SetFirstChar(first_char_); 00570 samp->SetLastChar(last_char_); 00571 samp->SetNormTop(norm_top_); 00572 samp->SetNormBottom(norm_bottom_); 00573 samp->SetNormAspectRatio(norm_aspect_ratio_); 00574 // copy the bitmap to the cropped img 00575 Copy(0, 0, wid_, hgt_, samp); 00576 return samp; 00577 } 00578 00579 // Load a Char Samp from a dump file 00580 CharSamp *CharSamp::FromCharDumpFile(unsigned char **raw_data_ptr) { 00581 unsigned int val32; 00582 char_32 *label32; 00583 unsigned char *raw_data = *raw_data_ptr; 00584 00585 // read and check 32 bit marker 00586 memcpy(&val32, raw_data, sizeof(val32)); 00587 raw_data += sizeof(val32); 00588 if (val32 != 0xabd0fefe) { 00589 return NULL; 00590 } 00591 // read label length, 00592 memcpy(&val32, raw_data, sizeof(val32)); 00593 raw_data += sizeof(val32); 00594 // the label is not null terminated in the file 00595 if (val32 > 0 && val32 < MAX_UINT32) { 00596 label32 = new char_32[val32 + 1]; 00597 if (label32 == NULL) { 00598 return NULL; 00599 } 00600 // read label 00601 memcpy(label32, raw_data, val32 * sizeof(*label32)); 00602 raw_data += (val32 * sizeof(*label32)); 00603 // null terminate 00604 label32[val32] = 0; 00605 } else { 00606 label32 = NULL; 00607 } 00608 00609 // create the object 00610 CharSamp *char_samp = new CharSamp(); 00611 if (char_samp == NULL) { 00612 return NULL; 00613 } 00614 00615 // read coordinates 00616 char_samp->label32_ = label32; 00617 memcpy(&char_samp->page_, raw_data, sizeof(char_samp->page_)); 00618 raw_data += sizeof(char_samp->page_); 00619 memcpy(&char_samp->left_, raw_data, sizeof(char_samp->left_)); 00620 raw_data += sizeof(char_samp->left_); 00621 memcpy(&char_samp->top_, raw_data, sizeof(char_samp->top_)); 00622 raw_data += sizeof(char_samp->top_); 00623 memcpy(&char_samp->first_char_, raw_data, sizeof(char_samp->first_char_)); 00624 raw_data += sizeof(char_samp->first_char_); 00625 memcpy(&char_samp->last_char_, raw_data, sizeof(char_samp->last_char_)); 00626 raw_data += sizeof(char_samp->last_char_); 00627 memcpy(&char_samp->norm_top_, raw_data, sizeof(char_samp->norm_top_)); 00628 raw_data += sizeof(char_samp->norm_top_); 00629 memcpy(&char_samp->norm_bottom_, raw_data, sizeof(char_samp->norm_bottom_)); 00630 raw_data += sizeof(char_samp->norm_bottom_); 00631 memcpy(&char_samp->norm_aspect_ratio_, raw_data, 00632 sizeof(char_samp->norm_aspect_ratio_)); 00633 raw_data += sizeof(char_samp->norm_aspect_ratio_); 00634 00635 // load the Bmp8 part 00636 if (char_samp->LoadFromCharDumpFile(&raw_data) == false) { 00637 delete char_samp; 00638 return NULL; 00639 } 00640 00641 (*raw_data_ptr) = raw_data; 00642 return char_samp; 00643 } 00644 00645 // computes the features corresponding to the char sample 00646 bool CharSamp::ComputeFeatures(int conv_grid_size, float *features) { 00647 // Create a scaled BMP 00648 CharSamp *scaled_bmp = Scale(conv_grid_size, conv_grid_size); 00649 if (!scaled_bmp) { 00650 return false; 00651 } 00652 // prepare input 00653 unsigned char *buff = scaled_bmp->RawData(); 00654 // bitmap features 00655 int input; 00656 int bmp_size = conv_grid_size * conv_grid_size; 00657 for (input = 0; input < bmp_size; input++) { 00658 features[input] = 255.0f - (1.0f * buff[input]); 00659 } 00660 // word context features 00661 features[input++] = FirstChar(); 00662 features[input++] = LastChar(); 00663 features[input++] = NormTop(); 00664 features[input++] = NormBottom(); 00665 features[input++] = NormAspectRatio(); 00666 delete scaled_bmp; 00667 return true; 00668 } 00669 } // namespace tesseract