|
tesseract 3.04.01
|
00001 /********************************************************************** 00002 * File: pageres.h (Formerly page_res.h) 00003 * Description: Results classes used by control.c 00004 * Author: Phil Cheatle 00005 * Created: Tue Sep 22 08:42:49 BST 1992 00006 * 00007 * (C) Copyright 1992, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 #ifndef PAGERES_H 00020 #define PAGERES_H 00021 00022 #include "blamer.h" 00023 #include "blobs.h" 00024 #include "boxword.h" 00025 #include "elst.h" 00026 #include "genericvector.h" 00027 #include "normalis.h" 00028 #include "ocrblock.h" 00029 #include "ocrrow.h" 00030 #include "params_training_featdef.h" 00031 #include "ratngs.h" 00032 #include "rejctmap.h" 00033 #include "seam.h" 00034 #include "werd.h" 00035 00036 namespace tesseract { 00037 struct FontInfo; 00038 class Tesseract; 00039 } 00040 using tesseract::FontInfo; 00041 00042 /* Forward declarations */ 00043 00044 class BLOCK_RES; 00045 00046 ELISTIZEH (BLOCK_RES) CLISTIZEH (BLOCK_RES) 00047 class 00048 ROW_RES; 00049 00050 ELISTIZEH (ROW_RES) 00051 class WERD_RES; 00052 00053 ELISTIZEH (WERD_RES) 00054 00055 /************************************************************************* 00056 * PAGE_RES - Page results 00057 *************************************************************************/ 00058 class PAGE_RES { // page result 00059 public: 00060 inT32 char_count; 00061 inT32 rej_count; 00062 BLOCK_RES_LIST block_res_list; 00063 BOOL8 rejected; 00064 // Updated every time PAGE_RES_IT iterating on this PAGE_RES moves to 00065 // the next word. This pointer is not owned by PAGE_RES class. 00066 WERD_CHOICE **prev_word_best_choice; 00067 // Sums of blame reasons computed by the blamer. 00068 GenericVector<int> blame_reasons; 00069 // Debug information about all the misadaptions on this page. 00070 // Each BlamerBundle contains an index into this vector, so that words that 00071 // caused misadaption could be marked. However, since words could be 00072 // deleted/split/merged, the log is stored on the PAGE_RES level. 00073 GenericVector<STRING> misadaption_log; 00074 00075 inline void Init() { 00076 char_count = 0; 00077 rej_count = 0; 00078 rejected = FALSE; 00079 prev_word_best_choice = NULL; 00080 blame_reasons.init_to_size(IRR_NUM_REASONS, 0); 00081 } 00082 00083 PAGE_RES() { Init(); } // empty constructor 00084 00085 PAGE_RES(bool merge_similar_words, 00086 BLOCK_LIST *block_list, // real blocks 00087 WERD_CHOICE **prev_word_best_choice_ptr); 00088 00089 ~PAGE_RES () { // destructor 00090 } 00091 }; 00092 00093 /************************************************************************* 00094 * BLOCK_RES - Block results 00095 *************************************************************************/ 00096 00097 class BLOCK_RES:public ELIST_LINK { 00098 public: 00099 BLOCK * block; // real block 00100 inT32 char_count; // chars in block 00101 inT32 rej_count; // rejected chars 00102 inT16 font_class; // 00103 inT16 row_count; 00104 float x_height; 00105 BOOL8 font_assigned; // block already 00106 // processed 00107 BOOL8 bold; // all bold 00108 BOOL8 italic; // all italic 00109 00110 ROW_RES_LIST row_res_list; 00111 00112 BLOCK_RES() { 00113 } // empty constructor 00114 00115 BLOCK_RES(bool merge_similar_words, BLOCK *the_block); // real block 00116 00117 ~BLOCK_RES () { // destructor 00118 } 00119 }; 00120 00121 /************************************************************************* 00122 * ROW_RES - Row results 00123 *************************************************************************/ 00124 00125 class ROW_RES:public ELIST_LINK { 00126 public: 00127 ROW * row; // real row 00128 inT32 char_count; // chars in block 00129 inT32 rej_count; // rejected chars 00130 inT32 whole_word_rej_count; // rejs in total rej wds 00131 WERD_RES_LIST word_res_list; 00132 00133 ROW_RES() { 00134 } // empty constructor 00135 00136 ROW_RES(bool merge_similar_words, ROW *the_row); // real row 00137 00138 ~ROW_RES() { // destructor 00139 } 00140 }; 00141 00142 /************************************************************************* 00143 * WERD_RES - Word results 00144 *************************************************************************/ 00145 enum CRUNCH_MODE 00146 { 00147 CR_NONE, 00148 CR_KEEP_SPACE, 00149 CR_LOOSE_SPACE, 00150 CR_DELETE 00151 }; 00152 00153 // WERD_RES is a collection of publicly accessible members that gathers 00154 // information about a word result. 00155 class WERD_RES : public ELIST_LINK { 00156 public: 00157 // Which word is which? 00158 // There are 3 coordinate spaces in use here: a possibly rotated pixel space, 00159 // the original image coordinate space, and the BLN space in which the 00160 // baseline of a word is at kBlnBaselineOffset, the xheight is kBlnXHeight, 00161 // and the x-middle of the word is at 0. 00162 // In the rotated pixel space, coordinates correspond to the input image, 00163 // but may be rotated about the origin by a multiple of 90 degrees, 00164 // and may therefore be negative. 00165 // In any case a rotation by denorm.block()->re_rotation() will take them 00166 // back to the original image. 00167 // The other differences between words all represent different stages of 00168 // processing during recognition. 00169 00170 // ---------------------------INPUT------------------------------------- 00171 00172 // The word is the input C_BLOBs in the rotated pixel space. 00173 // word is NOT owned by the WERD_RES unless combination is true. 00174 // All the other word pointers ARE owned by the WERD_RES. 00175 WERD* word; // Input C_BLOB word. 00176 00177 // -------------SETUP BY SetupFor*Recognition---READONLY-INPUT------------ 00178 00179 // The bln_boxes contains the bounding boxes (only) of the input word, in the 00180 // BLN space. The lengths of word and bln_boxes 00181 // match as they are both before any chopping. 00182 // TODO(rays) determine if docqual does anything useful and delete bln_boxes 00183 // if it doesn't. 00184 tesseract::BoxWord* bln_boxes; // BLN input bounding boxes. 00185 // The ROW that this word sits in. NOT owned by the WERD_RES. 00186 ROW* blob_row; 00187 // The denorm provides the transformation to get back to the rotated image 00188 // coords from the chopped_word/rebuild_word BLN coords, but each blob also 00189 // has its own denorm. 00190 DENORM denorm; // For use on chopped_word. 00191 // Unicharset used by the classifier output in best_choice and raw_choice. 00192 const UNICHARSET* uch_set; // For converting back to utf8. 00193 00194 // ----Initialized by SetupFor*Recognition---BUT OUTPUT FROM RECOGNITION---- 00195 // ----Setup to a (different!) state expected by the various classifiers---- 00196 // TODO(rays) Tidy and make more consistent. 00197 00198 // The chopped_word is also in BLN space, and represents the fully chopped 00199 // character fragments that make up the word. 00200 // The length of chopped_word matches length of seam_array + 1 (if set). 00201 TWERD* chopped_word; // BLN chopped fragments output. 00202 // Vector of SEAM* holding chopping points matching chopped_word. 00203 GenericVector<SEAM*> seam_array; 00204 // Widths of blobs in chopped_word. 00205 GenericVector<int> blob_widths; 00206 // Gaps between blobs in chopped_word. blob_gaps[i] is the gap between 00207 // blob i and blob i+1. 00208 GenericVector<int> blob_gaps; 00209 // Ratings matrix contains classifier choices for each classified combination 00210 // of blobs. The dimension is the same as the number of blobs in chopped_word 00211 // and the leading diagonal corresponds to classifier results of the blobs 00212 // in chopped_word. The state_ members of best_choice, raw_choice and 00213 // best_choices all correspond to this ratings matrix and allow extraction 00214 // of the blob choices for any given WERD_CHOICE. 00215 MATRIX* ratings; // Owned pointer. 00216 // Pointer to the first WERD_CHOICE in best_choices. This is the result that 00217 // will be output from Tesseract. Note that this is now a borrowed pointer 00218 // and should NOT be deleted. 00219 WERD_CHOICE* best_choice; // Borrowed pointer. 00220 // The best raw_choice found during segmentation search. Differs from the 00221 // best_choice by being the best result according to just the character 00222 // classifier, not taking any language model information into account. 00223 // Unlike best_choice, the pointer IS owned by this WERD_RES. 00224 WERD_CHOICE* raw_choice; // Owned pointer. 00225 // Alternative results found during chopping/segmentation search stages. 00226 // Note that being an ELIST, best_choices owns the WERD_CHOICEs. 00227 WERD_CHOICE_LIST best_choices; 00228 00229 // Truth bounding boxes, text and incorrect choice reason. 00230 BlamerBundle *blamer_bundle; 00231 00232 // --------------OUTPUT FROM RECOGNITION------------------------------- 00233 // --------------Not all fields are necessarily set.------------------- 00234 // ---best_choice, raw_choice *must* end up set, with a box_word------- 00235 // ---In complete output, the number of blobs in rebuild_word matches--- 00236 // ---the number of boxes in box_word, the number of unichar_ids in--- 00237 // ---best_choice, the number of ints in best_state, and the number--- 00238 // ---of strings in correct_text-------------------------------------- 00239 // ---SetupFake Sets everything to appropriate values if the word is--- 00240 // ---known to be bad before recognition.------------------------------ 00241 00242 // The rebuild_word is also in BLN space, but represents the final best 00243 // segmentation of the word. Its length is therefore the same as box_word. 00244 TWERD* rebuild_word; // BLN best segmented word. 00245 // The box_word is in the original image coordinate space. It is the 00246 // bounding boxes of the rebuild_word, after denormalization. 00247 // The length of box_word matches rebuild_word, best_state (if set) and 00248 // correct_text (if set), as well as best_choice and represents the 00249 // number of classified units in the output. 00250 tesseract::BoxWord* box_word; // Denormalized output boxes. 00251 // The best_state stores the relationship between chopped_word and 00252 // rebuild_word. Each blob[i] in rebuild_word is composed of best_state[i] 00253 // adjacent blobs in chopped_word. The seams in seam_array are hidden 00254 // within a rebuild_word blob and revealed between them. 00255 GenericVector<int> best_state; // Number of blobs in each best blob. 00256 // The correct_text is used during training and adaption to carry the 00257 // text to the training system without the need for a unicharset. There 00258 // is one entry in the vector for each blob in rebuild_word and box_word. 00259 GenericVector<STRING> correct_text; 00260 // The Tesseract that was used to recognize this word. Just a borrowed 00261 // pointer. Note: Tesseract's class definition is in a higher-level library. 00262 // We avoid introducing a cyclic dependency by not using the Tesseract 00263 // within WERD_RES. We are just storing it to provide access to it 00264 // for the top-level multi-language controller, and maybe for output of 00265 // the recognized language. 00266 tesseract::Tesseract* tesseract; 00267 00268 // Less-well documented members. 00269 // TODO(rays) Add more documentation here. 00270 WERD_CHOICE *ep_choice; // ep text TODO(rays) delete this. 00271 REJMAP reject_map; // best_choice rejects 00272 BOOL8 tess_failed; 00273 /* 00274 If tess_failed is TRUE, one of the following tests failed when Tess 00275 returned: 00276 - The outword blob list was not the same length as the best_choice string; 00277 - The best_choice string contained ALL blanks; 00278 - The best_choice string was zero length 00279 */ 00280 BOOL8 tess_accepted; // Tess thinks its ok? 00281 BOOL8 tess_would_adapt; // Tess would adapt? 00282 BOOL8 done; // ready for output? 00283 bool small_caps; // word appears to be small caps 00284 bool odd_size; // word is bigger than line or leader dots. 00285 inT8 italic; 00286 inT8 bold; 00287 // The fontinfos are pointers to data owned by the classifier. 00288 const FontInfo* fontinfo; 00289 const FontInfo* fontinfo2; 00290 inT8 fontinfo_id_count; // number of votes 00291 inT8 fontinfo_id2_count; // number of votes 00292 BOOL8 guessed_x_ht; 00293 BOOL8 guessed_caps_ht; 00294 CRUNCH_MODE unlv_crunch_mode; 00295 float x_height; // post match estimate 00296 float caps_height; // post match estimate 00297 float baseline_shift; // post match estimate. 00298 00299 /* 00300 To deal with fuzzy spaces we need to be able to combine "words" to form 00301 combinations when we suspect that the gap is a non-space. The (new) text 00302 ord code generates separate words for EVERY fuzzy gap - flags in the word 00303 indicate whether the gap is below the threshold (fuzzy kern) and is thus 00304 NOT a real word break by default, or above the threshold (fuzzy space) and 00305 this is a real word break by default. 00306 00307 The WERD_RES list contains all these words PLUS "combination" words built 00308 out of (copies of) the words split by fuzzy kerns. The separate parts have 00309 their "part_of_combo" flag set true and should be IGNORED on a default 00310 reading of the list. 00311 00312 Combination words are FOLLOWED by the sequence of part_of_combo words 00313 which they combine. 00314 */ 00315 BOOL8 combination; //of two fuzzy gap wds 00316 BOOL8 part_of_combo; //part of a combo 00317 BOOL8 reject_spaces; //Reject spacing? 00318 00319 WERD_RES() { 00320 InitNonPointers(); 00321 InitPointers(); 00322 } 00323 WERD_RES(WERD *the_word) { 00324 InitNonPointers(); 00325 InitPointers(); 00326 word = the_word; 00327 } 00328 // Deep copies everything except the ratings MATRIX. 00329 // To get that use deep_copy below. 00330 WERD_RES(const WERD_RES &source) : ELIST_LINK(source) { 00331 InitPointers(); 00332 *this = source; // see operator= 00333 } 00334 00335 ~WERD_RES(); 00336 00337 // Returns the UTF-8 string for the given blob index in the best_choice word, 00338 // given that we know whether we are in a right-to-left reading context. 00339 // This matters for mirrorable characters such as parentheses. We recognize 00340 // characters purely based on their shape on the page, and by default produce 00341 // the corresponding unicode for a left-to-right context. 00342 const char* BestUTF8(int blob_index, bool in_rtl_context) const { 00343 if (blob_index < 0 || best_choice == NULL || 00344 blob_index >= best_choice->length()) 00345 return NULL; 00346 UNICHAR_ID id = best_choice->unichar_id(blob_index); 00347 if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID) 00348 return NULL; 00349 UNICHAR_ID mirrored = uch_set->get_mirror(id); 00350 if (in_rtl_context && mirrored > 0 && mirrored != INVALID_UNICHAR_ID) 00351 id = mirrored; 00352 return uch_set->id_to_unichar_ext(id); 00353 } 00354 // Returns the UTF-8 string for the given blob index in the raw_choice word. 00355 const char* RawUTF8(int blob_index) const { 00356 if (blob_index < 0 || blob_index >= raw_choice->length()) 00357 return NULL; 00358 UNICHAR_ID id = raw_choice->unichar_id(blob_index); 00359 if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID) 00360 return NULL; 00361 return uch_set->id_to_unichar(id); 00362 } 00363 00364 UNICHARSET::Direction SymbolDirection(int blob_index) const { 00365 if (best_choice == NULL || 00366 blob_index >= best_choice->length() || 00367 blob_index < 0) 00368 return UNICHARSET::U_OTHER_NEUTRAL; 00369 return uch_set->get_direction(best_choice->unichar_id(blob_index)); 00370 } 00371 00372 bool AnyRtlCharsInWord() const { 00373 if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1) 00374 return false; 00375 for (int id = 0; id < best_choice->length(); id++) { 00376 int unichar_id = best_choice->unichar_id(id); 00377 if (unichar_id < 0 || unichar_id >= uch_set->size()) 00378 continue; // Ignore illegal chars. 00379 UNICHARSET::Direction dir = 00380 uch_set->get_direction(unichar_id); 00381 if (dir == UNICHARSET::U_RIGHT_TO_LEFT || 00382 dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC || 00383 dir == UNICHARSET::U_ARABIC_NUMBER) 00384 return true; 00385 } 00386 return false; 00387 } 00388 00389 bool AnyLtrCharsInWord() const { 00390 if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1) 00391 return false; 00392 for (int id = 0; id < best_choice->length(); id++) { 00393 int unichar_id = best_choice->unichar_id(id); 00394 if (unichar_id < 0 || unichar_id >= uch_set->size()) 00395 continue; // Ignore illegal chars. 00396 UNICHARSET::Direction dir = uch_set->get_direction(unichar_id); 00397 if (dir == UNICHARSET::U_LEFT_TO_RIGHT) 00398 return true; 00399 } 00400 return false; 00401 } 00402 00403 // Return whether the blobs in this WERD_RES 0, 1,... come from an engine 00404 // that gave us the unichars in reading order (as opposed to strict left 00405 // to right). 00406 bool UnicharsInReadingOrder() const { 00407 return best_choice->unichars_in_script_order(); 00408 } 00409 00410 void InitNonPointers(); 00411 void InitPointers(); 00412 void Clear(); 00413 void ClearResults(); 00414 void ClearWordChoices(); 00415 void ClearRatings(); 00416 00417 // Deep copies everything except the ratings MATRIX. 00418 // To get that use deep_copy below. 00419 WERD_RES& operator=(const WERD_RES& source); //from this 00420 00421 void CopySimpleFields(const WERD_RES& source); 00422 00423 // Initializes a blank (default constructed) WERD_RES from one that has 00424 // already been recognized. 00425 // Use SetupFor*Recognition afterwards to complete the setup and make 00426 // it ready for a retry recognition. 00427 void InitForRetryRecognition(const WERD_RES& source); 00428 00429 // Sets up the members used in recognition: bln_boxes, chopped_word, 00430 // seam_array, denorm. Returns false if 00431 // the word is empty and sets up fake results. If use_body_size is 00432 // true and row->body_size is set, then body_size will be used for 00433 // blob normalization instead of xheight + ascrise. This flag is for 00434 // those languages that are using CJK pitch model and thus it has to 00435 // be true if and only if tesseract->textord_use_cjk_fp_model is 00436 // true. 00437 // If allow_detailed_fx is true, the feature extractor will receive fine 00438 // precision outline information, allowing smoother features and better 00439 // features on low resolution images. 00440 // The norm_mode sets the default mode for normalization in absence 00441 // of any of the above flags. It should really be a tesseract::OcrEngineMode 00442 // but is declared as int for ease of use with tessedit_ocr_engine_mode. 00443 // Returns false if the word is empty and sets up fake results. 00444 bool SetupForRecognition(const UNICHARSET& unicharset_in, 00445 tesseract::Tesseract* tesseract, Pix* pix, 00446 int norm_mode, 00447 const TBOX* norm_box, bool numeric_mode, 00448 bool use_body_size, bool allow_detailed_fx, 00449 ROW *row, const BLOCK* block); 00450 00451 // Set up the seam array, bln_boxes, best_choice, and raw_choice to empty 00452 // accumulators from a made chopped word. We presume the fields are already 00453 // empty. 00454 void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in); 00455 00456 // Sets up the members used in recognition for an empty recognition result: 00457 // bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice. 00458 void SetupFake(const UNICHARSET& uch); 00459 00460 // Set the word as having the script of the input unicharset. 00461 void SetupWordScript(const UNICHARSET& unicharset_in); 00462 00463 // Sets up the blamer_bundle if it is not null, using the initialized denorm. 00464 void SetupBlamerBundle(); 00465 00466 // Computes the blob_widths and blob_gaps from the chopped_word. 00467 void SetupBlobWidthsAndGaps(); 00468 00469 // Updates internal data to account for a new SEAM (chop) at the given 00470 // blob_number. Fixes the ratings matrix and states in the choices, as well 00471 // as the blob widths and gaps. 00472 void InsertSeam(int blob_number, SEAM* seam); 00473 00474 // Returns true if all the word choices except the first have adjust_factors 00475 // worse than the given threshold. 00476 bool AlternativeChoiceAdjustmentsWorseThan(float threshold) const; 00477 00478 // Returns true if the current word is ambiguous (by number of answers or 00479 // by dangerous ambigs.) 00480 bool IsAmbiguous(); 00481 00482 // Returns true if the ratings matrix size matches the sum of each of the 00483 // segmentation states. 00484 bool StatesAllValid(); 00485 00486 // Prints a list of words found if debug is true or the word result matches 00487 // the word_to_debug. 00488 void DebugWordChoices(bool debug, const char* word_to_debug); 00489 00490 // Prints the top choice along with the accepted/done flags. 00491 void DebugTopChoice(const char* msg) const; 00492 00493 // Removes from best_choices all choices which are not within a reasonable 00494 // range of the best choice. 00495 void FilterWordChoices(int debug_level); 00496 00497 // Computes a set of distance thresholds used to control adaption. 00498 // Compares the best choice for the current word to the best raw choice 00499 // to determine which characters were classified incorrectly by the 00500 // classifier. Then places a separate threshold into thresholds for each 00501 // character in the word. If the classifier was correct, max_rating is placed 00502 // into thresholds. If the classifier was incorrect, the mean match rating 00503 // (error percentage) of the classifier's incorrect choice minus some margin 00504 // is placed into thresholds. This can then be used by the caller to try to 00505 // create a new template for the desired class that will classify the 00506 // character with a rating better than the threshold value. The match rating 00507 // placed into thresholds is never allowed to be below min_rating in order to 00508 // prevent trying to make overly tight templates. 00509 // min_rating limits how tight to make a template. 00510 // max_rating limits how loose to make a template. 00511 // rating_margin denotes the amount of margin to put in template. 00512 void ComputeAdaptionThresholds(float certainty_scale, 00513 float min_rating, 00514 float max_rating, 00515 float rating_margin, 00516 float* thresholds); 00517 00518 // Saves a copy of the word_choice if it has the best unadjusted rating. 00519 // Returns true if the word_choice was the new best. 00520 bool LogNewRawChoice(WERD_CHOICE* word_choice); 00521 // Consumes word_choice by adding it to best_choices, (taking ownership) if 00522 // the certainty for word_choice is some distance of the best choice in 00523 // best_choices, or by deleting the word_choice and returning false. 00524 // The best_choices list is kept in sorted order by rating. Duplicates are 00525 // removed, and the list is kept no longer than max_num_choices in length. 00526 // Returns true if the word_choice is still a valid pointer. 00527 bool LogNewCookedChoice(int max_num_choices, bool debug, 00528 WERD_CHOICE* word_choice); 00529 00530 // Prints a brief list of all the best choices. 00531 void PrintBestChoices() const; 00532 00533 // Returns the sum of the widths of the blob between start_blob and last_blob 00534 // inclusive. 00535 int GetBlobsWidth(int start_blob, int last_blob); 00536 // Returns the width of a gap between the specified blob and the next one. 00537 int GetBlobsGap(int blob_index); 00538 00539 // Returns the BLOB_CHOICE corresponding to the given index in the 00540 // best choice word taken from the appropriate cell in the ratings MATRIX. 00541 // Borrowed pointer, so do not delete. May return NULL if there is no 00542 // BLOB_CHOICE matching the unichar_id at the given index. 00543 BLOB_CHOICE* GetBlobChoice(int index) const; 00544 00545 // Returns the BLOB_CHOICE_LIST corresponding to the given index in the 00546 // best choice word taken from the appropriate cell in the ratings MATRIX. 00547 // Borrowed pointer, so do not delete. 00548 BLOB_CHOICE_LIST* GetBlobChoices(int index) const; 00549 00550 // Moves the results fields from word to this. This takes ownership of all 00551 // the data, so src can be destructed. 00552 // word1.ConsumeWordResult(word); 00553 // delete word; 00554 // is simpler and faster than: 00555 // word1 = *word; 00556 // delete word; 00557 // as it doesn't need to copy and reallocate anything. 00558 void ConsumeWordResults(WERD_RES* word); 00559 00560 // Replace the best choice and rebuild box word. 00561 // choice must be from the current best_choices list. 00562 void ReplaceBestChoice(WERD_CHOICE* choice); 00563 00564 // Builds the rebuild_word and sets the best_state from the chopped_word and 00565 // the best_choice->state. 00566 void RebuildBestState(); 00567 00568 // Copies the chopped_word to the rebuild_word, faking a best_state as well. 00569 // Also sets up the output box_word. 00570 void CloneChoppedToRebuild(); 00571 00572 // Sets/replaces the box_word with one made from the rebuild_word. 00573 void SetupBoxWord(); 00574 00575 // Sets up the script positions in the best_choice using the best_choice 00576 // to get the unichars, and the unicharset to get the target positions. 00577 void SetScriptPositions(); 00578 // Sets all the blobs in all the words (best choice and alternates) to be 00579 // the given position. (When a sub/superscript is recognized as a separate 00580 // word, it falls victim to the rule that a whole word cannot be sub or 00581 // superscript, so this function overrides that problem.) 00582 void SetAllScriptPositions(tesseract::ScriptPos position); 00583 00584 // Classifies the word with some already-calculated BLOB_CHOICEs. 00585 // The choices are an array of blob_count pointers to BLOB_CHOICE, 00586 // providing a single classifier result for each blob. 00587 // The BLOB_CHOICEs are consumed and the word takes ownership. 00588 // The number of blobs in the box_word must match blob_count. 00589 void FakeClassifyWord(int blob_count, BLOB_CHOICE** choices); 00590 00591 // Creates a WERD_CHOICE for the word using the top choices from the leading 00592 // diagonal of the ratings matrix. 00593 void FakeWordFromRatings(); 00594 00595 // Copies the best_choice strings to the correct_text for adaption/training. 00596 void BestChoiceToCorrectText(); 00597 00598 // Merges 2 adjacent blobs in the result if the permanent callback 00599 // class_cb returns other than INVALID_UNICHAR_ID, AND the permanent 00600 // callback box_cb is NULL or returns true, setting the merged blob 00601 // result to the class returned from class_cb. 00602 // Returns true if anything was merged. 00603 bool ConditionalBlobMerge( 00604 TessResultCallback2<UNICHAR_ID, UNICHAR_ID, UNICHAR_ID>* class_cb, 00605 TessResultCallback2<bool, const TBOX&, const TBOX&>* box_cb); 00606 00607 // Merges 2 adjacent blobs in the result (index and index+1) and corrects 00608 // all the data to account for the change. 00609 void MergeAdjacentBlobs(int index); 00610 00611 // Callback helper for fix_quotes returns a double quote if both 00612 // arguments are quote, otherwise INVALID_UNICHAR_ID. 00613 UNICHAR_ID BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2); 00614 void fix_quotes(); 00615 00616 // Callback helper for fix_hyphens returns UNICHAR_ID of - if both 00617 // arguments are hyphen, otherwise INVALID_UNICHAR_ID. 00618 UNICHAR_ID BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2); 00619 // Callback helper for fix_hyphens returns true if box1 and box2 overlap 00620 // (assuming both on the same textline, are in order and a chopped em dash.) 00621 bool HyphenBoxesOverlap(const TBOX& box1, const TBOX& box2); 00622 void fix_hyphens(); 00623 00624 // Callback helper for merge_tess_fails returns a space if both 00625 // arguments are space, otherwise INVALID_UNICHAR_ID. 00626 UNICHAR_ID BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2); 00627 void merge_tess_fails(); 00628 00629 // Returns a really deep copy of *src, including the ratings MATRIX. 00630 static WERD_RES* deep_copy(const WERD_RES* src) { 00631 WERD_RES* result = new WERD_RES(*src); 00632 // That didn't copy the ratings, but we want a copy if there is one to 00633 // begin width. 00634 if (src->ratings != NULL) 00635 result->ratings = src->ratings->DeepCopy(); 00636 return result; 00637 } 00638 00639 // Copy blobs from word_res onto this word (eliminating spaces between). 00640 // Since this may be called bidirectionally OR both the BOL and EOL flags. 00641 void copy_on(WERD_RES *word_res) { //from this word 00642 word->set_flag(W_BOL, word->flag(W_BOL) || word_res->word->flag(W_BOL)); 00643 word->set_flag(W_EOL, word->flag(W_EOL) || word_res->word->flag(W_EOL)); 00644 word->copy_on(word_res->word); 00645 } 00646 00647 // Returns true if the collection of count pieces, starting at start, are all 00648 // natural connected components, ie there are no real chops involved. 00649 bool PiecesAllNatural(int start, int count) const; 00650 }; 00651 00652 /************************************************************************* 00653 * PAGE_RES_IT - Page results iterator 00654 *************************************************************************/ 00655 00656 class PAGE_RES_IT { 00657 public: 00658 PAGE_RES * page_res; // page being iterated 00659 00660 PAGE_RES_IT() { 00661 } // empty contructor 00662 00663 PAGE_RES_IT(PAGE_RES *the_page_res) { // page result 00664 page_res = the_page_res; 00665 restart_page(); // ready to scan 00666 } 00667 00668 // Do two PAGE_RES_ITs point at the same word? 00669 // This is much cheaper than cmp(). 00670 bool operator ==(const PAGE_RES_IT &other) const; 00671 00672 bool operator !=(const PAGE_RES_IT &other) const {return !(*this == other); } 00673 00674 // Given another PAGE_RES_IT to the same page, 00675 // this before other: -1 00676 // this equal to other: 0 00677 // this later than other: 1 00678 int cmp(const PAGE_RES_IT &other) const; 00679 00680 WERD_RES *restart_page() { 00681 return start_page(false); // Skip empty blocks. 00682 } 00683 WERD_RES *restart_page_with_empties() { 00684 return start_page(true); // Allow empty blocks. 00685 } 00686 WERD_RES *start_page(bool empty_ok); 00687 00688 WERD_RES *restart_row(); 00689 00690 // ============ Methods that mutate the underling structures =========== 00691 // Note that these methods will potentially invalidate other PAGE_RES_ITs 00692 // and are intended to be used only while a single PAGE_RES_IT is active. 00693 // This problem needs to be taken into account if these mutation operators 00694 // are ever provided to PageIterator or its subclasses. 00695 00696 // Inserts the new_word and a corresponding WERD_RES before the current 00697 // position. The simple fields of the WERD_RES are copied from clone_res and 00698 // the resulting WERD_RES is returned for further setup with best_choice etc. 00699 WERD_RES* InsertSimpleCloneWord(const WERD_RES& clone_res, WERD* new_word); 00700 00701 // Replaces the current WERD/WERD_RES with the given words. The given words 00702 // contain fake blobs that indicate the position of the characters. These are 00703 // replaced with real blobs from the current word as much as possible. 00704 void ReplaceCurrentWord(tesseract::PointerVector<WERD_RES>* words); 00705 00706 // Deletes the current WERD_RES and its underlying WERD. 00707 void DeleteCurrentWord(); 00708 00709 // Makes the current word a fuzzy space if not already fuzzy. Updates 00710 // corresponding part of combo if required. 00711 void MakeCurrentWordFuzzy(); 00712 00713 WERD_RES *forward() { // Get next word. 00714 return internal_forward(false, false); 00715 } 00716 // Move forward, but allow empty blocks to show as single NULL words. 00717 WERD_RES *forward_with_empties() { 00718 return internal_forward(false, true); 00719 } 00720 00721 WERD_RES *forward_paragraph(); // get first word in next non-empty paragraph 00722 WERD_RES *forward_block(); // get first word in next non-empty block 00723 00724 WERD_RES *prev_word() const { // previous word 00725 return prev_word_res; 00726 } 00727 ROW_RES *prev_row() const { // row of prev word 00728 return prev_row_res; 00729 } 00730 BLOCK_RES *prev_block() const { // block of prev word 00731 return prev_block_res; 00732 } 00733 WERD_RES *word() const { // current word 00734 return word_res; 00735 } 00736 ROW_RES *row() const { // row of current word 00737 return row_res; 00738 } 00739 BLOCK_RES *block() const { // block of cur. word 00740 return block_res; 00741 } 00742 WERD_RES *next_word() const { // next word 00743 return next_word_res; 00744 } 00745 ROW_RES *next_row() const { // row of next word 00746 return next_row_res; 00747 } 00748 BLOCK_RES *next_block() const { // block of next word 00749 return next_block_res; 00750 } 00751 void rej_stat_word(); // for page/block/row 00752 void ResetWordIterator(); 00753 00754 private: 00755 WERD_RES *internal_forward(bool new_block, bool empty_ok); 00756 00757 WERD_RES * prev_word_res; // previous word 00758 ROW_RES *prev_row_res; // row of prev word 00759 BLOCK_RES *prev_block_res; // block of prev word 00760 00761 WERD_RES *word_res; // current word 00762 ROW_RES *row_res; // row of current word 00763 BLOCK_RES *block_res; // block of cur. word 00764 00765 WERD_RES *next_word_res; // next word 00766 ROW_RES *next_row_res; // row of next word 00767 BLOCK_RES *next_block_res; // block of next word 00768 00769 BLOCK_RES_IT block_res_it; // iterators 00770 ROW_RES_IT row_res_it; 00771 WERD_RES_IT word_res_it; 00772 }; 00773 #endif