|
tesseract 3.04.01
|
00001 00002 // File: textord.h 00003 // Description: The Textord class definition gathers text line and word 00004 // finding functionality. 00005 // Author: Ray Smith 00006 // Created: Fri Mar 13 14:29:01 PDT 2009 00007 // 00008 // (C) Copyright 2009, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #ifndef TESSERACT_TEXTORD_TEXTORD_H__ 00022 #define TESSERACT_TEXTORD_TEXTORD_H__ 00023 00024 #include "ccstruct.h" 00025 #include "bbgrid.h" 00026 #include "blobbox.h" 00027 #include "gap_map.h" 00028 #include "publictypes.h" // For PageSegMode. 00029 00030 class FCOORD; 00031 class BLOCK_LIST; 00032 class PAGE_RES; 00033 class TO_BLOCK; 00034 class TO_BLOCK_LIST; 00035 class ScrollView; 00036 00037 namespace tesseract { 00038 00039 // A simple class that can be used by BBGrid to hold a word and an expanded 00040 // bounding box that makes it easy to find words to put diacritics. 00041 class WordWithBox { 00042 public: 00043 WordWithBox() : word_(NULL) {} 00044 explicit WordWithBox(WERD *word) 00045 : word_(word), bounding_box_(word->bounding_box()) { 00046 int height = bounding_box_.height(); 00047 bounding_box_.pad(height, height); 00048 } 00049 00050 const TBOX &bounding_box() const { return bounding_box_; } 00051 // Returns the bounding box of only the good blobs. 00052 TBOX true_bounding_box() const { return word_->true_bounding_box(); } 00053 C_BLOB_LIST *RejBlobs() const { return word_->rej_cblob_list(); } 00054 const WERD *word() const { return word_; } 00055 00056 private: 00057 // Borrowed pointer to a real word somewhere that must outlive this class. 00058 WERD *word_; 00059 // Cached expanded bounding box of the word, padded all round by its height. 00060 TBOX bounding_box_; 00061 }; 00062 00063 // Make it usable by BBGrid. 00064 CLISTIZEH(WordWithBox) 00065 typedef BBGrid<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT> WordGrid; 00066 typedef GridSearch<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT> WordSearch; 00067 00068 class Textord { 00069 public: 00070 explicit Textord(CCStruct* ccstruct); 00071 ~Textord(); 00072 00073 // Make the textlines and words inside each block. 00074 // binary_pix is mandatory and is the binarized input after line removal. 00075 // grey_pix is optional, but if present must match the binary_pix in size, 00076 // and must be a *real* grey image instead of binary_pix * 255. 00077 // thresholds_pix is expected to be present iff grey_pix is present and 00078 // can be an integer factor reduction of the grey_pix. It represents the 00079 // thresholds that were used to create the binary_pix from the grey_pix. 00080 // diacritic_blobs contain small confusing components that should be added 00081 // to the appropriate word(s) in case they are really diacritics. 00082 void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, 00083 int height, Pix *binary_pix, Pix *thresholds_pix, 00084 Pix *grey_pix, bool use_box_bottoms, 00085 BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, 00086 TO_BLOCK_LIST *to_blocks); 00087 00088 // If we were supposed to return only a single textline, and there is more 00089 // than one, clean up and leave only the best. 00090 void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES* page_res); 00091 00092 bool use_cjk_fp_model() const { 00093 return use_cjk_fp_model_; 00094 } 00095 void set_use_cjk_fp_model(bool flag) { 00096 use_cjk_fp_model_ = flag; 00097 } 00098 00099 // tospace.cpp /////////////////////////////////////////// 00100 void to_spacing( 00101 ICOORD page_tr, //topright of page 00102 TO_BLOCK_LIST *blocks //blocks on page 00103 ); 00104 ROW *make_prop_words(TO_ROW *row, // row to make 00105 FCOORD rotation // for drawing 00106 ); 00107 ROW *make_blob_words(TO_ROW *row, // row to make 00108 FCOORD rotation // for drawing 00109 ); 00110 // tordmain.cpp /////////////////////////////////////////// 00111 void find_components(Pix* pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks); 00112 void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on); 00113 00114 private: 00115 // For underlying memory management and other utilities. 00116 CCStruct* ccstruct_; 00117 00118 // The size of the input image. 00119 ICOORD page_tr_; 00120 00121 bool use_cjk_fp_model_; 00122 00123 // makerow.cpp /////////////////////////////////////////// 00124 // Make the textlines inside each block. 00125 void MakeRows(PageSegMode pageseg_mode, const FCOORD& skew, 00126 int width, int height, TO_BLOCK_LIST* to_blocks); 00127 // Make the textlines inside a single block. 00128 void MakeBlockRows(int min_spacing, int max_spacing, 00129 const FCOORD& skew, TO_BLOCK* block, 00130 ScrollView* win); 00131 00132 public: 00133 void compute_block_xheight(TO_BLOCK *block, float gradient); 00134 void compute_row_xheight(TO_ROW *row, // row to do 00135 const FCOORD& rotation, 00136 float gradient, // global skew 00137 int block_line_size); 00138 void make_spline_rows(TO_BLOCK *block, // block to do 00139 float gradient, // gradient to fit 00140 BOOL8 testing_on); 00141 private: 00143 void make_old_baselines(TO_BLOCK *block, // block to do 00144 BOOL8 testing_on, // correct orientation 00145 float gradient); 00146 void correlate_lines(TO_BLOCK *block, float gradient); 00147 void correlate_neighbours(TO_BLOCK *block, // block rows are in. 00148 TO_ROW **rows, // rows of block. 00149 int rowcount); // no of rows to do. 00150 int correlate_with_stats(TO_ROW **rows, // rows of block. 00151 int rowcount, // no of rows to do. 00152 TO_BLOCK* block); 00153 void find_textlines(TO_BLOCK *block, // block row is in 00154 TO_ROW *row, // row to do 00155 int degree, // required approximation 00156 QSPLINE *spline); // starting spline 00157 // tospace.cpp /////////////////////////////////////////// 00158 //DEBUG USE ONLY 00159 void block_spacing_stats(TO_BLOCK *block, 00160 GAPMAP *gapmap, 00161 BOOL8 &old_text_ord_proportional, 00162 //resulting estimate 00163 inT16 &block_space_gap_width, 00164 //resulting estimate 00165 inT16 &block_non_space_gap_width 00166 ); 00167 void row_spacing_stats(TO_ROW *row, 00168 GAPMAP *gapmap, 00169 inT16 block_idx, 00170 inT16 row_idx, 00171 //estimate for block 00172 inT16 block_space_gap_width, 00173 //estimate for block 00174 inT16 block_non_space_gap_width 00175 ); 00176 void old_to_method(TO_ROW *row, 00177 STATS *all_gap_stats, 00178 STATS *space_gap_stats, 00179 STATS *small_gap_stats, 00180 inT16 block_space_gap_width, 00181 //estimate for block 00182 inT16 block_non_space_gap_width 00183 ); 00184 BOOL8 isolated_row_stats(TO_ROW *row, 00185 GAPMAP *gapmap, 00186 STATS *all_gap_stats, 00187 BOOL8 suspected_table, 00188 inT16 block_idx, 00189 inT16 row_idx); 00190 inT16 stats_count_under(STATS *stats, inT16 threshold); 00191 void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats); 00192 BOOL8 make_a_word_break(TO_ROW *row, // row being made 00193 TBOX blob_box, // for next_blob // how many blanks? 00194 inT16 prev_gap, 00195 TBOX prev_blob_box, 00196 inT16 real_current_gap, 00197 inT16 within_xht_current_gap, 00198 TBOX next_blob_box, 00199 inT16 next_gap, 00200 uinT8 &blanks, 00201 BOOL8 &fuzzy_sp, 00202 BOOL8 &fuzzy_non, 00203 BOOL8& prev_gap_was_a_space, 00204 BOOL8& break_at_next_gap); 00205 BOOL8 narrow_blob(TO_ROW *row, TBOX blob_box); 00206 BOOL8 wide_blob(TO_ROW *row, TBOX blob_box); 00207 BOOL8 suspected_punct_blob(TO_ROW *row, TBOX box); 00208 void peek_at_next_gap(TO_ROW *row, 00209 BLOBNBOX_IT box_it, 00210 TBOX &next_blob_box, 00211 inT16 &next_gap, 00212 inT16 &next_within_xht_gap); 00213 void mark_gap(TBOX blob, //blob following gap 00214 inT16 rule, // heuristic id 00215 inT16 prev_gap, 00216 inT16 prev_blob_width, 00217 inT16 current_gap, 00218 inT16 next_blob_width, 00219 inT16 next_gap); 00220 float find_mean_blob_spacing(WERD *word); 00221 BOOL8 ignore_big_gap(TO_ROW *row, 00222 inT32 row_length, 00223 GAPMAP *gapmap, 00224 inT16 left, 00225 inT16 right); 00226 //get bounding box 00227 TBOX reduced_box_next(TO_ROW *row, //current row 00228 BLOBNBOX_IT *it //iterator to blobds 00229 ); 00230 TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, inT16 *left_above_xht); 00231 // tordmain.cpp /////////////////////////////////////////// 00232 float filter_noise_blobs(BLOBNBOX_LIST *src_list, 00233 BLOBNBOX_LIST *noise_list, 00234 BLOBNBOX_LIST *small_list, 00235 BLOBNBOX_LIST *large_list); 00236 // Fixes the block so it obeys all the rules: 00237 // Must have at least one ROW. 00238 // Must have at least one WERD. 00239 // WERDs contain a fake blob. 00240 void cleanup_nontext_block(BLOCK* block); 00241 void cleanup_blocks(bool clean_noise, BLOCK_LIST *blocks); 00242 BOOL8 clean_noise_from_row(ROW *row); 00243 void clean_noise_from_words(ROW *row); 00244 // Remove outlines that are a tiny fraction in either width or height 00245 // of the word height. 00246 void clean_small_noise_from_words(ROW *row); 00247 // Groups blocks by rotation, then, for each group, makes a WordGrid and calls 00248 // TransferDiacriticsToWords to copy the diacritic blobs to the most 00249 // appropriate words in the group of blocks. Source blobs are not touched. 00250 void TransferDiacriticsToBlockGroups(BLOBNBOX_LIST* diacritic_blobs, 00251 BLOCK_LIST* blocks); 00252 // Places a copy of blobs that are near a word (after applying rotation to the 00253 // blob) in the most appropriate word, unless there is doubt, in which case a 00254 // blob can end up in two words. Source blobs are not touched. 00255 void TransferDiacriticsToWords(BLOBNBOX_LIST *diacritic_blobs, 00256 const FCOORD &rotation, WordGrid *word_grid); 00257 00258 public: 00259 // makerow.cpp /////////////////////////////////////////// 00260 BOOL_VAR_H(textord_single_height_mode, false, 00261 "Script has no xheight, so use a single mode for horizontal text"); 00262 // tospace.cpp /////////////////////////////////////////// 00263 BOOL_VAR_H(tosp_old_to_method, false, "Space stats use prechopping?"); 00264 BOOL_VAR_H(tosp_old_to_constrain_sp_kn, false, 00265 "Constrain relative values of inter and intra-word gaps for " 00266 "old_to_method."); 00267 BOOL_VAR_H(tosp_only_use_prop_rows, true, 00268 "Block stats to use fixed pitch rows?"); 00269 BOOL_VAR_H(tosp_force_wordbreak_on_punct, false, 00270 "Force word breaks on punct to break long lines in non-space " 00271 "delimited langs"); 00272 BOOL_VAR_H(tosp_use_pre_chopping, false, 00273 "Space stats use prechopping?"); 00274 BOOL_VAR_H(tosp_old_to_bug_fix, false, 00275 "Fix suspected bug in old code"); 00276 BOOL_VAR_H(tosp_block_use_cert_spaces, true, 00277 "Only stat OBVIOUS spaces"); 00278 BOOL_VAR_H(tosp_row_use_cert_spaces, true, 00279 "Only stat OBVIOUS spaces"); 00280 BOOL_VAR_H(tosp_narrow_blobs_not_cert, true, 00281 "Only stat OBVIOUS spaces"); 00282 BOOL_VAR_H(tosp_row_use_cert_spaces1, true, 00283 "Only stat OBVIOUS spaces"); 00284 BOOL_VAR_H(tosp_recovery_isolated_row_stats, true, 00285 "Use row alone when inadequate cert spaces"); 00286 BOOL_VAR_H(tosp_only_small_gaps_for_kern, false, "Better guess"); 00287 BOOL_VAR_H(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?"); 00288 BOOL_VAR_H(tosp_fuzzy_limit_all, true, 00289 "Don't restrict kn->sp fuzzy limit to tables"); 00290 BOOL_VAR_H(tosp_stats_use_xht_gaps, true, 00291 "Use within xht gap for wd breaks"); 00292 BOOL_VAR_H(tosp_use_xht_gaps, true, 00293 "Use within xht gap for wd breaks"); 00294 BOOL_VAR_H(tosp_only_use_xht_gaps, false, 00295 "Only use within xht gap for wd breaks"); 00296 BOOL_VAR_H(tosp_rule_9_test_punct, false, 00297 "Don't chng kn to space next to punct"); 00298 BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp, true, "Default flip"); 00299 BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn, true, "Default flip"); 00300 BOOL_VAR_H(tosp_improve_thresh, false, 00301 "Enable improvement heuristic"); 00302 INT_VAR_H(tosp_debug_level, 0, "Debug data"); 00303 INT_VAR_H(tosp_enough_space_samples_for_median, 3, 00304 "or should we use mean"); 00305 INT_VAR_H(tosp_redo_kern_limit, 10, 00306 "No.samples reqd to reestimate for row"); 00307 INT_VAR_H(tosp_few_samples, 40, 00308 "No.gaps reqd with 1 large gap to treat as a table"); 00309 INT_VAR_H(tosp_short_row, 20, 00310 "No.gaps reqd with few cert spaces to use certs"); 00311 INT_VAR_H(tosp_sanity_method, 1, "How to avoid being silly"); 00312 double_VAR_H(tosp_old_sp_kn_th_factor, 2.0, 00313 "Factor for defining space threshold in terms of space and " 00314 "kern sizes"); 00315 double_VAR_H(tosp_threshold_bias1, 0, 00316 "how far between kern and space?"); 00317 double_VAR_H(tosp_threshold_bias2, 0, 00318 "how far between kern and space?"); 00319 double_VAR_H(tosp_narrow_fraction, 0.3, 00320 "Fract of xheight for narrow"); 00321 double_VAR_H(tosp_narrow_aspect_ratio, 0.48, 00322 "narrow if w/h less than this"); 00323 double_VAR_H(tosp_wide_fraction, 0.52, "Fract of xheight for wide"); 00324 double_VAR_H(tosp_wide_aspect_ratio, 0.0, 00325 "wide if w/h less than this"); 00326 double_VAR_H(tosp_fuzzy_space_factor, 0.6, 00327 "Fract of xheight for fuzz sp"); 00328 double_VAR_H(tosp_fuzzy_space_factor1, 0.5, 00329 "Fract of xheight for fuzz sp"); 00330 double_VAR_H(tosp_fuzzy_space_factor2, 0.72, 00331 "Fract of xheight for fuzz sp"); 00332 double_VAR_H(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern"); 00333 double_VAR_H(tosp_kern_gap_factor1, 2.0, 00334 "gap ratio to flip kern->sp"); 00335 double_VAR_H(tosp_kern_gap_factor2, 1.3, 00336 "gap ratio to flip kern->sp"); 00337 double_VAR_H(tosp_kern_gap_factor3, 2.5, 00338 "gap ratio to flip kern->sp"); 00339 double_VAR_H(tosp_ignore_big_gaps, -1, "xht multiplier"); 00340 double_VAR_H(tosp_ignore_very_big_gaps, 3.5, "xht multiplier"); 00341 double_VAR_H(tosp_rep_space, 1.6, "rep gap multiplier for space"); 00342 double_VAR_H(tosp_enough_small_gaps, 0.65, 00343 "Fract of kerns reqd for isolated row stats"); 00344 double_VAR_H(tosp_table_kn_sp_ratio, 2.25, 00345 "Min difference of kn & sp in table"); 00346 double_VAR_H(tosp_table_xht_sp_ratio, 0.33, 00347 "Expect spaces bigger than this"); 00348 double_VAR_H(tosp_table_fuzzy_kn_sp_ratio, 3.0, 00349 "Fuzzy if less than this"); 00350 double_VAR_H(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg"); 00351 double_VAR_H(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg"); 00352 double_VAR_H(tosp_min_sane_kn_sp, 1.5, 00353 "Don't trust spaces less than this time kn"); 00354 double_VAR_H(tosp_init_guess_kn_mult, 2.2, 00355 "Thresh guess - mult kn by this"); 00356 double_VAR_H(tosp_init_guess_xht_mult, 0.28, 00357 "Thresh guess - mult xht by this"); 00358 double_VAR_H(tosp_max_sane_kn_thresh, 5.0, 00359 "Multiplier on kn to limit thresh"); 00360 double_VAR_H(tosp_flip_caution, 0.0, 00361 "Don't autoflip kn to sp when large separation"); 00362 double_VAR_H(tosp_large_kerning, 0.19, 00363 "Limit use of xht gap with large kns"); 00364 double_VAR_H(tosp_dont_fool_with_small_kerns, -1, 00365 "Limit use of xht gap with odd small kns"); 00366 double_VAR_H(tosp_near_lh_edge, 0, 00367 "Don't reduce box if the top left is non blank"); 00368 double_VAR_H(tosp_silly_kn_sp_gap, 0.2, 00369 "Don't let sp minus kn get too small"); 00370 double_VAR_H(tosp_pass_wide_fuzz_sp_to_context, 0.75, 00371 "How wide fuzzies need context"); 00372 // tordmain.cpp /////////////////////////////////////////// 00373 BOOL_VAR_H(textord_no_rejects, false, "Don't remove noise blobs"); 00374 BOOL_VAR_H(textord_show_blobs, false, "Display unsorted blobs"); 00375 BOOL_VAR_H(textord_show_boxes, false, "Display boxes"); 00376 INT_VAR_H(textord_max_noise_size, 7, "Pixel size of noise"); 00377 INT_VAR_H(textord_baseline_debug, 0, "Baseline debug level"); 00378 double_VAR_H(textord_blob_size_bigile, 95, "Percentile for large blobs"); 00379 double_VAR_H(textord_noise_area_ratio, 0.7, 00380 "Fraction of bounding box for noise"); 00381 double_VAR_H(textord_blob_size_smallile, 20, "Percentile for small blobs"); 00382 double_VAR_H(textord_initialx_ile, 0.75, "Ile of sizes for xheight guess"); 00383 double_VAR_H(textord_initialasc_ile, 0.90, "Ile of sizes for xheight guess"); 00384 INT_VAR_H(textord_noise_sizefraction, 10, "Fraction of size for maxima"); 00385 double_VAR_H(textord_noise_sizelimit, 0.5, "Fraction of x for big t count"); 00386 INT_VAR_H(textord_noise_translimit, 16, "Transitions for normal blob"); 00387 double_VAR_H(textord_noise_normratio, 2.0, "Dot to norm ratio for deletion"); 00388 BOOL_VAR_H(textord_noise_rejwords, true, "Reject noise-like words"); 00389 BOOL_VAR_H(textord_noise_rejrows, true, "Reject noise-like rows"); 00390 double_VAR_H(textord_noise_syfract, 0.2, "xh fract error for norm blobs"); 00391 double_VAR_H(textord_noise_sxfract, 0.4, 00392 "xh fract width error for norm blobs"); 00393 double_VAR_H(textord_noise_hfract, 1.0/64, 00394 "Height fraction to discard outlines as speckle noise"); 00395 INT_VAR_H(textord_noise_sncount, 1, "super norm blobs to save row"); 00396 double_VAR_H(textord_noise_rowratio, 6.0, "Dot to norm ratio for deletion"); 00397 BOOL_VAR_H(textord_noise_debug, FALSE, "Debug row garbage detector"); 00398 double_VAR_H(textord_blshift_maxshift, 0.00, "Max baseline shift"); 00399 double_VAR_H(textord_blshift_xfraction, 9.99, "Min size of baseline shift"); 00400 }; 00401 } // namespace tesseract. 00402 00403 #endif // TESSERACT_TEXTORD_TEXTORD_H__