|
tesseract 3.04.01
|
00001 00002 // File: colfind.cpp 00003 // Description: Class to hold BLOBNBOXs in a grid for fast access 00004 // to neighbours. 00005 // Author: Ray Smith 00006 // Created: Wed Jun 06 17:22:01 PDT 2007 00007 // 00008 // (C) Copyright 2007, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #ifdef _MSC_VER 00022 #pragma warning(disable:4244) // Conversion warnings 00023 #endif 00024 00025 // Include automatically generated configuration file if running autoconf. 00026 #ifdef HAVE_CONFIG_H 00027 #include "config_auto.h" 00028 #endif 00029 00030 #include "colfind.h" 00031 00032 #include "ccnontextdetect.h" 00033 #include "colpartition.h" 00034 #include "colpartitionset.h" 00035 #include "equationdetectbase.h" 00036 #include "linefind.h" 00037 #include "normalis.h" 00038 #include "strokewidth.h" 00039 #include "blobbox.h" 00040 #include "scrollview.h" 00041 #include "tablefind.h" 00042 #include "params.h" 00043 #include "workingpartset.h" 00044 00045 namespace tesseract { 00046 00047 // Minimum width (in pixels) to be considered when making columns. 00048 // TODO(rays) convert to inches, dependent on resolution. 00049 const int kMinColumnWidth = 100; 00050 // When assigning columns, the max number of misfit grid rows/ColPartitionSets 00051 // that can be ignored. 00052 const int kMaxIncompatibleColumnCount = 2; 00053 // Min fraction of ColPartition height to be overlapping for margin purposes. 00054 const double kMarginOverlapFraction = 0.25; 00055 // Max fraction of mean_column_gap_ for the gap between two partitions within a 00056 // column to allow them to merge. 00057 const double kHorizontalGapMergeFraction = 0.5; 00058 // Min fraction of grid size to not be considered likely noise. 00059 const double kMinNonNoiseFraction = 0.5; 00060 // Minimum gutter width as a fraction of gridsize 00061 const double kMinGutterWidthGrid = 0.5; 00062 // Max multiple of a partition's median size as a distance threshold for 00063 // adding noise blobs. 00064 const double kMaxDistToPartSizeRatio = 1.5; 00065 00066 BOOL_VAR(textord_tabfind_show_initial_partitions, 00067 false, "Show partition bounds"); 00068 BOOL_VAR(textord_tabfind_show_reject_blobs, 00069 false, "Show blobs rejected as noise"); 00070 INT_VAR(textord_tabfind_show_partitions, 0, 00071 "Show partition bounds, waiting if >1"); 00072 BOOL_VAR(textord_tabfind_show_columns, false, "Show column bounds"); 00073 BOOL_VAR(textord_tabfind_show_blocks, false, "Show final block bounds"); 00074 BOOL_VAR(textord_tabfind_find_tables, true, "run table detection"); 00075 00076 ScrollView* ColumnFinder::blocks_win_ = NULL; 00077 00078 // Gridsize is an estimate of the text size in the image. A suitable value 00079 // is in TO_BLOCK::line_size after find_components has been used to make 00080 // the blobs. 00081 // bleft and tright are the bounds of the image (or rectangle) being processed. 00082 // vlines is a (possibly empty) list of TabVector and vertical_x and y are 00083 // the sum logical vertical vector produced by LineFinder::FindVerticalLines. 00084 ColumnFinder::ColumnFinder(int gridsize, 00085 const ICOORD& bleft, const ICOORD& tright, 00086 int resolution, bool cjk_script, 00087 double aligned_gap_fraction, 00088 TabVector_LIST* vlines, TabVector_LIST* hlines, 00089 int vertical_x, int vertical_y) 00090 : TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y, 00091 resolution), 00092 cjk_script_(cjk_script), 00093 min_gutter_width_(static_cast<int>(kMinGutterWidthGrid * gridsize)), 00094 mean_column_gap_(tright.x() - bleft.x()), 00095 tabfind_aligned_gap_fraction_(aligned_gap_fraction), 00096 reskew_(1.0f, 0.0f), rotation_(1.0f, 0.0f), rerotate_(1.0f, 0.0f), 00097 best_columns_(NULL), stroke_width_(NULL), 00098 part_grid_(gridsize, bleft, tright), nontext_map_(NULL), 00099 projection_(resolution), 00100 denorm_(NULL), input_blobs_win_(NULL), equation_detect_(NULL) { 00101 TabVector_IT h_it(&horizontal_lines_); 00102 h_it.add_list_after(hlines); 00103 } 00104 00105 ColumnFinder::~ColumnFinder() { 00106 column_sets_.delete_data_pointers(); 00107 if (best_columns_ != NULL) { 00108 delete [] best_columns_; 00109 } 00110 if (stroke_width_ != NULL) 00111 delete stroke_width_; 00112 delete input_blobs_win_; 00113 pixDestroy(&nontext_map_); 00114 while (denorm_ != NULL) { 00115 DENORM* dead_denorm = denorm_; 00116 denorm_ = const_cast<DENORM*>(denorm_->predecessor()); 00117 delete dead_denorm; 00118 } 00119 00120 // The ColPartitions are destroyed automatically, but any boxes in 00121 // the noise_parts_ list are owned and need to be deleted explicitly. 00122 ColPartition_IT part_it(&noise_parts_); 00123 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { 00124 ColPartition* part = part_it.data(); 00125 part->DeleteBoxes(); 00126 } 00127 // Likewise any boxes in the good_parts_ list need to be deleted. 00128 // These are just the image parts. Text parts have already given their 00129 // boxes on to the TO_BLOCK, and have empty lists. 00130 part_it.set_to_list(&good_parts_); 00131 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { 00132 ColPartition* part = part_it.data(); 00133 part->DeleteBoxes(); 00134 } 00135 // Also, any blobs on the image_bblobs_ list need to have their cblobs 00136 // deleted. This only happens if there has been an early return from 00137 // FindColumns, as in a normal return, the blobs go into the grid and 00138 // end up in noise_parts_, good_parts_ or the output blocks. 00139 BLOBNBOX_IT bb_it(&image_bblobs_); 00140 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { 00141 BLOBNBOX* bblob = bb_it.data(); 00142 delete bblob->cblob(); 00143 } 00144 } 00145 00146 // Performs initial processing on the blobs in the input_block: 00147 // Setup the part_grid, stroke_width_, nontext_map. 00148 // Obvious noise blobs are filtered out and used to mark the nontext_map_. 00149 // Initial stroke-width analysis is used to get local text alignment 00150 // direction, so the textline projection_ map can be setup. 00151 // On return, IsVerticallyAlignedText may be called (now optionally) to 00152 // determine the gross textline alignment of the page. 00153 void ColumnFinder::SetupAndFilterNoise(PageSegMode pageseg_mode, 00154 Pix* photo_mask_pix, 00155 TO_BLOCK* input_block) { 00156 part_grid_.Init(gridsize(), bleft(), tright()); 00157 if (stroke_width_ != NULL) 00158 delete stroke_width_; 00159 stroke_width_ = new StrokeWidth(gridsize(), bleft(), tright()); 00160 min_gutter_width_ = static_cast<int>(kMinGutterWidthGrid * gridsize()); 00161 input_block->ReSetAndReFilterBlobs(); 00162 #ifndef GRAPHICS_DISABLED 00163 if (textord_tabfind_show_blocks) { 00164 input_blobs_win_ = MakeWindow(0, 0, "Filtered Input Blobs"); 00165 input_block->plot_graded_blobs(input_blobs_win_); 00166 } 00167 #endif // GRAPHICS_DISABLED 00168 SetBlockRuleEdges(input_block); 00169 pixDestroy(&nontext_map_); 00170 // Run a preliminary strokewidth neighbour detection on the medium blobs. 00171 stroke_width_->SetNeighboursOnMediumBlobs(input_block); 00172 CCNonTextDetect nontext_detect(gridsize(), bleft(), tright()); 00173 // Remove obvious noise and make the initial non-text map. 00174 nontext_map_ = nontext_detect.ComputeNonTextMask(textord_debug_tabfind, 00175 photo_mask_pix, input_block); 00176 stroke_width_->FindTextlineDirectionAndFixBrokenCJK(pageseg_mode, cjk_script_, 00177 input_block); 00178 // Clear the strokewidth grid ready for rotation or leader finding. 00179 stroke_width_->Clear(); 00180 } 00181 00182 // Tests for vertical alignment of text (returning true if so), and generates 00183 // a list of blobs of moderate aspect ratio, in the most frequent writing 00184 // direction (in osd_blobs) for orientation and script detection to test 00185 // the character orientation. 00186 // block is the single block for the whole page or rectangle to be OCRed. 00187 // Note that the vertical alignment may be due to text whose writing direction 00188 // is vertical, like say Japanese, or due to text whose writing direction is 00189 // horizontal but whose text appears vertically aligned because the image is 00190 // not the right way up. 00191 bool ColumnFinder::IsVerticallyAlignedText(double find_vertical_text_ratio, 00192 TO_BLOCK* block, 00193 BLOBNBOX_CLIST* osd_blobs) { 00194 return stroke_width_->TestVerticalTextDirection(find_vertical_text_ratio, 00195 block, osd_blobs); 00196 } 00197 00198 // Rotates the blobs and the TabVectors so that the gross writing direction 00199 // (text lines) are horizontal and lines are read down the page. 00200 // Applied rotation stored in rotation_. 00201 // A second rotation is calculated for application during recognition to 00202 // make the rotated blobs upright for recognition. 00203 // Subsequent rotation stored in text_rotation_. 00204 // 00205 // Arguments: 00206 // vertical_text_lines true if the text lines are vertical. 00207 // recognition_rotation [0..3] is the number of anti-clockwise 90 degree 00208 // rotations from osd required for the text to be upright and readable. 00209 void ColumnFinder::CorrectOrientation(TO_BLOCK* block, 00210 bool vertical_text_lines, 00211 int recognition_rotation) { 00212 const FCOORD anticlockwise90(0.0f, 1.0f); 00213 const FCOORD clockwise90(0.0f, -1.0f); 00214 const FCOORD rotation180(-1.0f, 0.0f); 00215 const FCOORD norotation(1.0f, 0.0f); 00216 00217 text_rotation_ = norotation; 00218 // Rotate the page to make the text upright, as implied by 00219 // recognition_rotation. 00220 rotation_ = norotation; 00221 if (recognition_rotation == 1) { 00222 rotation_ = anticlockwise90; 00223 } else if (recognition_rotation == 2) { 00224 rotation_ = rotation180; 00225 } else if (recognition_rotation == 3) { 00226 rotation_ = clockwise90; 00227 } 00228 // We infer text writing direction to be vertical if there are several 00229 // vertical text lines detected, and horizontal if not. But if the page 00230 // orientation was determined to be 90 or 270 degrees, the true writing 00231 // direction is the opposite of what we inferred. 00232 if (recognition_rotation & 1) { 00233 vertical_text_lines = !vertical_text_lines; 00234 } 00235 // If we still believe the writing direction is vertical, we use the 00236 // convention of rotating the page ccw 90 degrees to make the text lines 00237 // horizontal, and mark the blobs for rotation cw 90 degrees for 00238 // classification so that the text order is correct after recognition. 00239 if (vertical_text_lines) { 00240 rotation_.rotate(anticlockwise90); 00241 text_rotation_.rotate(clockwise90); 00242 } 00243 // Set rerotate_ to the inverse of rotation_. 00244 rerotate_ = FCOORD(rotation_.x(), -rotation_.y()); 00245 if (rotation_.x() != 1.0f || rotation_.y() != 0.0f) { 00246 // Rotate all the blobs and tab vectors. 00247 RotateBlobList(rotation_, &block->large_blobs); 00248 RotateBlobList(rotation_, &block->blobs); 00249 RotateBlobList(rotation_, &block->small_blobs); 00250 RotateBlobList(rotation_, &block->noise_blobs); 00251 TabFind::ResetForVerticalText(rotation_, rerotate_, &horizontal_lines_, 00252 &min_gutter_width_); 00253 part_grid_.Init(gridsize(), bleft(), tright()); 00254 // Reset all blobs to initial state and filter by size. 00255 // Since they have rotated, the list they belong on could have changed. 00256 block->ReSetAndReFilterBlobs(); 00257 SetBlockRuleEdges(block); 00258 stroke_width_->CorrectForRotation(rerotate_, &part_grid_); 00259 } 00260 if (textord_debug_tabfind) { 00261 tprintf("Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n", 00262 vertical_text_lines, recognition_rotation, 00263 rotation_.x(), rotation_.y(), 00264 text_rotation_.x(), text_rotation_.y()); 00265 } 00266 // Setup the denormalization. 00267 ASSERT_HOST(denorm_ == NULL); 00268 denorm_ = new DENORM; 00269 denorm_->SetupNormalization(NULL, &rotation_, NULL, 00270 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f); 00271 } 00272 00273 // Finds blocks of text, image, rule line, table etc, returning them in the 00274 // blocks and to_blocks 00275 // (Each TO_BLOCK points to the basic BLOCK and adds more information.) 00276 // Image blocks are generated by a combination of photo_mask_pix (which may 00277 // NOT be NULL) and the rejected text found during preliminary textline 00278 // finding. 00279 // The input_block is the result of a call to find_components, and contains 00280 // the blobs found in the image or rectangle to be OCRed. These blobs will be 00281 // removed and placed in the output blocks, while unused ones will be deleted. 00282 // If single_column is true, the input is treated as single column, but 00283 // it is still divided into blocks of equal line spacing/text size. 00284 // scaled_color is scaled down by scaled_factor from the input color image, 00285 // and may be NULL if the input was not color. 00286 // grey_pix is optional, but if present must match the photo_mask_pix in size, 00287 // and must be a *real* grey image instead of binary_pix * 255. 00288 // thresholds_pix is expected to be present iff grey_pix is present and 00289 // can be an integer factor reduction of the grey_pix. It represents the 00290 // thresholds that were used to create the binary_pix from the grey_pix. 00291 // If diacritic_blobs is non-null, then diacritics/noise blobs, that would 00292 // confuse layout anaylsis by causing textline overlap, are placed there, 00293 // with the expectation that they will be reassigned to words later and 00294 // noise/diacriticness determined via classification. 00295 // Returns -1 if the user hits the 'd' key in the blocks window while running 00296 // in debug mode, which requests a retry with more debug info. 00297 int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color, 00298 int scaled_factor, TO_BLOCK* input_block, 00299 Pix* photo_mask_pix, Pix* thresholds_pix, 00300 Pix* grey_pix, BLOCK_LIST* blocks, 00301 BLOBNBOX_LIST* diacritic_blobs, 00302 TO_BLOCK_LIST* to_blocks) { 00303 pixOr(photo_mask_pix, photo_mask_pix, nontext_map_); 00304 stroke_width_->FindLeaderPartitions(input_block, &part_grid_); 00305 stroke_width_->RemoveLineResidue(&big_parts_); 00306 FindInitialTabVectors(NULL, min_gutter_width_, tabfind_aligned_gap_fraction_, 00307 input_block); 00308 SetBlockRuleEdges(input_block); 00309 stroke_width_->GradeBlobsIntoPartitions( 00310 pageseg_mode, rerotate_, input_block, nontext_map_, denorm_, cjk_script_, 00311 &projection_, diacritic_blobs, &part_grid_, &big_parts_); 00312 if (!PSM_SPARSE(pageseg_mode)) { 00313 ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_, 00314 input_block, this, &part_grid_, &big_parts_); 00315 ImageFind::TransferImagePartsToImageMask(rerotate_, &part_grid_, 00316 photo_mask_pix); 00317 ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_, 00318 input_block, this, &part_grid_, &big_parts_); 00319 } 00320 part_grid_.ReTypeBlobs(&image_bblobs_); 00321 TidyBlobs(input_block); 00322 Reset(); 00323 // TODO(rays) need to properly handle big_parts_. 00324 ColPartition_IT p_it(&big_parts_); 00325 for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) 00326 p_it.data()->DisownBoxesNoAssert(); 00327 big_parts_.clear(); 00328 delete stroke_width_; 00329 stroke_width_ = NULL; 00330 // Compute the edge offsets whether or not there is a grey_pix. It is done 00331 // here as the c_blobs haven't been touched by rotation or anything yet, 00332 // so no denorm is required, yet the text has been separated from image, so 00333 // no time is wasted running it on image blobs. 00334 input_block->ComputeEdgeOffsets(thresholds_pix, grey_pix); 00335 00336 // A note about handling right-to-left scripts (Hebrew/Arabic): 00337 // The columns must be reversed and come out in right-to-left instead of 00338 // the normal left-to-right order. Because the left-to-right ordering 00339 // is implicit in many data structures, it is simpler to fool the algorithms 00340 // into thinking they are dealing with left-to-right text. 00341 // To do this, we reflect the needed data in the y-axis and then reflect 00342 // the blocks back after they have been created. This is a temporary 00343 // arrangement that is confined to this function only, so the reflection 00344 // is completely invisible in the output blocks. 00345 // The only objects reflected are: 00346 // The vertical separator lines that have already been found; 00347 // The bounding boxes of all BLOBNBOXES on all lists on the input_block 00348 // plus the image_bblobs. The outlines are not touched, since they are 00349 // not looked at. 00350 bool input_is_rtl = input_block->block->right_to_left(); 00351 if (input_is_rtl) { 00352 // Reflect the vertical separator lines (member of TabFind). 00353 ReflectInYAxis(); 00354 // Reflect the blob boxes. 00355 ReflectForRtl(input_block, &image_bblobs_); 00356 part_grid_.ReflectInYAxis(); 00357 } 00358 00359 if (!PSM_SPARSE(pageseg_mode)) { 00360 if (!PSM_COL_FIND_ENABLED(pageseg_mode)) { 00361 // No tab stops needed. Just the grid that FindTabVectors makes. 00362 DontFindTabVectors(&image_bblobs_, input_block, &deskew_, &reskew_); 00363 } else { 00364 SetBlockRuleEdges(input_block); 00365 // Find the tab stops, estimate skew, and deskew the tabs, blobs and 00366 // part_grid_. 00367 FindTabVectors(&horizontal_lines_, &image_bblobs_, input_block, 00368 min_gutter_width_, tabfind_aligned_gap_fraction_, 00369 &part_grid_, &deskew_, &reskew_); 00370 // Add the deskew to the denorm_. 00371 DENORM* new_denorm = new DENORM; 00372 new_denorm->SetupNormalization(NULL, &deskew_, denorm_, 00373 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f); 00374 denorm_ = new_denorm; 00375 } 00376 SetBlockRuleEdges(input_block); 00377 part_grid_.SetTabStops(this); 00378 00379 // Make the column_sets_. 00380 if (!MakeColumns(false)) { 00381 tprintf("Empty page!!\n"); 00382 part_grid_.DeleteParts(); 00383 return 0; // This is an empty page. 00384 } 00385 00386 // Refill the grid using rectangular spreading, and get the benefit 00387 // of the completed tab vectors marking the rule edges of each blob. 00388 Clear(); 00389 #ifndef GRAPHICS_DISABLED 00390 if (textord_tabfind_show_reject_blobs) { 00391 ScrollView* rej_win = MakeWindow(500, 300, "Rejected blobs"); 00392 input_block->plot_graded_blobs(rej_win); 00393 } 00394 #endif // GRAPHICS_DISABLED 00395 InsertBlobsToGrid(false, false, &image_bblobs_, this); 00396 InsertBlobsToGrid(true, true, &input_block->blobs, this); 00397 00398 part_grid_.GridFindMargins(best_columns_); 00399 // Split and merge the partitions by looking at local neighbours. 00400 GridSplitPartitions(); 00401 // Resolve unknown partitions by adding to an existing partition, fixing 00402 // the type, or declaring them noise. 00403 part_grid_.GridFindMargins(best_columns_); 00404 GridMergePartitions(); 00405 // Insert any unused noise blobs that are close enough to an appropriate 00406 // partition. 00407 InsertRemainingNoise(input_block); 00408 // Add horizontal line separators as partitions. 00409 GridInsertHLinePartitions(); 00410 GridInsertVLinePartitions(); 00411 // Recompute margins based on a local neighbourhood search. 00412 part_grid_.GridFindMargins(best_columns_); 00413 SetPartitionTypes(); 00414 } 00415 if (textord_tabfind_show_initial_partitions) { 00416 ScrollView* part_win = MakeWindow(100, 300, "InitialPartitions"); 00417 part_grid_.DisplayBoxes(part_win); 00418 DisplayTabVectors(part_win); 00419 } 00420 00421 if (!PSM_SPARSE(pageseg_mode)) { 00422 if (equation_detect_) { 00423 equation_detect_->FindEquationParts(&part_grid_, best_columns_); 00424 } 00425 if (textord_tabfind_find_tables) { 00426 TableFinder table_finder; 00427 table_finder.Init(gridsize(), bleft(), tright()); 00428 table_finder.set_resolution(resolution_); 00429 table_finder.set_left_to_right_language( 00430 !input_block->block->right_to_left()); 00431 // Copy cleaned partitions from part_grid_ to clean_part_grid_ and 00432 // insert dot-like noise into period_grid_ 00433 table_finder.InsertCleanPartitions(&part_grid_, input_block); 00434 // Get Table Regions 00435 table_finder.LocateTables(&part_grid_, best_columns_, WidthCB(), reskew_); 00436 } 00437 GridRemoveUnderlinePartitions(); 00438 part_grid_.DeleteUnknownParts(input_block); 00439 00440 // Build the partitions into chains that belong in the same block and 00441 // refine into one-to-one links, then smooth the types within each chain. 00442 part_grid_.FindPartitionPartners(); 00443 part_grid_.FindFigureCaptions(); 00444 part_grid_.RefinePartitionPartners(true); 00445 SmoothPartnerRuns(); 00446 00447 #ifndef GRAPHICS_DISABLED 00448 if (textord_tabfind_show_partitions) { 00449 ScrollView* window = MakeWindow(400, 300, "Partitions"); 00450 if (window != NULL) { 00451 if (textord_debug_images) 00452 window->Image(AlignedBlob::textord_debug_pix().string(), 00453 image_origin().x(), image_origin().y()); 00454 part_grid_.DisplayBoxes(window); 00455 if (!textord_debug_printable) 00456 DisplayTabVectors(window); 00457 if (window != NULL && textord_tabfind_show_partitions > 1) { 00458 delete window->AwaitEvent(SVET_DESTROY); 00459 } 00460 } 00461 } 00462 #endif // GRAPHICS_DISABLED 00463 part_grid_.AssertNoDuplicates(); 00464 } 00465 // Ownership of the ColPartitions moves from part_sets_ to part_grid_ here, 00466 // and ownership of the BLOBNBOXes moves to the ColPartitions. 00467 // (They were previously owned by the block or the image_bblobs list.) 00468 ReleaseBlobsAndCleanupUnused(input_block); 00469 // Ownership of the ColPartitions moves from part_grid_ to good_parts_ and 00470 // noise_parts_ here. In text blocks, ownership of the BLOBNBOXes moves 00471 // from the ColPartitions to the output TO_BLOCK. In non-text, the 00472 // BLOBNBOXes stay with the ColPartitions and get deleted in the destructor. 00473 if (PSM_SPARSE(pageseg_mode)) 00474 part_grid_.ExtractPartitionsAsBlocks(blocks, to_blocks); 00475 else 00476 TransformToBlocks(blocks, to_blocks); 00477 if (textord_debug_tabfind) { 00478 tprintf("Found %d blocks, %d to_blocks\n", 00479 blocks->length(), to_blocks->length()); 00480 } 00481 00482 DisplayBlocks(blocks); 00483 RotateAndReskewBlocks(input_is_rtl, to_blocks); 00484 int result = 0; 00485 #ifndef GRAPHICS_DISABLED 00486 if (blocks_win_ != NULL) { 00487 bool waiting = false; 00488 do { 00489 waiting = false; 00490 SVEvent* event = blocks_win_->AwaitEvent(SVET_ANY); 00491 if (event->type == SVET_INPUT && event->parameter != NULL) { 00492 if (*event->parameter == 'd') 00493 result = -1; 00494 else 00495 blocks->clear(); 00496 } else if (event->type == SVET_DESTROY) { 00497 blocks_win_ = NULL; 00498 } else { 00499 waiting = true; 00500 } 00501 delete event; 00502 } while (waiting); 00503 } 00504 #endif // GRAPHICS_DISABLED 00505 return result; 00506 } 00507 00508 // Get the rotation required to deskew, and its inverse rotation. 00509 void ColumnFinder::GetDeskewVectors(FCOORD* deskew, FCOORD* reskew) { 00510 *reskew = reskew_; 00511 *deskew = reskew_; 00512 deskew->set_y(-deskew->y()); 00513 } 00514 00515 void ColumnFinder::SetEquationDetect(EquationDetectBase* detect) { 00516 equation_detect_ = detect; 00517 } 00518 00520 00521 // Displays the blob and block bounding boxes in a window called Blocks. 00522 void ColumnFinder::DisplayBlocks(BLOCK_LIST* blocks) { 00523 #ifndef GRAPHICS_DISABLED 00524 if (textord_tabfind_show_blocks) { 00525 if (blocks_win_ == NULL) 00526 blocks_win_ = MakeWindow(700, 300, "Blocks"); 00527 else 00528 blocks_win_->Clear(); 00529 if (textord_debug_images) 00530 blocks_win_->Image(AlignedBlob::textord_debug_pix().string(), 00531 image_origin().x(), image_origin().y()); 00532 else 00533 DisplayBoxes(blocks_win_); 00534 BLOCK_IT block_it(blocks); 00535 int serial = 1; 00536 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); 00537 block_it.forward()) { 00538 BLOCK* block = block_it.data(); 00539 block->plot(blocks_win_, serial++, 00540 textord_debug_printable ? ScrollView::BLUE 00541 : ScrollView::GREEN); 00542 } 00543 blocks_win_->Update(); 00544 } 00545 #endif 00546 } 00547 00548 // Displays the column edges at each grid y coordinate defined by 00549 // best_columns_. 00550 void ColumnFinder::DisplayColumnBounds(PartSetVector* sets) { 00551 #ifndef GRAPHICS_DISABLED 00552 ScrollView* col_win = MakeWindow(50, 300, "Columns"); 00553 if (textord_debug_images) 00554 col_win->Image(AlignedBlob::textord_debug_pix().string(), 00555 image_origin().x(), image_origin().y()); 00556 else 00557 DisplayBoxes(col_win); 00558 col_win->Pen(textord_debug_printable ? ScrollView::BLUE : ScrollView::GREEN); 00559 for (int i = 0; i < gridheight_; ++i) { 00560 ColPartitionSet* columns = best_columns_[i]; 00561 if (columns != NULL) 00562 columns->DisplayColumnEdges(i * gridsize_, (i + 1) * gridsize_, col_win); 00563 } 00564 #endif 00565 } 00566 00567 // Sets up column_sets_ (the determined column layout at each horizontal 00568 // slice). Returns false if the page is empty. 00569 bool ColumnFinder::MakeColumns(bool single_column) { 00570 // The part_sets_ are a temporary structure used during column creation, 00571 // and is a vector of ColPartitionSets, representing ColPartitions found 00572 // at horizontal slices through the page. 00573 PartSetVector part_sets; 00574 if (!single_column) { 00575 if (!part_grid_.MakeColPartSets(&part_sets)) 00576 return false; // Empty page. 00577 ASSERT_HOST(part_grid_.gridheight() == gridheight_); 00578 // Try using only the good parts first. 00579 bool good_only = true; 00580 do { 00581 for (int i = 0; i < gridheight_; ++i) { 00582 ColPartitionSet* line_set = part_sets.get(i); 00583 if (line_set != NULL && line_set->LegalColumnCandidate()) { 00584 ColPartitionSet* column_candidate = line_set->Copy(good_only); 00585 if (column_candidate != NULL) 00586 column_candidate->AddToColumnSetsIfUnique(&column_sets_, WidthCB()); 00587 } 00588 } 00589 good_only = !good_only; 00590 } while (column_sets_.empty() && !good_only); 00591 if (textord_debug_tabfind) 00592 PrintColumnCandidates("Column candidates"); 00593 // Improve the column candidates against themselves. 00594 ImproveColumnCandidates(&column_sets_, &column_sets_); 00595 if (textord_debug_tabfind) 00596 PrintColumnCandidates("Improved columns"); 00597 // Improve the column candidates using the part_sets_. 00598 ImproveColumnCandidates(&part_sets, &column_sets_); 00599 } 00600 ColPartitionSet* single_column_set = 00601 part_grid_.MakeSingleColumnSet(WidthCB()); 00602 if (single_column_set != NULL) { 00603 // Always add the single column set as a backup even if not in 00604 // single column mode. 00605 single_column_set->AddToColumnSetsIfUnique(&column_sets_, WidthCB()); 00606 } 00607 if (textord_debug_tabfind) 00608 PrintColumnCandidates("Final Columns"); 00609 bool has_columns = !column_sets_.empty(); 00610 if (has_columns) { 00611 // Divide the page into sections of uniform column layout. 00612 bool any_multi_column = AssignColumns(part_sets); 00613 if (textord_tabfind_show_columns) { 00614 DisplayColumnBounds(&part_sets); 00615 } 00616 ComputeMeanColumnGap(any_multi_column); 00617 } 00618 for (int i = 0; i < part_sets.size(); ++i) { 00619 ColPartitionSet* line_set = part_sets.get(i); 00620 if (line_set != NULL) { 00621 line_set->RelinquishParts(); 00622 delete line_set; 00623 } 00624 } 00625 return has_columns; 00626 } 00627 00628 // Attempt to improve the column_candidates by expanding the columns 00629 // and adding new partitions from the partition sets in src_sets. 00630 // Src_sets may be equal to column_candidates, in which case it will 00631 // use them as a source to improve themselves. 00632 void ColumnFinder::ImproveColumnCandidates(PartSetVector* src_sets, 00633 PartSetVector* column_sets) { 00634 PartSetVector temp_cols; 00635 temp_cols.move(column_sets); 00636 if (src_sets == column_sets) 00637 src_sets = &temp_cols; 00638 int set_size = temp_cols.size(); 00639 // Try using only the good parts first. 00640 bool good_only = true; 00641 do { 00642 for (int i = 0; i < set_size; ++i) { 00643 ColPartitionSet* column_candidate = temp_cols.get(i); 00644 ASSERT_HOST(column_candidate != NULL); 00645 ColPartitionSet* improved = column_candidate->Copy(good_only); 00646 if (improved != NULL) { 00647 improved->ImproveColumnCandidate(WidthCB(), src_sets); 00648 improved->AddToColumnSetsIfUnique(column_sets, WidthCB()); 00649 } 00650 } 00651 good_only = !good_only; 00652 } while (column_sets->empty() && !good_only); 00653 if (column_sets->empty()) 00654 column_sets->move(&temp_cols); 00655 else 00656 temp_cols.delete_data_pointers(); 00657 } 00658 00659 // Prints debug information on the column candidates. 00660 void ColumnFinder::PrintColumnCandidates(const char* title) { 00661 int set_size = column_sets_.size(); 00662 tprintf("Found %d %s:\n", set_size, title); 00663 if (textord_debug_tabfind >= 3) { 00664 for (int i = 0; i < set_size; ++i) { 00665 ColPartitionSet* column_set = column_sets_.get(i); 00666 column_set->Print(); 00667 } 00668 } 00669 } 00670 00671 // Finds the optimal set of columns that cover the entire image with as 00672 // few changes in column partition as possible. 00673 // NOTE: this could be thought of as an optimization problem, but a simple 00674 // greedy algorithm is used instead. The algorithm repeatedly finds the modal 00675 // compatible column in an unassigned region and uses that with the extra 00676 // tweak of extending the modal region over small breaks in compatibility. 00677 // Where modal regions overlap, the boundary is chosen so as to minimize 00678 // the cost in terms of ColPartitions not fitting an approved column. 00679 // Returns true if any part of the page is multi-column. 00680 bool ColumnFinder::AssignColumns(const PartSetVector& part_sets) { 00681 int set_count = part_sets.size(); 00682 ASSERT_HOST(set_count == gridheight()); 00683 // Allocate and init the best_columns_. 00684 best_columns_ = new ColPartitionSet*[set_count]; 00685 for (int y = 0; y < set_count; ++y) 00686 best_columns_[y] = NULL; 00687 int column_count = column_sets_.size(); 00688 // column_set_costs[part_sets_ index][column_sets_ index] is 00689 // < MAX_INT32 if the partition set is compatible with the column set, 00690 // in which case its value is the cost for that set used in deciding 00691 // which competing set to assign. 00692 // any_columns_possible[part_sets_ index] is true if any of 00693 // possible_column_sets[part_sets_ index][*] is < MAX_INT32. 00694 // assigned_costs[part_sets_ index] is set to the column_set_costs 00695 // of the assigned column_sets_ index or MAX_INT32 if none is set. 00696 // On return the best_columns_ member is set. 00697 bool* any_columns_possible = new bool[set_count]; 00698 int* assigned_costs = new int[set_count]; 00699 int** column_set_costs = new int*[set_count]; 00700 // Set possible column_sets to indicate whether each set is compatible 00701 // with each column. 00702 for (int part_i = 0; part_i < set_count; ++part_i) { 00703 ColPartitionSet* line_set = part_sets.get(part_i); 00704 bool debug = line_set != NULL && 00705 WithinTestRegion(2, line_set->bounding_box().left(), 00706 line_set->bounding_box().bottom()); 00707 column_set_costs[part_i] = new int[column_count]; 00708 any_columns_possible[part_i] = false; 00709 assigned_costs[part_i] = MAX_INT32; 00710 for (int col_i = 0; col_i < column_count; ++col_i) { 00711 if (line_set != NULL && 00712 column_sets_.get(col_i)->CompatibleColumns(debug, line_set, 00713 WidthCB())) { 00714 column_set_costs[part_i][col_i] = 00715 column_sets_.get(col_i)->UnmatchedWidth(line_set); 00716 any_columns_possible[part_i] = true; 00717 } else { 00718 column_set_costs[part_i][col_i] = MAX_INT32; 00719 if (debug) 00720 tprintf("Set id %d did not match at y=%d, lineset =%p\n", 00721 col_i, part_i, line_set); 00722 } 00723 } 00724 } 00725 bool any_multi_column = false; 00726 // Assign a column set to each vertical grid position. 00727 // While there is an unassigned range, find its mode. 00728 int start, end; 00729 while (BiggestUnassignedRange(set_count, any_columns_possible, 00730 &start, &end)) { 00731 if (textord_debug_tabfind >= 2) 00732 tprintf("Biggest unassigned range = %d- %d\n", start, end); 00733 // Find the modal column_set_id in the range. 00734 int column_set_id = RangeModalColumnSet(column_set_costs, 00735 assigned_costs, start, end); 00736 if (textord_debug_tabfind >= 2) { 00737 tprintf("Range modal column id = %d\n", column_set_id); 00738 column_sets_.get(column_set_id)->Print(); 00739 } 00740 // Now find the longest run of the column_set_id in the range. 00741 ShrinkRangeToLongestRun(column_set_costs, assigned_costs, 00742 any_columns_possible, 00743 column_set_id, &start, &end); 00744 if (textord_debug_tabfind >= 2) 00745 tprintf("Shrunk range = %d- %d\n", start, end); 00746 // Extend the start and end past the longest run, while there are 00747 // only small gaps in compatibility that can be overcome by larger 00748 // regions of compatibility beyond. 00749 ExtendRangePastSmallGaps(column_set_costs, assigned_costs, 00750 any_columns_possible, 00751 column_set_id, -1, -1, &start); 00752 --end; 00753 ExtendRangePastSmallGaps(column_set_costs, assigned_costs, 00754 any_columns_possible, 00755 column_set_id, 1, set_count, &end); 00756 ++end; 00757 if (textord_debug_tabfind) 00758 tprintf("Column id %d applies to range = %d - %d\n", 00759 column_set_id, start, end); 00760 // Assign the column to the range, which now may overlap with other ranges. 00761 AssignColumnToRange(column_set_id, start, end, column_set_costs, 00762 assigned_costs); 00763 if (column_sets_.get(column_set_id)->GoodColumnCount() > 1) 00764 any_multi_column = true; 00765 } 00766 // If anything remains unassigned, the whole lot is unassigned, so 00767 // arbitrarily assign id 0. 00768 if (best_columns_[0] == NULL) { 00769 AssignColumnToRange(0, 0, gridheight_, column_set_costs, assigned_costs); 00770 } 00771 // Free memory. 00772 for (int i = 0; i < set_count; ++i) { 00773 delete [] column_set_costs[i]; 00774 } 00775 delete [] assigned_costs; 00776 delete [] any_columns_possible; 00777 delete [] column_set_costs; 00778 return any_multi_column; 00779 } 00780 00781 // Finds the biggest range in part_sets_ that has no assigned column, but 00782 // column assignment is possible. 00783 bool ColumnFinder::BiggestUnassignedRange(int set_count, 00784 const bool* any_columns_possible, 00785 int* best_start, int* best_end) { 00786 int best_range_size = 0; 00787 *best_start = set_count; 00788 *best_end = set_count; 00789 int end = set_count; 00790 for (int start = 0; start < gridheight_; start = end) { 00791 // Find the first unassigned index in start. 00792 while (start < set_count) { 00793 if (best_columns_[start] == NULL && any_columns_possible[start]) 00794 break; 00795 ++start; 00796 } 00797 // Find the first past the end and count the good ones in between. 00798 int range_size = 1; // Number of non-null, but unassigned line sets. 00799 end = start + 1; 00800 while (end < set_count) { 00801 if (best_columns_[end] != NULL) 00802 break; 00803 if (any_columns_possible[end]) 00804 ++range_size; 00805 ++end; 00806 } 00807 if (start < set_count && range_size > best_range_size) { 00808 best_range_size = range_size; 00809 *best_start = start; 00810 *best_end = end; 00811 } 00812 } 00813 return *best_start < *best_end; 00814 } 00815 00816 // Finds the modal compatible column_set_ index within the given range. 00817 int ColumnFinder::RangeModalColumnSet(int** column_set_costs, 00818 const int* assigned_costs, 00819 int start, int end) { 00820 int column_count = column_sets_.size(); 00821 STATS column_stats(0, column_count); 00822 for (int part_i = start; part_i < end; ++part_i) { 00823 for (int col_j = 0; col_j < column_count; ++col_j) { 00824 if (column_set_costs[part_i][col_j] < assigned_costs[part_i]) 00825 column_stats.add(col_j, 1); 00826 } 00827 } 00828 ASSERT_HOST(column_stats.get_total() > 0); 00829 return column_stats.mode(); 00830 } 00831 00832 // Given that there are many column_set_id compatible columns in the range, 00833 // shrinks the range to the longest contiguous run of compatibility, allowing 00834 // gaps where no columns are possible, but not where competing columns are 00835 // possible. 00836 void ColumnFinder::ShrinkRangeToLongestRun(int** column_set_costs, 00837 const int* assigned_costs, 00838 const bool* any_columns_possible, 00839 int column_set_id, 00840 int* best_start, int* best_end) { 00841 // orig_start and orig_end are the maximum range we will look at. 00842 int orig_start = *best_start; 00843 int orig_end = *best_end; 00844 int best_range_size = 0; 00845 *best_start = orig_end; 00846 *best_end = orig_end; 00847 int end = orig_end; 00848 for (int start = orig_start; start < orig_end; start = end) { 00849 // Find the first possible 00850 while (start < orig_end) { 00851 if (column_set_costs[start][column_set_id] < assigned_costs[start] || 00852 !any_columns_possible[start]) 00853 break; 00854 ++start; 00855 } 00856 // Find the first past the end. 00857 end = start + 1; 00858 while (end < orig_end) { 00859 if (column_set_costs[end][column_set_id] >= assigned_costs[start] && 00860 any_columns_possible[end]) 00861 break; 00862 ++end; 00863 } 00864 if (start < orig_end && end - start > best_range_size) { 00865 best_range_size = end - start; 00866 *best_start = start; 00867 *best_end = end; 00868 } 00869 } 00870 } 00871 00872 // Moves start in the direction of step, up to, but not including end while 00873 // the only incompatible regions are no more than kMaxIncompatibleColumnCount 00874 // in size, and the compatible regions beyond are bigger. 00875 void ColumnFinder::ExtendRangePastSmallGaps(int** column_set_costs, 00876 const int* assigned_costs, 00877 const bool* any_columns_possible, 00878 int column_set_id, 00879 int step, int end, int* start) { 00880 if (textord_debug_tabfind > 2) 00881 tprintf("Starting expansion at %d, step=%d, limit=%d\n", 00882 *start, step, end); 00883 if (*start == end) 00884 return; // Cannot be expanded. 00885 00886 int barrier_size = 0; 00887 int good_size = 0; 00888 do { 00889 // Find the size of the incompatible barrier. 00890 barrier_size = 0; 00891 int i; 00892 for (i = *start + step; i != end; i += step) { 00893 if (column_set_costs[i][column_set_id] < assigned_costs[i]) 00894 break; // We are back on. 00895 // Locations where none are possible don't count. 00896 if (any_columns_possible[i]) 00897 ++barrier_size; 00898 } 00899 if (textord_debug_tabfind > 2) 00900 tprintf("At %d, Barrier size=%d\n", i, barrier_size); 00901 if (barrier_size > kMaxIncompatibleColumnCount) 00902 return; // Barrier too big. 00903 if (i == end) { 00904 // We can't go any further, but the barrier was small, so go to the end. 00905 *start = i - step; 00906 return; 00907 } 00908 // Now find the size of the good region on the other side. 00909 good_size = 1; 00910 for (i += step; i != end; i += step) { 00911 if (column_set_costs[i][column_set_id] < assigned_costs[i]) 00912 ++good_size; 00913 else if (any_columns_possible[i]) 00914 break; 00915 } 00916 if (textord_debug_tabfind > 2) 00917 tprintf("At %d, good size = %d\n", i, good_size); 00918 // If we had enough good ones we can extend the start and keep looking. 00919 if (good_size >= barrier_size) 00920 *start = i - step; 00921 } while (good_size >= barrier_size); 00922 } 00923 00924 // Assigns the given column_set_id to the given range. 00925 void ColumnFinder::AssignColumnToRange(int column_set_id, int start, int end, 00926 int** column_set_costs, 00927 int* assigned_costs) { 00928 ColPartitionSet* column_set = column_sets_.get(column_set_id); 00929 for (int i = start; i < end; ++i) { 00930 assigned_costs[i] = column_set_costs[i][column_set_id]; 00931 best_columns_[i] = column_set; 00932 } 00933 } 00934 00935 // Computes the mean_column_gap_. 00936 void ColumnFinder::ComputeMeanColumnGap(bool any_multi_column) { 00937 int total_gap = 0; 00938 int total_width = 0; 00939 int gap_samples = 0; 00940 int width_samples = 0; 00941 for (int i = 0; i < gridheight_; ++i) { 00942 ASSERT_HOST(best_columns_[i] != NULL); 00943 best_columns_[i]->AccumulateColumnWidthsAndGaps(&total_width, 00944 &width_samples, 00945 &total_gap, 00946 &gap_samples); 00947 } 00948 mean_column_gap_ = any_multi_column && gap_samples > 0 00949 ? total_gap / gap_samples : total_width / width_samples; 00950 } 00951 00954 00955 // Helper to delete all the deletable blobs on the list. Owned blobs are 00956 // extracted from the list, but not deleted, leaving them owned by the owner(). 00957 static void ReleaseAllBlobsAndDeleteUnused(BLOBNBOX_LIST* blobs) { 00958 for (BLOBNBOX_IT blob_it(blobs); !blob_it.empty(); blob_it.forward()) { 00959 BLOBNBOX* blob = blob_it.extract(); 00960 if (blob->owner() == NULL) { 00961 delete blob->cblob(); 00962 delete blob; 00963 } 00964 } 00965 } 00966 00967 // Hoovers up all un-owned blobs and deletes them. 00968 // The rest get released from the block so the ColPartitions can pass 00969 // ownership to the output blocks. 00970 void ColumnFinder::ReleaseBlobsAndCleanupUnused(TO_BLOCK* block) { 00971 ReleaseAllBlobsAndDeleteUnused(&block->blobs); 00972 ReleaseAllBlobsAndDeleteUnused(&block->small_blobs); 00973 ReleaseAllBlobsAndDeleteUnused(&block->noise_blobs); 00974 ReleaseAllBlobsAndDeleteUnused(&block->large_blobs); 00975 ReleaseAllBlobsAndDeleteUnused(&image_bblobs_); 00976 } 00977 00978 // Splits partitions that cross columns where they have nothing in the gap. 00979 void ColumnFinder::GridSplitPartitions() { 00980 // Iterate the ColPartitions in the grid. 00981 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> 00982 gsearch(&part_grid_); 00983 gsearch.StartFullSearch(); 00984 ColPartition* dont_repeat = NULL; 00985 ColPartition* part; 00986 while ((part = gsearch.NextFullSearch()) != NULL) { 00987 if (part->blob_type() < BRT_UNKNOWN || part == dont_repeat) 00988 continue; // Only applies to text partitions. 00989 ColPartitionSet* column_set = best_columns_[gsearch.GridY()]; 00990 int first_col = -1; 00991 int last_col = -1; 00992 // Find which columns the partition spans. 00993 part->ColumnRange(resolution_, column_set, &first_col, &last_col); 00994 if (first_col > 0) 00995 --first_col; 00996 // Convert output column indices to physical column indices. 00997 first_col /= 2; 00998 last_col /= 2; 00999 // We will only consider cases where a partition spans two columns, 01000 // since a heading that spans more columns than that is most likely 01001 // genuine. 01002 if (last_col != first_col + 1) 01003 continue; 01004 // Set up a rectangle search x-bounded by the column gap and y by the part. 01005 int y = part->MidY(); 01006 TBOX margin_box = part->bounding_box(); 01007 bool debug = AlignedBlob::WithinTestRegion(2, margin_box.left(), 01008 margin_box.bottom()); 01009 if (debug) { 01010 tprintf("Considering partition for GridSplit:"); 01011 part->Print(); 01012 } 01013 ColPartition* column = column_set->GetColumnByIndex(first_col); 01014 if (column == NULL) 01015 continue; 01016 margin_box.set_left(column->RightAtY(y) + 2); 01017 column = column_set->GetColumnByIndex(last_col); 01018 if (column == NULL) 01019 continue; 01020 margin_box.set_right(column->LeftAtY(y) - 2); 01021 // TODO(rays) Decide whether to keep rectangular filling or not in the 01022 // main grid and therefore whether we need a fancier search here. 01023 // Now run the rect search on the main blob grid. 01024 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> rectsearch(this); 01025 if (debug) { 01026 tprintf("Searching box (%d,%d)->(%d,%d)\n", 01027 margin_box.left(), margin_box.bottom(), 01028 margin_box.right(), margin_box.top()); 01029 part->Print(); 01030 } 01031 rectsearch.StartRectSearch(margin_box); 01032 BLOBNBOX* bbox; 01033 while ((bbox = rectsearch.NextRectSearch()) != NULL) { 01034 if (bbox->bounding_box().overlap(margin_box)) 01035 break; 01036 } 01037 if (bbox == NULL) { 01038 // There seems to be nothing in the hole, so split the partition. 01039 gsearch.RemoveBBox(); 01040 int x_middle = (margin_box.left() + margin_box.right()) / 2; 01041 if (debug) { 01042 tprintf("Splitting part at %d:", x_middle); 01043 part->Print(); 01044 } 01045 ColPartition* split_part = part->SplitAt(x_middle); 01046 if (split_part != NULL) { 01047 if (debug) { 01048 tprintf("Split result:"); 01049 part->Print(); 01050 split_part->Print(); 01051 } 01052 part_grid_.InsertBBox(true, true, split_part); 01053 } else { 01054 // Split had no effect 01055 if (debug) 01056 tprintf("Split had no effect\n"); 01057 dont_repeat = part; 01058 } 01059 part_grid_.InsertBBox(true, true, part); 01060 gsearch.RepositionIterator(); 01061 } else if (debug) { 01062 tprintf("Part cannot be split: blob (%d,%d)->(%d,%d) in column gap\n", 01063 bbox->bounding_box().left(), bbox->bounding_box().bottom(), 01064 bbox->bounding_box().right(), bbox->bounding_box().top()); 01065 } 01066 } 01067 } 01068 01069 // Merges partitions where there is vertical overlap, within a single column, 01070 // and the horizontal gap is small enough. 01071 void ColumnFinder::GridMergePartitions() { 01072 // Iterate the ColPartitions in the grid. 01073 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> 01074 gsearch(&part_grid_); 01075 gsearch.StartFullSearch(); 01076 ColPartition* part; 01077 while ((part = gsearch.NextFullSearch()) != NULL) { 01078 if (part->IsUnMergeableType()) 01079 continue; 01080 // Set up a rectangle search x-bounded by the column and y by the part. 01081 ColPartitionSet* columns = best_columns_[gsearch.GridY()]; 01082 TBOX box = part->bounding_box(); 01083 bool debug = AlignedBlob::WithinTestRegion(1, box.left(), box.bottom()); 01084 if (debug) { 01085 tprintf("Considering part for merge at:"); 01086 part->Print(); 01087 } 01088 int y = part->MidY(); 01089 ColPartition* left_column = columns->ColumnContaining(box.left(), y); 01090 ColPartition* right_column = columns->ColumnContaining(box.right(), y); 01091 if (left_column == NULL || right_column != left_column) { 01092 if (debug) 01093 tprintf("In different columns\n"); 01094 continue; 01095 } 01096 box.set_left(left_column->LeftAtY(y)); 01097 box.set_right(right_column->RightAtY(y)); 01098 // Now run the rect search. 01099 bool modified_box = false; 01100 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> 01101 rsearch(&part_grid_); 01102 rsearch.SetUniqueMode(true); 01103 rsearch.StartRectSearch(box); 01104 ColPartition* neighbour; 01105 01106 while ((neighbour = rsearch.NextRectSearch()) != NULL) { 01107 if (neighbour == part || neighbour->IsUnMergeableType()) 01108 continue; 01109 const TBOX& neighbour_box = neighbour->bounding_box(); 01110 if (debug) { 01111 tprintf("Considering merge with neighbour at:"); 01112 neighbour->Print(); 01113 } 01114 if (neighbour_box.right() < box.left() || 01115 neighbour_box.left() > box.right()) 01116 continue; // Not within the same column. 01117 if (part->VSignificantCoreOverlap(*neighbour) && 01118 part->TypesMatch(*neighbour)) { 01119 // There is vertical overlap and the gross types match, but only 01120 // merge if the horizontal gap is small enough, as one of the 01121 // partitions may be a figure caption within a column. 01122 // If there is only one column, then the mean_column_gap_ is large 01123 // enough to allow almost any merge, by being the mean column width. 01124 const TBOX& part_box = part->bounding_box(); 01125 // Don't merge if there is something else in the way. Use the margin 01126 // to decide, and check both to allow a bit of overlap. 01127 if (neighbour_box.left() > part->right_margin() && 01128 part_box.right() < neighbour->left_margin()) 01129 continue; // Neighbour is too far to the right. 01130 if (neighbour_box.right() < part->left_margin() && 01131 part_box.left() > neighbour->right_margin()) 01132 continue; // Neighbour is too far to the left. 01133 int h_gap = MAX(part_box.left(), neighbour_box.left()) - 01134 MIN(part_box.right(), neighbour_box.right()); 01135 if (h_gap < mean_column_gap_ * kHorizontalGapMergeFraction || 01136 part_box.width() < mean_column_gap_ || 01137 neighbour_box.width() < mean_column_gap_) { 01138 if (debug) { 01139 tprintf("Running grid-based merge between:\n"); 01140 part->Print(); 01141 neighbour->Print(); 01142 } 01143 rsearch.RemoveBBox(); 01144 if (!modified_box) { 01145 // We are going to modify part, so remove it and re-insert it after. 01146 gsearch.RemoveBBox(); 01147 rsearch.RepositionIterator(); 01148 modified_box = true; 01149 } 01150 part->Absorb(neighbour, WidthCB()); 01151 } else if (debug) { 01152 tprintf("Neighbour failed hgap test\n"); 01153 } 01154 } else if (debug) { 01155 tprintf("Neighbour failed overlap or typesmatch test\n"); 01156 } 01157 } 01158 if (modified_box) { 01159 // We modified the box of part, so re-insert it into the grid. 01160 // This does no harm in the current cell, as it already exists there, 01161 // but it needs to exist in all the cells covered by its bounding box, 01162 // or it will never be found by a full search. 01163 // Because the box has changed, it has to be removed first, otherwise 01164 // add_sorted may fail to keep a single copy of the pointer. 01165 part_grid_.InsertBBox(true, true, part); 01166 gsearch.RepositionIterator(); 01167 } 01168 } 01169 } 01170 01171 // Inserts remaining noise blobs into the most applicable partition if any. 01172 // If there is no applicable partition, then the blobs are deleted. 01173 void ColumnFinder::InsertRemainingNoise(TO_BLOCK* block) { 01174 BLOBNBOX_IT blob_it(&block->noise_blobs); 01175 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 01176 BLOBNBOX* blob = blob_it.data(); 01177 if (blob->owner() != NULL) continue; 01178 TBOX search_box(blob->bounding_box()); 01179 bool debug = WithinTestRegion(2, search_box.left(), search_box.bottom()); 01180 search_box.pad(gridsize(), gridsize()); 01181 // Setup a rectangle search to find the best partition to merge with. 01182 ColPartitionGridSearch rsearch(&part_grid_); 01183 rsearch.SetUniqueMode(true); 01184 rsearch.StartRectSearch(search_box); 01185 ColPartition* part; 01186 ColPartition* best_part = NULL; 01187 int best_distance = 0; 01188 while ((part = rsearch.NextRectSearch()) != NULL) { 01189 if (part->IsUnMergeableType()) 01190 continue; 01191 int distance = projection_.DistanceOfBoxFromPartition( 01192 blob->bounding_box(), *part, denorm_, debug); 01193 if (best_part == NULL || distance < best_distance) { 01194 best_part = part; 01195 best_distance = distance; 01196 } 01197 } 01198 if (best_part != NULL && 01199 best_distance < kMaxDistToPartSizeRatio * best_part->median_size()) { 01200 // Close enough to merge. 01201 if (debug) { 01202 tprintf("Adding noise blob with distance %d, thr=%g:box:", 01203 best_distance, 01204 kMaxDistToPartSizeRatio * best_part->median_size()); 01205 blob->bounding_box().print(); 01206 tprintf("To partition:"); 01207 best_part->Print(); 01208 } 01209 part_grid_.RemoveBBox(best_part); 01210 best_part->AddBox(blob); 01211 part_grid_.InsertBBox(true, true, best_part); 01212 blob->set_owner(best_part); 01213 blob->set_flow(best_part->flow()); 01214 blob->set_region_type(best_part->blob_type()); 01215 } else { 01216 // Mark the blob for deletion. 01217 blob->set_region_type(BRT_NOISE); 01218 } 01219 } 01220 // Delete the marked blobs, clearing neighbour references. 01221 block->DeleteUnownedNoise(); 01222 } 01223 01224 // Helper makes a box from a horizontal line. 01225 static TBOX BoxFromHLine(const TabVector* hline) { 01226 int top = MAX(hline->startpt().y(), hline->endpt().y()); 01227 int bottom = MIN(hline->startpt().y(), hline->endpt().y()); 01228 top += hline->mean_width(); 01229 if (top == bottom) { 01230 if (bottom > 0) 01231 --bottom; 01232 else 01233 ++top; 01234 } 01235 return TBOX(hline->startpt().x(), bottom, hline->endpt().x(), top); 01236 } 01237 01238 // Remove partitions that come from horizontal lines that look like 01239 // underlines, but are not part of a table. 01240 void ColumnFinder::GridRemoveUnderlinePartitions() { 01241 TabVector_IT hline_it(&horizontal_lines_); 01242 for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) { 01243 TabVector* hline = hline_it.data(); 01244 if (hline->intersects_other_lines()) 01245 continue; 01246 TBOX line_box = BoxFromHLine(hline); 01247 TBOX search_box = line_box; 01248 search_box.pad(0, line_box.height()); 01249 ColPartitionGridSearch part_search(&part_grid_); 01250 part_search.SetUniqueMode(true); 01251 part_search.StartRectSearch(search_box); 01252 ColPartition* covered; 01253 bool touched_table = false; 01254 bool touched_text = false; 01255 ColPartition* line_part = NULL; 01256 while ((covered = part_search.NextRectSearch()) != NULL) { 01257 if (covered->type() == PT_TABLE) { 01258 touched_table = true; 01259 break; 01260 } else if (covered->IsTextType()) { 01261 // TODO(rays) Add a list of underline sections to ColPartition. 01262 int text_bottom = covered->median_bottom(); 01263 if (line_box.bottom() <= text_bottom && text_bottom <= search_box.top()) 01264 touched_text = true; 01265 } else if (covered->blob_type() == BRT_HLINE && 01266 line_box.contains(covered->bounding_box())) { 01267 line_part = covered; 01268 } 01269 } 01270 if (line_part != NULL && !touched_table && touched_text) { 01271 part_grid_.RemoveBBox(line_part); 01272 delete line_part; 01273 } 01274 } 01275 } 01276 01277 // Add horizontal line separators as partitions. 01278 void ColumnFinder::GridInsertHLinePartitions() { 01279 TabVector_IT hline_it(&horizontal_lines_); 01280 for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) { 01281 TabVector* hline = hline_it.data(); 01282 TBOX line_box = BoxFromHLine(hline); 01283 ColPartition* part = ColPartition::MakeLinePartition( 01284 BRT_HLINE, vertical_skew_, 01285 line_box.left(), line_box.bottom(), line_box.right(), line_box.top()); 01286 part->set_type(PT_HORZ_LINE); 01287 bool any_image = false; 01288 ColPartitionGridSearch part_search(&part_grid_); 01289 part_search.SetUniqueMode(true); 01290 part_search.StartRectSearch(line_box); 01291 ColPartition* covered; 01292 while ((covered = part_search.NextRectSearch()) != NULL) { 01293 if (covered->IsImageType()) { 01294 any_image = true; 01295 break; 01296 } 01297 } 01298 if (!any_image) 01299 part_grid_.InsertBBox(true, true, part); 01300 else 01301 delete part; 01302 } 01303 } 01304 01305 // Add horizontal line separators as partitions. 01306 void ColumnFinder::GridInsertVLinePartitions() { 01307 TabVector_IT vline_it(dead_vectors()); 01308 for (vline_it.mark_cycle_pt(); !vline_it.cycled_list(); vline_it.forward()) { 01309 TabVector* vline = vline_it.data(); 01310 if (!vline->IsSeparator()) 01311 continue; 01312 int left = MIN(vline->startpt().x(), vline->endpt().x()); 01313 int right = MAX(vline->startpt().x(), vline->endpt().x()); 01314 right += vline->mean_width(); 01315 if (left == right) { 01316 if (left > 0) 01317 --left; 01318 else 01319 ++right; 01320 } 01321 ColPartition* part = ColPartition::MakeLinePartition( 01322 BRT_VLINE, vertical_skew_, 01323 left, vline->startpt().y(), right, vline->endpt().y()); 01324 part->set_type(PT_VERT_LINE); 01325 bool any_image = false; 01326 ColPartitionGridSearch part_search(&part_grid_); 01327 part_search.SetUniqueMode(true); 01328 part_search.StartRectSearch(part->bounding_box()); 01329 ColPartition* covered; 01330 while ((covered = part_search.NextRectSearch()) != NULL) { 01331 if (covered->IsImageType()) { 01332 any_image = true; 01333 break; 01334 } 01335 } 01336 if (!any_image) 01337 part_grid_.InsertBBox(true, true, part); 01338 else 01339 delete part; 01340 } 01341 } 01342 01343 // For every ColPartition in the grid, sets its type based on position 01344 // in the columns. 01345 void ColumnFinder::SetPartitionTypes() { 01346 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> 01347 gsearch(&part_grid_); 01348 gsearch.StartFullSearch(); 01349 ColPartition* part; 01350 while ((part = gsearch.NextFullSearch()) != NULL) { 01351 part->SetPartitionType(resolution_, best_columns_[gsearch.GridY()]); 01352 } 01353 } 01354 01355 // Only images remain with multiple types in a run of partners. 01356 // Sets the type of all in the group to the maximum of the group. 01357 void ColumnFinder::SmoothPartnerRuns() { 01358 // Iterate the ColPartitions in the grid. 01359 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> 01360 gsearch(&part_grid_); 01361 gsearch.StartFullSearch(); 01362 ColPartition* part; 01363 while ((part = gsearch.NextFullSearch()) != NULL) { 01364 ColPartition* partner = part->SingletonPartner(true); 01365 if (partner != NULL) { 01366 if (partner->SingletonPartner(false) != part) { 01367 tprintf("Ooops! Partition:(%d partners)", 01368 part->upper_partners()->length()); 01369 part->Print(); 01370 tprintf("has singleton partner:(%d partners", 01371 partner->lower_partners()->length()); 01372 partner->Print(); 01373 tprintf("but its singleton partner is:"); 01374 if (partner->SingletonPartner(false) == NULL) 01375 tprintf("NULL\n"); 01376 else 01377 partner->SingletonPartner(false)->Print(); 01378 } 01379 ASSERT_HOST(partner->SingletonPartner(false) == part); 01380 } else if (part->SingletonPartner(false) != NULL) { 01381 ColPartitionSet* column_set = best_columns_[gsearch.GridY()]; 01382 int column_count = column_set->ColumnCount(); 01383 part->SmoothPartnerRun(column_count * 2 + 1); 01384 } 01385 } 01386 } 01387 01388 // Helper functions for TransformToBlocks. 01389 // Add the part to the temp list in the correct order. 01390 void ColumnFinder::AddToTempPartList(ColPartition* part, 01391 ColPartition_CLIST* temp_list) { 01392 int mid_y = part->MidY(); 01393 ColPartition_C_IT it(temp_list); 01394 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 01395 ColPartition* test_part = it.data(); 01396 if (part->type() == PT_NOISE || test_part->type() == PT_NOISE) 01397 continue; // Noise stays in sequence. 01398 if (test_part == part->SingletonPartner(false)) 01399 break; // Insert before its lower partner. 01400 int neighbour_bottom = test_part->median_bottom(); 01401 int neighbour_top = test_part->median_top(); 01402 int neighbour_y = (neighbour_bottom + neighbour_top) / 2; 01403 if (neighbour_y < mid_y) 01404 break; // part is above test_part so insert it. 01405 if (!part->HOverlaps(*test_part) && !part->WithinSameMargins(*test_part)) 01406 continue; // Incompatibles stay in order 01407 } 01408 if (it.cycled_list()) { 01409 it.add_to_end(part); 01410 } else { 01411 it.add_before_stay_put(part); 01412 } 01413 } 01414 01415 // Add everything from the temp list to the work_set assuming correct order. 01416 void ColumnFinder::EmptyTempPartList(ColPartition_CLIST* temp_list, 01417 WorkingPartSet_LIST* work_set) { 01418 ColPartition_C_IT it(temp_list); 01419 while (!it.empty()) { 01420 it.extract()->AddToWorkingSet(bleft_, tright_, resolution_, 01421 &good_parts_, work_set); 01422 it.forward(); 01423 } 01424 } 01425 01426 // Transform the grid of partitions to the output blocks. 01427 void ColumnFinder::TransformToBlocks(BLOCK_LIST* blocks, 01428 TO_BLOCK_LIST* to_blocks) { 01429 WorkingPartSet_LIST work_set; 01430 ColPartitionSet* column_set = NULL; 01431 ColPartition_IT noise_it(&noise_parts_); 01432 // The temp_part_list holds a list of parts at the same grid y coord 01433 // so they can be added in the correct order. This prevents thin objects 01434 // like horizontal lines going before the text lines above them. 01435 ColPartition_CLIST temp_part_list; 01436 // Iterate the ColPartitions in the grid. It starts at the top 01437 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> 01438 gsearch(&part_grid_); 01439 gsearch.StartFullSearch(); 01440 int prev_grid_y = -1; 01441 ColPartition* part; 01442 while ((part = gsearch.NextFullSearch()) != NULL) { 01443 int grid_y = gsearch.GridY(); 01444 if (grid_y != prev_grid_y) { 01445 EmptyTempPartList(&temp_part_list, &work_set); 01446 prev_grid_y = grid_y; 01447 } 01448 if (best_columns_[grid_y] != column_set) { 01449 column_set = best_columns_[grid_y]; 01450 // Every line should have a non-null best column. 01451 ASSERT_HOST(column_set != NULL); 01452 column_set->ChangeWorkColumns(bleft_, tright_, resolution_, 01453 &good_parts_, &work_set); 01454 if (textord_debug_tabfind) 01455 tprintf("Changed column groups at grid index %d, y=%d\n", 01456 gsearch.GridY(), gsearch.GridY() * gridsize()); 01457 } 01458 if (part->type() == PT_NOISE) { 01459 noise_it.add_to_end(part); 01460 } else { 01461 AddToTempPartList(part, &temp_part_list); 01462 } 01463 } 01464 EmptyTempPartList(&temp_part_list, &work_set); 01465 // Now finish all working sets and transfer ColPartitionSets to block_sets. 01466 WorkingPartSet_IT work_it(&work_set); 01467 while (!work_it.empty()) { 01468 WorkingPartSet* working_set = work_it.extract(); 01469 working_set->ExtractCompletedBlocks(bleft_, tright_, resolution_, 01470 &good_parts_, blocks, to_blocks); 01471 delete working_set; 01472 work_it.forward(); 01473 } 01474 } 01475 01476 // Helper reflects a list of blobs in the y-axis. 01477 // Only reflects the BLOBNBOX bounding box. Not the blobs or outlines below. 01478 static void ReflectBlobList(BLOBNBOX_LIST* bblobs) { 01479 BLOBNBOX_IT it(bblobs); 01480 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 01481 it.data()->reflect_box_in_y_axis(); 01482 } 01483 } 01484 01485 // Reflect the blob boxes (but not the outlines) in the y-axis so that 01486 // the blocks get created in the correct RTL order. Reflects the blobs 01487 // in the input_block and the bblobs list. 01488 // The reflection is undone in RotateAndReskewBlocks by 01489 // reflecting the blocks themselves, and then recomputing the blob bounding 01490 // boxes. 01491 void ColumnFinder::ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs) { 01492 ReflectBlobList(bblobs); 01493 ReflectBlobList(&input_block->blobs); 01494 ReflectBlobList(&input_block->small_blobs); 01495 ReflectBlobList(&input_block->noise_blobs); 01496 ReflectBlobList(&input_block->large_blobs); 01497 // Update the denorm with the reflection. 01498 DENORM* new_denorm = new DENORM; 01499 new_denorm->SetupNormalization(NULL, NULL, denorm_, 01500 0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 0.0f); 01501 denorm_ = new_denorm; 01502 } 01503 01504 // Helper fixes up blobs and cblobs to match the desired rotation, 01505 // exploding multi-outline blobs back to single blobs and accumulating 01506 // the bounding box widths and heights. 01507 static void RotateAndExplodeBlobList(const FCOORD& blob_rotation, 01508 BLOBNBOX_LIST* bblobs, 01509 STATS* widths, 01510 STATS* heights) { 01511 BLOBNBOX_IT it(bblobs); 01512 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 01513 BLOBNBOX* blob = it.data(); 01514 C_BLOB* cblob = blob->cblob(); 01515 C_OUTLINE_LIST* outlines = cblob->out_list(); 01516 C_OUTLINE_IT ol_it(outlines); 01517 if (!outlines->singleton()) { 01518 // This blob has multiple outlines from CJK repair. 01519 // Explode the blob back into individual outlines. 01520 for (;!ol_it.empty(); ol_it.forward()) { 01521 C_OUTLINE* outline = ol_it.extract(); 01522 BLOBNBOX* new_blob = BLOBNBOX::RealBlob(outline); 01523 // This blob will be revisited later since we add_after_stay_put here. 01524 // This means it will get rotated and have its width/height added to 01525 // the stats below. 01526 it.add_after_stay_put(new_blob); 01527 } 01528 it.extract(); 01529 delete cblob; 01530 delete blob; 01531 } else { 01532 if (blob_rotation.x() != 1.0f || blob_rotation.y() != 0.0f) { 01533 cblob->rotate(blob_rotation); 01534 } 01535 blob->compute_bounding_box(); 01536 widths->add(blob->bounding_box().width(), 1); 01537 heights->add(blob->bounding_box().height(), 1); 01538 } 01539 } 01540 } 01541 01542 // Undo the deskew that was done in FindTabVectors, as recognition is done 01543 // without correcting blobs or blob outlines for skew. 01544 // Reskew the completed blocks to put them back to the original rotated coords 01545 // that were created by CorrectOrientation. 01546 // If the input_is_rtl, then reflect the blocks in the y-axis to undo the 01547 // reflection that was done before FindTabVectors. 01548 // Blocks that were identified as vertical text (relative to the rotated 01549 // coordinates) are further rotated so the text lines are horizontal. 01550 // blob polygonal outlines are rotated to match the position of the blocks 01551 // that they are in, and their bounding boxes are recalculated to be accurate. 01552 // Record appropriate inverse transformations and required 01553 // classifier transformation in the blocks. 01554 void ColumnFinder::RotateAndReskewBlocks(bool input_is_rtl, 01555 TO_BLOCK_LIST* blocks) { 01556 if (input_is_rtl) { 01557 // The skew is backwards because of the reflection. 01558 FCOORD tmp = deskew_; 01559 deskew_ = reskew_; 01560 reskew_ = tmp; 01561 } 01562 TO_BLOCK_IT it(blocks); 01563 int block_index = 1; 01564 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 01565 TO_BLOCK* to_block = it.data(); 01566 BLOCK* block = to_block->block; 01567 // Blocks are created on the deskewed blob outlines in TransformToBlocks() 01568 // so we need to reskew them back to page coordinates. 01569 if (input_is_rtl) { 01570 block->reflect_polygon_in_y_axis(); 01571 } 01572 block->rotate(reskew_); 01573 // Copy the right_to_left flag to the created block. 01574 block->set_right_to_left(input_is_rtl); 01575 // Save the skew angle in the block for baseline computations. 01576 block->set_skew(reskew_); 01577 block->set_index(block_index++); 01578 FCOORD blob_rotation = ComputeBlockAndClassifyRotation(block); 01579 // Rotate all the blobs if needed and recompute the bounding boxes. 01580 // Compute the block median blob width and height as we go. 01581 STATS widths(0, block->bounding_box().width()); 01582 STATS heights(0, block->bounding_box().height()); 01583 RotateAndExplodeBlobList(blob_rotation, &to_block->blobs, 01584 &widths, &heights); 01585 TO_ROW_IT row_it(to_block->get_rows()); 01586 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { 01587 TO_ROW* row = row_it.data(); 01588 RotateAndExplodeBlobList(blob_rotation, row->blob_list(), 01589 &widths, &heights); 01590 } 01591 block->set_median_size(static_cast<int>(widths.median() + 0.5), 01592 static_cast<int>(heights.median() + 0.5)); 01593 if (textord_debug_tabfind >= 2) 01594 tprintf("Block median size = (%d, %d)\n", 01595 block->median_size().x(), block->median_size().y()); 01596 } 01597 } 01598 01599 // Computes the rotations for the block (to make textlines horizontal) and 01600 // for the blobs (for classification) and sets the appropriate members 01601 // of the given block. 01602 // Returns the rotation that needs to be applied to the blobs to make 01603 // them sit in the rotated block. 01604 FCOORD ColumnFinder::ComputeBlockAndClassifyRotation(BLOCK* block) { 01605 // The text_rotation_ tells us the gross page text rotation that needs 01606 // to be applied for classification 01607 // TODO(rays) find block-level classify rotation by orientation detection. 01608 // In the mean time, assume that "up" for text printed in the minority 01609 // direction (PT_VERTICAL_TEXT) is perpendicular to the line of reading. 01610 // Accomplish this by zero-ing out the text rotation. This covers the 01611 // common cases of image credits in documents written in Latin scripts 01612 // and page headings for predominantly vertically written CJK books. 01613 FCOORD classify_rotation(text_rotation_); 01614 FCOORD block_rotation(1.0f, 0.0f); 01615 if (block->poly_block()->isA() == PT_VERTICAL_TEXT) { 01616 // Vertical text needs to be 90 degrees rotated relative to the rest. 01617 // If the rest has a 90 degree rotation already, use the inverse, making 01618 // the vertical text the original way up. Otherwise use 90 degrees 01619 // clockwise. 01620 if (rerotate_.x() == 0.0f) 01621 block_rotation = rerotate_; 01622 else 01623 block_rotation = FCOORD(0.0f, -1.0f); 01624 block->rotate(block_rotation); 01625 classify_rotation = FCOORD(1.0f, 0.0f); 01626 } 01627 block_rotation.rotate(rotation_); 01628 // block_rotation is now what we have done to the blocks. Now do the same 01629 // thing to the blobs, but save the inverse rotation in the block, as that 01630 // is what we need to DENORM back to the image coordinates. 01631 FCOORD blob_rotation(block_rotation); 01632 block_rotation.set_y(-block_rotation.y()); 01633 block->set_re_rotation(block_rotation); 01634 block->set_classify_rotation(classify_rotation); 01635 if (textord_debug_tabfind) { 01636 tprintf("Blk %d, type %d rerotation(%.2f, %.2f), char(%.2f,%.2f), box:", 01637 block->index(), block->poly_block()->isA(), 01638 block->re_rotation().x(), block->re_rotation().y(), 01639 classify_rotation.x(), classify_rotation.y()); 01640 block->bounding_box().print(); 01641 } 01642 return blob_rotation; 01643 } 01644 01645 } // namespace tesseract.