|
tesseract 3.04.01
|
00001 00002 // File: colpartitionrid.h 00003 // Description: Class collecting code that acts on a BBGrid of ColPartitions. 00004 // Author: Ray Smith 00005 // Created: Mon Oct 05 08:42:01 PDT 2009 00006 // 00007 // (C) Copyright 2009, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifdef HAVE_CONFIG_H 00021 #include "config_auto.h" 00022 #endif 00023 00024 #include "colpartitiongrid.h" 00025 #include "colpartitionset.h" 00026 #include "imagefind.h" 00027 00028 namespace tesseract { 00029 00030 BOOL_VAR(textord_tabfind_show_color_fit, false, "Show stroke widths"); 00031 00032 // Max pad factor used to search the neighbourhood of a partition to smooth 00033 // partition types. 00034 const int kMaxPadFactor = 6; 00035 // Max multiple of size (min(height, width)) for the distance of the nearest 00036 // neighbour for the change of type to be used. 00037 const int kMaxNeighbourDistFactor = 4; 00038 // Maximum number of lines in a credible figure caption. 00039 const int kMaxCaptionLines = 7; 00040 // Min ratio between biggest and smallest gap to bound a caption. 00041 const double kMinCaptionGapRatio = 2.0; 00042 // Min ratio between biggest gap and mean line height to bound a caption. 00043 const double kMinCaptionGapHeightRatio = 0.5; 00044 // Min fraction of ColPartition height to be overlapping for margin purposes. 00045 const double kMarginOverlapFraction = 0.25; 00046 // Size ratio required to consider an unmerged overlapping partition to be big. 00047 const double kBigPartSizeRatio = 1.75; 00048 // Fraction of gridsize to allow arbitrary overlap between partitions. 00049 const double kTinyEnoughTextlineOverlapFraction = 0.25; 00050 // Max vertical distance of neighbouring ColPartition as a multiple of 00051 // partition height for it to be a partner. 00052 // TODO(rays) fix the problem that causes a larger number to not work well. 00053 // The value needs to be larger as sparse text blocks in a page that gets 00054 // marked as single column will not find adjacent lines as partners, and 00055 // will merge horizontally distant, but aligned lines. See rep.4B3 p5. 00056 // The value needs to be small because double-spaced legal docs written 00057 // in a single column, but justified courier have widely spaced lines 00058 // that need to get merged before they partner-up with the lines above 00059 // and below. See legal.3B5 p13/17. Neither of these should depend on 00060 // the value of kMaxPartitionSpacing to be successful, and ColPartition 00061 // merging needs attention to fix this problem. 00062 const double kMaxPartitionSpacing = 1.75; 00063 // Margin by which text has to beat image or vice-versa to make a firm 00064 // decision in GridSmoothNeighbour. 00065 const int kSmoothDecisionMargin = 4; 00066 00067 ColPartitionGrid::ColPartitionGrid() { 00068 } 00069 ColPartitionGrid::ColPartitionGrid(int gridsize, 00070 const ICOORD& bleft, const ICOORD& tright) 00071 : BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>(gridsize, 00072 bleft, tright) { 00073 } 00074 00075 ColPartitionGrid::~ColPartitionGrid() { 00076 } 00077 00078 // Handles a click event in a display window. 00079 void ColPartitionGrid::HandleClick(int x, int y) { 00080 BBGrid<ColPartition, 00081 ColPartition_CLIST, ColPartition_C_IT>::HandleClick(x, y); 00082 // Run a radial search for partitions that overlap. 00083 ColPartitionGridSearch radsearch(this); 00084 radsearch.SetUniqueMode(true); 00085 radsearch.StartRadSearch(x, y, 1); 00086 ColPartition* neighbour; 00087 FCOORD click(x, y); 00088 while ((neighbour = radsearch.NextRadSearch()) != NULL) { 00089 TBOX nbox = neighbour->bounding_box(); 00090 if (nbox.contains(click)) { 00091 tprintf("Block box:"); 00092 neighbour->bounding_box().print(); 00093 neighbour->Print(); 00094 } 00095 } 00096 } 00097 00098 // Merges ColPartitions in the grid that look like they belong in the same 00099 // textline. 00100 // For all partitions in the grid, calls the box_cb permanent callback 00101 // to compute the search box, searches the box, and if a candidate is found, 00102 // calls the confirm_cb to check any more rules. If the confirm_cb returns 00103 // true, then the partitions are merged. 00104 // Both callbacks are deleted before returning. 00105 void ColPartitionGrid::Merges( 00106 TessResultCallback2<bool, ColPartition*, TBOX*>* box_cb, 00107 TessResultCallback2<bool, const ColPartition*, 00108 const ColPartition*>* confirm_cb) { 00109 // Iterate the ColPartitions in the grid. 00110 ColPartitionGridSearch gsearch(this); 00111 gsearch.StartFullSearch(); 00112 ColPartition* part; 00113 while ((part = gsearch.NextFullSearch()) != NULL) { 00114 if (MergePart(box_cb, confirm_cb, part)) 00115 gsearch.RepositionIterator(); 00116 } 00117 delete box_cb; 00118 delete confirm_cb; 00119 } 00120 00121 // For the given partition, calls the box_cb permanent callback 00122 // to compute the search box, searches the box, and if a candidate is found, 00123 // calls the confirm_cb to check any more rules. If the confirm_cb returns 00124 // true, then the partitions are merged. 00125 // Returns true if the partition is consumed by one or more merges. 00126 bool ColPartitionGrid::MergePart( 00127 TessResultCallback2<bool, ColPartition*, TBOX*>* box_cb, 00128 TessResultCallback2<bool, const ColPartition*, 00129 const ColPartition*>* confirm_cb, 00130 ColPartition* part) { 00131 if (part->IsUnMergeableType()) 00132 return false; 00133 bool any_done = false; 00134 // Repeatedly merge part while we find a best merge candidate that works. 00135 bool merge_done = false; 00136 do { 00137 merge_done = false; 00138 TBOX box = part->bounding_box(); 00139 bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()); 00140 if (debug) { 00141 tprintf("Merge candidate:"); 00142 box.print(); 00143 } 00144 // Set up a rectangle search bounded by the part. 00145 if (!box_cb->Run(part, &box)) 00146 continue; 00147 // Create a list of merge candidates. 00148 ColPartition_CLIST merge_candidates; 00149 FindMergeCandidates(part, box, debug, &merge_candidates); 00150 // Find the best merge candidate based on minimal overlap increase. 00151 int overlap_increase; 00152 ColPartition* neighbour = BestMergeCandidate(part, &merge_candidates, debug, 00153 confirm_cb, 00154 &overlap_increase); 00155 if (neighbour != NULL && overlap_increase <= 0) { 00156 if (debug) { 00157 tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n", 00158 part->HCoreOverlap(*neighbour), part->VCoreOverlap(*neighbour), 00159 overlap_increase); 00160 } 00161 // Looks like a good candidate so merge it. 00162 RemoveBBox(neighbour); 00163 // We will modify the box of part, so remove it from the grid, merge 00164 // it and then re-insert it into the grid. 00165 RemoveBBox(part); 00166 part->Absorb(neighbour, NULL); 00167 InsertBBox(true, true, part); 00168 merge_done = true; 00169 any_done = true; 00170 } else if (neighbour != NULL) { 00171 if (debug) { 00172 tprintf("Overlapped when merged with increase %d: ", overlap_increase); 00173 neighbour->bounding_box().print(); 00174 } 00175 } else if (debug) { 00176 tprintf("No candidate neighbour returned\n"); 00177 } 00178 } while (merge_done); 00179 return any_done; 00180 } 00181 00182 // Returns true if the given part and merge candidate might believably 00183 // be part of a single text line according to the default rules. 00184 // In general we only want to merge partitions that look like they 00185 // are on the same text line, ie their median limits overlap, but we have 00186 // to make exceptions for diacritics and stray punctuation. 00187 static bool OKMergeCandidate(const ColPartition* part, 00188 const ColPartition* candidate, 00189 bool debug) { 00190 const TBOX& part_box = part->bounding_box(); 00191 if (candidate == part) 00192 return false; // Ignore itself. 00193 if (!part->TypesMatch(*candidate) || candidate->IsUnMergeableType()) 00194 return false; // Don't mix inappropriate types. 00195 00196 const TBOX& c_box = candidate->bounding_box(); 00197 if (debug) { 00198 tprintf("Examining merge candidate:"); 00199 c_box.print(); 00200 } 00201 // Candidates must be within a reasonable distance. 00202 if (candidate->IsVerticalType() || part->IsVerticalType()) { 00203 int h_dist = -part->HCoreOverlap(*candidate); 00204 if (h_dist >= MAX(part_box.width(), c_box.width()) / 2) { 00205 if (debug) 00206 tprintf("Too far away: h_dist = %d\n", h_dist); 00207 return false; 00208 } 00209 } else { 00210 // Coarse filter by vertical distance between partitions. 00211 int v_dist = -part->VCoreOverlap(*candidate); 00212 if (v_dist >= MAX(part_box.height(), c_box.height()) / 2) { 00213 if (debug) 00214 tprintf("Too far away: v_dist = %d\n", v_dist); 00215 return false; 00216 } 00217 // Candidates must either overlap in median y, 00218 // or part or candidate must be an acceptable diacritic. 00219 if (!part->VSignificantCoreOverlap(*candidate) && 00220 !part->OKDiacriticMerge(*candidate, debug) && 00221 !candidate->OKDiacriticMerge(*part, debug)) { 00222 if (debug) 00223 tprintf("Candidate fails overlap and diacritic tests!\n"); 00224 return false; 00225 } 00226 } 00227 return true; 00228 } 00229 00230 // Helper function to compute the increase in overlap of the parts list of 00231 // Colpartitions with the combination of merge1 and merge2, compared to 00232 // the overlap with them uncombined. 00233 // An overlap is not counted if passes the OKMergeOverlap test with ok_overlap 00234 // as the pixel overlap limit. merge1 and merge2 must both be non-NULL. 00235 static int IncreaseInOverlap(const ColPartition* merge1, 00236 const ColPartition* merge2, 00237 int ok_overlap, 00238 ColPartition_CLIST* parts) { 00239 ASSERT_HOST(merge1 != NULL && merge2 != NULL); 00240 int total_area = 0; 00241 ColPartition_C_IT it(parts); 00242 TBOX merged_box(merge1->bounding_box()); 00243 merged_box += merge2->bounding_box(); 00244 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00245 ColPartition* part = it.data(); 00246 if (part == merge1 || part == merge2) 00247 continue; 00248 TBOX part_box = part->bounding_box(); 00249 // Compute the overlap of the merged box with part. 00250 int overlap_area = part_box.intersection(merged_box).area(); 00251 if (overlap_area > 0 && !part->OKMergeOverlap(*merge1, *merge2, 00252 ok_overlap, false)) { 00253 total_area += overlap_area; 00254 // Subtract the overlap of merge1 and merge2 individually. 00255 overlap_area = part_box.intersection(merge1->bounding_box()).area(); 00256 if (overlap_area > 0) 00257 total_area -= overlap_area; 00258 TBOX intersection_box = part_box.intersection(merge2->bounding_box()); 00259 overlap_area = intersection_box.area(); 00260 if (overlap_area > 0) { 00261 total_area -= overlap_area; 00262 // Add back the 3-way area. 00263 intersection_box &= merge1->bounding_box(); // In-place intersection. 00264 overlap_area = intersection_box.area(); 00265 if (overlap_area > 0) 00266 total_area += overlap_area; 00267 } 00268 } 00269 } 00270 return total_area; 00271 } 00272 00273 // Helper function to test that each partition in candidates is either a 00274 // good diacritic merge with part or an OK merge candidate with all others 00275 // in the candidates list. 00276 // ASCII Art Scenario: 00277 // We sometimes get text such as "join-this" where the - is actually a long 00278 // dash culled from a standard set of extra characters that don't match the 00279 // font of the text. This makes its strokewidth not match and forms a broken 00280 // set of 3 partitions for "join", "-" and "this" and the dash may slightly 00281 // overlap BOTH words. 00282 // ------- ------- 00283 // | ==== | 00284 // ------- ------- 00285 // The standard merge rule: "you can merge 2 partitions as long as there is 00286 // no increase in overlap elsewhere" fails miserably here. Merge any pair 00287 // of partitions and the combined box overlaps more with the third than 00288 // before. To allow the merge, we need to consider whether it is safe to 00289 // merge everything, without merging separate text lines. For that we need 00290 // everything to be an OKMergeCandidate (which is supposed to prevent 00291 // separate text lines merging), but this is hard for diacritics to satisfy, 00292 // so an alternative to being OKMergeCandidate with everything is to be an 00293 // OKDiacriticMerge with part as the base character. 00294 static bool TestCompatibleCandidates(const ColPartition& part, bool debug, 00295 ColPartition_CLIST* candidates) { 00296 ColPartition_C_IT it(candidates); 00297 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00298 ColPartition* candidate = it.data(); 00299 if (!candidate->OKDiacriticMerge(part, false)) { 00300 ColPartition_C_IT it2(it); 00301 for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) { 00302 ColPartition* candidate2 = it2.data(); 00303 if (candidate2 != candidate && 00304 !OKMergeCandidate(candidate, candidate2, false)) { 00305 if (debug) { 00306 tprintf("NC overlap failed:Candidate:"); 00307 candidate2->bounding_box().print(); 00308 tprintf("fails to be a good merge with:"); 00309 candidate->bounding_box().print(); 00310 } 00311 return false; 00312 } 00313 } 00314 } 00315 } 00316 return true; 00317 } 00318 00319 // Computes and returns the total overlap of all partitions in the grid. 00320 // If overlap_grid is non-null, it is filled with a grid that holds empty 00321 // partitions representing the union of all overlapped partitions. 00322 int ColPartitionGrid::ComputeTotalOverlap(ColPartitionGrid** overlap_grid) { 00323 int total_overlap = 0; 00324 // Iterate the ColPartitions in the grid. 00325 ColPartitionGridSearch gsearch(this); 00326 gsearch.StartFullSearch(); 00327 ColPartition* part; 00328 while ((part = gsearch.NextFullSearch()) != NULL) { 00329 ColPartition_CLIST neighbors; 00330 const TBOX& part_box = part->bounding_box(); 00331 FindOverlappingPartitions(part_box, part, &neighbors); 00332 ColPartition_C_IT n_it(&neighbors); 00333 bool any_part_overlap = false; 00334 for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) { 00335 const TBOX& n_box = n_it.data()->bounding_box(); 00336 int overlap = n_box.intersection(part_box).area(); 00337 if (overlap > 0 && overlap_grid != NULL) { 00338 if (*overlap_grid == NULL) { 00339 *overlap_grid = new ColPartitionGrid(gridsize(), bleft(), tright()); 00340 } 00341 (*overlap_grid)->InsertBBox(true, true, n_it.data()->ShallowCopy()); 00342 if (!any_part_overlap) { 00343 (*overlap_grid)->InsertBBox(true, true, part->ShallowCopy()); 00344 } 00345 } 00346 any_part_overlap = true; 00347 total_overlap += overlap; 00348 } 00349 } 00350 return total_overlap; 00351 } 00352 00353 // Finds all the ColPartitions in the grid that overlap with the given 00354 // box and returns them SortByBoxLeft(ed) and uniqued in the given list. 00355 // Any partition equal to not_this (may be NULL) is excluded. 00356 void ColPartitionGrid::FindOverlappingPartitions(const TBOX& box, 00357 const ColPartition* not_this, 00358 ColPartition_CLIST* parts) { 00359 ColPartitionGridSearch rsearch(this); 00360 rsearch.StartRectSearch(box); 00361 ColPartition* part; 00362 while ((part = rsearch.NextRectSearch()) != NULL) { 00363 if (part != not_this) 00364 parts->add_sorted(SortByBoxLeft<ColPartition>, true, part); 00365 } 00366 } 00367 00368 // Finds and returns the best candidate ColPartition to merge with part, 00369 // selected from the candidates list, based on the minimum increase in 00370 // pairwise overlap among all the partitions overlapped by the combined box. 00371 // If overlap_increase is not NULL then it returns the increase in overlap 00372 // that would result from the merge. 00373 // confirm_cb is a permanent callback that (if non-null) will be used to 00374 // confirm the validity of a proposed merge candidate before selecting it. 00375 // 00376 // ======HOW MERGING WORKS====== 00377 // The problem: 00378 // We want to merge all the parts of a textline together, but avoid merging 00379 // separate textlines. Diacritics, i dots, punctuation, and broken characters 00380 // are examples of small bits that need merging with the main textline. 00381 // Drop-caps and descenders in one line that touch ascenders in the one below 00382 // are examples of cases where we don't want to merge. 00383 // 00384 // The solution: 00385 // Merges that increase overlap among other partitions are generally bad. 00386 // Those that don't increase overlap (much) and minimize the total area 00387 // seem to be good. 00388 // 00389 // Ascii art example: 00390 // The text: 00391 // groggy descenders 00392 // minimum ascenders 00393 // The boxes: The === represents a small box near or overlapping the lower box. 00394 // ----------------- 00395 // | | 00396 // ----------------- 00397 // -===------------- 00398 // | | 00399 // ----------------- 00400 // In considering what to do with the small === box, we find the 2 larger 00401 // boxes as neighbours and possible merge candidates, but merging with the 00402 // upper box increases overlap with the lower box, whereas merging with the 00403 // lower box does not increase overlap. 00404 // If the small === box didn't overlap either to start with, total area 00405 // would be minimized by merging with the nearer (lower) box. 00406 // 00407 // This is a simple example. In reality, we have to allow some increase 00408 // in overlap, or tightly spaced text would end up in bits. 00409 ColPartition* ColPartitionGrid::BestMergeCandidate( 00410 const ColPartition* part, ColPartition_CLIST* candidates, bool debug, 00411 TessResultCallback2<bool, const ColPartition*, const ColPartition*>* confirm_cb, 00412 int* overlap_increase) { 00413 if (overlap_increase != NULL) 00414 *overlap_increase = 0; 00415 if (candidates->empty()) 00416 return NULL; 00417 int ok_overlap = 00418 static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); 00419 // The best neighbour to merge with is the one that causes least 00420 // total pairwise overlap among all the neighbours. 00421 // If more than one offers the same total overlap, choose the one 00422 // with the least total area. 00423 const TBOX& part_box = part->bounding_box(); 00424 ColPartition_C_IT it(candidates); 00425 ColPartition* best_candidate = NULL; 00426 // Find the total combined box of all candidates and the original. 00427 TBOX full_box(part_box); 00428 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00429 ColPartition* candidate = it.data(); 00430 full_box += candidate->bounding_box(); 00431 } 00432 // Keep valid neighbours in a list. 00433 ColPartition_CLIST neighbours; 00434 // Now run a rect search of the merged box for overlapping neighbours, as 00435 // we need anything that might be overlapped by the merged box. 00436 FindOverlappingPartitions(full_box, part, &neighbours); 00437 if (debug) { 00438 tprintf("Finding best merge candidate from %d, %d neighbours for box:", 00439 candidates->length(), neighbours.length()); 00440 part_box.print(); 00441 } 00442 // If the best increase in overlap is positive, then we also check the 00443 // worst non-candidate overlap. This catches the case of multiple good 00444 // candidates that overlap each other when merged. If the worst 00445 // non-candidate overlap is better than the best overlap, then return 00446 // the worst non-candidate overlap instead. 00447 ColPartition_CLIST non_candidate_neighbours; 00448 non_candidate_neighbours.set_subtract(SortByBoxLeft<ColPartition>, true, 00449 &neighbours, candidates); 00450 int worst_nc_increase = 0; 00451 int best_increase = MAX_INT32; 00452 int best_area = 0; 00453 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00454 ColPartition* candidate = it.data(); 00455 if (confirm_cb != NULL && !confirm_cb->Run(part, candidate)) { 00456 if (debug) { 00457 tprintf("Candidate not confirmed:"); 00458 candidate->bounding_box().print(); 00459 } 00460 continue; 00461 } 00462 int increase = IncreaseInOverlap(part, candidate, ok_overlap, &neighbours); 00463 const TBOX& cand_box = candidate->bounding_box(); 00464 if (best_candidate == NULL || increase < best_increase) { 00465 best_candidate = candidate; 00466 best_increase = increase; 00467 best_area = cand_box.bounding_union(part_box).area() - cand_box.area(); 00468 if (debug) { 00469 tprintf("New best merge candidate has increase %d, area %d, over box:", 00470 increase, best_area); 00471 full_box.print(); 00472 candidate->Print(); 00473 } 00474 } else if (increase == best_increase) { 00475 int area = cand_box.bounding_union(part_box).area() - cand_box.area(); 00476 if (area < best_area) { 00477 best_area = area; 00478 best_candidate = candidate; 00479 } 00480 } 00481 increase = IncreaseInOverlap(part, candidate, ok_overlap, 00482 &non_candidate_neighbours); 00483 if (increase > worst_nc_increase) 00484 worst_nc_increase = increase; 00485 } 00486 if (best_increase > 0) { 00487 // If the worst non-candidate increase is less than the best increase 00488 // including the candidates, then all the candidates can merge together 00489 // and the increase in outside overlap would be less, so use that result, 00490 // but only if each candidate is either a good diacritic merge with part, 00491 // or an ok merge candidate with all the others. 00492 // See TestCompatibleCandidates for more explanation and a picture. 00493 if (worst_nc_increase < best_increase && 00494 TestCompatibleCandidates(*part, debug, candidates)) { 00495 best_increase = worst_nc_increase; 00496 } 00497 } 00498 if (overlap_increase != NULL) 00499 *overlap_increase = best_increase; 00500 return best_candidate; 00501 } 00502 00503 // Helper to remove the given box from the given partition, put it in its 00504 // own partition, and add to the partition list. 00505 static void RemoveBadBox(BLOBNBOX* box, ColPartition* part, 00506 ColPartition_LIST* part_list) { 00507 part->RemoveBox(box); 00508 ColPartition::MakeBigPartition(box, part_list); 00509 } 00510 00511 00512 // Split partitions where it reduces overlap between their bounding boxes. 00513 // ColPartitions are after all supposed to be a partitioning of the blobs 00514 // AND of the space on the page! 00515 // Blobs that cause overlaps get removed, put in individual partitions 00516 // and added to the big_parts list. They are most likely characters on 00517 // 2 textlines that touch, or something big like a dropcap. 00518 void ColPartitionGrid::SplitOverlappingPartitions( 00519 ColPartition_LIST* big_parts) { 00520 int ok_overlap = 00521 static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); 00522 // Iterate the ColPartitions in the grid. 00523 ColPartitionGridSearch gsearch(this); 00524 gsearch.StartFullSearch(); 00525 ColPartition* part; 00526 while ((part = gsearch.NextFullSearch()) != NULL) { 00527 // Set up a rectangle search bounded by the part. 00528 const TBOX& box = part->bounding_box(); 00529 ColPartitionGridSearch rsearch(this); 00530 rsearch.SetUniqueMode(true); 00531 rsearch.StartRectSearch(box); 00532 int unresolved_overlaps = 0; 00533 00534 ColPartition* neighbour; 00535 while ((neighbour = rsearch.NextRectSearch()) != NULL) { 00536 if (neighbour == part) 00537 continue; 00538 const TBOX& neighbour_box = neighbour->bounding_box(); 00539 if (neighbour->OKMergeOverlap(*part, *part, ok_overlap, false) && 00540 part->OKMergeOverlap(*neighbour, *neighbour, ok_overlap, false)) 00541 continue; // The overlap is OK both ways. 00542 00543 // If removal of the biggest box from either partition eliminates the 00544 // overlap, and it is much bigger than the box left behind, then 00545 // it is either a drop-cap, an inter-line join, or some junk that 00546 // we don't want anyway, so put it in the big_parts list. 00547 if (!part->IsSingleton()) { 00548 BLOBNBOX* excluded = part->BiggestBox(); 00549 TBOX shrunken = part->BoundsWithoutBox(excluded); 00550 if (!shrunken.overlap(neighbour_box) && 00551 excluded->bounding_box().height() > 00552 kBigPartSizeRatio * shrunken.height()) { 00553 // Removing the biggest box fixes the overlap, so do it! 00554 gsearch.RemoveBBox(); 00555 RemoveBadBox(excluded, part, big_parts); 00556 InsertBBox(true, true, part); 00557 gsearch.RepositionIterator(); 00558 break; 00559 } 00560 } else if (box.contains(neighbour_box)) { 00561 ++unresolved_overlaps; 00562 continue; // No amount of splitting will fix it. 00563 } 00564 if (!neighbour->IsSingleton()) { 00565 BLOBNBOX* excluded = neighbour->BiggestBox(); 00566 TBOX shrunken = neighbour->BoundsWithoutBox(excluded); 00567 if (!shrunken.overlap(box) && 00568 excluded->bounding_box().height() > 00569 kBigPartSizeRatio * shrunken.height()) { 00570 // Removing the biggest box fixes the overlap, so do it! 00571 rsearch.RemoveBBox(); 00572 RemoveBadBox(excluded, neighbour, big_parts); 00573 InsertBBox(true, true, neighbour); 00574 gsearch.RepositionIterator(); 00575 break; 00576 } 00577 } 00578 int part_overlap_count = part->CountOverlappingBoxes(neighbour_box); 00579 int neighbour_overlap_count = neighbour->CountOverlappingBoxes(box); 00580 ColPartition* right_part = NULL; 00581 if (neighbour_overlap_count <= part_overlap_count || 00582 part->IsSingleton()) { 00583 // Try to split the neighbour to reduce overlap. 00584 BLOBNBOX* split_blob = neighbour->OverlapSplitBlob(box); 00585 if (split_blob != NULL) { 00586 rsearch.RemoveBBox(); 00587 right_part = neighbour->SplitAtBlob(split_blob); 00588 InsertBBox(true, true, neighbour); 00589 ASSERT_HOST(right_part != NULL); 00590 } 00591 } else { 00592 // Try to split part to reduce overlap. 00593 BLOBNBOX* split_blob = part->OverlapSplitBlob(neighbour_box); 00594 if (split_blob != NULL) { 00595 gsearch.RemoveBBox(); 00596 right_part = part->SplitAtBlob(split_blob); 00597 InsertBBox(true, true, part); 00598 ASSERT_HOST(right_part != NULL); 00599 } 00600 } 00601 if (right_part != NULL) { 00602 InsertBBox(true, true, right_part); 00603 gsearch.RepositionIterator(); 00604 rsearch.RepositionIterator(); 00605 break; 00606 } 00607 } 00608 if (unresolved_overlaps > 2 && part->IsSingleton()) { 00609 // This part is no good so just add to big_parts. 00610 RemoveBBox(part); 00611 ColPartition_IT big_it(big_parts); 00612 part->set_block_owned(true); 00613 big_it.add_to_end(part); 00614 gsearch.RepositionIterator(); 00615 } 00616 } 00617 } 00618 00619 // Filters partitions of source_type by looking at local neighbours. 00620 // Where a majority of neighbours have a text type, the partitions are 00621 // changed to text, where the neighbours have image type, they are changed 00622 // to image, and partitions that have no definite neighbourhood type are 00623 // left unchanged. 00624 // im_box and rerotation are used to map blob coordinates onto the 00625 // nontext_map, which is used to prevent the spread of text neighbourhoods 00626 // into images. 00627 // Returns true if anything was changed. 00628 bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type, 00629 Pix* nontext_map, 00630 const TBOX& im_box, 00631 const FCOORD& rotation) { 00632 // Iterate the ColPartitions in the grid. 00633 ColPartitionGridSearch gsearch(this); 00634 gsearch.StartFullSearch(); 00635 ColPartition* part; 00636 bool any_changed = false; 00637 while ((part = gsearch.NextFullSearch()) != NULL) { 00638 if (part->flow() != source_type || BLOBNBOX::IsLineType(part->blob_type())) 00639 continue; 00640 const TBOX& box = part->bounding_box(); 00641 bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()); 00642 if (SmoothRegionType(nontext_map, im_box, rotation, debug, part)) 00643 any_changed = true; 00644 } 00645 return any_changed; 00646 } 00647 00648 // Compute the mean RGB of the light and dark pixels in each ColPartition 00649 // and also the rms error in the linearity of color. 00650 void ColPartitionGrid::ComputePartitionColors(Pix* scaled_color, 00651 int scaled_factor, 00652 const FCOORD& rerotation) { 00653 if (scaled_color == NULL) 00654 return; 00655 Pix* color_map1 = NULL; 00656 Pix* color_map2 = NULL; 00657 Pix* rms_map = NULL; 00658 if (textord_tabfind_show_color_fit) { 00659 int width = pixGetWidth(scaled_color); 00660 int height = pixGetHeight(scaled_color); 00661 color_map1 = pixCreate(width, height, 32); 00662 color_map2 = pixCreate(width, height, 32); 00663 rms_map = pixCreate(width, height, 8); 00664 } 00665 // Iterate the ColPartitions in the grid. 00666 ColPartitionGridSearch gsearch(this); 00667 gsearch.StartFullSearch(); 00668 ColPartition* part; 00669 while ((part = gsearch.NextFullSearch()) != NULL) { 00670 TBOX part_box = part->bounding_box(); 00671 part_box.rotate_large(rerotation); 00672 ImageFind::ComputeRectangleColors(part_box, scaled_color, 00673 scaled_factor, 00674 color_map1, color_map2, rms_map, 00675 part->color1(), part->color2()); 00676 } 00677 if (color_map1 != NULL) { 00678 pixWrite("swcolorinput.png", scaled_color, IFF_PNG); 00679 pixWrite("swcolor1.png", color_map1, IFF_PNG); 00680 pixWrite("swcolor2.png", color_map2, IFF_PNG); 00681 pixWrite("swrms.png", rms_map, IFF_PNG); 00682 pixDestroy(&color_map1); 00683 pixDestroy(&color_map2); 00684 pixDestroy(&rms_map); 00685 } 00686 } 00687 00688 // Reflects the grid and its colpartitions in the y-axis, assuming that 00689 // all blob boxes have already been done. 00690 void ColPartitionGrid::ReflectInYAxis() { 00691 ColPartition_LIST parts; 00692 ColPartition_IT part_it(&parts); 00693 // Iterate the ColPartitions in the grid to extract them. 00694 ColPartitionGridSearch gsearch(this); 00695 gsearch.StartFullSearch(); 00696 ColPartition* part; 00697 while ((part = gsearch.NextFullSearch()) != NULL) { 00698 part_it.add_after_then_move(part); 00699 } 00700 ICOORD bot_left(-tright().x(), bleft().y()); 00701 ICOORD top_right(-bleft().x(), tright().y()); 00702 // Reinitializing the grid with reflected coords also clears all the 00703 // pointers, so parts will now own the ColPartitions. (Briefly). 00704 Init(gridsize(), bot_left, top_right); 00705 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { 00706 part = part_it.extract(); 00707 part->ReflectInYAxis(); 00708 InsertBBox(true, true, part); 00709 } 00710 } 00711 00712 // Transforms the grid of partitions to the output blocks, putting each 00713 // partition into a separate block. We don't really care about the order, 00714 // as we just want to get as much text as possible without trying to organize 00715 // it into proper blocks or columns. 00716 // TODO(rays) some kind of sort function would be useful and probably better 00717 // than the default here, which is to sort by order of the grid search. 00718 void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST* blocks, 00719 TO_BLOCK_LIST* to_blocks) { 00720 TO_BLOCK_IT to_block_it(to_blocks); 00721 BLOCK_IT block_it(blocks); 00722 // All partitions will be put on this list and deleted on return. 00723 ColPartition_LIST parts; 00724 ColPartition_IT part_it(&parts); 00725 // Iterate the ColPartitions in the grid to extract them. 00726 ColPartitionGridSearch gsearch(this); 00727 gsearch.StartFullSearch(); 00728 ColPartition* part; 00729 while ((part = gsearch.NextFullSearch()) != NULL) { 00730 part_it.add_after_then_move(part); 00731 // The partition has to be at least vaguely like text. 00732 BlobRegionType blob_type = part->blob_type(); 00733 if (BLOBNBOX::IsTextType(blob_type) || 00734 (blob_type == BRT_UNKNOWN && part->boxes_count() > 1)) { 00735 PolyBlockType type = blob_type == BRT_VERT_TEXT ? PT_VERTICAL_TEXT 00736 : PT_FLOWING_TEXT; 00737 // Get metrics from the row that will be used for the block. 00738 TBOX box = part->bounding_box(); 00739 int median_width = part->median_width(); 00740 int median_height = part->median_size(); 00741 // Turn the partition into a TO_ROW. 00742 TO_ROW* row = part->MakeToRow(); 00743 if (row == NULL) { 00744 // This partition is dead. 00745 part->DeleteBoxes(); 00746 continue; 00747 } 00748 BLOCK* block = new BLOCK("", true, 0, 0, box.left(), box.bottom(), 00749 box.right(), box.top()); 00750 block->set_poly_block(new POLY_BLOCK(box, type)); 00751 TO_BLOCK* to_block = new TO_BLOCK(block); 00752 TO_ROW_IT row_it(to_block->get_rows()); 00753 row_it.add_after_then_move(row); 00754 // We haven't differentially rotated vertical and horizontal text at 00755 // this point, so use width or height as appropriate. 00756 if (blob_type == BRT_VERT_TEXT) { 00757 to_block->line_size = static_cast<float>(median_width); 00758 to_block->line_spacing = static_cast<float>(box.width()); 00759 to_block->max_blob_size = static_cast<float>(box.width() + 1); 00760 } else { 00761 to_block->line_size = static_cast<float>(median_height); 00762 to_block->line_spacing = static_cast<float>(box.height()); 00763 to_block->max_blob_size = static_cast<float>(box.height() + 1); 00764 } 00765 block_it.add_to_end(block); 00766 to_block_it.add_to_end(to_block); 00767 } else { 00768 // This partition is dead. 00769 part->DeleteBoxes(); 00770 } 00771 } 00772 Clear(); 00773 // Now it is safe to delete the ColPartitions as parts goes out of scope. 00774 } 00775 00776 // Rotates the grid and its colpartitions by the given angle, assuming that 00777 // all blob boxes have already been done. 00778 void ColPartitionGrid::Deskew(const FCOORD& deskew) { 00779 ColPartition_LIST parts; 00780 ColPartition_IT part_it(&parts); 00781 // Iterate the ColPartitions in the grid to extract them. 00782 ColPartitionGridSearch gsearch(this); 00783 gsearch.StartFullSearch(); 00784 ColPartition* part; 00785 while ((part = gsearch.NextFullSearch()) != NULL) { 00786 part_it.add_after_then_move(part); 00787 } 00788 // Rebuild the grid to the new size. 00789 TBOX grid_box(bleft_, tright_); 00790 grid_box.rotate_large(deskew); 00791 Init(gridsize(), grid_box.botleft(), grid_box.topright()); 00792 // Reinitializing the grid with rotated coords also clears all the 00793 // pointers, so parts will now own the ColPartitions. (Briefly). 00794 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { 00795 part = part_it.extract(); 00796 part->ComputeLimits(); 00797 InsertBBox(true, true, part); 00798 } 00799 } 00800 00801 // Sets the left and right tabs of the partitions in the grid. 00802 void ColPartitionGrid::SetTabStops(TabFind* tabgrid) { 00803 // Iterate the ColPartitions in the grid. 00804 ColPartitionGridSearch gsearch(this); 00805 gsearch.StartFullSearch(); 00806 ColPartition* part; 00807 while ((part = gsearch.NextFullSearch()) != NULL) { 00808 const TBOX& part_box = part->bounding_box(); 00809 TabVector* left_line = tabgrid->LeftTabForBox(part_box, true, false); 00810 // If the overlapping line is not a left tab, try for non-overlapping. 00811 if (left_line != NULL && !left_line->IsLeftTab()) 00812 left_line = tabgrid->LeftTabForBox(part_box, false, false); 00813 if (left_line != NULL && left_line->IsLeftTab()) 00814 part->SetLeftTab(left_line); 00815 TabVector* right_line = tabgrid->RightTabForBox(part_box, true, false); 00816 if (right_line != NULL && !right_line->IsRightTab()) 00817 right_line = tabgrid->RightTabForBox(part_box, false, false); 00818 if (right_line != NULL && right_line->IsRightTab()) 00819 part->SetRightTab(right_line); 00820 part->SetColumnGoodness(tabgrid->WidthCB()); 00821 } 00822 } 00823 00824 // Makes the ColPartSets and puts them in the PartSetVector ready 00825 // for finding column bounds. Returns false if no partitions were found. 00826 bool ColPartitionGrid::MakeColPartSets(PartSetVector* part_sets) { 00827 ColPartition_LIST* part_lists = new ColPartition_LIST[gridheight()]; 00828 part_sets->reserve(gridheight()); 00829 // Iterate the ColPartitions in the grid to get parts onto lists for the 00830 // y bottom of each. 00831 ColPartitionGridSearch gsearch(this); 00832 gsearch.StartFullSearch(); 00833 ColPartition* part; 00834 bool any_parts_found = false; 00835 while ((part = gsearch.NextFullSearch()) != NULL) { 00836 BlobRegionType blob_type = part->blob_type(); 00837 if (blob_type != BRT_NOISE && 00838 (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) { 00839 int grid_x, grid_y; 00840 const TBOX& part_box = part->bounding_box(); 00841 GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y); 00842 ColPartition_IT part_it(&part_lists[grid_y]); 00843 part_it.add_to_end(part); 00844 any_parts_found = true; 00845 } 00846 } 00847 if (any_parts_found) { 00848 for (int grid_y = 0; grid_y < gridheight(); ++grid_y) { 00849 ColPartitionSet* line_set = NULL; 00850 if (!part_lists[grid_y].empty()) { 00851 line_set = new ColPartitionSet(&part_lists[grid_y]); 00852 } 00853 part_sets->push_back(line_set); 00854 } 00855 } 00856 delete [] part_lists; 00857 return any_parts_found; 00858 } 00859 00860 // Makes a single ColPartitionSet consisting of a single ColPartition that 00861 // represents the total horizontal extent of the significant content on the 00862 // page. Used for the single column setting in place of automatic detection. 00863 // Returns NULL if the page is empty of significant content. 00864 ColPartitionSet* ColPartitionGrid::MakeSingleColumnSet(WidthCallback* cb) { 00865 ColPartition* single_column_part = NULL; 00866 // Iterate the ColPartitions in the grid to get parts onto lists for the 00867 // y bottom of each. 00868 ColPartitionGridSearch gsearch(this); 00869 gsearch.StartFullSearch(); 00870 ColPartition* part; 00871 while ((part = gsearch.NextFullSearch()) != NULL) { 00872 BlobRegionType blob_type = part->blob_type(); 00873 if (blob_type != BRT_NOISE && 00874 (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) { 00875 // Consider for single column. 00876 BlobTextFlowType flow = part->flow(); 00877 if ((blob_type == BRT_TEXT && 00878 (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN || 00879 flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) || 00880 blob_type == BRT_RECTIMAGE || blob_type == BRT_POLYIMAGE) { 00881 if (single_column_part == NULL) { 00882 single_column_part = part->ShallowCopy(); 00883 single_column_part->set_blob_type(BRT_TEXT); 00884 // Copy the tabs from itself to properly setup the margins. 00885 single_column_part->CopyLeftTab(*single_column_part, false); 00886 single_column_part->CopyRightTab(*single_column_part, false); 00887 } else { 00888 if (part->left_key() < single_column_part->left_key()) 00889 single_column_part->CopyLeftTab(*part, false); 00890 if (part->right_key() > single_column_part->right_key()) 00891 single_column_part->CopyRightTab(*part, false); 00892 } 00893 } 00894 } 00895 } 00896 if (single_column_part != NULL) { 00897 // Make a ColPartitionSet out of the single_column_part as a candidate 00898 // for the single column case. 00899 single_column_part->SetColumnGoodness(cb); 00900 return new ColPartitionSet(single_column_part); 00901 } 00902 return NULL; 00903 } 00904 00905 // Mark the BLOBNBOXes in each partition as being owned by that partition. 00906 void ColPartitionGrid::ClaimBoxes() { 00907 // Iterate the ColPartitions in the grid. 00908 ColPartitionGridSearch gsearch(this); 00909 gsearch.StartFullSearch(); 00910 ColPartition* part; 00911 while ((part = gsearch.NextFullSearch()) != NULL) { 00912 part->ClaimBoxes(); 00913 } 00914 } 00915 00916 // Retypes all the blobs referenced by the partitions in the grid. 00917 // Image blobs are found and returned in the im_blobs list, as they are not 00918 // owned by the block. 00919 void ColPartitionGrid::ReTypeBlobs(BLOBNBOX_LIST* im_blobs) { 00920 BLOBNBOX_IT im_blob_it(im_blobs); 00921 ColPartition_LIST dead_parts; 00922 ColPartition_IT dead_part_it(&dead_parts); 00923 // Iterate the ColPartitions in the grid. 00924 ColPartitionGridSearch gsearch(this); 00925 gsearch.StartFullSearch(); 00926 ColPartition* part; 00927 while ((part = gsearch.NextFullSearch()) != NULL) { 00928 BlobRegionType blob_type = part->blob_type(); 00929 BlobTextFlowType flow = part->flow(); 00930 bool any_blobs_moved = false; 00931 if (blob_type == BRT_POLYIMAGE || blob_type == BRT_RECTIMAGE) { 00932 BLOBNBOX_C_IT blob_it(part->boxes()); 00933 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00934 BLOBNBOX* blob = blob_it.data(); 00935 im_blob_it.add_after_then_move(blob); 00936 } 00937 } else if (blob_type != BRT_NOISE) { 00938 // Make sure the blobs are marked with the correct type and flow. 00939 BLOBNBOX_C_IT blob_it(part->boxes()); 00940 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00941 BLOBNBOX* blob = blob_it.data(); 00942 if (blob->region_type() == BRT_NOISE) { 00943 // TODO(rays) Deprecated. Change this section to an assert to verify 00944 // and then delete. 00945 ASSERT_HOST(blob->cblob()->area() != 0); 00946 blob->set_owner(NULL); 00947 blob_it.extract(); 00948 any_blobs_moved = true; 00949 } else { 00950 blob->set_region_type(blob_type); 00951 if (blob->flow() != BTFT_LEADER) 00952 blob->set_flow(flow); 00953 } 00954 } 00955 } 00956 if (blob_type == BRT_NOISE || part->boxes()->empty()) { 00957 BLOBNBOX_C_IT blob_it(part->boxes()); 00958 part->DisownBoxes(); 00959 dead_part_it.add_to_end(part); 00960 gsearch.RemoveBBox(); 00961 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00962 BLOBNBOX* blob = blob_it.data(); 00963 if (blob->cblob()->area() == 0) { 00964 // Any blob with zero area is a fake image blob and should be deleted. 00965 delete blob->cblob(); 00966 delete blob; 00967 } 00968 } 00969 } else if (any_blobs_moved) { 00970 gsearch.RemoveBBox(); 00971 part->ComputeLimits(); 00972 InsertBBox(true, true, part); 00973 gsearch.RepositionIterator(); 00974 } 00975 } 00976 } 00977 00978 // The boxes within the partitions have changed (by deskew) so recompute 00979 // the bounds of all the partitions and reinsert them into the grid. 00980 void ColPartitionGrid::RecomputeBounds(int gridsize, 00981 const ICOORD& bleft, 00982 const ICOORD& tright, 00983 const ICOORD& vertical) { 00984 ColPartition_LIST saved_parts; 00985 ColPartition_IT part_it(&saved_parts); 00986 // Iterate the ColPartitions in the grid to get parts onto a list. 00987 ColPartitionGridSearch gsearch(this); 00988 gsearch.StartFullSearch(); 00989 ColPartition* part; 00990 while ((part = gsearch.NextFullSearch()) != NULL) { 00991 part_it.add_to_end(part); 00992 } 00993 // Reinitialize grid to the new size. 00994 Init(gridsize, bleft, tright); 00995 // Recompute the bounds of the parts and put them back in the new grid. 00996 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { 00997 part = part_it.extract(); 00998 part->set_vertical(vertical); 00999 part->ComputeLimits(); 01000 InsertBBox(true, true, part); 01001 } 01002 } 01003 01004 // Improves the margins of the ColPartitions in the grid by calling 01005 // FindPartitionMargins on each. 01006 // best_columns, which may be NULL, is an array of pointers indicating the 01007 // column set at each y-coordinate in the grid. 01008 // best_columns is usually the best_columns_ member of ColumnFinder. 01009 void ColPartitionGrid::GridFindMargins(ColPartitionSet** best_columns) { 01010 // Iterate the ColPartitions in the grid. 01011 ColPartitionGridSearch gsearch(this); 01012 gsearch.StartFullSearch(); 01013 ColPartition* part; 01014 while ((part = gsearch.NextFullSearch()) != NULL) { 01015 // Set up a rectangle search x-bounded by the column and y by the part. 01016 ColPartitionSet* columns = best_columns != NULL 01017 ? best_columns[gsearch.GridY()] 01018 : NULL; 01019 FindPartitionMargins(columns, part); 01020 const TBOX& box = part->bounding_box(); 01021 if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) { 01022 tprintf("Computed margins for part:"); 01023 part->Print(); 01024 } 01025 } 01026 } 01027 01028 // Improves the margins of the ColPartitions in the list by calling 01029 // FindPartitionMargins on each. 01030 // best_columns, which may be NULL, is an array of pointers indicating the 01031 // column set at each y-coordinate in the grid. 01032 // best_columns is usually the best_columns_ member of ColumnFinder. 01033 void ColPartitionGrid::ListFindMargins(ColPartitionSet** best_columns, 01034 ColPartition_LIST* parts) { 01035 ColPartition_IT part_it(parts); 01036 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { 01037 ColPartition* part = part_it.data(); 01038 ColPartitionSet* columns = NULL; 01039 if (best_columns != NULL) { 01040 TBOX part_box = part->bounding_box(); 01041 // Get the columns from the y grid coord. 01042 int grid_x, grid_y; 01043 GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y); 01044 columns = best_columns[grid_y]; 01045 } 01046 FindPartitionMargins(columns, part); 01047 } 01048 } 01049 01050 // Deletes all the partitions in the grid after disowning all the blobs. 01051 void ColPartitionGrid::DeleteParts() { 01052 ColPartition_LIST dead_parts; 01053 ColPartition_IT dead_it(&dead_parts); 01054 ColPartitionGridSearch gsearch(this); 01055 gsearch.StartFullSearch(); 01056 ColPartition* part; 01057 while ((part = gsearch.NextFullSearch()) != NULL) { 01058 part->DisownBoxes(); 01059 dead_it.add_to_end(part); // Parts will be deleted on return. 01060 } 01061 Clear(); 01062 } 01063 01064 // Deletes all the partitions in the grid that are of type BRT_UNKNOWN and 01065 // all the blobs in them. 01066 void ColPartitionGrid::DeleteUnknownParts(TO_BLOCK* block) { 01067 ColPartitionGridSearch gsearch(this); 01068 gsearch.StartFullSearch(); 01069 ColPartition* part; 01070 while ((part = gsearch.NextFullSearch()) != NULL) { 01071 if (part->blob_type() == BRT_UNKNOWN) { 01072 gsearch.RemoveBBox(); 01073 // Once marked, the blobs will be swept up by DeleteUnownedNoise. 01074 part->set_flow(BTFT_NONTEXT); 01075 part->set_blob_type(BRT_NOISE); 01076 part->SetBlobTypes(); 01077 part->DisownBoxes(); 01078 delete part; 01079 } 01080 } 01081 block->DeleteUnownedNoise(); 01082 } 01083 01084 // Deletes all the partitions in the grid that are NOT of flow type BTFT_LEADER. 01085 void ColPartitionGrid::DeleteNonLeaderParts() { 01086 ColPartitionGridSearch gsearch(this); 01087 gsearch.StartFullSearch(); 01088 ColPartition* part; 01089 while ((part = gsearch.NextFullSearch()) != NULL) { 01090 if (part->flow() != BTFT_LEADER) { 01091 gsearch.RemoveBBox(); 01092 if (part->ReleaseNonLeaderBoxes()) { 01093 InsertBBox(true, true, part); 01094 gsearch.RepositionIterator(); 01095 } else { 01096 delete part; 01097 } 01098 } 01099 } 01100 } 01101 01102 // Finds and marks text partitions that represent figure captions. 01103 void ColPartitionGrid::FindFigureCaptions() { 01104 // For each image region find its best candidate text caption region, 01105 // if any and mark it as such. 01106 ColPartitionGridSearch gsearch(this); 01107 gsearch.StartFullSearch(); 01108 ColPartition* part; 01109 while ((part = gsearch.NextFullSearch()) != NULL) { 01110 if (part->IsImageType()) { 01111 const TBOX& part_box = part->bounding_box(); 01112 bool debug = AlignedBlob::WithinTestRegion(2, part_box.left(), 01113 part_box.bottom()); 01114 ColPartition* best_caption = NULL; 01115 int best_dist = 0; // Distance to best_caption. 01116 int best_upper = 0; // Direction of best_caption. 01117 // Handle both lower and upper directions. 01118 for (int upper = 0; upper < 2; ++upper) { 01119 ColPartition_C_IT partner_it(upper ? part->upper_partners() 01120 : part->lower_partners()); 01121 // If there are no image partners, then this direction is ok. 01122 for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); 01123 partner_it.forward()) { 01124 ColPartition* partner = partner_it.data(); 01125 if (partner->IsImageType()) { 01126 break; 01127 } 01128 } 01129 if (!partner_it.cycled_list()) continue; 01130 // Find the nearest totally overlapping text partner. 01131 for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); 01132 partner_it.forward()) { 01133 ColPartition* partner = partner_it.data(); 01134 if (!partner->IsTextType() || partner->type() == PT_TABLE) continue; 01135 const TBOX& partner_box = partner->bounding_box(); 01136 if (debug) { 01137 tprintf("Finding figure captions for image part:"); 01138 part_box.print(); 01139 tprintf("Considering partner:"); 01140 partner_box.print(); 01141 } 01142 if (partner_box.left() >= part_box.left() && 01143 partner_box.right() <= part_box.right()) { 01144 int dist = partner_box.y_gap(part_box); 01145 if (best_caption == NULL || dist < best_dist) { 01146 best_dist = dist; 01147 best_caption = partner; 01148 best_upper = upper; 01149 } 01150 } 01151 } 01152 } 01153 if (best_caption != NULL) { 01154 if (debug) { 01155 tprintf("Best caption candidate:"); 01156 best_caption->bounding_box().print(); 01157 } 01158 // We have a candidate caption. Qualify it as being separable from 01159 // any body text. We are looking for either a small number of lines 01160 // or a big gap that indicates a separation from the body text. 01161 int line_count = 0; 01162 int biggest_gap = 0; 01163 int smallest_gap = MAX_INT16; 01164 int total_height = 0; 01165 int mean_height = 0; 01166 ColPartition* end_partner = NULL; 01167 ColPartition* next_partner = NULL; 01168 for (ColPartition* partner = best_caption; partner != NULL && 01169 line_count <= kMaxCaptionLines; 01170 partner = next_partner) { 01171 if (!partner->IsTextType()) { 01172 end_partner = partner; 01173 break; 01174 } 01175 ++line_count; 01176 total_height += partner->bounding_box().height(); 01177 next_partner = partner->SingletonPartner(best_upper); 01178 if (next_partner != NULL) { 01179 int gap = partner->bounding_box().y_gap( 01180 next_partner->bounding_box()); 01181 if (gap > biggest_gap) { 01182 biggest_gap = gap; 01183 end_partner = next_partner; 01184 mean_height = total_height / line_count; 01185 } else if (gap < smallest_gap) { 01186 smallest_gap = gap; 01187 } 01188 // If the gap looks big compared to the text size and the smallest 01189 // gap seen so far, then we can stop. 01190 if (biggest_gap > mean_height * kMinCaptionGapHeightRatio && 01191 biggest_gap > smallest_gap * kMinCaptionGapRatio) 01192 break; 01193 } 01194 } 01195 if (debug) { 01196 tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n", 01197 line_count, biggest_gap, smallest_gap, mean_height); 01198 if (end_partner != NULL) { 01199 tprintf("End partner:"); 01200 end_partner->bounding_box().print(); 01201 } 01202 } 01203 if (next_partner == NULL && line_count <= kMaxCaptionLines) 01204 end_partner = NULL; // No gap, but line count is small. 01205 if (line_count <= kMaxCaptionLines) { 01206 // This is a qualified caption. Mark the text as caption. 01207 for (ColPartition* partner = best_caption; partner != NULL && 01208 partner != end_partner; 01209 partner = next_partner) { 01210 partner->set_type(PT_CAPTION_TEXT); 01211 partner->SetBlobTypes(); 01212 if (debug) { 01213 tprintf("Set caption type for partition:"); 01214 partner->bounding_box().print(); 01215 } 01216 next_partner = partner->SingletonPartner(best_upper); 01217 } 01218 } 01219 } 01220 } 01221 } 01222 } 01223 01226 01227 // For every ColPartition in the grid, finds its upper and lower neighbours. 01228 void ColPartitionGrid::FindPartitionPartners() { 01229 ColPartitionGridSearch gsearch(this); 01230 gsearch.StartFullSearch(); 01231 ColPartition* part; 01232 while ((part = gsearch.NextFullSearch()) != NULL) { 01233 if (part->IsVerticalType()) { 01234 FindVPartitionPartners(true, part); 01235 FindVPartitionPartners(false, part); 01236 } else { 01237 FindPartitionPartners(true, part); 01238 FindPartitionPartners(false, part); 01239 } 01240 } 01241 } 01242 01243 // Finds the best partner in the given direction for the given partition. 01244 // Stores the result with AddPartner. 01245 void ColPartitionGrid::FindPartitionPartners(bool upper, ColPartition* part) { 01246 if (part->type() == PT_NOISE) 01247 return; // Noise is not allowed to partner anything. 01248 const TBOX& box = part->bounding_box(); 01249 int top = part->median_top(); 01250 int bottom = part->median_bottom(); 01251 int height = top - bottom; 01252 int mid_y = (bottom + top) / 2; 01253 ColPartitionGridSearch vsearch(this); 01254 // Search down for neighbour below 01255 vsearch.StartVerticalSearch(box.left(), box.right(), part->MidY()); 01256 ColPartition* neighbour; 01257 ColPartition* best_neighbour = NULL; 01258 int best_dist = MAX_INT32; 01259 while ((neighbour = vsearch.NextVerticalSearch(!upper)) != NULL) { 01260 if (neighbour == part || neighbour->type() == PT_NOISE) 01261 continue; // Noise is not allowed to partner anything. 01262 int neighbour_bottom = neighbour->median_bottom(); 01263 int neighbour_top = neighbour->median_top(); 01264 int neighbour_y = (neighbour_bottom + neighbour_top) / 2; 01265 if (upper != (neighbour_y > mid_y)) 01266 continue; 01267 if (!part->HOverlaps(*neighbour) && !part->WithinSameMargins(*neighbour)) 01268 continue; 01269 if (!part->TypesMatch(*neighbour)) { 01270 if (best_neighbour == NULL) 01271 best_neighbour = neighbour; 01272 continue; 01273 } 01274 int dist = upper ? neighbour_bottom - top : bottom - neighbour_top; 01275 if (dist <= kMaxPartitionSpacing * height) { 01276 if (dist < best_dist) { 01277 best_dist = dist; 01278 best_neighbour = neighbour; 01279 } 01280 } else { 01281 break; 01282 } 01283 } 01284 if (best_neighbour != NULL) 01285 part->AddPartner(upper, best_neighbour); 01286 } 01287 01288 // Finds the best partner in the given direction for the given partition. 01289 // Stores the result with AddPartner. 01290 void ColPartitionGrid::FindVPartitionPartners(bool to_the_left, 01291 ColPartition* part) { 01292 if (part->type() == PT_NOISE) 01293 return; // Noise is not allowed to partner anything. 01294 const TBOX& box = part->bounding_box(); 01295 int left = part->median_left(); 01296 int right = part->median_right(); 01297 int width = right - left; 01298 int mid_x = (left + right) / 2; 01299 ColPartitionGridSearch hsearch(this); 01300 // Search left for neighbour to_the_left 01301 hsearch.StartSideSearch(mid_x, box.bottom(), box.top()); 01302 ColPartition* neighbour; 01303 ColPartition* best_neighbour = NULL; 01304 int best_dist = MAX_INT32; 01305 while ((neighbour = hsearch.NextSideSearch(to_the_left)) != NULL) { 01306 if (neighbour == part || neighbour->type() == PT_NOISE) 01307 continue; // Noise is not allowed to partner anything. 01308 int neighbour_left = neighbour->median_left(); 01309 int neighbour_right = neighbour->median_right(); 01310 int neighbour_x = (neighbour_left + neighbour_right) / 2; 01311 if (to_the_left != (neighbour_x < mid_x)) 01312 continue; 01313 if (!part->VOverlaps(*neighbour)) 01314 continue; 01315 if (!part->TypesMatch(*neighbour)) 01316 continue; // Only match to other vertical text. 01317 int dist = to_the_left ? left - neighbour_right : neighbour_left - right; 01318 if (dist <= kMaxPartitionSpacing * width) { 01319 if (dist < best_dist || best_neighbour == NULL) { 01320 best_dist = dist; 01321 best_neighbour = neighbour; 01322 } 01323 } else { 01324 break; 01325 } 01326 } 01327 // For vertical partitions, the upper partner is to the left, and lower is 01328 // to the right. 01329 if (best_neighbour != NULL) 01330 part->AddPartner(to_the_left, best_neighbour); 01331 } 01332 01333 // For every ColPartition with multiple partners in the grid, reduces the 01334 // number of partners to 0 or 1. If get_desperate is true, goes to more 01335 // desperate merge methods to merge flowing text before breaking partnerships. 01336 void ColPartitionGrid::RefinePartitionPartners(bool get_desperate) { 01337 ColPartitionGridSearch gsearch(this); 01338 // Refine in type order so that chasing multiple partners can be done 01339 // before eliminating type mis-matching partners. 01340 for (int type = PT_UNKNOWN + 1; type <= PT_COUNT; type++) { 01341 // Iterate the ColPartitions in the grid. 01342 gsearch.StartFullSearch(); 01343 ColPartition* part; 01344 while ((part = gsearch.NextFullSearch()) != NULL) { 01345 part->RefinePartners(static_cast<PolyBlockType>(type), 01346 get_desperate, this); 01347 // Iterator may have been messed up by a merge. 01348 gsearch.RepositionIterator(); 01349 } 01350 } 01351 } 01352 01353 01354 // ========================== PRIVATE CODE ======================== 01355 01356 // Finds and returns a list of candidate ColPartitions to merge with part. 01357 // The candidates must overlap search_box, and when merged must not 01358 // overlap any other partitions that are not overlapped by each individually. 01359 void ColPartitionGrid::FindMergeCandidates(const ColPartition* part, 01360 const TBOX& search_box, bool debug, 01361 ColPartition_CLIST* candidates) { 01362 int ok_overlap = 01363 static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); 01364 const TBOX& part_box = part->bounding_box(); 01365 // Now run the rect search. 01366 ColPartitionGridSearch rsearch(this); 01367 rsearch.SetUniqueMode(true); 01368 rsearch.StartRectSearch(search_box); 01369 ColPartition* candidate; 01370 while ((candidate = rsearch.NextRectSearch()) != NULL) { 01371 if (!OKMergeCandidate(part, candidate, debug)) 01372 continue; 01373 const TBOX& c_box = candidate->bounding_box(); 01374 // Candidate seems to be a potential merge with part. If one contains 01375 // the other, then the merge is a no-brainer. Otherwise, search the 01376 // combined box to see if anything else is inappropriately overlapped. 01377 if (!part_box.contains(c_box) && !c_box.contains(part_box)) { 01378 // Search the combined rectangle to see if anything new is overlapped. 01379 // This is a preliminary test designed to quickly weed-out stupid 01380 // merge candidates that would create a big list of overlapped objects 01381 // for the squared-order overlap analysis. Eg. vertical and horizontal 01382 // line-like objects that overlap real text when merged: 01383 // || ========================== 01384 // || 01385 // || r e a l t e x t 01386 // || 01387 // || 01388 TBOX merged_box(part_box); 01389 merged_box += c_box; 01390 ColPartitionGridSearch msearch(this); 01391 msearch.SetUniqueMode(true); 01392 msearch.StartRectSearch(merged_box); 01393 ColPartition* neighbour; 01394 while ((neighbour = msearch.NextRectSearch()) != NULL) { 01395 if (neighbour == part || neighbour == candidate) 01396 continue; // Ignore itself. 01397 if (neighbour->OKMergeOverlap(*part, *candidate, ok_overlap, false)) 01398 continue; // This kind of merge overlap is OK. 01399 TBOX n_box = neighbour->bounding_box(); 01400 // The overlap is OK if: 01401 // * the n_box already overlapped the part or the candidate OR 01402 // * the n_box is a suitable merge with either part or candidate 01403 if (!n_box.overlap(part_box) && !n_box.overlap(c_box) && 01404 !OKMergeCandidate(part, neighbour, false) && 01405 !OKMergeCandidate(candidate, neighbour, false)) 01406 break; 01407 } 01408 if (neighbour != NULL) { 01409 if (debug) { 01410 tprintf("Combined box overlaps another that is not OK despite" 01411 " allowance of %d:", ok_overlap); 01412 neighbour->bounding_box().print(); 01413 tprintf("Reason:"); 01414 OKMergeCandidate(part, neighbour, true); 01415 tprintf("...and:"); 01416 OKMergeCandidate(candidate, neighbour, true); 01417 tprintf("Overlap:"); 01418 neighbour->OKMergeOverlap(*part, *candidate, ok_overlap, true); 01419 } 01420 continue; 01421 } 01422 } 01423 if (debug) { 01424 tprintf("Adding candidate:"); 01425 candidate->bounding_box().print(); 01426 } 01427 // Unique elements as they arrive. 01428 candidates->add_sorted(SortByBoxLeft<ColPartition>, true, candidate); 01429 } 01430 } 01431 01432 // Smoothes the region type/flow type of the given part by looking at local 01433 // neighbours and the given image mask. Searches a padded rectangle with the 01434 // padding truncated on one size of the part's box in turn for each side, 01435 // using the result (if any) that has the least distance to all neighbours 01436 // that contribute to the decision. This biases in favor of rectangular 01437 // regions without completely enforcing them. 01438 // If a good decision cannot be reached, the part is left unchanged. 01439 // im_box and rerotation are used to map blob coordinates onto the 01440 // nontext_map, which is used to prevent the spread of text neighbourhoods 01441 // into images. 01442 // Returns true if the partition was changed. 01443 bool ColPartitionGrid::SmoothRegionType(Pix* nontext_map, 01444 const TBOX& im_box, 01445 const FCOORD& rerotation, 01446 bool debug, 01447 ColPartition* part) { 01448 const TBOX& part_box = part->bounding_box(); 01449 if (debug) { 01450 tprintf("Smooothing part at:"); 01451 part_box.print(); 01452 } 01453 BlobRegionType best_type = BRT_UNKNOWN; 01454 int best_dist = MAX_INT32; 01455 int max_dist = MIN(part_box.width(), part_box.height()); 01456 max_dist = MAX(max_dist * kMaxNeighbourDistFactor, gridsize() * 2); 01457 // Search with the pad truncated on each side of the box in turn. 01458 bool any_image = false; 01459 bool all_image = true; 01460 for (int d = 0; d < BND_COUNT; ++d) { 01461 int dist; 01462 BlobNeighbourDir dir = static_cast<BlobNeighbourDir>(d); 01463 BlobRegionType type = SmoothInOneDirection(dir, nontext_map, im_box, 01464 rerotation, debug, *part, 01465 &dist); 01466 if (debug) { 01467 tprintf("Result in dir %d = %d at dist %d\n", dir, type, dist); 01468 } 01469 if (type != BRT_UNKNOWN && dist < best_dist) { 01470 best_dist = dist; 01471 best_type = type; 01472 } 01473 if (type == BRT_POLYIMAGE) 01474 any_image = true; 01475 else 01476 all_image = false; 01477 } 01478 if (best_dist > max_dist) 01479 return false; // Too far away to set the type with it. 01480 if (part->flow() == BTFT_STRONG_CHAIN && !all_image) { 01481 return false; // We are not modifying it. 01482 } 01483 BlobRegionType new_type = part->blob_type(); 01484 BlobTextFlowType new_flow = part->flow(); 01485 if (best_type == BRT_TEXT && !any_image) { 01486 new_flow = BTFT_STRONG_CHAIN; 01487 new_type = BRT_TEXT; 01488 } else if (best_type == BRT_VERT_TEXT && !any_image) { 01489 new_flow = BTFT_STRONG_CHAIN; 01490 new_type = BRT_VERT_TEXT; 01491 } else if (best_type == BRT_POLYIMAGE) { 01492 new_flow = BTFT_NONTEXT; 01493 new_type = BRT_UNKNOWN; 01494 } 01495 if (new_type != part->blob_type() || new_flow != part->flow()) { 01496 part->set_flow(new_flow); 01497 part->set_blob_type(new_type); 01498 part->SetBlobTypes(); 01499 if (debug) { 01500 tprintf("Modified part:"); 01501 part->Print(); 01502 } 01503 return true; 01504 } else { 01505 return false; 01506 } 01507 } 01508 01509 // Sets up a search box based on the part_box, padded in all directions 01510 // except direction. Also setup dist_scaling to weight x,y distances according 01511 // to the given direction. 01512 static void ComputeSearchBoxAndScaling(BlobNeighbourDir direction, 01513 const TBOX& part_box, 01514 int min_padding, 01515 TBOX* search_box, 01516 ICOORD* dist_scaling) { 01517 *search_box = part_box; 01518 // Generate a pad value based on the min dimension of part_box, but at least 01519 // min_padding and then scaled by kMaxPadFactor. 01520 int padding = MIN(part_box.height(), part_box.width()); 01521 padding = MAX(padding, min_padding); 01522 padding *= kMaxPadFactor; 01523 search_box->pad(padding, padding); 01524 // Truncate the box in the appropriate direction and make the distance 01525 // metric slightly biased in the truncated direction. 01526 switch (direction) { 01527 case BND_LEFT: 01528 search_box->set_left(part_box.left()); 01529 *dist_scaling = ICOORD(2, 1); 01530 break; 01531 case BND_BELOW: 01532 search_box->set_bottom(part_box.bottom()); 01533 *dist_scaling = ICOORD(1, 2); 01534 break; 01535 case BND_RIGHT: 01536 search_box->set_right(part_box.right()); 01537 *dist_scaling = ICOORD(2, 1); 01538 break; 01539 case BND_ABOVE: 01540 search_box->set_top(part_box.top()); 01541 *dist_scaling = ICOORD(1, 2); 01542 break; 01543 default: 01544 ASSERT_HOST(false); 01545 } 01546 } 01547 01548 // Local enum used by SmoothInOneDirection and AccumulatePartDistances 01549 // for the different types of partition neighbour. 01550 enum NeighbourPartitionType { 01551 NPT_HTEXT, // Definite horizontal text. 01552 NPT_VTEXT, // Definite vertical text. 01553 NPT_WEAK_HTEXT, // Weakly horizontal text. Counts as HTEXT for HTEXT, but 01554 // image for image and VTEXT. 01555 NPT_WEAK_VTEXT, // Weakly vertical text. Counts as VTEXT for VTEXT, but 01556 // image for image and HTEXT. 01557 NPT_IMAGE, // Defininte non-text. 01558 NPT_COUNT // Number of array elements. 01559 }; 01560 01561 // Executes the search for SmoothRegionType in a single direction. 01562 // Creates a bounding box that is padded in all directions except direction, 01563 // and searches it for other partitions. Finds the nearest collection of 01564 // partitions that makes a decisive result (if any) and returns the type 01565 // and the distance of the collection. If there are any pixels in the 01566 // nontext_map, then the decision is biased towards image. 01567 BlobRegionType ColPartitionGrid::SmoothInOneDirection( 01568 BlobNeighbourDir direction, Pix* nontext_map, 01569 const TBOX& im_box, const FCOORD& rerotation, 01570 bool debug, const ColPartition& part, int* best_distance) { 01571 // Set up a rectangle search bounded by the part. 01572 TBOX part_box = part.bounding_box(); 01573 TBOX search_box; 01574 ICOORD dist_scaling; 01575 ComputeSearchBoxAndScaling(direction, part_box, gridsize(), 01576 &search_box, &dist_scaling); 01577 bool image_region = ImageFind::CountPixelsInRotatedBox(search_box, im_box, 01578 rerotation, 01579 nontext_map) > 0; 01580 GenericVector<int> dists[NPT_COUNT]; 01581 AccumulatePartDistances(part, dist_scaling, search_box, 01582 nontext_map, im_box, rerotation, debug, dists); 01583 // By iteratively including the next smallest distance across the vectors, 01584 // (as in a merge sort) we can use the vector indices as counts of each type 01585 // and find the nearest set of objects that give us a definite decision. 01586 int counts[NPT_COUNT]; 01587 memset(counts, 0, sizeof(counts[0]) * NPT_COUNT); 01588 // If there is image in the search box, tip the balance in image's favor. 01589 int image_bias = image_region ? kSmoothDecisionMargin / 2 : 0; 01590 BlobRegionType text_dir = part.blob_type(); 01591 BlobTextFlowType flow_type = part.flow(); 01592 int min_dist = 0; 01593 do { 01594 // Find the minimum new entry across the vectors 01595 min_dist = MAX_INT32; 01596 for (int i = 0; i < NPT_COUNT; ++i) { 01597 if (counts[i] < dists[i].size() && dists[i][counts[i]] < min_dist) 01598 min_dist = dists[i][counts[i]]; 01599 } 01600 // Step all the indices/counts forward to include min_dist. 01601 for (int i = 0; i < NPT_COUNT; ++i) { 01602 while (counts[i] < dists[i].size() && dists[i][counts[i]] <= min_dist) 01603 ++counts[i]; 01604 } 01605 *best_distance = min_dist; 01606 if (debug) { 01607 tprintf("Totals: htext=%d+%d, vtext=%d+%d, image=%d+%d, at dist=%d\n", 01608 counts[NPT_HTEXT], counts[NPT_WEAK_HTEXT], 01609 counts[NPT_VTEXT], counts[NPT_WEAK_VTEXT], 01610 counts[NPT_IMAGE], image_bias, min_dist); 01611 } 01612 // See if we have a decision yet. 01613 int image_count = counts[NPT_IMAGE]; 01614 int htext_score = counts[NPT_HTEXT] + counts[NPT_WEAK_HTEXT] - 01615 (image_count + counts[NPT_WEAK_VTEXT]); 01616 int vtext_score = counts[NPT_VTEXT] + counts[NPT_WEAK_VTEXT] - 01617 (image_count + counts[NPT_WEAK_HTEXT]); 01618 if (image_count > 0 && 01619 image_bias - htext_score >= kSmoothDecisionMargin && 01620 image_bias - vtext_score >= kSmoothDecisionMargin) { 01621 *best_distance = dists[NPT_IMAGE][0]; 01622 if (dists[NPT_WEAK_VTEXT].size() > 0 && 01623 *best_distance > dists[NPT_WEAK_VTEXT][0]) 01624 *best_distance = dists[NPT_WEAK_VTEXT][0]; 01625 if (dists[NPT_WEAK_HTEXT].size() > 0 && 01626 *best_distance > dists[NPT_WEAK_HTEXT][0]) 01627 *best_distance = dists[NPT_WEAK_HTEXT][0]; 01628 return BRT_POLYIMAGE; 01629 } 01630 if ((text_dir != BRT_VERT_TEXT || flow_type != BTFT_CHAIN) && 01631 counts[NPT_HTEXT] > 0 && htext_score >= kSmoothDecisionMargin) { 01632 *best_distance = dists[NPT_HTEXT][0]; 01633 return BRT_TEXT; 01634 } else if ((text_dir != BRT_TEXT || flow_type != BTFT_CHAIN) && 01635 counts[NPT_VTEXT] > 0 && vtext_score >= kSmoothDecisionMargin) { 01636 *best_distance = dists[NPT_VTEXT][0]; 01637 return BRT_VERT_TEXT; 01638 } 01639 } while (min_dist < MAX_INT32); 01640 return BRT_UNKNOWN; 01641 } 01642 01643 // Counts the partitions in the given search_box by appending the gap 01644 // distance (scaled by dist_scaling) of the part from the base_part to the 01645 // vector of the appropriate type for the partition. Prior to return, the 01646 // vectors in the dists array are sorted in increasing order. 01647 // The nontext_map (+im_box, rerotation) is used to make text invisible if 01648 // there is non-text in between. 01649 // dists must be an array of GenericVectors of size NPT_COUNT. 01650 void ColPartitionGrid::AccumulatePartDistances(const ColPartition& base_part, 01651 const ICOORD& dist_scaling, 01652 const TBOX& search_box, 01653 Pix* nontext_map, 01654 const TBOX& im_box, 01655 const FCOORD& rerotation, 01656 bool debug, 01657 GenericVector<int>* dists) { 01658 const TBOX& part_box = base_part.bounding_box(); 01659 ColPartitionGridSearch rsearch(this); 01660 rsearch.SetUniqueMode(true); 01661 rsearch.StartRectSearch(search_box); 01662 ColPartition* neighbour; 01663 // Search for compatible neighbours with a similar strokewidth, but not 01664 // on the other side of a tab vector. 01665 while ((neighbour = rsearch.NextRectSearch()) != NULL) { 01666 if (neighbour->IsUnMergeableType() || 01667 !base_part.ConfirmNoTabViolation(*neighbour) || 01668 neighbour == &base_part) 01669 continue; 01670 TBOX nbox = neighbour->bounding_box(); 01671 BlobRegionType n_type = neighbour->blob_type(); 01672 if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) && 01673 !ImageFind::BlankImageInBetween(part_box, nbox, im_box, rerotation, 01674 nontext_map)) 01675 continue; // Text not visible the other side of image. 01676 if (BLOBNBOX::IsLineType(n_type)) 01677 continue; // Don't use horizontal lines as neighbours. 01678 int x_gap = MAX(part_box.x_gap(nbox), 0); 01679 int y_gap = MAX(part_box.y_gap(nbox), 0); 01680 int n_dist = x_gap * dist_scaling.x() + y_gap* dist_scaling.y(); 01681 if (debug) { 01682 tprintf("Part has x-gap=%d, y=%d, dist=%d at:", 01683 x_gap, y_gap, n_dist); 01684 nbox.print(); 01685 } 01686 // Truncate the number of boxes, so text doesn't get too much advantage. 01687 int n_boxes = MIN(neighbour->boxes_count(), kSmoothDecisionMargin); 01688 BlobTextFlowType n_flow = neighbour->flow(); 01689 GenericVector<int>* count_vector = NULL; 01690 if (n_flow == BTFT_STRONG_CHAIN) { 01691 if (n_type == BRT_TEXT) 01692 count_vector = &dists[NPT_HTEXT]; 01693 else 01694 count_vector = &dists[NPT_VTEXT]; 01695 if (debug) { 01696 tprintf("%s %d\n", n_type == BRT_TEXT ? "Htext" : "Vtext", n_boxes); 01697 } 01698 } else if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) && 01699 (n_flow == BTFT_CHAIN || n_flow == BTFT_NEIGHBOURS)) { 01700 // Medium text counts as weak, and all else counts as image. 01701 if (n_type == BRT_TEXT) 01702 count_vector = &dists[NPT_WEAK_HTEXT]; 01703 else 01704 count_vector = &dists[NPT_WEAK_VTEXT]; 01705 if (debug) tprintf("Weak %d\n", n_boxes); 01706 } else { 01707 count_vector = &dists[NPT_IMAGE]; 01708 if (debug) tprintf("Image %d\n", n_boxes); 01709 } 01710 if (count_vector != NULL) { 01711 for (int i = 0; i < n_boxes; ++i) 01712 count_vector->push_back(n_dist); 01713 } 01714 if (debug) { 01715 neighbour->Print(); 01716 } 01717 } 01718 for (int i = 0; i < NPT_COUNT; ++i) 01719 dists[i].sort(); 01720 } 01721 01722 // Improves the margins of the part ColPartition by searching for 01723 // neighbours that vertically overlap significantly. 01724 // columns may be NULL, and indicates the assigned column structure this 01725 // is applicable to part. 01726 void ColPartitionGrid::FindPartitionMargins(ColPartitionSet* columns, 01727 ColPartition* part) { 01728 // Set up a rectangle search x-bounded by the column and y by the part. 01729 TBOX box = part->bounding_box(); 01730 int y = part->MidY(); 01731 // Initial left margin is based on the column, if there is one. 01732 int left_margin = bleft().x(); 01733 int right_margin = tright().x(); 01734 if (columns != NULL) { 01735 ColPartition* column = columns->ColumnContaining(box.left(), y); 01736 if (column != NULL) 01737 left_margin = column->LeftAtY(y); 01738 column = columns->ColumnContaining(box.right(), y); 01739 if (column != NULL) 01740 right_margin = column->RightAtY(y); 01741 } 01742 left_margin -= kColumnWidthFactor; 01743 right_margin += kColumnWidthFactor; 01744 // Search for ColPartitions that reduce the margin. 01745 left_margin = FindMargin(box.left() + box.height(), true, left_margin, 01746 box.bottom(), box.top(), part); 01747 part->set_left_margin(left_margin); 01748 // Search for ColPartitions that reduce the margin. 01749 right_margin = FindMargin(box.right() - box.height(), false, right_margin, 01750 box.bottom(), box.top(), part); 01751 part->set_right_margin(right_margin); 01752 } 01753 01754 // Starting at x, and going in the specified direction, up to x_limit, finds 01755 // the margin for the given y range by searching sideways, 01756 // and ignoring not_this. 01757 int ColPartitionGrid::FindMargin(int x, bool right_to_left, int x_limit, 01758 int y_bottom, int y_top, 01759 const ColPartition* not_this) { 01760 int height = y_top - y_bottom; 01761 // Iterate the ColPartitions in the grid. 01762 ColPartitionGridSearch side_search(this); 01763 side_search.SetUniqueMode(true); 01764 side_search.StartSideSearch(x, y_bottom, y_top); 01765 ColPartition* part; 01766 while ((part = side_search.NextSideSearch(right_to_left)) != NULL) { 01767 // Ignore itself. 01768 if (part == not_this) // || part->IsLineType()) 01769 continue; 01770 // Must overlap by enough, based on the min of the heights, so 01771 // large partitions can't smash through small ones. 01772 TBOX box = part->bounding_box(); 01773 int min_overlap = MIN(height, box.height()); 01774 min_overlap = static_cast<int>(min_overlap * kMarginOverlapFraction + 0.5); 01775 int y_overlap = MIN(y_top, box.top()) - MAX(y_bottom, box.bottom()); 01776 if (y_overlap < min_overlap) 01777 continue; 01778 // Must be going the right way. 01779 int x_edge = right_to_left ? box.right() : box.left(); 01780 if ((x_edge < x) != right_to_left) 01781 continue; 01782 // If we have gone past x_limit, then x_limit will do. 01783 if ((x_edge < x_limit) == right_to_left) 01784 break; 01785 // It reduces x limit, so save the new one. 01786 x_limit = x_edge; 01787 } 01788 return x_limit; 01789 } 01790 01791 01792 } // namespace tesseract.