|
tesseract 3.04.01
|
00001 00002 // File: colpartitionset.cpp 00003 // Description: Class to hold a list of ColPartitions of the page that 00004 // correspond roughly to columns. 00005 // Author: Ray Smith 00006 // Created: Thu Aug 14 10:54:01 PDT 2008 00007 // 00008 // (C) Copyright 2008, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #ifdef HAVE_CONFIG_H 00022 #include "config_auto.h" 00023 #endif 00024 00025 #include "colpartitionset.h" 00026 #include "ndminx.h" 00027 #include "workingpartset.h" 00028 #include "tablefind.h" 00029 00030 namespace tesseract { 00031 00032 // Minimum width of a column to be interesting as a multiple of resolution. 00033 const double kMinColumnWidth = 2.0 / 3; 00034 00035 ELISTIZE(ColPartitionSet) 00036 00037 ColPartitionSet::ColPartitionSet(ColPartition_LIST* partitions) { 00038 ColPartition_IT it(&parts_); 00039 it.add_list_after(partitions); 00040 ComputeCoverage(); 00041 } 00042 00043 ColPartitionSet::ColPartitionSet(ColPartition* part) { 00044 ColPartition_IT it(&parts_); 00045 it.add_after_then_move(part); 00046 ComputeCoverage(); 00047 } 00048 00049 ColPartitionSet::~ColPartitionSet() { 00050 } 00051 00052 // Returns the number of columns of good width. 00053 int ColPartitionSet::GoodColumnCount() const { 00054 int num_good_cols = 0; 00055 // This is a read-only iteration of the list. 00056 ColPartition_IT it(const_cast<ColPartition_LIST*>(&parts_)); 00057 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00058 if (it.data()->good_width()) ++num_good_cols; 00059 } 00060 return num_good_cols; 00061 } 00062 00063 // Return an element of the parts_ list from its index. 00064 ColPartition* ColPartitionSet::GetColumnByIndex(int index) { 00065 ColPartition_IT it(&parts_); 00066 it.mark_cycle_pt(); 00067 for (int i = 0; i < index && !it.cycled_list(); ++i, it.forward()); 00068 if (it.cycled_list()) 00069 return NULL; 00070 return it.data(); 00071 } 00072 00073 // Return the ColPartition that contains the given coords, if any, else NULL. 00074 ColPartition* ColPartitionSet::ColumnContaining(int x, int y) { 00075 ColPartition_IT it(&parts_); 00076 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00077 ColPartition* part = it.data(); 00078 if (part->ColumnContains(x, y)) 00079 return part; 00080 } 00081 return NULL; 00082 } 00083 00084 // Extract all the parts from the list, relinquishing ownership. 00085 void ColPartitionSet::RelinquishParts() { 00086 ColPartition_IT it(&parts_); 00087 while (!it.empty()) { 00088 it.extract(); 00089 it.forward(); 00090 } 00091 } 00092 00093 // Attempt to improve this by adding partitions or expanding partitions. 00094 void ColPartitionSet::ImproveColumnCandidate(WidthCallback* cb, 00095 PartSetVector* src_sets) { 00096 int set_size = src_sets->size(); 00097 // Iterate over the provided column sets, as each one may have something 00098 // to improve this. 00099 for (int i = 0; i < set_size; ++i) { 00100 ColPartitionSet* column_set = src_sets->get(i); 00101 if (column_set == NULL) 00102 continue; 00103 // Iterate over the parts in this and column_set, adding bigger or 00104 // new parts in column_set to this. 00105 ColPartition_IT part_it(&parts_); 00106 ASSERT_HOST(!part_it.empty()); 00107 int prev_right = MIN_INT32; 00108 part_it.mark_cycle_pt(); 00109 ColPartition_IT col_it(&column_set->parts_); 00110 for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) { 00111 ColPartition* col_part = col_it.data(); 00112 if (col_part->blob_type() < BRT_UNKNOWN) 00113 continue; // Ignore image partitions. 00114 int col_left = col_part->left_key(); 00115 int col_right = col_part->right_key(); 00116 // Sync-up part_it (in this) so it matches the col_part in column_set. 00117 ColPartition* part = part_it.data(); 00118 while (!part_it.at_last() && part->right_key() < col_left) { 00119 prev_right = part->right_key(); 00120 part_it.forward(); 00121 part = part_it.data(); 00122 } 00123 int part_left = part->left_key(); 00124 int part_right = part->right_key(); 00125 if (part_right < col_left || col_right < part_left) { 00126 // There is no overlap so this is a new partition. 00127 AddPartition(col_part->ShallowCopy(), &part_it); 00128 continue; 00129 } 00130 // Check the edges of col_part to see if they can improve part. 00131 bool part_width_ok = cb->Run(part->KeyWidth(part_left, part_right)); 00132 if (col_left < part_left && col_left > prev_right) { 00133 // The left edge of the column is better and it doesn't overlap, 00134 // so we can potentially expand it. 00135 int col_box_left = col_part->BoxLeftKey(); 00136 bool tab_width_ok = cb->Run(part->KeyWidth(col_left, part_right)); 00137 bool box_width_ok = cb->Run(part->KeyWidth(col_box_left, part_right)); 00138 if (tab_width_ok || (!part_width_ok )) { 00139 // The tab is leaving the good column metric at least as good as 00140 // it was before, so use the tab. 00141 part->CopyLeftTab(*col_part, false); 00142 part->SetColumnGoodness(cb); 00143 } else if (col_box_left < part_left && 00144 (box_width_ok || !part_width_ok)) { 00145 // The box is leaving the good column metric at least as good as 00146 // it was before, so use the box. 00147 part->CopyLeftTab(*col_part, true); 00148 part->SetColumnGoodness(cb); 00149 } 00150 part_left = part->left_key(); 00151 } 00152 if (col_right > part_right && 00153 (part_it.at_last() || 00154 part_it.data_relative(1)->left_key() > col_right)) { 00155 // The right edge is better, so we can possibly expand it. 00156 int col_box_right = col_part->BoxRightKey(); 00157 bool tab_width_ok = cb->Run(part->KeyWidth(part_left, col_right)); 00158 bool box_width_ok = cb->Run(part->KeyWidth(part_left, col_box_right)); 00159 if (tab_width_ok || (!part_width_ok )) { 00160 // The tab is leaving the good column metric at least as good as 00161 // it was before, so use the tab. 00162 part->CopyRightTab(*col_part, false); 00163 part->SetColumnGoodness(cb); 00164 } else if (col_box_right > part_right && 00165 (box_width_ok || !part_width_ok)) { 00166 // The box is leaving the good column metric at least as good as 00167 // it was before, so use the box. 00168 part->CopyRightTab(*col_part, true); 00169 part->SetColumnGoodness(cb); 00170 } 00171 } 00172 } 00173 } 00174 ComputeCoverage(); 00175 } 00176 00177 // If this set is good enough to represent a new partitioning into columns, 00178 // add it to the vector of sets, otherwise delete it. 00179 void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector* column_sets, 00180 WidthCallback* cb) { 00181 bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), 00182 bounding_box_.bottom()); 00183 if (debug) { 00184 tprintf("Considering new column candidate:\n"); 00185 Print(); 00186 } 00187 if (!LegalColumnCandidate()) { 00188 if (debug) { 00189 tprintf("Not a legal column candidate:\n"); 00190 Print(); 00191 } 00192 delete this; 00193 return; 00194 } 00195 for (int i = 0; i < column_sets->size(); ++i) { 00196 ColPartitionSet* columns = column_sets->get(i); 00197 // In ordering the column set candidates, good_coverage_ is king, 00198 // followed by good_column_count_ and then bad_coverage_. 00199 bool better = good_coverage_ > columns->good_coverage_; 00200 if (good_coverage_ == columns->good_coverage_) { 00201 better = good_column_count_ > columns->good_column_count_; 00202 if (good_column_count_ == columns->good_column_count_) { 00203 better = bad_coverage_ > columns->bad_coverage_; 00204 } 00205 } 00206 if (better) { 00207 // The new one is better so add it. 00208 if (debug) 00209 tprintf("Good one\n"); 00210 column_sets->insert(this, i); 00211 return; 00212 } 00213 if (columns->CompatibleColumns(false, this, cb)) { 00214 if (debug) 00215 tprintf("Duplicate\n"); 00216 delete this; 00217 return; // It is not unique. 00218 } 00219 } 00220 if (debug) 00221 tprintf("Added to end\n"); 00222 column_sets->push_back(this); 00223 } 00224 00225 // Return true if the partitions in other are all compatible with the columns 00226 // in this. 00227 bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other, 00228 WidthCallback* cb) { 00229 if (debug) { 00230 tprintf("CompatibleColumns testing compatibility\n"); 00231 Print(); 00232 other->Print(); 00233 } 00234 if (other->parts_.empty()) { 00235 if (debug) 00236 tprintf("CompatibleColumns true due to empty other\n"); 00237 return true; 00238 } 00239 ColPartition_IT it(&other->parts_); 00240 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00241 ColPartition* part = it.data(); 00242 if (part->blob_type() < BRT_UNKNOWN) { 00243 if (debug) { 00244 tprintf("CompatibleColumns ignoring image partition\n"); 00245 part->Print(); 00246 } 00247 continue; // Image partitions are irrelevant to column compatibility. 00248 } 00249 int y = part->MidY(); 00250 int left = part->bounding_box().left(); 00251 int right = part->bounding_box().right(); 00252 ColPartition* left_col = ColumnContaining(left, y); 00253 ColPartition* right_col = ColumnContaining(right, y); 00254 if (right_col == NULL || left_col == NULL) { 00255 if (debug) { 00256 tprintf("CompatibleColumns false due to partition edge outside\n"); 00257 part->Print(); 00258 } 00259 return false; // A partition edge lies outside of all columns 00260 } 00261 if (right_col != left_col && cb->Run(right - left)) { 00262 if (debug) { 00263 tprintf("CompatibleColumns false due to good width in multiple cols\n"); 00264 part->Print(); 00265 } 00266 return false; // Partition with a good width must be in a single column. 00267 } 00268 00269 ColPartition_IT it2= it; 00270 while (!it2.at_last()) { 00271 it2.forward(); 00272 ColPartition* next_part = it2.data(); 00273 if (!BLOBNBOX::IsTextType(next_part->blob_type())) 00274 continue; // Non-text partitions are irrelevant. 00275 int next_left = next_part->bounding_box().left(); 00276 if (next_left == right) { 00277 break; // They share the same edge, so one must be a pull-out. 00278 } 00279 // Search to see if right and next_left fall within a single column. 00280 ColPartition* next_left_col = ColumnContaining(next_left, y); 00281 if (right_col == next_left_col) { 00282 // There is a column break in this column. 00283 // This can be due to a figure caption within a column, a pull-out 00284 // block, or a simple broken textline that remains to be merged: 00285 // all allowed, or a change in column layout: not allowed. 00286 // If both partitions are of good width, then it is likely 00287 // a change in column layout, otherwise probably an allowed situation. 00288 if (part->good_width() && next_part->good_width()) { 00289 if (debug) { 00290 int next_right = next_part->bounding_box().right(); 00291 tprintf("CompatibleColumns false due to 2 parts of good width\n"); 00292 tprintf("part1 %d-%d, part2 %d-%d\n", 00293 left, right, next_left, next_right); 00294 right_col->Print(); 00295 } 00296 return false; 00297 } 00298 } 00299 break; 00300 } 00301 } 00302 if (debug) 00303 tprintf("CompatibleColumns true!\n"); 00304 return true; 00305 } 00306 00307 // Returns the total width of all blobs in the part_set that do not lie 00308 // within an approved column. Used as a cost measure for using this 00309 // column set over another that might be compatible. 00310 int ColPartitionSet::UnmatchedWidth(ColPartitionSet* part_set) { 00311 int total_width = 0; 00312 ColPartition_IT it(&part_set->parts_); 00313 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00314 ColPartition* part = it.data(); 00315 if (!BLOBNBOX::IsTextType(part->blob_type())) { 00316 continue; // Non-text partitions are irrelevant to column compatibility. 00317 } 00318 int y = part->MidY(); 00319 BLOBNBOX_C_IT box_it(part->boxes()); 00320 for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) { 00321 const TBOX& box = it.data()->bounding_box(); 00322 // Assume that the whole blob is outside any column iff its x-middle 00323 // is outside. 00324 int x = (box.left() + box.right()) / 2; 00325 ColPartition* col = ColumnContaining(x, y); 00326 if (col == NULL) 00327 total_width += box.width(); 00328 } 00329 } 00330 return total_width; 00331 } 00332 00333 // Return true if this ColPartitionSet makes a legal column candidate by 00334 // having legal individual partitions and non-overlapping adjacent pairs. 00335 bool ColPartitionSet::LegalColumnCandidate() { 00336 ColPartition_IT it(&parts_); 00337 if (it.empty()) 00338 return false; 00339 bool any_text_parts = false; 00340 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00341 ColPartition* part = it.data(); 00342 if (BLOBNBOX::IsTextType(part->blob_type())) { 00343 if (!part->IsLegal()) 00344 return false; // Individual partition is illegal. 00345 any_text_parts = true; 00346 } 00347 if (!it.at_last()) { 00348 ColPartition* next_part = it.data_relative(1); 00349 if (next_part->left_key() < part->right_key()) { 00350 return false; 00351 } 00352 } 00353 } 00354 return any_text_parts; 00355 } 00356 00357 // Return a copy of this. If good_only will only copy the Good ColPartitions. 00358 ColPartitionSet* ColPartitionSet::Copy(bool good_only) { 00359 ColPartition_LIST copy_parts; 00360 ColPartition_IT src_it(&parts_); 00361 ColPartition_IT dest_it(©_parts); 00362 for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) { 00363 ColPartition* part = src_it.data(); 00364 if (BLOBNBOX::IsTextType(part->blob_type()) && 00365 (!good_only || part->good_width() || part->good_column())) 00366 dest_it.add_after_then_move(part->ShallowCopy()); 00367 } 00368 if (dest_it.empty()) 00369 return NULL; 00370 return new ColPartitionSet(©_parts); 00371 } 00372 00373 // Return the bounding boxes of columns at the given y-range 00374 void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top, 00375 ColSegment_LIST *segments) { 00376 ColPartition_IT it(&parts_); 00377 ColSegment_IT col_it(segments); 00378 col_it.move_to_last(); 00379 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00380 ColPartition* part = it.data(); 00381 ICOORD bot_left(part->LeftAtY(y_top), y_bottom); 00382 ICOORD top_right(part->RightAtY(y_bottom), y_top); 00383 ColSegment *col_seg = new ColSegment(); 00384 col_seg->InsertBox(TBOX(bot_left, top_right)); 00385 col_it.add_after_then_move(col_seg); 00386 } 00387 } 00388 00389 // Display the edges of the columns at the given y coords. 00390 void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top, 00391 ScrollView* win) { 00392 #ifndef GRAPHICS_DISABLED 00393 ColPartition_IT it(&parts_); 00394 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00395 ColPartition* part = it.data(); 00396 win->Line(part->LeftAtY(y_top), y_top, part->LeftAtY(y_bottom), y_bottom); 00397 win->Line(part->RightAtY(y_top), y_top, part->RightAtY(y_bottom), y_bottom); 00398 } 00399 #endif // GRAPHICS_DISABLED 00400 } 00401 00402 // Return the ColumnSpanningType that best explains the columns overlapped 00403 // by the given coords(left,right,y), with the given margins. 00404 // Also return the first and last column index touched by the coords and 00405 // the leftmost spanned column. 00406 // Column indices are 2n + 1 for real columns (0 based) and even values 00407 // represent the gaps in between columns, with 0 being left of the leftmost. 00408 // resolution refers to the ppi resolution of the image. 00409 ColumnSpanningType ColPartitionSet::SpanningType(int resolution, 00410 int left, int right, 00411 int height, int y, 00412 int left_margin, 00413 int right_margin, 00414 int* first_col, 00415 int* last_col, 00416 int* first_spanned_col) { 00417 *first_col = -1; 00418 *last_col = -1; 00419 *first_spanned_col = -1; 00420 int margin_columns = 0; 00421 ColPartition_IT it(&parts_); 00422 int col_index = 1; 00423 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), col_index += 2) { 00424 ColPartition* part = it.data(); 00425 if (part->ColumnContains(left, y) || 00426 (it.at_first() && part->ColumnContains(left + height, y))) { 00427 // In the default case, first_col is set, but columns_spanned remains 00428 // zero, so first_col will get reset in the first column genuinely 00429 // spanned, but we can tell the difference from a noise partition 00430 // that touches no column. 00431 *first_col = col_index; 00432 if (part->ColumnContains(right, y) || 00433 (it.at_last() && part->ColumnContains(right - height, y))) { 00434 // Both within a single column. 00435 *last_col = col_index; 00436 return CST_FLOWING; 00437 } 00438 if (left_margin <= part->LeftAtY(y)) { 00439 // It completely spans this column. 00440 *first_spanned_col = col_index; 00441 margin_columns = 1; 00442 } 00443 } else if (part->ColumnContains(right, y) || 00444 (it.at_last() && part->ColumnContains(right - height, y))) { 00445 if (*first_col < 0) { 00446 // It started in-between. 00447 *first_col = col_index - 1; 00448 } 00449 if (right_margin >= part->RightAtY(y)) { 00450 // It completely spans this column. 00451 if (margin_columns == 0) 00452 *first_spanned_col = col_index; 00453 ++margin_columns; 00454 } 00455 *last_col = col_index; 00456 break; 00457 } else if (left < part->LeftAtY(y) && right > part->RightAtY(y)) { 00458 // Neither left nor right are contained within, so it spans this 00459 // column. 00460 if (*first_col < 0) { 00461 // It started in between the previous column and the current column. 00462 *first_col = col_index - 1; 00463 } 00464 if (margin_columns == 0) 00465 *first_spanned_col = col_index; 00466 *last_col = col_index; 00467 } else if (right < part->LeftAtY(y)) { 00468 // We have gone past the end. 00469 *last_col = col_index - 1; 00470 if (*first_col < 0) { 00471 // It must lie completely between columns =>noise. 00472 *first_col = col_index - 1; 00473 } 00474 break; 00475 } 00476 } 00477 if (*first_col < 0) 00478 *first_col = col_index - 1; // The last in-between. 00479 if (*last_col < 0) 00480 *last_col = col_index - 1; // The last in-between. 00481 ASSERT_HOST(*first_col >= 0 && *last_col >= 0); 00482 ASSERT_HOST(*first_col <= *last_col); 00483 if (*first_col == *last_col && right - left < kMinColumnWidth * resolution) { 00484 // Neither end was in a column, and it didn't span any, so it lies 00485 // entirely between columns, therefore noise. 00486 return CST_NOISE; 00487 } else if (margin_columns <= 1) { 00488 // An exception for headings that stick outside of single-column text. 00489 if (margin_columns == 1 && parts_.singleton()) { 00490 return CST_HEADING; 00491 } 00492 // It is a pullout, as left and right were not in the same column, but 00493 // it doesn't go to the edge of its start and end. 00494 return CST_PULLOUT; 00495 } 00496 // Its margins went to the edges of first and last columns => heading. 00497 return CST_HEADING; 00498 } 00499 00500 // The column_set has changed. Close down all in-progress WorkingPartSets in 00501 // columns that do not match and start new ones for the new columns in this. 00502 // As ColPartitions are turned into BLOCKs, the used ones are put in 00503 // used_parts, as they still need to be referenced in the grid. 00504 void ColPartitionSet::ChangeWorkColumns(const ICOORD& bleft, 00505 const ICOORD& tright, 00506 int resolution, 00507 ColPartition_LIST* used_parts, 00508 WorkingPartSet_LIST* working_set_list) { 00509 // Move the input list to a temporary location so we can delete its elements 00510 // as we add them to the output working_set. 00511 WorkingPartSet_LIST work_src; 00512 WorkingPartSet_IT src_it(&work_src); 00513 src_it.add_list_after(working_set_list); 00514 src_it.move_to_first(); 00515 WorkingPartSet_IT dest_it(working_set_list); 00516 // Completed blocks and to_blocks are accumulated and given to the first new 00517 // one whenever we keep a column, or at the end. 00518 BLOCK_LIST completed_blocks; 00519 TO_BLOCK_LIST to_blocks; 00520 WorkingPartSet* first_new_set = NULL; 00521 WorkingPartSet* working_set = NULL; 00522 ColPartition_IT col_it(&parts_); 00523 for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) { 00524 ColPartition* column = col_it.data(); 00525 // Any existing column to the left of column is completed. 00526 while (!src_it.empty() && 00527 ((working_set = src_it.data())->column() == NULL || 00528 working_set->column()->right_key() <= column->left_key())) { 00529 src_it.extract(); 00530 working_set->ExtractCompletedBlocks(bleft, tright, resolution, 00531 used_parts, &completed_blocks, 00532 &to_blocks); 00533 delete working_set; 00534 src_it.forward(); 00535 } 00536 // Make a new between-column WorkingSet for before the current column. 00537 working_set = new WorkingPartSet(NULL); 00538 dest_it.add_after_then_move(working_set); 00539 if (first_new_set == NULL) 00540 first_new_set = working_set; 00541 // A matching column gets to stay, and first_new_set gets all the 00542 // completed_sets. 00543 working_set = src_it.empty() ? NULL : src_it.data(); 00544 if (working_set != NULL && 00545 working_set->column()->MatchingColumns(*column)) { 00546 working_set->set_column(column); 00547 dest_it.add_after_then_move(src_it.extract()); 00548 src_it.forward(); 00549 first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks); 00550 first_new_set = NULL; 00551 } else { 00552 // Just make a new working set for the current column. 00553 working_set = new WorkingPartSet(column); 00554 dest_it.add_after_then_move(working_set); 00555 } 00556 } 00557 // Complete any remaining src working sets. 00558 while (!src_it.empty()) { 00559 working_set = src_it.extract(); 00560 working_set->ExtractCompletedBlocks(bleft, tright, resolution, 00561 used_parts, &completed_blocks, 00562 &to_blocks); 00563 delete working_set; 00564 src_it.forward(); 00565 } 00566 // Make a new between-column WorkingSet for after the last column. 00567 working_set = new WorkingPartSet(NULL); 00568 dest_it.add_after_then_move(working_set); 00569 if (first_new_set == NULL) 00570 first_new_set = working_set; 00571 // The first_new_set now gets any accumulated completed_parts/blocks. 00572 first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks); 00573 } 00574 00575 // Accumulate the widths and gaps into the given variables. 00576 void ColPartitionSet::AccumulateColumnWidthsAndGaps(int* total_width, 00577 int* width_samples, 00578 int* total_gap, 00579 int* gap_samples) { 00580 ColPartition_IT it(&parts_); 00581 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00582 ColPartition* part = it.data(); 00583 *total_width += part->ColumnWidth(); 00584 ++*width_samples; 00585 if (!it.at_last()) { 00586 ColPartition* next_part = it.data_relative(1); 00587 int gap = part->KeyWidth(part->right_key(), next_part->left_key()); 00588 *total_gap += gap; 00589 ++*gap_samples; 00590 } 00591 } 00592 } 00593 00594 // Provide debug output for this ColPartitionSet and all the ColPartitions. 00595 void ColPartitionSet::Print() { 00596 ColPartition_IT it(&parts_); 00597 tprintf("Partition set of %d parts, %d good, coverage=%d+%d" 00598 " (%d,%d)->(%d,%d)\n", 00599 it.length(), good_column_count_, good_coverage_, bad_coverage_, 00600 bounding_box_.left(), bounding_box_.bottom(), 00601 bounding_box_.right(), bounding_box_.top()); 00602 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00603 ColPartition* part = it.data(); 00604 part->Print(); 00605 } 00606 } 00607 00608 // PRIVATE CODE. 00609 00610 // Add the given partition to the list in the appropriate place. 00611 void ColPartitionSet::AddPartition(ColPartition* new_part, 00612 ColPartition_IT* it) { 00613 AddPartitionCoverageAndBox(*new_part); 00614 int new_right = new_part->right_key(); 00615 if (it->data()->left_key() >= new_right) 00616 it->add_before_stay_put(new_part); 00617 else 00618 it->add_after_stay_put(new_part); 00619 } 00620 00621 // Compute the coverage and good column count. Coverage is the amount of the 00622 // width of the page (in pixels) that is covered by ColPartitions, which are 00623 // used to provide candidate column layouts. 00624 // Coverage is split into good and bad. Good coverage is provided by 00625 // ColPartitions of a frequent width (according to the callback function 00626 // provided by TabFinder::WidthCB, which accesses stored statistics on the 00627 // widths of ColParititions) and bad coverage is provided by all other 00628 // ColPartitions, even if they have tab vectors at both sides. Thus: 00629 // |-----------------------------------------------------------------| 00630 // | Double width heading | 00631 // |-----------------------------------------------------------------| 00632 // |-------------------------------| |-------------------------------| 00633 // | Common width ColParition | | Common width ColPartition | 00634 // |-------------------------------| |-------------------------------| 00635 // the layout with two common-width columns has better coverage than the 00636 // double width heading, because the coverage is "good," even though less in 00637 // total coverage than the heading, because the heading coverage is "bad." 00638 void ColPartitionSet::ComputeCoverage() { 00639 // Count the number of good columns and sum their width. 00640 ColPartition_IT it(&parts_); 00641 good_column_count_ = 0; 00642 good_coverage_ = 0; 00643 bad_coverage_ = 0; 00644 bounding_box_ = TBOX(); 00645 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00646 ColPartition* part = it.data(); 00647 AddPartitionCoverageAndBox(*part); 00648 } 00649 } 00650 00651 // Adds the coverage, column count and box for a single partition, 00652 // without adding it to the list. (Helper factored from ComputeCoverage.) 00653 void ColPartitionSet::AddPartitionCoverageAndBox(const ColPartition& part) { 00654 bounding_box_ += part.bounding_box(); 00655 int coverage = part.ColumnWidth(); 00656 if (part.good_width()) { 00657 good_coverage_ += coverage; 00658 good_column_count_ += 2; 00659 } else { 00660 if (part.blob_type() < BRT_UNKNOWN) 00661 coverage /= 2; 00662 if (part.good_column()) 00663 ++good_column_count_; 00664 bad_coverage_ += coverage; 00665 } 00666 } 00667 00668 } // namespace tesseract.