21 #include "config_auto.h" 82 : left_margin_(-INT32_MAX), right_margin_(INT32_MAX),
83 median_bottom_(INT32_MAX), median_top_(-INT32_MAX), median_height_(0),
84 median_left_(INT32_MAX), median_right_(-INT32_MAX), median_width_(0),
85 blob_type_(blob_type), flow_(
BTFT_NONE), good_blob_score_(0),
86 good_width_(false), good_column_(false),
87 left_key_tab_(false), right_key_tab_(false),
88 left_key_(0), right_key_(0), type_(
PT_UNKNOWN), vertical_(vertical),
89 working_set_(
nullptr), last_add_was_vertical_(false), block_owned_(false),
90 desperately_merged_(false),
91 first_column_(-1), last_column_(-1), column_set_(
nullptr),
92 side_step_(0), top_spacing_(0), bottom_spacing_(0),
93 type_before_table_(PT_UNKNOWN), inside_table_column_(false),
94 nearest_neighbor_above_(
nullptr), nearest_neighbor_below_(
nullptr),
95 space_above_(0), space_below_(0), space_to_left_(0), space_to_right_(0),
97 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
127 ColPartition_LIST* big_part_list) {
136 if (big_part_list !=
nullptr) {
137 ColPartition_IT part_it(big_part_list);
138 part_it.add_to_end(single);
146 ColPartition_C_IT it(&upper_partners_);
147 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
148 it.data()->RemovePartner(
false,
this);
150 it.set_to_list(&lower_partners_);
151 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
152 it.data()->RemovePartner(
true,
this);
160 int left,
int bottom,
161 int right,
int top) {
163 part->bounding_box_ =
TBOX(left, bottom, right, top);
164 part->median_bottom_ = bottom;
165 part->median_top_ = top;
166 part->median_height_ = top - bottom;
167 part->median_left_ = left;
168 part->median_right_ = right;
169 part->median_width_ = right - left;
170 part->left_key_ = part->BoxLeftKey();
171 part->right_key_ = part->BoxRightKey();
182 if (boxes_.length() == 0) {
185 bounding_box_ += box;
189 if (!last_add_was_vertical_) {
190 boxes_.sort(SortByBoxBottom<BLOBNBOX>);
191 last_add_was_vertical_ =
true;
193 boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>,
true, bbox);
195 if (last_add_was_vertical_) {
196 boxes_.sort(SortByBoxLeft<BLOBNBOX>);
197 last_add_was_vertical_ =
false;
199 boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>,
true, bbox);
206 tprintf(
"Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
208 bounding_box_.
left(), bounding_box_.
right());
213 BLOBNBOX_C_IT bb_it(&boxes_);
214 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
215 if (box == bb_it.data()) {
227 BLOBNBOX_C_IT bb_it(&boxes_);
228 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
231 if (biggest ==
nullptr ||
235 if (biggest ==
nullptr ||
246 BLOBNBOX_C_IT bb_it(&boxes_);
247 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
248 if (box != bb_it.data()) {
249 result += bb_it.data()->bounding_box();
258 BLOBNBOX_C_IT bb_it(&boxes_);
259 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
262 if (other ==
nullptr) {
274 BLOBNBOX_C_IT bb_it(&boxes_);
275 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
287 BLOBNBOX_C_IT bb_it(&boxes_);
288 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
290 if (bblob->
owner() ==
this)
300 BLOBNBOX_C_IT bb_it(&boxes_);
301 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
308 if (bb_it.empty())
return false;
319 for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
321 delete bblob->
cblob();
331 BLOBNBOX_CLIST reversed_boxes;
332 BLOBNBOX_C_IT reversed_it(&reversed_boxes);
334 BLOBNBOX_C_IT bb_it(&boxes_);
335 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
336 reversed_it.add_before_then_move(bb_it.extract());
338 bb_it.add_list_after(&reversed_boxes);
340 int tmp = left_margin_;
341 left_margin_ = -right_margin_;
342 right_margin_ = -tmp;
353 if (bounding_box_.
left() > bounding_box_.
right()) {
355 tprintf(
"Bounding box invalid\n");
360 if (left_margin_ > bounding_box_.
left() ||
361 right_margin_ < bounding_box_.
right()) {
370 tprintf(
"Key inside box: %d v %d or %d v %d\n",
381 int y = (
MidY() + other.
MidY()) / 2;
424 if (bounding_box_.
right() < other.bounding_box_.
left() &&
427 if (other.bounding_box_.
right() < bounding_box_.
left() &&
430 if (bounding_box_.
left() > other.bounding_box_.
right() &&
433 if (other.bounding_box_.
left() > bounding_box_.
right() &&
441 double fractional_tolerance,
442 double constant_tolerance)
const {
444 int nonmatch_count = 0;
445 BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
446 BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST*>(&other.boxes_));
447 box_it.mark_cycle_pt();
448 other_it.mark_cycle_pt();
449 while (!box_it.cycled_list() && !other_it.cycled_list()) {
450 if (box_it.data()->MatchingStrokeWidth(*other_it.data(),
451 fractional_tolerance,
459 return match_count > nonmatch_count;
470 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
471 int min_top = INT32_MAX;
472 int max_bottom = -INT32_MAX;
473 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
477 tprintf(
"Blob is not a diacritic:");
489 bool result = min_top > candidate.median_bottom_ &&
490 max_bottom < candidate.median_top_;
495 tprintf(
"y ranges don\'t overlap: %d-%d / %d-%d\n",
496 max_bottom, min_top, median_bottom_, median_top_);
505 if (tab_vector !=
nullptr) {
509 left_key_tab_ =
false;
517 if (tab_vector !=
nullptr) {
518 right_key_ = tab_vector->
sort_key();
521 right_key_tab_ =
false;
530 left_key_tab_ = take_box ?
false : src.left_key_tab_;
532 left_key_ = src.left_key_;
537 if (left_margin_ > bounding_box_.
left())
538 left_margin_ = src.left_margin_;
543 right_key_tab_ = take_box ?
false : src.right_key_tab_;
544 if (right_key_tab_) {
545 right_key_ = src.right_key_;
550 if (right_margin_ < bounding_box_.
right())
551 right_margin_ = src.right_margin_;
556 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
557 return it.data()->left_rule();
561 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
563 return it.data()->right_rule();
568 return special_blobs_densities_[
type];
573 BLOBNBOX_C_IT blob_it(&boxes_);
575 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
578 if (blob_type == type) {
589 special_blobs_densities_[
type] = density;
593 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
594 if (boxes_.empty()) {
598 BLOBNBOX_C_IT blob_it(&boxes_);
599 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
602 special_blobs_densities_[
type]++;
605 for (
float& special_blobs_density : special_blobs_densities_) {
606 special_blobs_density /= boxes_.length();
615 partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
617 upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
true, partner);
619 partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
621 lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
true, partner);
629 ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
630 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
631 if (it.data() == partner) {
640 ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
641 if (!partners->singleton())
643 ColPartition_C_IT it(partners);
655 bounding_box_.
bottom()) ||
657 other->bounding_box_.
bottom())) {
664 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
666 unsigned w1 = boxes_.length();
667 unsigned w2 = other->boxes_.length();
668 float new_val = special_blobs_densities_[
type] * w1 +
669 other->special_blobs_densities_[
type] * w2;
672 special_blobs_densities_[
type] = new_val / (w1 + w2);
677 BLOBNBOX_C_IT it(&boxes_);
678 BLOBNBOX_C_IT it2(&other->boxes_);
679 for (; !it2.empty(); it2.forward()) {
682 if (prev_owner != other && prev_owner !=
nullptr) {
686 ASSERT_HOST(prev_owner == other || prev_owner ==
nullptr);
687 if (prev_owner == other)
689 it.add_to_end(bbox2);
691 left_margin_ = std::min(left_margin_, other->left_margin_);
692 right_margin_ = std::max(right_margin_, other->right_margin_);
693 if (other->left_key_ < left_key_) {
694 left_key_ = other->left_key_;
695 left_key_tab_ = other->left_key_tab_;
697 if (other->right_key_ > right_key_) {
698 right_key_ = other->right_key_;
699 right_key_tab_ = other->right_key_tab_;
704 flow_ = other->flow_;
705 blob_type_ = other->blob_type_;
709 boxes_.sort(SortByBoxBottom<BLOBNBOX>);
710 last_add_was_vertical_ =
true;
712 boxes_.sort(SortByBoxLeft<BLOBNBOX>);
713 last_add_was_vertical_ =
false;
718 for (
int upper = 0; upper < 2; ++upper) {
719 ColPartition_CLIST partners;
720 ColPartition_C_IT part_it(&partners);
721 part_it.add_list_after(upper ? &other->upper_partners_
722 : &other->lower_partners_);
723 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
748 int ok_box_overlap,
bool debug) {
752 tprintf(
"Vertical partition\n");
766 if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ &&
767 merged_box.bottom() < bounding_box_.
top() - ok_box_overlap &&
768 merged_box.top() > bounding_box_.
bottom() + ok_box_overlap) {
770 tprintf(
"Excessive box overlap\n");
780 if (boxes_.empty() || boxes_.singleton())
782 BLOBNBOX_C_IT it(&boxes_);
783 TBOX left_box(it.data()->bounding_box());
784 for (it.forward(); !it.at_first(); it.forward()) {
787 if (left_box.overlap(box))
800 BLOBNBOX_C_IT it(&boxes_);
801 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
805 if (bbox == split_blob || !split_part->boxes_.empty()) {
806 split_part->
AddBox(it.extract());
818 right_key_tab_ =
false;
819 split_part->left_key_tab_ =
false;
834 if (split_x <= bounding_box_.
left() || split_x >= bounding_box_.
right())
838 BLOBNBOX_C_IT it(&boxes_);
839 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
844 if (box.
left() >= split_x) {
845 split_part->
AddBox(it.extract());
852 it.add_list_after(&split_part->boxes_);
861 right_key_tab_ =
false;
862 split_part->left_key_tab_ =
false;
863 right_margin_ = split_x;
864 split_part->left_margin_ = split_x;
872 bounding_box_ =
TBOX();
873 BLOBNBOX_C_IT it(&boxes_);
875 int non_leader_count = 0;
877 bounding_box_.
set_left(left_margin_);
882 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
894 tprintf(
"Computed left-illegal partition\n");
900 tprintf(
"Computed right-illegal partition\n");
907 median_top_ = bounding_box_.
top();
908 median_bottom_ = bounding_box_.
bottom();
909 median_height_ = bounding_box_.
height();
910 median_left_ = bounding_box_.
left();
911 median_right_ = bounding_box_.
right();
912 median_width_ = bounding_box_.
width();
915 STATS bottom_stats(bounding_box_.
bottom(), bounding_box_.
top() + 1);
917 STATS left_stats(bounding_box_.
left(), bounding_box_.
right() + 1);
918 STATS right_stats(bounding_box_.
left(), bounding_box_.
right() + 1);
919 STATS width_stats(0, bounding_box_.
width() + 1);
920 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
924 int area = box.
area();
925 top_stats.add(box.
top(), area);
926 bottom_stats.add(box.
bottom(), area);
927 height_stats.add(box.
height(), area);
928 left_stats.add(box.
left(), area);
929 right_stats.add(box.
right(), area);
930 width_stats.add(box.
width(), area);
933 median_top_ =
static_cast<int>(top_stats.median() + 0.5);
934 median_bottom_ =
static_cast<int>(bottom_stats.median() + 0.5);
935 median_height_ =
static_cast<int>(height_stats.median() + 0.5);
936 median_left_ =
static_cast<int>(left_stats.median() + 0.5);
937 median_right_ =
static_cast<int>(right_stats.median() + 0.5);
938 median_width_ =
static_cast<int>(width_stats.median() + 0.5);
942 tprintf(
"Made partition with bad right coords");
946 tprintf(
"Made partition with bad left coords");
952 for (
int upper = 0; upper < 2; ++upper) {
953 ColPartition_CLIST partners;
954 ColPartition_C_IT part_it(&partners);
955 part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
956 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
963 bounding_box_.
bottom())) {
964 tprintf(
"Recomputed box for partition %p\n",
this);
971 BLOBNBOX_C_IT it(&boxes_);
972 int overlap_count = 0;
973 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
978 return overlap_count;
984 int first_spanned_col = -1;
987 bounding_box_.
left(), bounding_box_.
right(),
988 std::min(bounding_box_.
height(), bounding_box_.
width()),
989 MidY(), left_margin_, right_margin_,
990 &first_column_, &last_column_,
992 column_set_ = columns;
993 if (first_column_ < last_column_ && span_type ==
CST_PULLOUT &&
997 if (first_spanned_col >= 0) {
998 first_column_ = first_spanned_col;
999 last_column_ = first_spanned_col;
1001 if ((first_column_ & 1) == 0)
1002 last_column_ = first_column_;
1003 else if ((last_column_ & 1) == 0)
1004 first_column_ = last_column_;
1006 first_column_ = last_column_ = (first_column_ + last_column_) / 2;
1024 switch (blob_type_) {
1067 int* first_col,
int* last_col) {
1068 int first_spanned_col = -1;
1071 bounding_box_.
left(), bounding_box_.
right(),
1072 std::min(bounding_box_.
height(), bounding_box_.
width()),
1073 MidY(), left_margin_, right_margin_,
1074 first_col, last_col,
1075 &first_spanned_col);
1083 good_width_ = cb->
Run(width);
1084 good_column_ = blob_type_ ==
BRT_TEXT && left_key_tab_ && right_key_tab_;
1094 bool result =
false;
1096 int part_width = bounding_box_.
width();
1097 STATS gap_stats(0, part_width);
1098 STATS width_stats(0, part_width);
1099 BLOBNBOX_C_IT it(&boxes_);
1104 for (it.forward(); !it.at_first(); it.forward()) {
1109 width_stats.
add(right - left, 1);
1114 double median_gap = gap_stats.
median();
1116 double max_width = std::max(median_gap, median_width);
1117 double min_width = std::min(median_gap, median_width);
1118 double gap_iqr = gap_stats.
ile(0.75f) - gap_stats.
ile(0.25f);
1120 tprintf(
"gap iqr = %g, blob_count=%d, limits=%g,%g\n",
1130 int offset =
static_cast<int>(ceil(gap_iqr * 2));
1131 int min_step =
static_cast<int>(median_gap + median_width + 0.5);
1132 int max_step = min_step + offset;
1135 int part_left = bounding_box_.
left() - min_step / 2;
1136 part_width += min_step;
1137 auto* projection =
new DPPoint[part_width];
1138 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1143 for (
int x = left; x < right; ++x) {
1144 projection[left - part_left].AddLocalCost(height);
1149 part_width, projection);
1150 if (best_end !=
nullptr && best_end->
total_cost() < blob_count) {
1153 bool modified_blob_list =
false;
1154 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1157 if (it.at_first()) {
1162 modified_blob_list =
true;
1168 it.data_relative(-1)->bounding_box().right();
1171 modified_blob_list =
true;
1182 if (best_end ==
nullptr) {
1189 delete [] projection;
1203 int good_blob_score_ = 0;
1204 int noisy_count = 0;
1205 int hline_count = 0;
1206 int vline_count = 0;
1207 BLOBNBOX_C_IT it(&boxes_);
1208 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1218 if (hline_count > vline_count) {
1221 }
else if (vline_count > hline_count) {
1224 }
else if (value < -1 || 1 < value) {
1228 long_side = bounding_box_.
width();
1229 short_side = bounding_box_.
height();
1232 long_side = bounding_box_.
height();
1233 short_side = bounding_box_.
width();
1249 if (flow_ ==
BTFT_CHAIN && strong_score == 3)
1257 if (noisy_count >= blob_count) {
1263 bounding_box_.
bottom())) {
1264 tprintf(
"RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
1265 blob_count, noisy_count, good_blob_score_);
1266 tprintf(
" Projection value=%d, flow=%d, blob_type=%d\n",
1267 value, flow_, blob_type_);
1278 BLOBNBOX_C_IT it(&boxes_);
1279 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1296 int total_height = 0;
1298 int height_count = 0;
1300 BLOBNBOX_C_IT it(&boxes_);
1301 TBOX box(it.data()->bounding_box());
1307 ICOORD first_pt(box.right(), box.bottom());
1310 linepoints.
Add(first_pt);
1311 for (it.forward(); !it.at_last(); it.forward()) {
1314 ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2);
1315 linepoints.
Add(box_pt);
1316 total_height += box.width();
1317 coverage += box.height();
1320 box = it.data()->bounding_box();
1321 ICOORD last_pt(box.right(), box.top());
1322 linepoints.
Add(last_pt);
1323 width = last_pt.y() - first_pt.y();
1327 TBOX box(it.data()->bounding_box());
1330 ICOORD first_pt(box.left(), box.bottom());
1331 linepoints.
Add(first_pt);
1332 for (it.forward(); !it.at_last(); it.forward()) {
1335 ICOORD box_pt((box.left() + box.right()) / 2, box.bottom());
1336 linepoints.
Add(box_pt);
1337 total_height += box.height();
1338 coverage += box.width();
1341 box = it.data()->bounding_box();
1342 ICOORD last_pt(box.right(), box.bottom());
1343 linepoints.
Add(last_pt);
1344 width = last_pt.x() - first_pt.x();
1347 if (height_count == 0)
1351 double error = linepoints.
Fit(&start_pt, &end_pt);
1359 ColPartition_LIST* used_parts,
1360 WorkingPartSet_LIST* working_sets) {
1363 block_owned_ =
true;
1364 WorkingPartSet_IT it(working_sets);
1367 if (partner !=
nullptr && partner->working_set_ !=
nullptr) {
1368 working_set_ = partner->working_set_;
1373 tprintf(
"Partition with partner has no working set!:");
1381 for (it.mark_cycle_pt(); !it.cycled_list() &&
1382 col_index != first_column_;
1383 it.forward(), ++col_index);
1385 tprintf(
"Match is %s for:", (col_index & 1) ?
"Real" :
"Between");
1389 tprintf(
"Target column=%d, only had %d\n", first_column_, col_index);
1392 work_set = it.data();
1395 if (!it.cycled_list() && last_column_ != first_column_ && !
IsPulloutType()) {
1397 BLOCK_LIST completed_blocks;
1398 TO_BLOCK_LIST to_blocks;
1399 for (; !it.cycled_list() && col_index <= last_column_;
1400 it.forward(), ++col_index) {
1403 &completed_blocks, &to_blocks);
1405 work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
1407 working_set_ = work_set;
1419 ColPartition_LIST* block_parts,
1420 ColPartition_LIST* used_parts,
1421 BLOCK_LIST* completed_blocks,
1422 TO_BLOCK_LIST* to_blocks) {
1423 int page_height = tright.
y() - bleft.
y();
1425 ColPartition_IT it(block_parts);
1427 int max_line_height = 0;
1433 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1439 BLOBNBOX_C_IT blob_it(part->
boxes());
1440 int prev_bottom = blob_it.data()->bounding_box().bottom();
1441 for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
1444 int step = bottom - prev_bottom;
1447 side_steps.add(step, 1);
1448 prev_bottom = bottom;
1450 part->
set_side_step(static_cast<int>(side_steps.median() + 0.5));
1451 if (!it.at_last()) {
1462 tprintf(
"side step = %.2f, top spacing = %d, bottom spacing=%d\n",
1467 if (part_count == 0)
1470 SmoothSpacings(resolution, page_height, block_parts);
1473 BLOCK_IT block_it(completed_blocks);
1474 TO_BLOCK_IT to_block_it(to_blocks);
1475 ColPartition_LIST spacing_parts;
1476 ColPartition_IT sp_block_it(&spacing_parts);
1478 for (it.mark_cycle_pt(); !it.empty();) {
1480 sp_block_it.add_to_end(part);
1482 if (it.empty() || part->
bottom_spacing() > same_block_threshold ||
1483 !part->SpacingsEqual(*it.data(), resolution)) {
1486 if (!it.empty() && part->
bottom_spacing() <= same_block_threshold) {
1490 ColPartition* third_part = it.at_last() ?
nullptr : it.data_relative(1);
1492 tprintf(
"Spacings unequal: upper:%d/%d, lower:%d/%d," 1493 " sizes %d %d %d\n",
1501 if (part->SizesSimilar(*next_part) &&
1508 if (third_part ==
nullptr ||
1509 !next_part->SizesSimilar(*third_part) ||
1516 sp_block_it.add_to_end(it.extract());
1519 tprintf(
"Added line to current block.\n");
1525 if (to_block !=
nullptr) {
1526 to_block_it.add_to_end(to_block);
1527 block_it.add_to_end(to_block->
block);
1529 sp_block_it.set_to_list(&spacing_parts);
1533 tprintf(
"Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
1544 if (pos->
x() < bleft.
x())
1546 if (pos->
x() > tright.
x())
1548 if (pos->
y() < bleft.
y())
1550 if (pos->
y() > tright.
y())
1558 static TO_BLOCK* MoveBlobsToBlock(
bool vertical_text,
int line_spacing,
1560 ColPartition_LIST* block_parts,
1561 ColPartition_LIST* used_parts) {
1567 STATS sizes(0, std::max(block_box.width(), block_box.height()));
1569 ColPartition_IT it(block_parts);
1570 auto* to_block =
new TO_BLOCK(block);
1571 BLOBNBOX_IT blob_it(&to_block->blobs);
1572 ColPartition_IT used_it(used_parts);
1573 for (it.move_to_first(); !it.empty(); it.forward()) {
1578 for (BLOBNBOX_C_IT bb_it(part->
boxes()); !bb_it.empty();
1581 if (bblob->
owner() != part) {
1582 tprintf(
"Ownership incorrect for blob:");
1586 if (bblob->
owner() ==
nullptr) {
1599 C_OUTLINE_IT ol_it(outlines);
1600 ASSERT_HOST(!text_type || ol_it.data()->pathlength() > 0);
1605 blob_it.add_after_then_move(bblob);
1607 used_it.add_to_end(part);
1609 if (text_type && blob_it.empty()) {
1614 to_block->line_size = sizes.median();
1615 if (vertical_text) {
1617 if (block_width < line_spacing)
1618 line_spacing = block_width;
1619 to_block->line_spacing =
static_cast<float>(line_spacing);
1620 to_block->max_blob_size =
static_cast<float>(block_width + 1);
1623 if (block_height < line_spacing)
1624 line_spacing = block_height;
1625 to_block->line_spacing =
static_cast<float>(line_spacing);
1626 to_block->max_blob_size =
static_cast<float>(block_height + 1);
1634 ColPartition_LIST* block_parts,
1635 ColPartition_LIST* used_parts) {
1636 if (block_parts->empty())
1642 ColPartition_IT it(block_parts);
1653 ICOORDELT_LIST vertices;
1654 ICOORDELT_IT vert_it(&vertices);
1656 int min_x = INT32_MAX;
1657 int max_x = -INT32_MAX;
1658 int min_y = INT32_MAX;
1659 int max_y = -INT32_MAX;
1663 ColPartition::LeftEdgeRun(&it, &start, &end);
1665 ColPartition::RightEdgeRun(&it, &start, &end);
1666 ClipCoord(bleft, tright, &start);
1667 ClipCoord(bleft, tright, &end);
1668 vert_it.add_after_then_move(
new ICOORDELT(start));
1669 vert_it.add_after_then_move(
new ICOORDELT(end));
1674 if ((iteration == 0 && it.at_first()) ||
1675 (iteration == 1 && it.at_last())) {
1679 }
while (iteration < 2);
1681 tprintf(
"Making block at (%d,%d)->(%d,%d)\n",
1682 min_x, min_y, max_x, max_y);
1683 auto* block =
new BLOCK(
"",
true, 0, 0, min_x, min_y, max_x, max_y);
1685 return MoveBlobsToBlock(
false, line_spacing, block, block_parts, used_parts);
1692 ColPartition_LIST* block_parts,
1693 ColPartition_LIST* used_parts) {
1694 if (block_parts->empty())
1696 ColPartition_IT it(block_parts);
1699 int line_spacing = block_box.
width();
1701 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1702 block_box += it.data()->bounding_box();
1708 auto* block =
new BLOCK(
"",
true, 0, 0, block_box.
left(), block_box.
bottom(),
1709 block_box.
right(), block_box.
top());
1711 return MoveBlobsToBlock(
true, line_spacing, block, block_parts, used_parts);
1717 BLOBNBOX_C_IT blob_it(&boxes_);
1719 int line_size =
IsVerticalType() ? median_width_ : median_height_;
1721 for (; !blob_it.empty(); blob_it.forward()) {
1722 BLOBNBOX* blob = blob_it.extract();
1726 if (row ==
nullptr) {
1727 row =
new TO_ROW(blob, static_cast<float>(top),
1728 static_cast<float>(bottom),
1729 static_cast<float>(line_size));
1731 row->
add_blob(blob, static_cast<float>(top),
1732 static_cast<float>(bottom),
1733 static_cast<float>(line_size));
1743 part->left_margin_ = left_margin_;
1744 part->right_margin_ = right_margin_;
1745 part->bounding_box_ = bounding_box_;
1746 memcpy(part->special_blobs_densities_, special_blobs_densities_,
1747 sizeof(special_blobs_densities_));
1748 part->median_bottom_ = median_bottom_;
1749 part->median_top_ = median_top_;
1750 part->median_height_ = median_height_;
1751 part->median_left_ = median_left_;
1752 part->median_right_ = median_right_;
1753 part->median_width_ = median_width_;
1754 part->good_width_ = good_width_;
1755 part->good_column_ = good_column_;
1756 part->left_key_tab_ = left_key_tab_;
1757 part->right_key_tab_ = right_key_tab_;
1758 part->type_ = type_;
1759 part->flow_ = flow_;
1760 part->left_key_ = left_key_;
1761 part->right_key_ = right_key_;
1762 part->first_column_ = first_column_;
1763 part->last_column_ = last_column_;
1764 part->owns_blobs_ =
false;
1771 BLOBNBOX_C_IT inserter(copy->
boxes());
1772 BLOBNBOX_C_IT traverser(
boxes());
1773 for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward())
1774 inserter.add_after_then_move(traverser.data());
1778 #ifndef GRAPHICS_DISABLED 1786 #endif // GRAPHICS_DISABLED 1789 static char kBlobTypes[
BRT_COUNT + 1] =
"NHSRIUVT";
1794 tprintf(
"ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)" 1795 " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d" 1796 " ts=%d bs=%d ls=%d rs=%d\n",
1797 boxes_.empty() ?
'E' :
' ',
1798 left_margin_, left_key_tab_ ?
'T' :
'B',
LeftAtY(y),
1799 bounding_box_.
left(), median_left_,
1800 bounding_box_.
bottom(), median_bottom_,
1801 bounding_box_.
right(),
RightAtY(y), right_key_tab_ ?
'T' :
'B',
1802 right_margin_, median_right_, bounding_box_.
top(), median_top_,
1803 good_width_, good_column_, type_,
1804 kBlobTypes[blob_type_], flow_,
1805 first_column_, last_column_, boxes_.length(),
1806 space_above_, space_below_, space_to_left_, space_to_right_);
1811 tprintf(
"Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n",
1812 color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE],
1813 color1_[L_ALPHA_CHANNEL],
1814 color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
1819 STATS left_stats(0, working_set_count);
1820 STATS right_stats(0, working_set_count);
1825 if (partner->type_ > max_type)
1826 max_type = partner->type_;
1827 if (column_set_ == partner->column_set_) {
1828 left_stats.
add(partner->first_column_, 1);
1829 right_stats.
add(partner->last_column_, 1);
1837 first_column_ = left_stats.
mode();
1838 last_column_ = right_stats.
mode();
1839 if (last_column_ < first_column_)
1840 last_column_ = first_column_;
1845 partner->type_ = max_type;
1846 #if 0 // See TODO above 1847 if (column_set_ == partner->column_set_) {
1848 partner->first_column_ = first_column_;
1849 partner->last_column_ = last_column_;
1890 RefinePartnersInternal(
true, get_desperate, grid);
1891 RefinePartnersInternal(
false, get_desperate, grid);
1895 RefinePartnersByType(
true, &upper_partners_);
1896 RefinePartnersByType(
false, &lower_partners_);
1900 if (!upper_partners_.empty() && !upper_partners_.singleton())
1901 RefinePartnersByOverlap(
true, &upper_partners_);
1902 if (!lower_partners_.empty() && !lower_partners_.singleton())
1903 RefinePartnersByOverlap(
false, &lower_partners_);
1912 void ColPartition::RefinePartnersInternal(
bool upper,
bool get_desperate,
1914 ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
1915 if (!partners->empty() && !partners->singleton()) {
1916 RefinePartnersByType(upper, partners);
1917 if (!partners->empty() && !partners->singleton()) {
1919 RefinePartnerShortcuts(upper, partners);
1920 if (!partners->empty() && !partners->singleton()) {
1924 RefineTextPartnersByMerge(upper,
false, partners, grid);
1925 if (!partners->empty() && !partners->singleton())
1926 RefineTextPartnersByMerge(upper,
true, partners, grid);
1929 if (!partners->empty() && !partners->singleton())
1930 RefinePartnersByOverlap(upper, partners);
1939 void ColPartition::RefinePartnersByType(
bool upper,
1940 ColPartition_CLIST* partners) {
1944 tprintf(
"Refining %d %s partners by type for:\n",
1945 partners->length(), upper ?
"Upper" :
"Lower");
1948 ColPartition_C_IT it(partners);
1954 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1970 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1993 void ColPartition::RefinePartnerShortcuts(
bool upper,
1994 ColPartition_CLIST* partners) {
1995 bool done_any =
false;
1998 ColPartition_C_IT it(partners);
1999 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2003 ColPartition_C_IT it1(upper ? &a->upper_partners_ : &a->lower_partners_);
2004 for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
2012 ColPartition_C_IT it2(partners);
2013 for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) {
2031 }
while (done_any && !partners->empty() && !partners->singleton());
2042 void ColPartition::RefineTextPartnersByMerge(
bool upper,
bool desperate,
2043 ColPartition_CLIST* partners,
2048 tprintf(
"Refining %d %s partners by merge for:\n",
2049 partners->length(), upper ?
"Upper" :
"Lower");
2052 while (!partners->empty() && !partners->singleton()) {
2055 ColPartition_C_IT it(partners);
2059 ColPartition_CLIST candidates;
2060 ColPartition_C_IT cand_it(&candidates);
2061 for (it.forward(); !it.at_first(); it.forward()) {
2063 if (part->first_column_ == candidate->last_column_ &&
2064 part->last_column_ == candidate->first_column_)
2065 cand_it.add_after_then_move(it.data());
2067 int overlap_increase;
2069 nullptr, &overlap_increase);
2070 if (candidate !=
nullptr && (overlap_increase <= 0 || desperate)) {
2072 tprintf(
"Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
2079 part->
Absorb(candidate,
nullptr);
2082 if (overlap_increase > 0)
2083 part->desperately_merged_ =
true;
2092 void ColPartition::RefinePartnersByOverlap(
bool upper,
2093 ColPartition_CLIST* partners) {
2097 tprintf(
"Refining %d %s partners by overlap for:\n",
2098 partners->length(), upper ?
"Upper" :
"Lower");
2101 ColPartition_C_IT it(partners);
2104 int best_overlap = 0;
2105 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2107 int overlap = std::min(bounding_box_.
right(), partner->bounding_box_.
right())
2108 - std::max(bounding_box_.
left(), partner->bounding_box_.
left());
2109 if (overlap > best_overlap) {
2110 best_overlap = overlap;
2111 best_partner = partner;
2115 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2117 if (partner != best_partner) {
2129 bool ColPartition::ThisPartitionBetter(
BLOBNBOX* bbox,
2133 int left = box.
left();
2134 int right = box.
right();
2135 if (left < left_margin_ || right > right_margin_)
2137 if (left < other.left_margin_ || right > other.right_margin_)
2139 int top = box.
top();
2140 int bottom = box.
bottom();
2141 int this_overlap = std::min(top, median_top_) - std::max(bottom, median_bottom_);
2142 int other_overlap = std::min(top, other.median_top_) -
2143 std::max(bottom, other.median_bottom_);
2144 int this_miss = median_top_ - median_bottom_ - this_overlap;
2145 int other_miss = other.median_top_ - other.median_bottom_ - other_overlap;
2147 tprintf(
"Unique on (%d,%d)->(%d,%d) overlap %d/%d, miss %d/%d, mt=%d/%d\n",
2149 this_overlap, other_overlap, this_miss, other_miss,
2150 median_top_, other.median_top_);
2152 if (this_miss < other_miss)
2154 if (this_miss > other_miss)
2156 if (this_overlap > other_overlap)
2158 if (this_overlap < other_overlap)
2160 return median_top_ >= other.median_top_;
2167 static int MedianSpacing(
int page_height, ColPartition_IT it) {
2168 STATS stats(0, page_height);
2169 while (!it.cycled_list()) {
2175 return static_cast<int>(stats.
median() + 0.5);
2189 return (last_column_ >= part.first_column_) &&
2190 (first_column_ <= part.last_column_);
2196 void ColPartition::SmoothSpacings(
int resolution,
int page_height,
2197 ColPartition_LIST* parts) {
2205 ColPartition_IT it(parts);
2212 int median_space = MedianSpacing(page_height, it);
2213 ColPartition_IT start_it(it);
2214 ColPartition_IT end_it(it);
2215 for (
int i = 0; i < PN_COUNT; ++i) {
2216 if (i < PN_UPPER || it.cycled_list()) {
2217 neighbourhood[i] =
nullptr;
2221 neighbourhood[i] = it.data();
2225 while (neighbourhood[PN_UPPER] !=
nullptr) {
2247 if (neighbourhood[PN_LOWER] ==
nullptr ||
2248 (!neighbourhood[PN_UPPER]->SpacingsEqual(*neighbourhood[PN_LOWER],
2250 !OKSpacingBlip(resolution, median_space, neighbourhood) &&
2251 (!OKSpacingBlip(resolution, median_space, neighbourhood - 1) ||
2252 !neighbourhood[PN_LOWER]->SpacingEqual(median_space, resolution)) &&
2253 (!OKSpacingBlip(resolution, median_space, neighbourhood + 1) ||
2254 !neighbourhood[PN_UPPER]->SpacingEqual(median_space, resolution)))) {
2257 ColPartition_IT sum_it(start_it);
2259 double total_bottom = 0.0;
2260 double total_top = 0.0;
2261 int total_count = 0;
2264 while (upper != last_part) {
2269 upper = sum_it.data();
2271 if (total_count > 0) {
2273 int top_spacing =
static_cast<int>(total_top / total_count + 0.5);
2274 int bottom_spacing =
static_cast<int>(total_bottom / total_count + 0.5);
2276 tprintf(
"Spacing run ended. Cause:");
2277 if (neighbourhood[PN_LOWER] ==
nullptr) {
2280 tprintf(
"Spacing change. Spacings:\n");
2281 for (
int i = 0; i < PN_COUNT; ++i) {
2282 if (neighbourhood[i] ==
nullptr) {
2284 if (i > 0 && neighbourhood[i - 1] !=
nullptr) {
2289 tprintf(
" nullptr lower partner:\n");
2295 tprintf(
"Top = %d, bottom = %d\n",
2301 tprintf(
"Mean spacing = %d/%d\n", top_spacing, bottom_spacing);
2304 upper = sum_it.data();
2305 while (upper != last_part) {
2313 upper = sum_it.data();
2320 median_space = MedianSpacing(page_height, end_it);
2323 for (
int j = 1; j < PN_COUNT; ++j) {
2324 neighbourhood[j - 1] = neighbourhood[j];
2326 if (it.cycled_list()) {
2327 neighbourhood[PN_COUNT - 1] =
nullptr;
2329 neighbourhood[PN_COUNT - 1] = it.data();
2339 bool ColPartition::OKSpacingBlip(
int resolution,
int median_spacing,
2341 if (parts[PN_UPPER] ==
nullptr || parts[PN_LOWER] ==
nullptr)
2345 return parts[PN_UPPER]->SummedSpacingOK(*parts[PN_LOWER],
2346 median_spacing, resolution) &&
2347 ((parts[PN_ABOVE1] !=
nullptr &&
2348 parts[PN_ABOVE1]->SpacingEqual(median_spacing, resolution)) ||
2349 (parts[PN_BELOW1] !=
nullptr &&
2350 parts[PN_BELOW1]->SpacingEqual(median_spacing, resolution)));
2355 bool ColPartition::SpacingEqual(
int spacing,
int resolution)
const {
2356 int bottom_error = BottomSpacingMargin(resolution);
2357 int top_error = TopSpacingMargin(resolution);
2358 return NearlyEqual(bottom_spacing_, spacing, bottom_error) &&
2364 bool ColPartition::SpacingsEqual(
const ColPartition& other,
2365 int resolution)
const {
2366 int bottom_error = std::max(BottomSpacingMargin(resolution),
2367 other.BottomSpacingMargin(resolution));
2368 int top_error = std::max(TopSpacingMargin(resolution),
2369 other.TopSpacingMargin(resolution));
2370 return NearlyEqual(bottom_spacing_, other.bottom_spacing_, bottom_error) &&
2371 (
NearlyEqual(top_spacing_, other.top_spacing_, top_error) ||
2372 NearlyEqual(top_spacing_ + other.top_spacing_, bottom_spacing_ * 2,
2379 bool ColPartition::SummedSpacingOK(
const ColPartition& other,
2380 int spacing,
int resolution)
const {
2381 int bottom_error = std::max(BottomSpacingMargin(resolution),
2382 other.BottomSpacingMargin(resolution));
2383 int top_error = std::max(TopSpacingMargin(resolution),
2384 other.TopSpacingMargin(resolution));
2385 int bottom_total = bottom_spacing_ + other.bottom_spacing_;
2386 int top_total = top_spacing_ + other.top_spacing_;
2387 return (
NearlyEqual(spacing, bottom_total, bottom_error) &&
2389 (
NearlyEqual(spacing * 2, bottom_total, bottom_error) &&
2395 int ColPartition::BottomSpacingMargin(
int resolution)
const {
2401 int ColPartition::TopSpacingMargin(
int resolution)
const {
2403 BottomSpacingMargin(resolution);
2408 bool ColPartition::SizesSimilar(
const ColPartition& other)
const {
2409 return median_height_ <= other.median_height_ *
kMaxSizeRatio &&
2417 int* margin_left,
int* margin_right) {
2419 int top = part_box.
top();
2420 int bottom = part_box.
bottom();
2424 int br_key = part.
SortKey(part_box.
left(), bottom);
2425 int left_key = std::max(tl_key, bl_key);
2426 int right_key = std::min(tr_key, br_key);
2427 if (left_key <= *margin_right && right_key >= *margin_left) {
2429 *margin_right = std::min(*margin_right, right_key);
2430 *margin_left = std::max(*margin_left, left_key);
2441 void ColPartition::LeftEdgeRun(ColPartition_IT* part_it,
2445 int start_y = part->bounding_box_.
top();
2446 if (!part_it->at_first()) {
2447 int prev_bottom = part_it->data_relative(-1)->bounding_box_.bottom();
2448 if (prev_bottom < start_y)
2449 start_y = prev_bottom;
2450 else if (prev_bottom > start_y)
2451 start_y = (start_y + prev_bottom) / 2;
2453 int end_y = part->bounding_box_.
bottom();
2454 int margin_right = INT32_MAX;
2455 int margin_left = -INT32_MAX;
2456 UpdateLeftMargin(*part, &margin_left, &margin_right);
2459 part = part_it->data();
2460 }
while (!part_it->at_first() &&
2461 UpdateLeftMargin(*part, &margin_left, &margin_right));
2465 int next_margin_right = INT32_MAX;
2466 int next_margin_left = -INT32_MAX;
2467 UpdateLeftMargin(*part, &next_margin_left, &next_margin_right);
2468 if (next_margin_left > margin_right) {
2469 ColPartition_IT next_it(*part_it);
2472 part = next_it.data();
2473 }
while (!next_it.at_first() &&
2474 UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
2478 part_it->backward();
2479 part = part_it->data();
2480 }
while (part != start_part &&
2481 UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
2485 part = part_it->data_relative(-1);
2486 end_y = part->bounding_box_.
bottom();
2487 if (!part_it->at_first() && part_it->data()->bounding_box_.top() < end_y)
2488 end_y = (end_y + part_it->data()->bounding_box_.top()) / 2;
2489 start->
set_y(start_y);
2490 start->
set_x(part->
XAtY(margin_right, start_y));
2492 end->
set_x(part->
XAtY(margin_right, end_y));
2494 tprintf(
"Left run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
2495 start_y, end_y, part->
XAtY(margin_left, end_y),
2496 end->
x(), part->left_margin_, part->bounding_box_.
left());
2502 static bool UpdateRightMargin(
const ColPartition& part,
2503 int* margin_left,
int* margin_right) {
2505 int top = part_box.
top();
2506 int bottom = part_box.
bottom();
2511 int left_key = std::max(tl_key, bl_key);
2512 int right_key = std::min(tr_key, br_key);
2513 if (left_key <= *margin_right && right_key >= *margin_left) {
2515 *margin_right = std::min(*margin_right, right_key);
2516 *margin_left = std::max(*margin_left, left_key);
2528 void ColPartition::RightEdgeRun(ColPartition_IT* part_it,
2532 int start_y = part->bounding_box_.
bottom();
2533 if (!part_it->at_last()) {
2534 int next_y = part_it->data_relative(1)->bounding_box_.top();
2535 if (next_y > start_y)
2537 else if (next_y < start_y)
2538 start_y = (start_y + next_y) / 2;
2540 int end_y = part->bounding_box_.
top();
2541 int margin_right = INT32_MAX;
2542 int margin_left = -INT32_MAX;
2543 UpdateRightMargin(*part, &margin_left, &margin_right);
2545 part_it->backward();
2546 part = part_it->data();
2547 }
while (!part_it->at_last() &&
2548 UpdateRightMargin(*part, &margin_left, &margin_right));
2551 int next_margin_right = INT32_MAX;
2552 int next_margin_left = -INT32_MAX;
2553 UpdateRightMargin(*part, &next_margin_left, &next_margin_right);
2554 if (next_margin_right < margin_left) {
2555 ColPartition_IT next_it(*part_it);
2558 part = next_it.data();
2559 }
while (!next_it.at_last() &&
2560 UpdateRightMargin(*part, &next_margin_left,
2561 &next_margin_right));
2566 part = part_it->data();
2567 }
while (part != start_part &&
2568 UpdateRightMargin(*part, &next_margin_left,
2569 &next_margin_right));
2570 part_it->backward();
2573 part = part_it->data_relative(1);
2575 if (!part_it->at_last() &&
2576 part_it->data()->bounding_box_.bottom() > end_y)
2577 end_y = (end_y + part_it->data()->bounding_box_.bottom()) / 2;
2578 start->
set_y(start_y);
2579 start->
set_x(part->
XAtY(margin_left, start_y));
2581 end->
set_x(part->
XAtY(margin_left, end_y));
2583 tprintf(
"Right run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
2584 start_y, end_y, end->
x(), part->
XAtY(margin_right, end_y),
2585 part->bounding_box_.
right(), part->right_margin_);
void CopyRightTab(const ColPartition &src, bool take_box)
bool IsVerticalType() const
ColPartition * SplitAt(int split_x)
const int kMaxColorDistance
static ColPartition * FakePartition(const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
int median_height() const
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
void SetPartitionType(int resolution, ColPartitionSet *columns)
void SetRegionAndFlowTypesFromProjectionValue(int value)
C_OUTLINE_LIST * out_list()
const int kHorzStrongTextlineHeight
const int kMinStrongTextValue
void set_owns_blobs(bool owns_blobs)
void set_type(PolyBlockType t)
static TO_BLOCK * MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
const double kMaxBaselineError
double ile(double frac) const
void set_x(int16_t xin)
rewrite function
bool IsPulloutType() const
bool ReleaseNonLeaderBoxes()
float SpecialBlobsDensity(const BlobSpecialTextType type) const
void ExtractCompletedBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
ScrollView::Color BoxColor() const
ColPartition * SplitAtBlob(BLOBNBOX *split_blob)
POLY_BLOCK * poly_block() const
tesseract::ColPartition * owner() const
bool ConfirmNoTabViolation(const ColPartition &other) const
void SetLeftTab(const TabVector *tab_vector)
void AddBox(BLOBNBOX *box)
double Fit(ICOORD *pt1, ICOORD *pt2)
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
const int kColumnWidthFactor
bool NearlyEqual(T x, T y, T tolerance)
void set_block_owned(bool owned)
int HCoreOverlap(const ColPartition &other) const
void set_bottom_spacing(int spacing)
void SmoothPartnerRun(int working_set_count)
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
const TBOX & bounding_box() const
const double kMaxSameBlockLineSpacing
int16_t y() const
access_function
void set_y(int16_t yin)
rewrite function
int base_char_bottom() const
BlobRegionType blob_type() const
BlobTextFlowType flow() const
bool IsInSameColumnAs(const ColPartition &part) const
void add(int32_t value, int32_t count)
void SetSpecialBlobsDensity(const BlobSpecialTextType type, const float density)
void AddPartner(bool upper, ColPartition *partner)
int bottom_spacing() const
void AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
static DPPoint * Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size, DPPoint *points)
void set_right_margin(int margin)
ColPartition * CopyButDontOwnBlobs()
int NoisyNeighbours() const
const double kMinBaselineCoverage
int median_bottom() const
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
const double kMaxLeaderGapFractionOfMax
void Add(const ICOORD &pt)
bool OKDiacriticMerge(const ColPartition &candidate, bool debug) const
PolyBlockType type() const
bool MatchingTextColor(const ColPartition &other) const
const int kMinLeaderCount
TBOX BoundsWithoutBox(BLOBNBOX *box)
const TBOX & bounding_box() const
void ComputeSpecialBlobsDensity()
bool OKMergeOverlap(const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug)
void AddPartition(ColPartition *part)
void set_owner(tesseract::ColPartition *new_owner)
ColPartition * ShallowCopy() const
void SetRightTab(const TabVector *tab_vector)
int16_t x() const
access function
DLLSYM void tprintf(const char *format,...)
static ColPartition * MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list)
static double ColorDistanceFromLine(const uint8_t *line1, const uint8_t *line2, const uint8_t *point)
bool overlap(const TBOX &box) const
const int kHorzStrongTextlineCount
void DisownBoxesNoAssert()
int textord_debug_tabfind
int VCoreOverlap(const ColPartition &other) const
static bool DifferentSizes(int size1, int size2)
ColPartition * BestMergeCandidate(const ColPartition *part, ColPartition_CLIST *candidates, bool debug, TessResultCallback2< bool, const ColPartition *, const ColPartition * > *confirm_cb, int *overlap_increase)
PDBLK pdblk
Page Description Block.
static ScrollView::Color ColorForPolyBlockType(PolyBlockType type)
Returns a color to draw the given type.
const double kMaxSizeRatio
int64_t CostWithVariance(const DPPoint *prev)
const int kMinChainTextValue
bool VSignificantCoreOverlap(const ColPartition &other) const
static TO_BLOCK * MakeBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
bool MarkAsLeaderIfMonospaced()
bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2)
CLISTIZE(BLOCK_RES) ELISTIZE(ROW_RES) ELISTIZE(WERD_RES) static const double kStopperAmbiguityThresholdGain
bool MatchingStrokeWidth(const ColPartition &other, double fractional_tolerance, double constant_tolerance) const
void set_flow(BlobTextFlowType f)
void RefinePartners(PolyBlockType type, bool get_desperate, ColPartitionGrid *grid)
BlobRegionType region_type() const
int SpecialBlobsCount(const BlobSpecialTextType type)
bool MatchingColumns(const ColPartition &other) const
PolyBlockType PartitionType(ColumnSpanningType flow) const
static void LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks)
const int kHorzStrongTextlineAspect
BlobTextFlowType flow() const
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
void set_region_type(BlobRegionType new_type)
BlobSpecialTextType special_text_type() const
ColPartition * SingletonPartner(bool upper)
void set_flow(BlobTextFlowType value)
void RemoveBBox(BBC *bbox)
void RemoveBox(BLOBNBOX *box)
int RightBlobRule() const
void set_top_spacing(int spacing)
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2)
const double kMaxTopSpacingFraction
BLOBNBOX * OverlapSplitBlob(const TBOX &box)
bool MatchingSizes(const ColPartition &other) const
void CopyLeftTab(const ColPartition &src, bool take_box)
int SortKey(int x, int y) const
void set_poly_block(POLY_BLOCK *blk)
set the poly block
void SetColumnGoodness(WidthCallback *cb)
const double kMaxLeaderGapFractionOfMin
#define ELIST2IZE(CLASSNAME)
void RemovePartner(bool upper, ColPartition *partner)
int CountOverlappingBoxes(const TBOX &box)
void set_left_margin(int margin)
ColumnSpanningType SpanningType(int resolution, int left, int right, int height, int y, int left_margin, int right_margin, int *first_col, int *last_col, int *first_spanned_col)
static bool WithinTestRegion(int detail_level, int x, int y)
void Absorb(ColPartition *other, WidthCallback *cb)
void set_side_step(int step)
static int SortByBBox(const void *p1, const void *p2)
int base_char_top() const
static C_BLOB * FakeBlob(const TBOX &box)
void ColumnRange(int resolution, ColPartitionSet *columns, int *first_col, int *last_col)
int RightAtY(int y) const
static ColPartition * MakeLinePartition(BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top)
const double kMaxSpacingDrift
int XAtY(int sort_key, int y) const
const int kMaxRMSColorNoise