tesseract  4.1.0
tesseract::ColPartitionGrid Class Reference

#include <colpartitiongrid.h>

Inheritance diagram for tesseract::ColPartitionGrid:
tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT > tesseract::GridBase

Public Member Functions

 ColPartitionGrid ()=default
 
 ColPartitionGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~ColPartitionGrid () override=default
 
void HandleClick (int x, int y) override
 
void Merges (TessResultCallback2< bool, ColPartition *, TBOX * > *box_cb, TessResultCallback2< bool, const ColPartition *, const ColPartition * > *confirm_cb)
 
bool MergePart (TessResultCallback2< bool, ColPartition *, TBOX * > *box_cb, TessResultCallback2< bool, const ColPartition *, const ColPartition * > *confirm_cb, ColPartition *part)
 
int ComputeTotalOverlap (ColPartitionGrid **overlap_grid)
 
void FindOverlappingPartitions (const TBOX &box, const ColPartition *not_this, ColPartition_CLIST *parts)
 
ColPartitionBestMergeCandidate (const ColPartition *part, ColPartition_CLIST *candidates, bool debug, TessResultCallback2< bool, const ColPartition *, const ColPartition * > *confirm_cb, int *overlap_increase)
 
void SplitOverlappingPartitions (ColPartition_LIST *big_parts)
 
bool GridSmoothNeighbours (BlobTextFlowType source_type, Pix *nontext_map, const TBOX &im_box, const FCOORD &rerotation)
 
void ReflectInYAxis ()
 
void Deskew (const FCOORD &deskew)
 
void ExtractPartitionsAsBlocks (BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
 
void SetTabStops (TabFind *tabgrid)
 
bool MakeColPartSets (PartSetVector *part_sets)
 
ColPartitionSetMakeSingleColumnSet (WidthCallback *cb)
 
void ClaimBoxes ()
 
void ReTypeBlobs (BLOBNBOX_LIST *im_blobs)
 
void RecomputeBounds (int gridsize, const ICOORD &bleft, const ICOORD &tright, const ICOORD &vertical)
 
void GridFindMargins (ColPartitionSet **best_columns)
 
void ListFindMargins (ColPartitionSet **best_columns, ColPartition_LIST *parts)
 
void DeleteParts ()
 
void DeleteUnknownParts (TO_BLOCK *block)
 
void DeleteNonLeaderParts ()
 
void FindFigureCaptions ()
 
void FindPartitionPartners ()
 
void FindPartitionPartners (bool upper, ColPartition *part)
 
void FindVPartitionPartners (bool to_the_left, ColPartition *part)
 
void RefinePartitionPartners (bool get_desperate)
 
- Public Member Functions inherited from tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT >
 BBGrid ()
 
 BBGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~BBGrid () override
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
void Clear ()
 
void ClearGridData (void(*free_method)(ColPartition *))
 
void InsertBBox (bool h_spread, bool v_spread, ColPartition *bbox)
 
void InsertPixPtBBox (int left, int bottom, Pix *pix, ColPartition *bbox)
 
void RemoveBBox (ColPartition *bbox)
 
bool RectangleEmpty (const TBOX &rect)
 
IntGridCountCellElements ()
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void DisplayBoxes (ScrollView *window)
 
void AssertNoDuplicates ()
 
- Public Member Functions inherited from tesseract::GridBase
 GridBase ()=default
 
 GridBase (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~GridBase ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
void GridCoords (int x, int y, int *grid_x, int *grid_y) const
 
void ClipGridCoords (int *x, int *y) const
 

Additional Inherited Members

- Protected Attributes inherited from tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT >
ColPartition_CLIST * grid_
 
- Protected Attributes inherited from tesseract::GridBase
int gridsize_
 
int gridwidth_
 
int gridheight_
 
int gridbuckets_
 
ICOORD bleft_
 
ICOORD tright_
 

Detailed Description

Definition at line 33 of file colpartitiongrid.h.

Constructor & Destructor Documentation

tesseract::ColPartitionGrid::ColPartitionGrid ( )
default
tesseract::ColPartitionGrid::ColPartitionGrid ( int  gridsize,
const ICOORD bleft,
const ICOORD tright 
)

Definition at line 68 of file colpartitiongrid.cpp.

70  : BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>(gridsize,
71  bleft, tright) {
72 }
const ICOORD & bleft() const
Definition: bbgrid.h:73
const ICOORD & tright() const
Definition: bbgrid.h:76
int gridsize() const
Definition: bbgrid.h:64
tesseract::ColPartitionGrid::~ColPartitionGrid ( )
overridedefault

Member Function Documentation

ColPartition * tesseract::ColPartitionGrid::BestMergeCandidate ( const ColPartition part,
ColPartition_CLIST *  candidates,
bool  debug,
TessResultCallback2< bool, const ColPartition *, const ColPartition * > *  confirm_cb,
int *  overlap_increase 
)

Definition at line 405 of file colpartitiongrid.cpp.

408  {
409  if (overlap_increase != nullptr)
410  *overlap_increase = 0;
411  if (candidates->empty())
412  return nullptr;
413  int ok_overlap =
414  static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
415  // The best neighbour to merge with is the one that causes least
416  // total pairwise overlap among all the neighbours.
417  // If more than one offers the same total overlap, choose the one
418  // with the least total area.
419  const TBOX& part_box = part->bounding_box();
420  ColPartition_C_IT it(candidates);
421  ColPartition* best_candidate = nullptr;
422  // Find the total combined box of all candidates and the original.
423  TBOX full_box(part_box);
424  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
425  ColPartition* candidate = it.data();
426  full_box += candidate->bounding_box();
427  }
428  // Keep valid neighbours in a list.
429  ColPartition_CLIST neighbours;
430  // Now run a rect search of the merged box for overlapping neighbours, as
431  // we need anything that might be overlapped by the merged box.
432  FindOverlappingPartitions(full_box, part, &neighbours);
433  if (debug) {
434  tprintf("Finding best merge candidate from %d, %d neighbours for box:",
435  candidates->length(), neighbours.length());
436  part_box.print();
437  }
438  // If the best increase in overlap is positive, then we also check the
439  // worst non-candidate overlap. This catches the case of multiple good
440  // candidates that overlap each other when merged. If the worst
441  // non-candidate overlap is better than the best overlap, then return
442  // the worst non-candidate overlap instead.
443  ColPartition_CLIST non_candidate_neighbours;
444  non_candidate_neighbours.set_subtract(SortByBoxLeft<ColPartition>, true,
445  &neighbours, candidates);
446  int worst_nc_increase = 0;
447  int best_increase = INT32_MAX;
448  int best_area = 0;
449  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
450  ColPartition* candidate = it.data();
451  if (confirm_cb != nullptr && !confirm_cb->Run(part, candidate)) {
452  if (debug) {
453  tprintf("Candidate not confirmed:");
454  candidate->bounding_box().print();
455  }
456  continue;
457  }
458  int increase = IncreaseInOverlap(part, candidate, ok_overlap, &neighbours);
459  const TBOX& cand_box = candidate->bounding_box();
460  if (best_candidate == nullptr || increase < best_increase) {
461  best_candidate = candidate;
462  best_increase = increase;
463  best_area = cand_box.bounding_union(part_box).area() - cand_box.area();
464  if (debug) {
465  tprintf("New best merge candidate has increase %d, area %d, over box:",
466  increase, best_area);
467  full_box.print();
468  candidate->Print();
469  }
470  } else if (increase == best_increase) {
471  int area = cand_box.bounding_union(part_box).area() - cand_box.area();
472  if (area < best_area) {
473  best_area = area;
474  best_candidate = candidate;
475  }
476  }
477  increase = IncreaseInOverlap(part, candidate, ok_overlap,
478  &non_candidate_neighbours);
479  if (increase > worst_nc_increase)
480  worst_nc_increase = increase;
481  }
482  if (best_increase > 0) {
483  // If the worst non-candidate increase is less than the best increase
484  // including the candidates, then all the candidates can merge together
485  // and the increase in outside overlap would be less, so use that result,
486  // but only if each candidate is either a good diacritic merge with part,
487  // or an ok merge candidate with all the others.
488  // See TestCompatibleCandidates for more explanation and a picture.
489  if (worst_nc_increase < best_increase &&
490  TestCompatibleCandidates(*part, debug, candidates)) {
491  best_increase = worst_nc_increase;
492  }
493  }
494  if (overlap_increase != nullptr)
495  *overlap_increase = best_increase;
496  return best_candidate;
497 }
int32_t area() const
Definition: rect.h:122
Definition: rect.h:34
void print() const
Definition: rect.h:278
TBOX bounding_union(const TBOX &box) const
Definition: rect.cpp:129
void FindOverlappingPartitions(const TBOX &box, const ColPartition *not_this, ColPartition_CLIST *parts)
const double kTinyEnoughTextlineOverlapFraction
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
virtual R Run(A1, A2)=0
int gridsize() const
Definition: bbgrid.h:64
void tesseract::ColPartitionGrid::ClaimBoxes ( )

Definition at line 863 of file colpartitiongrid.cpp.

863  {
864  // Iterate the ColPartitions in the grid.
865  ColPartitionGridSearch gsearch(this);
866  gsearch.StartFullSearch();
867  ColPartition* part;
868  while ((part = gsearch.NextFullSearch()) != nullptr) {
869  part->ClaimBoxes();
870  }
871 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
int tesseract::ColPartitionGrid::ComputeTotalOverlap ( ColPartitionGrid **  overlap_grid)

Definition at line 318 of file colpartitiongrid.cpp.

318  {
319  int total_overlap = 0;
320  // Iterate the ColPartitions in the grid.
321  ColPartitionGridSearch gsearch(this);
322  gsearch.StartFullSearch();
323  ColPartition* part;
324  while ((part = gsearch.NextFullSearch()) != nullptr) {
325  ColPartition_CLIST neighbors;
326  const TBOX& part_box = part->bounding_box();
327  FindOverlappingPartitions(part_box, part, &neighbors);
328  ColPartition_C_IT n_it(&neighbors);
329  bool any_part_overlap = false;
330  for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
331  const TBOX& n_box = n_it.data()->bounding_box();
332  int overlap = n_box.intersection(part_box).area();
333  if (overlap > 0 && overlap_grid != nullptr) {
334  if (*overlap_grid == nullptr) {
335  *overlap_grid = new ColPartitionGrid(gridsize(), bleft(), tright());
336  }
337  (*overlap_grid)->InsertBBox(true, true, n_it.data()->ShallowCopy());
338  if (!any_part_overlap) {
339  (*overlap_grid)->InsertBBox(true, true, part->ShallowCopy());
340  }
341  }
342  any_part_overlap = true;
343  total_overlap += overlap;
344  }
345  }
346  return total_overlap;
347 }
const ICOORD & bleft() const
Definition: bbgrid.h:73
int32_t area() const
Definition: rect.h:122
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
Definition: rect.h:34
const ICOORD & tright() const
Definition: bbgrid.h:76
TBOX intersection(const TBOX &box) const
Definition: rect.cpp:87
void FindOverlappingPartitions(const TBOX &box, const ColPartition *not_this, ColPartition_CLIST *parts)
int gridsize() const
Definition: bbgrid.h:64
void tesseract::ColPartitionGrid::DeleteNonLeaderParts ( )

Definition at line 1042 of file colpartitiongrid.cpp.

1042  {
1043  ColPartitionGridSearch gsearch(this);
1044  gsearch.StartFullSearch();
1045  ColPartition* part;
1046  while ((part = gsearch.NextFullSearch()) != nullptr) {
1047  if (part->flow() != BTFT_LEADER) {
1048  gsearch.RemoveBBox();
1049  if (part->ReleaseNonLeaderBoxes()) {
1050  InsertBBox(true, true, part);
1051  gsearch.RepositionIterator();
1052  } else {
1053  delete part;
1054  }
1055  }
1056  }
1057 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
void InsertBBox(bool h_spread, bool v_spread, ColPartition *bbox)
void tesseract::ColPartitionGrid::DeleteParts ( )

Definition at line 1008 of file colpartitiongrid.cpp.

1008  {
1009  ColPartition_LIST dead_parts;
1010  ColPartition_IT dead_it(&dead_parts);
1011  ColPartitionGridSearch gsearch(this);
1012  gsearch.StartFullSearch();
1013  ColPartition* part;
1014  while ((part = gsearch.NextFullSearch()) != nullptr) {
1015  part->DisownBoxes();
1016  dead_it.add_to_end(part); // Parts will be deleted on return.
1017  }
1018  Clear();
1019 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
void tesseract::ColPartitionGrid::DeleteUnknownParts ( TO_BLOCK block)

Definition at line 1023 of file colpartitiongrid.cpp.

1023  {
1024  ColPartitionGridSearch gsearch(this);
1025  gsearch.StartFullSearch();
1026  ColPartition* part;
1027  while ((part = gsearch.NextFullSearch()) != nullptr) {
1028  if (part->blob_type() == BRT_UNKNOWN) {
1029  gsearch.RemoveBBox();
1030  // Once marked, the blobs will be swept up by DeleteUnownedNoise.
1031  part->set_flow(BTFT_NONTEXT);
1032  part->set_blob_type(BRT_NOISE);
1033  part->SetBlobTypes();
1034  part->DisownBoxes();
1035  delete part;
1036  }
1037  }
1038  block->DeleteUnownedNoise();
1039 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
void DeleteUnownedNoise()
Definition: blobbox.cpp:1037
void tesseract::ColPartitionGrid::Deskew ( const FCOORD deskew)

Definition at line 735 of file colpartitiongrid.cpp.

735  {
736  ColPartition_LIST parts;
737  ColPartition_IT part_it(&parts);
738  // Iterate the ColPartitions in the grid to extract them.
739  ColPartitionGridSearch gsearch(this);
740  gsearch.StartFullSearch();
741  ColPartition* part;
742  while ((part = gsearch.NextFullSearch()) != nullptr) {
743  part_it.add_after_then_move(part);
744  }
745  // Rebuild the grid to the new size.
746  TBOX grid_box(bleft_, tright_);
747  grid_box.rotate_large(deskew);
748  Init(gridsize(), grid_box.botleft(), grid_box.topright());
749  // Reinitializing the grid with rotated coords also clears all the
750  // pointers, so parts will now own the ColPartitions. (Briefly).
751  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
752  part = part_it.extract();
753  part->ComputeLimits();
754  InsertBBox(true, true, part);
755  }
756 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
Definition: rect.h:34
ICOORD tright_
Definition: bbgrid.h:92
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
void InsertBBox(bool h_spread, bool v_spread, ColPartition *bbox)
int gridsize() const
Definition: bbgrid.h:64
void tesseract::ColPartitionGrid::ExtractPartitionsAsBlocks ( BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  to_blocks 
)

Definition at line 674 of file colpartitiongrid.cpp.

675  {
676  TO_BLOCK_IT to_block_it(to_blocks);
677  BLOCK_IT block_it(blocks);
678  // All partitions will be put on this list and deleted on return.
679  ColPartition_LIST parts;
680  ColPartition_IT part_it(&parts);
681  // Iterate the ColPartitions in the grid to extract them.
682  ColPartitionGridSearch gsearch(this);
683  gsearch.StartFullSearch();
684  ColPartition* part;
685  while ((part = gsearch.NextFullSearch()) != nullptr) {
686  part_it.add_after_then_move(part);
687  // The partition has to be at least vaguely like text.
688  BlobRegionType blob_type = part->blob_type();
689  if (BLOBNBOX::IsTextType(blob_type) ||
690  (blob_type == BRT_UNKNOWN && part->boxes_count() > 1)) {
692  : PT_FLOWING_TEXT;
693  // Get metrics from the row that will be used for the block.
694  TBOX box = part->bounding_box();
695  int median_width = part->median_width();
696  int median_height = part->median_height();
697  // Turn the partition into a TO_ROW.
698  TO_ROW* row = part->MakeToRow();
699  if (row == nullptr) {
700  // This partition is dead.
701  part->DeleteBoxes();
702  continue;
703  }
704  auto* block = new BLOCK("", true, 0, 0, box.left(), box.bottom(),
705  box.right(), box.top());
706  block->pdblk.set_poly_block(new POLY_BLOCK(box, type));
707  auto* to_block = new TO_BLOCK(block);
708  TO_ROW_IT row_it(to_block->get_rows());
709  row_it.add_after_then_move(row);
710  // We haven't differentially rotated vertical and horizontal text at
711  // this point, so use width or height as appropriate.
712  if (blob_type == BRT_VERT_TEXT) {
713  to_block->line_size = static_cast<float>(median_width);
714  to_block->line_spacing = static_cast<float>(box.width());
715  to_block->max_blob_size = static_cast<float>(box.width() + 1);
716  } else {
717  to_block->line_size = static_cast<float>(median_height);
718  to_block->line_spacing = static_cast<float>(box.height());
719  to_block->max_blob_size = static_cast<float>(box.height() + 1);
720  }
721  if (to_block->line_size == 0) to_block->line_size = 1;
722  block_it.add_to_end(block);
723  to_block_it.add_to_end(to_block);
724  } else {
725  // This partition is dead.
726  part->DeleteBoxes();
727  }
728  }
729  Clear();
730  // Now it is safe to delete the ColPartitions as parts goes out of scope.
731 }
int16_t top() const
Definition: rect.h:58
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
Definition: rect.h:34
int16_t height() const
Definition: rect.h:108
PolyBlockType
Definition: publictypes.h:53
BlobRegionType
Definition: blobbox.h:72
int16_t width() const
Definition: rect.h:115
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:191
int16_t right() const
Definition: rect.h:79
int16_t bottom() const
Definition: rect.h:65
static bool IsTextType(BlobRegionType type)
Definition: blobbox.h:418
int16_t left() const
Definition: rect.h:72
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:58
Definition: ocrblock.h:29
void tesseract::ColPartitionGrid::FindFigureCaptions ( )

Definition at line 1060 of file colpartitiongrid.cpp.

1060  {
1061  // For each image region find its best candidate text caption region,
1062  // if any and mark it as such.
1063  ColPartitionGridSearch gsearch(this);
1064  gsearch.StartFullSearch();
1065  ColPartition* part;
1066  while ((part = gsearch.NextFullSearch()) != nullptr) {
1067  if (part->IsImageType()) {
1068  const TBOX& part_box = part->bounding_box();
1069  bool debug = AlignedBlob::WithinTestRegion(2, part_box.left(),
1070  part_box.bottom());
1071  ColPartition* best_caption = nullptr;
1072  int best_dist = 0; // Distance to best_caption.
1073  int best_upper = 0; // Direction of best_caption.
1074  // Handle both lower and upper directions.
1075  for (int upper = 0; upper < 2; ++upper) {
1076  ColPartition_C_IT partner_it(upper ? part->upper_partners()
1077  : part->lower_partners());
1078  // If there are no image partners, then this direction is ok.
1079  for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
1080  partner_it.forward()) {
1081  ColPartition* partner = partner_it.data();
1082  if (partner->IsImageType()) {
1083  break;
1084  }
1085  }
1086  if (!partner_it.cycled_list()) continue;
1087  // Find the nearest totally overlapping text partner.
1088  for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
1089  partner_it.forward()) {
1090  ColPartition* partner = partner_it.data();
1091  if (!partner->IsTextType() || partner->type() == PT_TABLE) continue;
1092  const TBOX& partner_box = partner->bounding_box();
1093  if (debug) {
1094  tprintf("Finding figure captions for image part:");
1095  part_box.print();
1096  tprintf("Considering partner:");
1097  partner_box.print();
1098  }
1099  if (partner_box.left() >= part_box.left() &&
1100  partner_box.right() <= part_box.right()) {
1101  int dist = partner_box.y_gap(part_box);
1102  if (best_caption == nullptr || dist < best_dist) {
1103  best_dist = dist;
1104  best_caption = partner;
1105  best_upper = upper;
1106  }
1107  }
1108  }
1109  }
1110  if (best_caption != nullptr) {
1111  if (debug) {
1112  tprintf("Best caption candidate:");
1113  best_caption->bounding_box().print();
1114  }
1115  // We have a candidate caption. Qualify it as being separable from
1116  // any body text. We are looking for either a small number of lines
1117  // or a big gap that indicates a separation from the body text.
1118  int line_count = 0;
1119  int biggest_gap = 0;
1120  int smallest_gap = INT16_MAX;
1121  int total_height = 0;
1122  int mean_height = 0;
1123  ColPartition* end_partner = nullptr;
1124  ColPartition* next_partner = nullptr;
1125  for (ColPartition* partner = best_caption; partner != nullptr &&
1126  line_count <= kMaxCaptionLines;
1127  partner = next_partner) {
1128  if (!partner->IsTextType()) {
1129  end_partner = partner;
1130  break;
1131  }
1132  ++line_count;
1133  total_height += partner->bounding_box().height();
1134  next_partner = partner->SingletonPartner(best_upper);
1135  if (next_partner != nullptr) {
1136  int gap = partner->bounding_box().y_gap(
1137  next_partner->bounding_box());
1138  if (gap > biggest_gap) {
1139  biggest_gap = gap;
1140  end_partner = next_partner;
1141  mean_height = total_height / line_count;
1142  } else if (gap < smallest_gap) {
1143  smallest_gap = gap;
1144  }
1145  // If the gap looks big compared to the text size and the smallest
1146  // gap seen so far, then we can stop.
1147  if (biggest_gap > mean_height * kMinCaptionGapHeightRatio &&
1148  biggest_gap > smallest_gap * kMinCaptionGapRatio)
1149  break;
1150  }
1151  }
1152  if (debug) {
1153  tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n",
1154  line_count, biggest_gap, smallest_gap, mean_height);
1155  if (end_partner != nullptr) {
1156  tprintf("End partner:");
1157  end_partner->bounding_box().print();
1158  }
1159  }
1160  if (next_partner == nullptr && line_count <= kMaxCaptionLines)
1161  end_partner = nullptr; // No gap, but line count is small.
1162  if (line_count <= kMaxCaptionLines) {
1163  // This is a qualified caption. Mark the text as caption.
1164  for (ColPartition* partner = best_caption; partner != nullptr &&
1165  partner != end_partner;
1166  partner = next_partner) {
1167  partner->set_type(PT_CAPTION_TEXT);
1168  partner->SetBlobTypes();
1169  if (debug) {
1170  tprintf("Set caption type for partition:");
1171  partner->bounding_box().print();
1172  }
1173  next_partner = partner->SingletonPartner(best_upper);
1174  }
1175  }
1176  }
1177  }
1178  }
1179 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
Definition: rect.h:34
void print() const
Definition: rect.h:278
const int kMaxCaptionLines
Definition: capi.h:134
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
int y_gap(const TBOX &box) const
Definition: rect.h:233
int16_t right() const
Definition: rect.h:79
int16_t bottom() const
Definition: rect.h:65
int16_t left() const
Definition: rect.h:72
const double kMinCaptionGapRatio
static bool WithinTestRegion(int detail_level, int x, int y)
const double kMinCaptionGapHeightRatio
void tesseract::ColPartitionGrid::FindOverlappingPartitions ( const TBOX box,
const ColPartition not_this,
ColPartition_CLIST *  parts 
)

Definition at line 352 of file colpartitiongrid.cpp.

354  {
355  ColPartitionGridSearch rsearch(this);
356  rsearch.StartRectSearch(box);
357  ColPartition* part;
358  while ((part = rsearch.NextRectSearch()) != nullptr) {
359  if (part != not_this)
360  parts->add_sorted(SortByBoxLeft<ColPartition>, true, part);
361  }
362 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
void tesseract::ColPartitionGrid::FindPartitionPartners ( )

Definition at line 1185 of file colpartitiongrid.cpp.

1185  {
1186  ColPartitionGridSearch gsearch(this);
1187  gsearch.StartFullSearch();
1188  ColPartition* part;
1189  while ((part = gsearch.NextFullSearch()) != nullptr) {
1190  if (part->IsVerticalType()) {
1191  FindVPartitionPartners(true, part);
1192  FindVPartitionPartners(false, part);
1193  } else {
1194  FindPartitionPartners(true, part);
1195  FindPartitionPartners(false, part);
1196  }
1197  }
1198 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
void FindVPartitionPartners(bool to_the_left, ColPartition *part)
void tesseract::ColPartitionGrid::FindPartitionPartners ( bool  upper,
ColPartition part 
)

Definition at line 1202 of file colpartitiongrid.cpp.

1202  {
1203  if (part->type() == PT_NOISE)
1204  return; // Noise is not allowed to partner anything.
1205  const TBOX& box = part->bounding_box();
1206  int top = part->median_top();
1207  int bottom = part->median_bottom();
1208  int height = top - bottom;
1209  int mid_y = (bottom + top) / 2;
1210  ColPartitionGridSearch vsearch(this);
1211  // Search down for neighbour below
1212  vsearch.StartVerticalSearch(box.left(), box.right(), part->MidY());
1213  ColPartition* neighbour;
1214  ColPartition* best_neighbour = nullptr;
1215  int best_dist = INT32_MAX;
1216  while ((neighbour = vsearch.NextVerticalSearch(!upper)) != nullptr) {
1217  if (neighbour == part || neighbour->type() == PT_NOISE)
1218  continue; // Noise is not allowed to partner anything.
1219  int neighbour_bottom = neighbour->median_bottom();
1220  int neighbour_top = neighbour->median_top();
1221  int neighbour_y = (neighbour_bottom + neighbour_top) / 2;
1222  if (upper != (neighbour_y > mid_y))
1223  continue;
1224  if (!part->HOverlaps(*neighbour) && !part->WithinSameMargins(*neighbour))
1225  continue;
1226  if (!part->TypesMatch(*neighbour)) {
1227  if (best_neighbour == nullptr)
1228  best_neighbour = neighbour;
1229  continue;
1230  }
1231  int dist = upper ? neighbour_bottom - top : bottom - neighbour_top;
1232  if (dist <= kMaxPartitionSpacing * height) {
1233  if (dist < best_dist) {
1234  best_dist = dist;
1235  best_neighbour = neighbour;
1236  }
1237  } else {
1238  break;
1239  }
1240  }
1241  if (best_neighbour != nullptr)
1242  part->AddPartner(upper, best_neighbour);
1243 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
Definition: rect.h:34
const double kMaxPartitionSpacing
int16_t right() const
Definition: rect.h:79
int16_t left() const
Definition: rect.h:72
Definition: capi.h:142
void tesseract::ColPartitionGrid::FindVPartitionPartners ( bool  to_the_left,
ColPartition part 
)

Definition at line 1247 of file colpartitiongrid.cpp.

1248  {
1249  if (part->type() == PT_NOISE)
1250  return; // Noise is not allowed to partner anything.
1251  const TBOX& box = part->bounding_box();
1252  int left = part->median_left();
1253  int right = part->median_right();
1254  int width = right >= left ? right - left : -1;
1255  int mid_x = (left + right) / 2;
1256  ColPartitionGridSearch hsearch(this);
1257  // Search left for neighbour to_the_left
1258  hsearch.StartSideSearch(mid_x, box.bottom(), box.top());
1259  ColPartition* neighbour;
1260  ColPartition* best_neighbour = nullptr;
1261  int best_dist = INT32_MAX;
1262  while ((neighbour = hsearch.NextSideSearch(to_the_left)) != nullptr) {
1263  if (neighbour == part || neighbour->type() == PT_NOISE)
1264  continue; // Noise is not allowed to partner anything.
1265  int neighbour_left = neighbour->median_left();
1266  int neighbour_right = neighbour->median_right();
1267  int neighbour_x = (neighbour_left + neighbour_right) / 2;
1268  if (to_the_left != (neighbour_x < mid_x))
1269  continue;
1270  if (!part->VOverlaps(*neighbour))
1271  continue;
1272  if (!part->TypesMatch(*neighbour))
1273  continue; // Only match to other vertical text.
1274  int dist = to_the_left ? left - neighbour_right : neighbour_left - right;
1275  if (dist <= kMaxPartitionSpacing * width) {
1276  if (dist < best_dist || best_neighbour == nullptr) {
1277  best_dist = dist;
1278  best_neighbour = neighbour;
1279  }
1280  } else {
1281  break;
1282  }
1283  }
1284  // For vertical partitions, the upper partner is to the left, and lower is
1285  // to the right.
1286  if (best_neighbour != nullptr)
1287  part->AddPartner(to_the_left, best_neighbour);
1288 }
int16_t top() const
Definition: rect.h:58
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
Definition: rect.h:34
const double kMaxPartitionSpacing
int16_t bottom() const
Definition: rect.h:65
Definition: capi.h:142
void tesseract::ColPartitionGrid::GridFindMargins ( ColPartitionSet **  best_columns)

Definition at line 966 of file colpartitiongrid.cpp.

966  {
967  // Iterate the ColPartitions in the grid.
968  ColPartitionGridSearch gsearch(this);
969  gsearch.StartFullSearch();
970  ColPartition* part;
971  while ((part = gsearch.NextFullSearch()) != nullptr) {
972  // Set up a rectangle search x-bounded by the column and y by the part.
973  ColPartitionSet* columns = best_columns != nullptr
974  ? best_columns[gsearch.GridY()]
975  : nullptr;
976  FindPartitionMargins(columns, part);
977  const TBOX& box = part->bounding_box();
978  if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) {
979  tprintf("Computed margins for part:");
980  part->Print();
981  }
982  }
983 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
Definition: rect.h:34
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
int16_t bottom() const
Definition: rect.h:65
int16_t left() const
Definition: rect.h:72
static bool WithinTestRegion(int detail_level, int x, int y)
bool tesseract::ColPartitionGrid::GridSmoothNeighbours ( BlobTextFlowType  source_type,
Pix *  nontext_map,
const TBOX im_box,
const FCOORD rerotation 
)

Definition at line 624 of file colpartitiongrid.cpp.

627  {
628  // Iterate the ColPartitions in the grid.
629  ColPartitionGridSearch gsearch(this);
630  gsearch.StartFullSearch();
631  ColPartition* part;
632  bool any_changed = false;
633  while ((part = gsearch.NextFullSearch()) != nullptr) {
634  if (part->flow() != source_type || BLOBNBOX::IsLineType(part->blob_type()))
635  continue;
636  const TBOX& box = part->bounding_box();
637  bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
638  if (SmoothRegionType(nontext_map, im_box, rotation, debug, part))
639  any_changed = true;
640  }
641  return any_changed;
642 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
Definition: rect.h:34
static bool IsLineType(BlobRegionType type)
Definition: blobbox.h:426
int16_t bottom() const
Definition: rect.h:65
int16_t left() const
Definition: rect.h:72
static bool WithinTestRegion(int detail_level, int x, int y)
void tesseract::ColPartitionGrid::HandleClick ( int  x,
int  y 
)
overridevirtual

Reimplemented from tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT >.

Definition at line 75 of file colpartitiongrid.cpp.

75  {
76  BBGrid<ColPartition,
77  ColPartition_CLIST, ColPartition_C_IT>::HandleClick(x, y);
78  // Run a radial search for partitions that overlap.
79  ColPartitionGridSearch radsearch(this);
80  radsearch.SetUniqueMode(true);
81  radsearch.StartRadSearch(x, y, 1);
82  ColPartition* neighbour;
83  FCOORD click(x, y);
84  while ((neighbour = radsearch.NextRadSearch()) != nullptr) {
85  const TBOX& nbox = neighbour->bounding_box();
86  if (nbox.contains(click)) {
87  tprintf("Block box:");
88  neighbour->bounding_box().print();
89  neighbour->Print();
90  }
91  }
92 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
Definition: rect.h:34
Definition: points.h:188
bool contains(const FCOORD pt) const
Definition: rect.h:333
void HandleClick(int x, int y) override
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
void tesseract::ColPartitionGrid::ListFindMargins ( ColPartitionSet **  best_columns,
ColPartition_LIST *  parts 
)

Definition at line 990 of file colpartitiongrid.cpp.

991  {
992  ColPartition_IT part_it(parts);
993  for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
994  ColPartition* part = part_it.data();
995  ColPartitionSet* columns = nullptr;
996  if (best_columns != nullptr) {
997  const TBOX& part_box = part->bounding_box();
998  // Get the columns from the y grid coord.
999  int grid_x, grid_y;
1000  GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
1001  columns = best_columns[grid_y];
1002  }
1003  FindPartitionMargins(columns, part);
1004  }
1005 }
Definition: rect.h:34
void GridCoords(int x, int y, int *grid_x, int *grid_y) const
Definition: bbgrid.cpp:52
int16_t bottom() const
Definition: rect.h:65
int16_t left() const
Definition: rect.h:72
bool tesseract::ColPartitionGrid::MakeColPartSets ( PartSetVector part_sets)

Definition at line 783 of file colpartitiongrid.cpp.

783  {
784  auto* part_lists = new ColPartition_LIST[gridheight()];
785  part_sets->reserve(gridheight());
786  // Iterate the ColPartitions in the grid to get parts onto lists for the
787  // y bottom of each.
788  ColPartitionGridSearch gsearch(this);
789  gsearch.StartFullSearch();
790  ColPartition* part;
791  bool any_parts_found = false;
792  while ((part = gsearch.NextFullSearch()) != nullptr) {
793  BlobRegionType blob_type = part->blob_type();
794  if (blob_type != BRT_NOISE &&
795  (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
796  int grid_x, grid_y;
797  const TBOX& part_box = part->bounding_box();
798  GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
799  ColPartition_IT part_it(&part_lists[grid_y]);
800  part_it.add_to_end(part);
801  any_parts_found = true;
802  }
803  }
804  if (any_parts_found) {
805  for (int grid_y = 0; grid_y < gridheight(); ++grid_y) {
806  ColPartitionSet* line_set = nullptr;
807  if (!part_lists[grid_y].empty()) {
808  line_set = new ColPartitionSet(&part_lists[grid_y]);
809  }
810  part_sets->push_back(line_set);
811  }
812  }
813  delete [] part_lists;
814  return any_parts_found;
815 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
Definition: rect.h:34
int gridheight() const
Definition: bbgrid.h:70
BlobRegionType
Definition: blobbox.h:72
void GridCoords(int x, int y, int *grid_x, int *grid_y) const
Definition: bbgrid.cpp:52
int16_t bottom() const
Definition: rect.h:65
int16_t left() const
Definition: rect.h:72
ColPartitionSet * tesseract::ColPartitionGrid::MakeSingleColumnSet ( WidthCallback cb)

Definition at line 821 of file colpartitiongrid.cpp.

821  {
822  ColPartition* single_column_part = nullptr;
823  // Iterate the ColPartitions in the grid to get parts onto lists for the
824  // y bottom of each.
825  ColPartitionGridSearch gsearch(this);
826  gsearch.StartFullSearch();
827  ColPartition* part;
828  while ((part = gsearch.NextFullSearch()) != nullptr) {
829  BlobRegionType blob_type = part->blob_type();
830  if (blob_type != BRT_NOISE &&
831  (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
832  // Consider for single column.
833  BlobTextFlowType flow = part->flow();
834  if ((blob_type == BRT_TEXT &&
835  (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN ||
836  flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) ||
837  blob_type == BRT_RECTIMAGE || blob_type == BRT_POLYIMAGE) {
838  if (single_column_part == nullptr) {
839  single_column_part = part->ShallowCopy();
840  single_column_part->set_blob_type(BRT_TEXT);
841  // Copy the tabs from itself to properly setup the margins.
842  single_column_part->CopyLeftTab(*single_column_part, false);
843  single_column_part->CopyRightTab(*single_column_part, false);
844  } else {
845  if (part->left_key() < single_column_part->left_key())
846  single_column_part->CopyLeftTab(*part, false);
847  if (part->right_key() > single_column_part->right_key())
848  single_column_part->CopyRightTab(*part, false);
849  }
850  }
851  }
852  }
853  if (single_column_part != nullptr) {
854  // Make a ColPartitionSet out of the single_column_part as a candidate
855  // for the single column case.
856  single_column_part->SetColumnGoodness(cb);
857  return new ColPartitionSet(single_column_part);
858  }
859  return nullptr;
860 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
BlobRegionType
Definition: blobbox.h:72
BlobTextFlowType
Definition: blobbox.h:114
bool tesseract::ColPartitionGrid::MergePart ( TessResultCallback2< bool, ColPartition *, TBOX * > *  box_cb,
TessResultCallback2< bool, const ColPartition *, const ColPartition * > *  confirm_cb,
ColPartition part 
)

Definition at line 122 of file colpartitiongrid.cpp.

126  {
127  if (part->IsUnMergeableType())
128  return false;
129  bool any_done = false;
130  // Repeatedly merge part while we find a best merge candidate that works.
131  bool merge_done = false;
132  do {
133  merge_done = false;
134  TBOX box = part->bounding_box();
135  bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
136  if (debug) {
137  tprintf("Merge candidate:");
138  box.print();
139  }
140  // Set up a rectangle search bounded by the part.
141  if (!box_cb->Run(part, &box))
142  continue;
143  // Create a list of merge candidates.
144  ColPartition_CLIST merge_candidates;
145  FindMergeCandidates(part, box, debug, &merge_candidates);
146  // Find the best merge candidate based on minimal overlap increase.
147  int overlap_increase;
148  ColPartition* neighbour = BestMergeCandidate(part, &merge_candidates, debug,
149  confirm_cb,
150  &overlap_increase);
151  if (neighbour != nullptr && overlap_increase <= 0) {
152  if (debug) {
153  tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
154  part->HCoreOverlap(*neighbour), part->VCoreOverlap(*neighbour),
155  overlap_increase);
156  }
157  // Looks like a good candidate so merge it.
158  RemoveBBox(neighbour);
159  // We will modify the box of part, so remove it from the grid, merge
160  // it and then re-insert it into the grid.
161  RemoveBBox(part);
162  part->Absorb(neighbour, nullptr);
163  InsertBBox(true, true, part);
164  merge_done = true;
165  any_done = true;
166  } else if (neighbour != nullptr) {
167  if (debug) {
168  tprintf("Overlapped when merged with increase %d: ", overlap_increase);
169  neighbour->bounding_box().print();
170  }
171  } else if (debug) {
172  tprintf("No candidate neighbour returned\n");
173  }
174  } while (merge_done);
175  return any_done;
176 }
Definition: rect.h:34
void print() const
Definition: rect.h:278
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
ColPartition * BestMergeCandidate(const ColPartition *part, ColPartition_CLIST *candidates, bool debug, TessResultCallback2< bool, const ColPartition *, const ColPartition * > *confirm_cb, int *overlap_increase)
int16_t bottom() const
Definition: rect.h:65
void InsertBBox(bool h_spread, bool v_spread, ColPartition *bbox)
int16_t left() const
Definition: rect.h:72
virtual R Run(A1, A2)=0
static bool WithinTestRegion(int detail_level, int x, int y)
void tesseract::ColPartitionGrid::Merges ( TessResultCallback2< bool, ColPartition *, TBOX * > *  box_cb,
TessResultCallback2< bool, const ColPartition *, const ColPartition * > *  confirm_cb 
)

Definition at line 101 of file colpartitiongrid.cpp.

104  {
105  // Iterate the ColPartitions in the grid.
106  ColPartitionGridSearch gsearch(this);
107  gsearch.StartFullSearch();
108  ColPartition* part;
109  while ((part = gsearch.NextFullSearch()) != nullptr) {
110  if (MergePart(box_cb, confirm_cb, part))
111  gsearch.RepositionIterator();
112  }
113  delete box_cb;
114  delete confirm_cb;
115 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
bool MergePart(TessResultCallback2< bool, ColPartition *, TBOX * > *box_cb, TessResultCallback2< bool, const ColPartition *, const ColPartition * > *confirm_cb, ColPartition *part)
void tesseract::ColPartitionGrid::RecomputeBounds ( int  gridsize,
const ICOORD bleft,
const ICOORD tright,
const ICOORD vertical 
)

Definition at line 937 of file colpartitiongrid.cpp.

940  {
941  ColPartition_LIST saved_parts;
942  ColPartition_IT part_it(&saved_parts);
943  // Iterate the ColPartitions in the grid to get parts onto a list.
944  ColPartitionGridSearch gsearch(this);
945  gsearch.StartFullSearch();
946  ColPartition* part;
947  while ((part = gsearch.NextFullSearch()) != nullptr) {
948  part_it.add_to_end(part);
949  }
950  // Reinitialize grid to the new size.
951  Init(gridsize, bleft, tright);
952  // Recompute the bounds of the parts and put them back in the new grid.
953  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
954  part = part_it.extract();
955  part->set_vertical(vertical);
956  part->ComputeLimits();
957  InsertBBox(true, true, part);
958  }
959 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
void InsertBBox(bool h_spread, bool v_spread, ColPartition *bbox)
int gridsize() const
Definition: bbgrid.h:64
void tesseract::ColPartitionGrid::RefinePartitionPartners ( bool  get_desperate)

Definition at line 1293 of file colpartitiongrid.cpp.

1293  {
1294  ColPartitionGridSearch gsearch(this);
1295  // Refine in type order so that chasing multiple partners can be done
1296  // before eliminating type mis-matching partners.
1297  for (int type = PT_UNKNOWN + 1; type <= PT_COUNT; type++) {
1298  // Iterate the ColPartitions in the grid.
1299  gsearch.StartFullSearch();
1300  ColPartition* part;
1301  while ((part = gsearch.NextFullSearch()) != nullptr) {
1302  part->RefinePartners(static_cast<PolyBlockType>(type),
1303  get_desperate, this);
1304  // Iterator may have been messed up by a merge.
1305  gsearch.RepositionIterator();
1306  }
1307  }
1308 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
Definition: capi.h:143
void tesseract::ColPartitionGrid::ReflectInYAxis ( )

Definition at line 646 of file colpartitiongrid.cpp.

646  {
647  ColPartition_LIST parts;
648  ColPartition_IT part_it(&parts);
649  // Iterate the ColPartitions in the grid to extract them.
650  ColPartitionGridSearch gsearch(this);
651  gsearch.StartFullSearch();
652  ColPartition* part;
653  while ((part = gsearch.NextFullSearch()) != nullptr) {
654  part_it.add_after_then_move(part);
655  }
656  ICOORD bot_left(-tright().x(), bleft().y());
657  ICOORD top_right(-bleft().x(), tright().y());
658  // Reinitializing the grid with reflected coords also clears all the
659  // pointers, so parts will now own the ColPartitions. (Briefly).
660  Init(gridsize(), bot_left, top_right);
661  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
662  part = part_it.extract();
663  part->ReflectInYAxis();
664  InsertBBox(true, true, part);
665  }
666 }
const ICOORD & bleft() const
Definition: bbgrid.h:73
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
const ICOORD & tright() const
Definition: bbgrid.h:76
integer coordinate
Definition: points.h:31
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
void InsertBBox(bool h_spread, bool v_spread, ColPartition *bbox)
int gridsize() const
Definition: bbgrid.h:64
void tesseract::ColPartitionGrid::ReTypeBlobs ( BLOBNBOX_LIST *  im_blobs)

Definition at line 876 of file colpartitiongrid.cpp.

876  {
877  BLOBNBOX_IT im_blob_it(im_blobs);
878  ColPartition_LIST dead_parts;
879  ColPartition_IT dead_part_it(&dead_parts);
880  // Iterate the ColPartitions in the grid.
881  ColPartitionGridSearch gsearch(this);
882  gsearch.StartFullSearch();
883  ColPartition* part;
884  while ((part = gsearch.NextFullSearch()) != nullptr) {
885  BlobRegionType blob_type = part->blob_type();
886  BlobTextFlowType flow = part->flow();
887  bool any_blobs_moved = false;
888  if (blob_type == BRT_POLYIMAGE || blob_type == BRT_RECTIMAGE) {
889  BLOBNBOX_C_IT blob_it(part->boxes());
890  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
891  BLOBNBOX* blob = blob_it.data();
892  im_blob_it.add_after_then_move(blob);
893  }
894  } else if (blob_type != BRT_NOISE) {
895  // Make sure the blobs are marked with the correct type and flow.
896  BLOBNBOX_C_IT blob_it(part->boxes());
897  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
898  BLOBNBOX* blob = blob_it.data();
899  if (blob->region_type() == BRT_NOISE) {
900  // TODO(rays) Deprecated. Change this section to an assert to verify
901  // and then delete.
902  ASSERT_HOST(blob->cblob()->area() != 0);
903  blob->set_owner(nullptr);
904  blob_it.extract();
905  any_blobs_moved = true;
906  } else {
907  blob->set_region_type(blob_type);
908  if (blob->flow() != BTFT_LEADER)
909  blob->set_flow(flow);
910  }
911  }
912  }
913  if (blob_type == BRT_NOISE || part->boxes()->empty()) {
914  BLOBNBOX_C_IT blob_it(part->boxes());
915  part->DisownBoxes();
916  dead_part_it.add_to_end(part);
917  gsearch.RemoveBBox();
918  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
919  BLOBNBOX* blob = blob_it.data();
920  if (blob->cblob()->area() == 0) {
921  // Any blob with zero area is a fake image blob and should be deleted.
922  delete blob->cblob();
923  delete blob;
924  }
925  }
926  } else if (any_blobs_moved) {
927  gsearch.RemoveBBox();
928  part->ComputeLimits();
929  InsertBBox(true, true, part);
930  gsearch.RepositionIterator();
931  }
932  }
933 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
C_BLOB * cblob() const
Definition: blobbox.h:268
BlobTextFlowType flow() const
Definition: blobbox.h:295
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:355
BlobRegionType
Definition: blobbox.h:72
BlobRegionType region_type() const
Definition: blobbox.h:283
BlobTextFlowType
Definition: blobbox.h:114
#define ASSERT_HOST(x)
Definition: errcode.h:88
void InsertBBox(bool h_spread, bool v_spread, ColPartition *bbox)
void set_region_type(BlobRegionType new_type)
Definition: blobbox.h:286
void set_flow(BlobTextFlowType value)
Definition: blobbox.h:298
int32_t area()
Definition: stepblob.cpp:273
void tesseract::ColPartitionGrid::SetTabStops ( TabFind tabgrid)

Definition at line 759 of file colpartitiongrid.cpp.

759  {
760  // Iterate the ColPartitions in the grid.
761  ColPartitionGridSearch gsearch(this);
762  gsearch.StartFullSearch();
763  ColPartition* part;
764  while ((part = gsearch.NextFullSearch()) != nullptr) {
765  const TBOX& part_box = part->bounding_box();
766  TabVector* left_line = tabgrid->LeftTabForBox(part_box, true, false);
767  // If the overlapping line is not a left tab, try for non-overlapping.
768  if (left_line != nullptr && !left_line->IsLeftTab())
769  left_line = tabgrid->LeftTabForBox(part_box, false, false);
770  if (left_line != nullptr && left_line->IsLeftTab())
771  part->SetLeftTab(left_line);
772  TabVector* right_line = tabgrid->RightTabForBox(part_box, true, false);
773  if (right_line != nullptr && !right_line->IsRightTab())
774  right_line = tabgrid->RightTabForBox(part_box, false, false);
775  if (right_line != nullptr && right_line->IsRightTab())
776  part->SetRightTab(right_line);
777  part->SetColumnGoodness(tabgrid->WidthCB());
778  }
779 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
Definition: rect.h:34
void tesseract::ColPartitionGrid::SplitOverlappingPartitions ( ColPartition_LIST *  big_parts)

Definition at line 514 of file colpartitiongrid.cpp.

515  {
516  int ok_overlap =
517  static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
518  // Iterate the ColPartitions in the grid.
519  ColPartitionGridSearch gsearch(this);
520  gsearch.StartFullSearch();
521  ColPartition* part;
522  while ((part = gsearch.NextFullSearch()) != nullptr) {
523  // Set up a rectangle search bounded by the part.
524  const TBOX& box = part->bounding_box();
525  ColPartitionGridSearch rsearch(this);
526  rsearch.SetUniqueMode(true);
527  rsearch.StartRectSearch(box);
528  int unresolved_overlaps = 0;
529 
530  ColPartition* neighbour;
531  while ((neighbour = rsearch.NextRectSearch()) != nullptr) {
532  if (neighbour == part)
533  continue;
534  const TBOX& neighbour_box = neighbour->bounding_box();
535  if (neighbour->OKMergeOverlap(*part, *part, ok_overlap, false) &&
536  part->OKMergeOverlap(*neighbour, *neighbour, ok_overlap, false))
537  continue; // The overlap is OK both ways.
538 
539  // If removal of the biggest box from either partition eliminates the
540  // overlap, and it is much bigger than the box left behind, then
541  // it is either a drop-cap, an inter-line join, or some junk that
542  // we don't want anyway, so put it in the big_parts list.
543  if (!part->IsSingleton()) {
544  BLOBNBOX* excluded = part->BiggestBox();
545  TBOX shrunken = part->BoundsWithoutBox(excluded);
546  if (!shrunken.overlap(neighbour_box) &&
547  excluded->bounding_box().height() >
548  kBigPartSizeRatio * shrunken.height()) {
549  // Removing the biggest box fixes the overlap, so do it!
550  gsearch.RemoveBBox();
551  RemoveBadBox(excluded, part, big_parts);
552  InsertBBox(true, true, part);
553  gsearch.RepositionIterator();
554  break;
555  }
556  } else if (box.contains(neighbour_box)) {
557  ++unresolved_overlaps;
558  continue; // No amount of splitting will fix it.
559  }
560  if (!neighbour->IsSingleton()) {
561  BLOBNBOX* excluded = neighbour->BiggestBox();
562  TBOX shrunken = neighbour->BoundsWithoutBox(excluded);
563  if (!shrunken.overlap(box) &&
564  excluded->bounding_box().height() >
565  kBigPartSizeRatio * shrunken.height()) {
566  // Removing the biggest box fixes the overlap, so do it!
567  rsearch.RemoveBBox();
568  RemoveBadBox(excluded, neighbour, big_parts);
569  InsertBBox(true, true, neighbour);
570  gsearch.RepositionIterator();
571  break;
572  }
573  }
574  int part_overlap_count = part->CountOverlappingBoxes(neighbour_box);
575  int neighbour_overlap_count = neighbour->CountOverlappingBoxes(box);
576  ColPartition* right_part = nullptr;
577  if (neighbour_overlap_count <= part_overlap_count ||
578  part->IsSingleton()) {
579  // Try to split the neighbour to reduce overlap.
580  BLOBNBOX* split_blob = neighbour->OverlapSplitBlob(box);
581  if (split_blob != nullptr) {
582  rsearch.RemoveBBox();
583  right_part = neighbour->SplitAtBlob(split_blob);
584  InsertBBox(true, true, neighbour);
585  ASSERT_HOST(right_part != nullptr);
586  }
587  } else {
588  // Try to split part to reduce overlap.
589  BLOBNBOX* split_blob = part->OverlapSplitBlob(neighbour_box);
590  if (split_blob != nullptr) {
591  gsearch.RemoveBBox();
592  right_part = part->SplitAtBlob(split_blob);
593  InsertBBox(true, true, part);
594  ASSERT_HOST(right_part != nullptr);
595  }
596  }
597  if (right_part != nullptr) {
598  InsertBBox(true, true, right_part);
599  gsearch.RepositionIterator();
600  rsearch.RepositionIterator();
601  break;
602  }
603  }
604  if (unresolved_overlaps > 2 && part->IsSingleton()) {
605  // This part is no good so just add to big_parts.
606  RemoveBBox(part);
607  ColPartition_IT big_it(big_parts);
608  part->set_block_owned(true);
609  big_it.add_to_end(part);
610  gsearch.RepositionIterator();
611  }
612  }
613 }
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
Definition: rect.h:34
const TBOX & bounding_box() const
Definition: blobbox.h:230
bool contains(const FCOORD pt) const
Definition: rect.h:333
const double kBigPartSizeRatio
int16_t height() const
Definition: rect.h:108
const double kTinyEnoughTextlineOverlapFraction
bool overlap(const TBOX &box) const
Definition: rect.h:355
#define ASSERT_HOST(x)
Definition: errcode.h:88
void InsertBBox(bool h_spread, bool v_spread, ColPartition *bbox)
int gridsize() const
Definition: bbgrid.h:64

The documentation for this class was generated from the following files: