tesseract  4.1.0
tesseract::ColPartition Class Reference

#include <colpartition.h>

Inheritance diagram for tesseract::ColPartition:
ELIST2_LINK

Public Member Functions

 ColPartition ()=default
 
 ColPartition (BlobRegionType blob_type, const ICOORD &vertical)
 
 ~ColPartition ()
 
const TBOXbounding_box () const
 
int left_margin () const
 
void set_left_margin (int margin)
 
int right_margin () const
 
void set_right_margin (int margin)
 
int median_top () const
 
int median_bottom () const
 
int median_left () const
 
int median_right () const
 
int median_height () const
 
void set_median_height (int height)
 
int median_width () const
 
void set_median_width (int width)
 
BlobRegionType blob_type () const
 
void set_blob_type (BlobRegionType t)
 
BlobTextFlowType flow () const
 
void set_flow (BlobTextFlowType f)
 
int good_blob_score () const
 
bool good_width () const
 
bool good_column () const
 
bool left_key_tab () const
 
int left_key () const
 
bool right_key_tab () const
 
int right_key () const
 
PolyBlockType type () const
 
void set_type (PolyBlockType t)
 
BLOBNBOX_CLIST * boxes ()
 
int boxes_count () const
 
void set_vertical (const ICOORD &v)
 
ColPartition_CLIST * upper_partners ()
 
ColPartition_CLIST * lower_partners ()
 
void set_working_set (WorkingPartSet *working_set)
 
bool block_owned () const
 
void set_block_owned (bool owned)
 
bool desperately_merged () const
 
ColPartitionSetcolumn_set () const
 
void set_side_step (int step)
 
int bottom_spacing () const
 
void set_bottom_spacing (int spacing)
 
int top_spacing () const
 
void set_top_spacing (int spacing)
 
void set_table_type ()
 
void clear_table_type ()
 
bool inside_table_column ()
 
void set_inside_table_column (bool val)
 
ColPartitionnearest_neighbor_above () const
 
void set_nearest_neighbor_above (ColPartition *part)
 
ColPartitionnearest_neighbor_below () const
 
void set_nearest_neighbor_below (ColPartition *part)
 
int space_above () const
 
void set_space_above (int space)
 
int space_below () const
 
void set_space_below (int space)
 
int space_to_left () const
 
void set_space_to_left (int space)
 
int space_to_right () const
 
void set_space_to_right (int space)
 
uint8_t * color1 ()
 
uint8_t * color2 ()
 
bool owns_blobs () const
 
void set_owns_blobs (bool owns_blobs)
 
int MidY () const
 
int MedianY () const
 
int MidX () const
 
int SortKey (int x, int y) const
 
int XAtY (int sort_key, int y) const
 
int KeyWidth (int left_key, int right_key) const
 
int ColumnWidth () const
 
int BoxLeftKey () const
 
int BoxRightKey () const
 
int LeftAtY (int y) const
 
int RightAtY (int y) const
 
bool IsLeftOf (const ColPartition &other) const
 
bool ColumnContains (int x, int y) const
 
bool IsEmpty () const
 
bool IsSingleton () const
 
bool HOverlaps (const ColPartition &other) const
 
bool VOverlaps (const ColPartition &other) const
 
int VCoreOverlap (const ColPartition &other) const
 
int HCoreOverlap (const ColPartition &other) const
 
bool VSignificantCoreOverlap (const ColPartition &other) const
 
bool WithinSameMargins (const ColPartition &other) const
 
bool TypesMatch (const ColPartition &other) const
 
bool IsLineType () const
 
bool IsImageType () const
 
bool IsTextType () const
 
bool IsPulloutType () const
 
bool IsVerticalType () const
 
bool IsHorizontalType () const
 
bool IsUnMergeableType () const
 
bool IsVerticalLine () const
 
bool IsHorizontalLine () const
 
void AddBox (BLOBNBOX *box)
 
void RemoveBox (BLOBNBOX *box)
 
BLOBNBOXBiggestBox ()
 
TBOX BoundsWithoutBox (BLOBNBOX *box)
 
void ClaimBoxes ()
 
void DisownBoxes ()
 
void DisownBoxesNoAssert ()
 
bool ReleaseNonLeaderBoxes ()
 
void DeleteBoxes ()
 
void ReflectInYAxis ()
 
bool IsLegal ()
 
bool MatchingColumns (const ColPartition &other) const
 
bool MatchingTextColor (const ColPartition &other) const
 
bool MatchingSizes (const ColPartition &other) const
 
bool ConfirmNoTabViolation (const ColPartition &other) const
 
bool MatchingStrokeWidth (const ColPartition &other, double fractional_tolerance, double constant_tolerance) const
 
bool OKDiacriticMerge (const ColPartition &candidate, bool debug) const
 
void SetLeftTab (const TabVector *tab_vector)
 
void SetRightTab (const TabVector *tab_vector)
 
void CopyLeftTab (const ColPartition &src, bool take_box)
 
void CopyRightTab (const ColPartition &src, bool take_box)
 
int LeftBlobRule () const
 
int RightBlobRule () const
 
float SpecialBlobsDensity (const BlobSpecialTextType type) const
 
int SpecialBlobsCount (const BlobSpecialTextType type)
 
void SetSpecialBlobsDensity (const BlobSpecialTextType type, const float density)
 
void ComputeSpecialBlobsDensity ()
 
void AddPartner (bool upper, ColPartition *partner)
 
void RemovePartner (bool upper, ColPartition *partner)
 
ColPartitionSingletonPartner (bool upper)
 
void Absorb (ColPartition *other, WidthCallback *cb)
 
bool OKMergeOverlap (const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug)
 
BLOBNBOXOverlapSplitBlob (const TBOX &box)
 
ColPartitionSplitAtBlob (BLOBNBOX *split_blob)
 
ColPartitionSplitAt (int split_x)
 
void ComputeLimits ()
 
int CountOverlappingBoxes (const TBOX &box)
 
void SetPartitionType (int resolution, ColPartitionSet *columns)
 
PolyBlockType PartitionType (ColumnSpanningType flow) const
 
void ColumnRange (int resolution, ColPartitionSet *columns, int *first_col, int *last_col)
 
void SetColumnGoodness (WidthCallback *cb)
 
bool MarkAsLeaderIfMonospaced ()
 
void SetRegionAndFlowTypesFromProjectionValue (int value)
 
void SetBlobTypes ()
 
bool HasGoodBaseline ()
 
void AddToWorkingSet (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
 
TO_ROWMakeToRow ()
 
ColPartitionShallowCopy () const
 
ColPartitionCopyButDontOwnBlobs ()
 
ScrollView::Color BoxColor () const
 
void Print () const
 
void PrintColors ()
 
void SmoothPartnerRun (int working_set_count)
 
void RefinePartners (PolyBlockType type, bool get_desperate, ColPartitionGrid *grid)
 
bool IsInSameColumnAs (const ColPartition &part) const
 
void set_first_column (int column)
 
void set_last_column (int column)
 
- Public Member Functions inherited from ELIST2_LINK
 ELIST2_LINK ()
 
 ELIST2_LINK (const ELIST2_LINK &)
 
void operator= (const ELIST2_LINK &)
 

Static Public Member Functions

static ColPartitionMakeLinePartition (BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top)
 
static ColPartitionFakePartition (const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
 
static ColPartitionMakeBigPartition (BLOBNBOX *box, ColPartition_LIST *big_part_list)
 
static bool TypesMatch (BlobRegionType type1, BlobRegionType type2)
 
static bool TypesSimilar (PolyBlockType type1, PolyBlockType type2)
 
static void LineSpacingBlocks (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks)
 
static TO_BLOCKMakeBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
 
static TO_BLOCKMakeVerticalTextBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
 
static int SortByBBox (const void *p1, const void *p2)
 

Detailed Description

ColPartition is a partition of a horizontal slice of the page. It starts out as a collection of blobs at a particular y-coord in the grid, but ends up (after merging and uniquing) as an approximate text line. ColPartitions are also used to hold a partitioning of the page into columns, each representing one column. Although a ColPartition applies to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions emerges, which represents the columns over a wide y-coordinate range.

Definition at line 68 of file colpartition.h.

Constructor & Destructor Documentation

tesseract::ColPartition::ColPartition ( )
default
tesseract::ColPartition::ColPartition ( BlobRegionType  blob_type,
const ICOORD vertical 
)
Parameters
blob_typeis the blob_region_type_ of the blobs in this partition.
verticalis the direction of logical vertical on the possibly skewed image.

Definition at line 81 of file colpartition.cpp.

82  : left_margin_(-INT32_MAX), right_margin_(INT32_MAX),
83  median_bottom_(INT32_MAX), median_top_(-INT32_MAX), median_height_(0),
84  median_left_(INT32_MAX), median_right_(-INT32_MAX), median_width_(0),
85  blob_type_(blob_type), flow_(BTFT_NONE), good_blob_score_(0),
86  good_width_(false), good_column_(false),
87  left_key_tab_(false), right_key_tab_(false),
88  left_key_(0), right_key_(0), type_(PT_UNKNOWN), vertical_(vertical),
89  working_set_(nullptr), last_add_was_vertical_(false), block_owned_(false),
90  desperately_merged_(false),
91  first_column_(-1), last_column_(-1), column_set_(nullptr),
92  side_step_(0), top_spacing_(0), bottom_spacing_(0),
93  type_before_table_(PT_UNKNOWN), inside_table_column_(false),
94  nearest_neighbor_above_(nullptr), nearest_neighbor_below_(nullptr),
95  space_above_(0), space_below_(0), space_to_left_(0), space_to_right_(0),
96  owns_blobs_(true) {
97  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
98 }
BlobRegionType blob_type() const
Definition: colpartition.h:149
tesseract::ColPartition::~ColPartition ( )

Definition at line 143 of file colpartition.cpp.

143  {
144  // Remove this as a partner of all partners, as we don't want them
145  // referring to a deleted object.
146  ColPartition_C_IT it(&upper_partners_);
147  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
148  it.data()->RemovePartner(false, this);
149  }
150  it.set_to_list(&lower_partners_);
151  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
152  it.data()->RemovePartner(true, this);
153  }
154 }

Member Function Documentation

void tesseract::ColPartition::Absorb ( ColPartition other,
WidthCallback cb 
)

Definition at line 648 of file colpartition.cpp.

648  {
649  // The result has to either own all of the blobs or none of them.
650  // Verify the flag is consistent.
651  ASSERT_HOST(owns_blobs() == other->owns_blobs());
652  // TODO(nbeato): check owns_blobs better. Right now owns_blobs
653  // should always be true when this is called. So there is no issues.
654  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
655  bounding_box_.bottom()) ||
656  TabFind::WithinTestRegion(2, other->bounding_box_.left(),
657  other->bounding_box_.bottom())) {
658  tprintf("Merging:");
659  Print();
660  other->Print();
661  }
662 
663  // Update the special_blobs_densities_.
664  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
665  for (int type = 0; type < BSTT_COUNT; ++type) {
666  unsigned w1 = boxes_.length();
667  unsigned w2 = other->boxes_.length();
668  float new_val = special_blobs_densities_[type] * w1 +
669  other->special_blobs_densities_[type] * w2;
670  if (!w1 || !w2) {
671  ASSERT_HOST((w1 + w2) > 0);
672  special_blobs_densities_[type] = new_val / (w1 + w2);
673  }
674  }
675 
676  // Merge the two sorted lists.
677  BLOBNBOX_C_IT it(&boxes_);
678  BLOBNBOX_C_IT it2(&other->boxes_);
679  for (; !it2.empty(); it2.forward()) {
680  BLOBNBOX* bbox2 = it2.extract();
681  ColPartition* prev_owner = bbox2->owner();
682  if (prev_owner != other && prev_owner != nullptr) {
683  // A blob on other's list is owned by someone else; let them have it.
684  continue;
685  }
686  ASSERT_HOST(prev_owner == other || prev_owner == nullptr);
687  if (prev_owner == other)
688  bbox2->set_owner(this);
689  it.add_to_end(bbox2);
690  }
691  left_margin_ = std::min(left_margin_, other->left_margin_);
692  right_margin_ = std::max(right_margin_, other->right_margin_);
693  if (other->left_key_ < left_key_) {
694  left_key_ = other->left_key_;
695  left_key_tab_ = other->left_key_tab_;
696  }
697  if (other->right_key_ > right_key_) {
698  right_key_ = other->right_key_;
699  right_key_tab_ = other->right_key_tab_;
700  }
701  // Combine the flow and blob_type in a sensible way.
702  // Dominant flows stay.
703  if (!DominatesInMerge(flow_, other->flow_)) {
704  flow_ = other->flow_;
705  blob_type_ = other->blob_type_;
706  }
707  SetBlobTypes();
708  if (IsVerticalType()) {
709  boxes_.sort(SortByBoxBottom<BLOBNBOX>);
710  last_add_was_vertical_ = true;
711  } else {
712  boxes_.sort(SortByBoxLeft<BLOBNBOX>);
713  last_add_was_vertical_ = false;
714  }
715  ComputeLimits();
716  // Fix partner lists. other is going away, so remove it as a
717  // partner of all its partners and add this in its place.
718  for (int upper = 0; upper < 2; ++upper) {
719  ColPartition_CLIST partners;
720  ColPartition_C_IT part_it(&partners);
721  part_it.add_list_after(upper ? &other->upper_partners_
722  : &other->lower_partners_);
723  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
724  ColPartition* partner = part_it.extract();
725  partner->RemovePartner(!upper, other);
726  partner->RemovePartner(!upper, this);
727  partner->AddPartner(!upper, this);
728  }
729  }
730  delete other;
731  if (cb != nullptr) {
732  SetColumnGoodness(cb);
733  }
734 }
bool IsVerticalType() const
Definition: colpartition.h:442
tesseract::ColPartition * owner() const
Definition: blobbox.h:352
PolyBlockType type() const
Definition: colpartition.h:182
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:355
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
int16_t bottom() const
Definition: rect.h:65
bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2)
Definition: blobbox.h:129
#define ASSERT_HOST(x)
Definition: errcode.h:88
int16_t left() const
Definition: rect.h:72
void SetColumnGoodness(WidthCallback *cb)
static bool WithinTestRegion(int detail_level, int x, int y)
bool owns_blobs() const
Definition: colpartition.h:292
void tesseract::ColPartition::AddBox ( BLOBNBOX box)

Definition at line 179 of file colpartition.cpp.

179  {
180  TBOX box = bbox->bounding_box();
181  // Update the partition limits.
182  if (boxes_.length() == 0) {
183  bounding_box_ = box;
184  } else {
185  bounding_box_ += box;
186  }
187 
188  if (IsVerticalType()) {
189  if (!last_add_was_vertical_) {
190  boxes_.sort(SortByBoxBottom<BLOBNBOX>);
191  last_add_was_vertical_ = true;
192  }
193  boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>, true, bbox);
194  } else {
195  if (last_add_was_vertical_) {
196  boxes_.sort(SortByBoxLeft<BLOBNBOX>);
197  last_add_was_vertical_ = false;
198  }
199  boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>, true, bbox);
200  }
201  if (!left_key_tab_)
202  left_key_ = BoxLeftKey();
203  if (!right_key_tab_)
204  right_key_ = BoxRightKey();
205  if (TabFind::WithinTestRegion(2, box.left(), box.bottom()))
206  tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
207  box.left(), box.bottom(), box.right(), box.top(),
208  bounding_box_.left(), bounding_box_.right());
209 }
bool IsVerticalType() const
Definition: colpartition.h:442
int16_t top() const
Definition: rect.h:58
Definition: rect.h:34
const TBOX & bounding_box() const
Definition: blobbox.h:230
int BoxRightKey() const
Definition: colpartition.h:337
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
int16_t right() const
Definition: rect.h:79
int16_t bottom() const
Definition: rect.h:65
int16_t left() const
Definition: rect.h:72
static bool WithinTestRegion(int detail_level, int x, int y)
void tesseract::ColPartition::AddPartner ( bool  upper,
ColPartition partner 
)

Definition at line 613 of file colpartition.cpp.

613  {
614  if (upper) {
615  partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
616  true, this);
617  upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
618  } else {
619  partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
620  true, this);
621  lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
622  }
623 }
void tesseract::ColPartition::AddToWorkingSet ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  used_parts,
WorkingPartSet_LIST *  working_set 
)

Definition at line 1357 of file colpartition.cpp.

1360  {
1361  if (block_owned_)
1362  return; // Done it already.
1363  block_owned_ = true;
1364  WorkingPartSet_IT it(working_sets);
1365  // If there is an upper partner use its working_set_ directly.
1366  ColPartition* partner = SingletonPartner(true);
1367  if (partner != nullptr && partner->working_set_ != nullptr) {
1368  working_set_ = partner->working_set_;
1369  working_set_->AddPartition(this);
1370  return;
1371  }
1372  if (partner != nullptr && textord_debug_bugs) {
1373  tprintf("Partition with partner has no working set!:");
1374  Print();
1375  partner->Print();
1376  }
1377  // Search for the column that the left edge fits in.
1378  WorkingPartSet* work_set = nullptr;
1379  it.move_to_first();
1380  int col_index = 0;
1381  for (it.mark_cycle_pt(); !it.cycled_list() &&
1382  col_index != first_column_;
1383  it.forward(), ++col_index);
1384  if (textord_debug_tabfind >= 2) {
1385  tprintf("Match is %s for:", (col_index & 1) ? "Real" : "Between");
1386  Print();
1387  }
1388  if (it.cycled_list() && textord_debug_bugs) {
1389  tprintf("Target column=%d, only had %d\n", first_column_, col_index);
1390  }
1391  ASSERT_HOST(!it.cycled_list());
1392  work_set = it.data();
1393  // If last_column_ != first_column, then we need to scoop up all blocks
1394  // between here and the last_column_ and put back in work_set.
1395  if (!it.cycled_list() && last_column_ != first_column_ && !IsPulloutType()) {
1396  // Find the column that the right edge falls in.
1397  BLOCK_LIST completed_blocks;
1398  TO_BLOCK_LIST to_blocks;
1399  for (; !it.cycled_list() && col_index <= last_column_;
1400  it.forward(), ++col_index) {
1401  WorkingPartSet* end_set = it.data();
1402  end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
1403  &completed_blocks, &to_blocks);
1404  }
1405  work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
1406  }
1407  working_set_ = work_set;
1408  work_set->AddPartition(this);
1409 }
bool IsPulloutType() const
Definition: colpartition.h:438
int textord_debug_bugs
Definition: alignedblob.cpp:28
void AddPartition(ColPartition *part)
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
int textord_debug_tabfind
Definition: alignedblob.cpp:27
#define ASSERT_HOST(x)
Definition: errcode.h:88
ColPartition * SingletonPartner(bool upper)
BLOBNBOX * tesseract::ColPartition::BiggestBox ( )

Definition at line 225 of file colpartition.cpp.

225  {
226  BLOBNBOX* biggest = nullptr;
227  BLOBNBOX_C_IT bb_it(&boxes_);
228  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
229  BLOBNBOX* bbox = bb_it.data();
230  if (IsVerticalType()) {
231  if (biggest == nullptr ||
232  bbox->bounding_box().width() > biggest->bounding_box().width())
233  biggest = bbox;
234  } else {
235  if (biggest == nullptr ||
236  bbox->bounding_box().height() > biggest->bounding_box().height())
237  biggest = bbox;
238  }
239  }
240  return biggest;
241 }
bool IsVerticalType() const
Definition: colpartition.h:442
const TBOX & bounding_box() const
Definition: blobbox.h:230
int16_t height() const
Definition: rect.h:108
int16_t width() const
Definition: rect.h:115
BlobRegionType tesseract::ColPartition::blob_type ( ) const
inline

Definition at line 149 of file colpartition.h.

149  {
150  return blob_type_;
151  }
bool tesseract::ColPartition::block_owned ( ) const
inline

Definition at line 206 of file colpartition.h.

206  {
207  return block_owned_;
208  }
int tesseract::ColPartition::bottom_spacing ( ) const
inline

Definition at line 221 of file colpartition.h.

221  {
222  return bottom_spacing_;
223  }
const TBOX& tesseract::ColPartition::bounding_box ( ) const
inline

Definition at line 110 of file colpartition.h.

110  {
111  return bounding_box_;
112  }
TBOX tesseract::ColPartition::BoundsWithoutBox ( BLOBNBOX box)

Definition at line 244 of file colpartition.cpp.

244  {
245  TBOX result;
246  BLOBNBOX_C_IT bb_it(&boxes_);
247  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
248  if (box != bb_it.data()) {
249  result += bb_it.data()->bounding_box();
250  }
251  }
252  return result;
253 }
Definition: rect.h:34
ScrollView::Color tesseract::ColPartition::BoxColor ( ) const

Definition at line 1781 of file colpartition.cpp.

1781  {
1782  if (type_ == PT_UNKNOWN)
1783  return BLOBNBOX::TextlineColor(blob_type_, flow_);
1784  return POLY_BLOCK::ColorForPolyBlockType(type_);
1785 }
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
Definition: blobbox.cpp:444
static ScrollView::Color ColorForPolyBlockType(PolyBlockType type)
Returns a color to draw the given type.
Definition: polyblk.cpp:393
BLOBNBOX_CLIST* tesseract::ColPartition::boxes ( )
inline

Definition at line 188 of file colpartition.h.

188  {
189  return &boxes_;
190  }
int tesseract::ColPartition::boxes_count ( ) const
inline

Definition at line 191 of file colpartition.h.

191  {
192  return boxes_.length();
193  }
int tesseract::ColPartition::BoxLeftKey ( ) const
inline

Definition at line 333 of file colpartition.h.

333  {
334  return SortKey(bounding_box_.left(), MidY());
335  }
int16_t left() const
Definition: rect.h:72
int SortKey(int x, int y) const
Definition: colpartition.h:317
int tesseract::ColPartition::BoxRightKey ( ) const
inline

Definition at line 337 of file colpartition.h.

337  {
338  return SortKey(bounding_box_.right(), MidY());
339  }
int16_t right() const
Definition: rect.h:79
int SortKey(int x, int y) const
Definition: colpartition.h:317
void tesseract::ColPartition::ClaimBoxes ( )

Definition at line 257 of file colpartition.cpp.

257  {
258  BLOBNBOX_C_IT bb_it(&boxes_);
259  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
260  BLOBNBOX* bblob = bb_it.data();
261  ColPartition* other = bblob->owner();
262  if (other == nullptr) {
263  // Normal case: ownership is available.
264  bblob->set_owner(this);
265  } else {
266  ASSERT_HOST(other == this);
267  }
268  }
269 }
tesseract::ColPartition * owner() const
Definition: blobbox.h:352
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:355
#define ASSERT_HOST(x)
Definition: errcode.h:88
void tesseract::ColPartition::clear_table_type ( )
inline

Definition at line 240 of file colpartition.h.

240  {
241  if (type_ == PT_TABLE)
242  type_ = type_before_table_;
243  }
Definition: capi.h:134
uint8_t* tesseract::ColPartition::color1 ( )
inline

Definition at line 286 of file colpartition.h.

286  {
287  return color1_;
288  }
uint8_t* tesseract::ColPartition::color2 ( )
inline

Definition at line 289 of file colpartition.h.

289  {
290  return color2_;
291  }
ColPartitionSet* tesseract::ColPartition::column_set ( ) const
inline

Definition at line 215 of file colpartition.h.

215  {
216  return column_set_;
217  }
bool tesseract::ColPartition::ColumnContains ( int  x,
int  y 
) const
inline

Definition at line 354 of file colpartition.h.

354  {
355  return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1;
356  }
int LeftAtY(int y) const
Definition: colpartition.h:341
int RightAtY(int y) const
Definition: colpartition.h:345
void tesseract::ColPartition::ColumnRange ( int  resolution,
ColPartitionSet columns,
int *  first_col,
int *  last_col 
)

Definition at line 1066 of file colpartition.cpp.

1067  {
1068  int first_spanned_col = -1;
1069  ColumnSpanningType span_type =
1070  columns->SpanningType(resolution,
1071  bounding_box_.left(), bounding_box_.right(),
1072  std::min(bounding_box_.height(), bounding_box_.width()),
1073  MidY(), left_margin_, right_margin_,
1074  first_col, last_col,
1075  &first_spanned_col);
1076  type_ = PartitionType(span_type);
1077 }
int16_t height() const
Definition: rect.h:108
int16_t width() const
Definition: rect.h:115
int16_t right() const
Definition: rect.h:79
PolyBlockType PartitionType(ColumnSpanningType flow) const
int16_t left() const
Definition: rect.h:72
int tesseract::ColPartition::ColumnWidth ( ) const
inline

Definition at line 329 of file colpartition.h.

329  {
330  return KeyWidth(left_key_, right_key_);
331  }
int KeyWidth(int left_key, int right_key) const
Definition: colpartition.h:325
void tesseract::ColPartition::ComputeLimits ( )

Definition at line 871 of file colpartition.cpp.

871  {
872  bounding_box_ = TBOX(); // Clear it
873  BLOBNBOX_C_IT it(&boxes_);
874  BLOBNBOX* bbox = nullptr;
875  int non_leader_count = 0;
876  if (it.empty()) {
877  bounding_box_.set_left(left_margin_);
878  bounding_box_.set_right(right_margin_);
879  bounding_box_.set_bottom(0);
880  bounding_box_.set_top(0);
881  } else {
882  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
883  bbox = it.data();
884  bounding_box_ += bbox->bounding_box();
885  if (bbox->flow() != BTFT_LEADER)
886  ++non_leader_count;
887  }
888  }
889  if (!left_key_tab_)
890  left_key_ = BoxLeftKey();
891  if (left_key_ > BoxLeftKey() && textord_debug_bugs) {
892  // TODO(rays) investigate the causes of these error messages, to find
893  // out if they are genuinely harmful, or just indicative of junk input.
894  tprintf("Computed left-illegal partition\n");
895  Print();
896  }
897  if (!right_key_tab_)
898  right_key_ = BoxRightKey();
899  if (right_key_ < BoxRightKey() && textord_debug_bugs) {
900  tprintf("Computed right-illegal partition\n");
901  Print();
902  }
903  if (it.empty())
904  return;
905  if (IsImageType() || blob_type() == BRT_RECTIMAGE ||
906  blob_type() == BRT_POLYIMAGE) {
907  median_top_ = bounding_box_.top();
908  median_bottom_ = bounding_box_.bottom();
909  median_height_ = bounding_box_.height();
910  median_left_ = bounding_box_.left();
911  median_right_ = bounding_box_.right();
912  median_width_ = bounding_box_.width();
913  } else {
914  STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
915  STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
916  STATS height_stats(0, bounding_box_.height() + 1);
917  STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1);
918  STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1);
919  STATS width_stats(0, bounding_box_.width() + 1);
920  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
921  bbox = it.data();
922  if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) {
923  const TBOX& box = bbox->bounding_box();
924  int area = box.area();
925  top_stats.add(box.top(), area);
926  bottom_stats.add(box.bottom(), area);
927  height_stats.add(box.height(), area);
928  left_stats.add(box.left(), area);
929  right_stats.add(box.right(), area);
930  width_stats.add(box.width(), area);
931  }
932  }
933  median_top_ = static_cast<int>(top_stats.median() + 0.5);
934  median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5);
935  median_height_ = static_cast<int>(height_stats.median() + 0.5);
936  median_left_ = static_cast<int>(left_stats.median() + 0.5);
937  median_right_ = static_cast<int>(right_stats.median() + 0.5);
938  median_width_ = static_cast<int>(width_stats.median() + 0.5);
939  }
940 
941  if (right_margin_ < bounding_box_.right() && textord_debug_bugs) {
942  tprintf("Made partition with bad right coords");
943  Print();
944  }
945  if (left_margin_ > bounding_box_.left() && textord_debug_bugs) {
946  tprintf("Made partition with bad left coords");
947  Print();
948  }
949  // Fix partner lists. The bounding box has changed and partners are stored
950  // in bounding box order, so remove and reinsert this as a partner
951  // of all its partners.
952  for (int upper = 0; upper < 2; ++upper) {
953  ColPartition_CLIST partners;
954  ColPartition_C_IT part_it(&partners);
955  part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
956  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
957  ColPartition* partner = part_it.extract();
958  partner->RemovePartner(!upper, this);
959  partner->AddPartner(!upper, this);
960  }
961  }
962  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
963  bounding_box_.bottom())) {
964  tprintf("Recomputed box for partition %p\n", this);
965  Print();
966  }
967 }
int32_t area() const
Definition: rect.h:122
int16_t top() const
Definition: rect.h:58
void set_top(int y)
Definition: rect.h:61
Definition: rect.h:34
void set_right(int x)
Definition: rect.h:82
int textord_debug_bugs
Definition: alignedblob.cpp:28
const TBOX & bounding_box() const
Definition: blobbox.h:230
BlobRegionType blob_type() const
Definition: colpartition.h:149
BlobTextFlowType flow() const
Definition: blobbox.h:295
void set_left(int x)
Definition: rect.h:75
int16_t height() const
Definition: rect.h:108
int BoxRightKey() const
Definition: colpartition.h:337
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
int16_t width() const
Definition: rect.h:115
int16_t right() const
Definition: rect.h:79
int16_t bottom() const
Definition: rect.h:65
int16_t left() const
Definition: rect.h:72
bool IsImageType() const
Definition: colpartition.h:430
Definition: statistc.h:31
static bool WithinTestRegion(int detail_level, int x, int y)
void set_bottom(int y)
Definition: rect.h:68
void tesseract::ColPartition::ComputeSpecialBlobsDensity ( )

Definition at line 592 of file colpartition.cpp.

592  {
593  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
594  if (boxes_.empty()) {
595  return;
596  }
597 
598  BLOBNBOX_C_IT blob_it(&boxes_);
599  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
600  BLOBNBOX* blob = blob_it.data();
602  special_blobs_densities_[type]++;
603  }
604 
605  for (float& special_blobs_density : special_blobs_densities_) {
606  special_blobs_density /= boxes_.length();
607  }
608 }
PolyBlockType type() const
Definition: colpartition.h:182
BlobSpecialTextType special_text_type() const
Definition: blobbox.h:289
BlobSpecialTextType
Definition: blobbox.h:96
bool tesseract::ColPartition::ConfirmNoTabViolation ( const ColPartition other) const

Definition at line 423 of file colpartition.cpp.

423  {
424  if (bounding_box_.right() < other.bounding_box_.left() &&
425  bounding_box_.right() < other.LeftBlobRule())
426  return false;
427  if (other.bounding_box_.right() < bounding_box_.left() &&
428  other.bounding_box_.right() < LeftBlobRule())
429  return false;
430  if (bounding_box_.left() > other.bounding_box_.right() &&
431  bounding_box_.left() > other.RightBlobRule())
432  return false;
433  if (other.bounding_box_.left() > bounding_box_.right() &&
434  other.bounding_box_.left() > RightBlobRule())
435  return false;
436  return true;
437 }
int16_t right() const
Definition: rect.h:79
int16_t left() const
Definition: rect.h:72
ColPartition * tesseract::ColPartition::CopyButDontOwnBlobs ( )

Definition at line 1768 of file colpartition.cpp.

1768  {
1769  ColPartition* copy = ShallowCopy();
1770  copy->set_owns_blobs(false);
1771  BLOBNBOX_C_IT inserter(copy->boxes());
1772  BLOBNBOX_C_IT traverser(boxes());
1773  for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward())
1774  inserter.add_after_then_move(traverser.data());
1775  return copy;
1776 }
ColPartition * ShallowCopy() const
BLOBNBOX_CLIST * boxes()
Definition: colpartition.h:188
void tesseract::ColPartition::CopyLeftTab ( const ColPartition src,
bool  take_box 
)

Definition at line 529 of file colpartition.cpp.

529  {
530  left_key_tab_ = take_box ? false : src.left_key_tab_;
531  if (left_key_tab_) {
532  left_key_ = src.left_key_;
533  } else {
534  bounding_box_.set_left(XAtY(src.BoxLeftKey(), MidY()));
535  left_key_ = BoxLeftKey();
536  }
537  if (left_margin_ > bounding_box_.left())
538  left_margin_ = src.left_margin_;
539 }
void set_left(int x)
Definition: rect.h:75
int16_t left() const
Definition: rect.h:72
int XAtY(int sort_key, int y) const
Definition: colpartition.h:321
void tesseract::ColPartition::CopyRightTab ( const ColPartition src,
bool  take_box 
)

Definition at line 542 of file colpartition.cpp.

542  {
543  right_key_tab_ = take_box ? false : src.right_key_tab_;
544  if (right_key_tab_) {
545  right_key_ = src.right_key_;
546  } else {
547  bounding_box_.set_right(XAtY(src.BoxRightKey(), MidY()));
548  right_key_ = BoxRightKey();
549  }
550  if (right_margin_ < bounding_box_.right())
551  right_margin_ = src.right_margin_;
552 }
void set_right(int x)
Definition: rect.h:82
int BoxRightKey() const
Definition: colpartition.h:337
int16_t right() const
Definition: rect.h:79
int XAtY(int sort_key, int y) const
Definition: colpartition.h:321
int tesseract::ColPartition::CountOverlappingBoxes ( const TBOX box)

Definition at line 970 of file colpartition.cpp.

970  {
971  BLOBNBOX_C_IT it(&boxes_);
972  int overlap_count = 0;
973  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
974  BLOBNBOX* bbox = it.data();
975  if (box.overlap(bbox->bounding_box()))
976  ++overlap_count;
977  }
978  return overlap_count;
979 }
const TBOX & bounding_box() const
Definition: blobbox.h:230
bool overlap(const TBOX &box) const
Definition: rect.h:355
void tesseract::ColPartition::DeleteBoxes ( )

Definition at line 315 of file colpartition.cpp.

315  {
316  // Although the boxes_ list is a C_LIST, in some cases it owns the
317  // BLOBNBOXes, as the ColPartition takes ownership from the grid,
318  // and the BLOBNBOXes own the underlying C_BLOBs.
319  for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
320  BLOBNBOX* bblob = bb_it.extract();
321  delete bblob->cblob();
322  delete bblob;
323  }
324 }
C_BLOB * cblob() const
Definition: blobbox.h:268
bool tesseract::ColPartition::desperately_merged ( ) const
inline

Definition at line 212 of file colpartition.h.

212  {
213  return desperately_merged_;
214  }
void tesseract::ColPartition::DisownBoxes ( )

Definition at line 273 of file colpartition.cpp.

273  {
274  BLOBNBOX_C_IT bb_it(&boxes_);
275  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
276  BLOBNBOX* bblob = bb_it.data();
277  ASSERT_HOST(bblob->owner() == this || bblob->owner() == nullptr);
278  bblob->set_owner(nullptr);
279  }
280 }
tesseract::ColPartition * owner() const
Definition: blobbox.h:352
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:355
#define ASSERT_HOST(x)
Definition: errcode.h:88
void tesseract::ColPartition::DisownBoxesNoAssert ( )

Definition at line 286 of file colpartition.cpp.

286  {
287  BLOBNBOX_C_IT bb_it(&boxes_);
288  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
289  BLOBNBOX* bblob = bb_it.data();
290  if (bblob->owner() == this)
291  bblob->set_owner(nullptr);
292  }
293 }
tesseract::ColPartition * owner() const
Definition: blobbox.h:352
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:355
ColPartition * tesseract::ColPartition::FakePartition ( const TBOX box,
PolyBlockType  block_type,
BlobRegionType  blob_type,
BlobTextFlowType  flow 
)
static

Definition at line 105 of file colpartition.cpp.

108  {
109  ColPartition* part = new ColPartition(blob_type, ICOORD(0, 1));
110  part->set_type(block_type);
111  part->set_flow(flow);
112  part->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(box)));
113  part->set_left_margin(box.left());
114  part->set_right_margin(box.right());
115  part->SetBlobTypes();
116  part->ComputeLimits();
117  part->ClaimBoxes();
118  return part;
119 }
integer coordinate
Definition: points.h:31
BlobRegionType blob_type() const
Definition: colpartition.h:149
int16_t right() const
Definition: rect.h:79
BlobTextFlowType flow() const
Definition: colpartition.h:155
int16_t left() const
Definition: rect.h:72
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:241
BlobTextFlowType tesseract::ColPartition::flow ( ) const
inline

Definition at line 155 of file colpartition.h.

155  {
156  return flow_;
157  }
int tesseract::ColPartition::good_blob_score ( ) const
inline

Definition at line 161 of file colpartition.h.

161  {
162  return good_blob_score_;
163  }
bool tesseract::ColPartition::good_column ( ) const
inline

Definition at line 167 of file colpartition.h.

167  {
168  return good_column_;
169  }
bool tesseract::ColPartition::good_width ( ) const
inline

Definition at line 164 of file colpartition.h.

164  {
165  return good_width_;
166  }
bool tesseract::ColPartition::HasGoodBaseline ( )

Definition at line 1290 of file colpartition.cpp.

1290  {
1291  // Approximation of the baseline.
1292  DetLineFit linepoints;
1293  // Calculation of the mean height on this line segment. Note that these
1294  // variable names apply to the context of a horizontal line, and work
1295  // analogously, rather than literally in the case of a vertical line.
1296  int total_height = 0;
1297  int coverage = 0;
1298  int height_count = 0;
1299  int width = 0;
1300  BLOBNBOX_C_IT it(&boxes_);
1301  TBOX box(it.data()->bounding_box());
1302  // Accumulate points representing the baseline at the middle of each blob,
1303  // but add an additional point for each end of the line. This makes it
1304  // harder to fit a severe skew angle, as it is most likely not right.
1305  if (IsVerticalType()) {
1306  // For a vertical line, use the right side as the baseline.
1307  ICOORD first_pt(box.right(), box.bottom());
1308  // Use the bottom-right of the first (bottom) box, the top-right of the
1309  // last, and the middle-right of all others.
1310  linepoints.Add(first_pt);
1311  for (it.forward(); !it.at_last(); it.forward()) {
1312  BLOBNBOX* blob = it.data();
1313  box = blob->bounding_box();
1314  ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2);
1315  linepoints.Add(box_pt);
1316  total_height += box.width();
1317  coverage += box.height();
1318  ++height_count;
1319  }
1320  box = it.data()->bounding_box();
1321  ICOORD last_pt(box.right(), box.top());
1322  linepoints.Add(last_pt);
1323  width = last_pt.y() - first_pt.y();
1324 
1325  } else {
1326  // Horizontal lines use the bottom as the baseline.
1327  TBOX box(it.data()->bounding_box());
1328  // Use the bottom-left of the first box, the the bottom-right of the last,
1329  // and the middle of all others.
1330  ICOORD first_pt(box.left(), box.bottom());
1331  linepoints.Add(first_pt);
1332  for (it.forward(); !it.at_last(); it.forward()) {
1333  BLOBNBOX* blob = it.data();
1334  box = blob->bounding_box();
1335  ICOORD box_pt((box.left() + box.right()) / 2, box.bottom());
1336  linepoints.Add(box_pt);
1337  total_height += box.height();
1338  coverage += box.width();
1339  ++height_count;
1340  }
1341  box = it.data()->bounding_box();
1342  ICOORD last_pt(box.right(), box.bottom());
1343  linepoints.Add(last_pt);
1344  width = last_pt.x() - first_pt.x();
1345  }
1346  // Maximum median error allowed to be a good text line.
1347  if (height_count == 0)
1348  return false;
1349  double max_error = kMaxBaselineError * total_height / height_count;
1350  ICOORD start_pt, end_pt;
1351  double error = linepoints.Fit(&start_pt, &end_pt);
1352  return error < max_error && coverage >= kMinBaselineCoverage * width;
1353 }
bool IsVerticalType() const
Definition: colpartition.h:442
Definition: rect.h:34
const double kMaxBaselineError
integer coordinate
Definition: points.h:31
const TBOX & bounding_box() const
Definition: blobbox.h:230
int16_t y() const
access_function
Definition: points.h:56
const double kMinBaselineCoverage
int16_t x() const
access function
Definition: points.h:52
int tesseract::ColPartition::HCoreOverlap ( const ColPartition other) const
inline

Definition at line 385 of file colpartition.h.

385  {
386  return std::min(median_right_, other.median_right_) -
387  std::max(median_left_, other.median_left_);
388  }
bool tesseract::ColPartition::HOverlaps ( const ColPartition other) const
inline

Definition at line 366 of file colpartition.h.

366  {
367  return bounding_box_.x_overlap(other.bounding_box_);
368  }
bool x_overlap(const TBOX &box) const
Definition: rect.h:401
bool tesseract::ColPartition::inside_table_column ( )
inline

Definition at line 244 of file colpartition.h.

244  {
245  return inside_table_column_;
246  }
bool tesseract::ColPartition::IsEmpty ( ) const
inline

Definition at line 358 of file colpartition.h.

358  {
359  return boxes_.empty();
360  }
bool tesseract::ColPartition::IsHorizontalLine ( ) const
inline

Definition at line 460 of file colpartition.h.

460  {
461  return IsHorizontalType() && IsLineType();
462  }
bool IsLineType() const
Definition: colpartition.h:426
bool IsHorizontalType() const
Definition: colpartition.h:446
bool tesseract::ColPartition::IsHorizontalType ( ) const
inline

Definition at line 446 of file colpartition.h.

446  {
447  return blob_type_ == BRT_TEXT || blob_type_ == BRT_HLINE;
448  }
bool tesseract::ColPartition::IsImageType ( ) const
inline

Definition at line 430 of file colpartition.h.

430  {
431  return PTIsImageType(type_);
432  }
bool PTIsImageType(PolyBlockType type)
Definition: publictypes.h:77
bool tesseract::ColPartition::IsInSameColumnAs ( const ColPartition part) const

Definition at line 2185 of file colpartition.cpp.

2185  {
2186  // Overlap does not occur when last < part.first or first > part.last.
2187  // In other words, one is completely to the side of the other.
2188  // This is just DeMorgan's law applied to that so the function returns true.
2189  return (last_column_ >= part.first_column_) &&
2190  (first_column_ <= part.last_column_);
2191 }
bool tesseract::ColPartition::IsLeftOf ( const ColPartition other) const
inline

Definition at line 350 of file colpartition.h.

350  {
351  return bounding_box_.right() < other.bounding_box_.right();
352  }
int16_t right() const
Definition: rect.h:79
bool tesseract::ColPartition::IsLegal ( )

Definition at line 352 of file colpartition.cpp.

352  {
353  if (bounding_box_.left() > bounding_box_.right()) {
354  if (textord_debug_bugs) {
355  tprintf("Bounding box invalid\n");
356  Print();
357  }
358  return false; // Bounding box invalid.
359  }
360  if (left_margin_ > bounding_box_.left() ||
361  right_margin_ < bounding_box_.right()) {
362  if (textord_debug_bugs) {
363  tprintf("Margins invalid\n");
364  Print();
365  }
366  return false; // Margins invalid.
367  }
368  if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) {
369  if (textord_debug_bugs) {
370  tprintf("Key inside box: %d v %d or %d v %d\n",
371  left_key_, BoxLeftKey(), right_key_, BoxRightKey());
372  Print();
373  }
374  return false; // Keys inside the box.
375  }
376  return true;
377 }
int textord_debug_bugs
Definition: alignedblob.cpp:28
int BoxRightKey() const
Definition: colpartition.h:337
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
int16_t right() const
Definition: rect.h:79
int16_t left() const
Definition: rect.h:72
bool tesseract::ColPartition::IsLineType ( ) const
inline

Definition at line 426 of file colpartition.h.

426  {
427  return PTIsLineType(type_);
428  }
bool PTIsLineType(PolyBlockType type)
Definition: publictypes.h:73
bool tesseract::ColPartition::IsPulloutType ( ) const
inline

Definition at line 438 of file colpartition.h.

438  {
439  return PTIsPulloutType(type_);
440  }
bool PTIsPulloutType(PolyBlockType type)
Definition: publictypes.h:89
bool tesseract::ColPartition::IsSingleton ( ) const
inline

Definition at line 362 of file colpartition.h.

362  {
363  return boxes_.singleton();
364  }
bool tesseract::ColPartition::IsTextType ( ) const
inline

Definition at line 434 of file colpartition.h.

434  {
435  return PTIsTextType(type_);
436  }
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:82
bool tesseract::ColPartition::IsUnMergeableType ( ) const
inline

Definition at line 450 of file colpartition.h.

450  {
451  return BLOBNBOX::UnMergeableType(blob_type_) || type_ == PT_NOISE;
452  }
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:430
Definition: capi.h:142
bool tesseract::ColPartition::IsVerticalLine ( ) const
inline

Definition at line 455 of file colpartition.h.

455  {
456  return IsVerticalType() && IsLineType();
457  }
bool IsVerticalType() const
Definition: colpartition.h:442
bool IsLineType() const
Definition: colpartition.h:426
bool tesseract::ColPartition::IsVerticalType ( ) const
inline

Definition at line 442 of file colpartition.h.

442  {
443  return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE;
444  }
int tesseract::ColPartition::KeyWidth ( int  left_key,
int  right_key 
) const
inline

Definition at line 325 of file colpartition.h.

325  {
326  return (right_key - left_key) / vertical_.y();
327  }
int16_t y() const
access_function
Definition: points.h:56
int tesseract::ColPartition::left_key ( ) const
inline

Definition at line 173 of file colpartition.h.

173  {
174  return left_key_;
175  }
bool tesseract::ColPartition::left_key_tab ( ) const
inline

Definition at line 170 of file colpartition.h.

170  {
171  return left_key_tab_;
172  }
int tesseract::ColPartition::left_margin ( ) const
inline

Definition at line 113 of file colpartition.h.

113  {
114  return left_margin_;
115  }
int tesseract::ColPartition::LeftAtY ( int  y) const
inline

Definition at line 341 of file colpartition.h.

341  {
342  return XAtY(left_key_, y);
343  }
int XAtY(int sort_key, int y) const
Definition: colpartition.h:321
int tesseract::ColPartition::LeftBlobRule ( ) const

Definition at line 555 of file colpartition.cpp.

555  {
556  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
557  return it.data()->left_rule();
558 }
void tesseract::ColPartition::LineSpacingBlocks ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts,
BLOCK_LIST *  completed_blocks,
TO_BLOCK_LIST *  to_blocks 
)
static

Definition at line 1417 of file colpartition.cpp.

1422  {
1423  int page_height = tright.y() - bleft.y();
1424  // Compute the initial spacing stats.
1425  ColPartition_IT it(block_parts);
1426  int part_count = 0;
1427  int max_line_height = 0;
1428 
1429  // TODO(joeliu): We should add some special logic for PT_INLINE_EQUATION type
1430  // because their line spacing with their neighbors maybe smaller and their
1431  // height may be slightly larger.
1432 
1433  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1434  ColPartition* part = it.data();
1435  ASSERT_HOST(!part->boxes()->empty());
1436  STATS side_steps(0, part->bounding_box().height());
1437  if (part->bounding_box().height() > max_line_height)
1438  max_line_height = part->bounding_box().height();
1439  BLOBNBOX_C_IT blob_it(part->boxes());
1440  int prev_bottom = blob_it.data()->bounding_box().bottom();
1441  for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
1442  BLOBNBOX* blob = blob_it.data();
1443  int bottom = blob->bounding_box().bottom();
1444  int step = bottom - prev_bottom;
1445  if (step < 0)
1446  step = -step;
1447  side_steps.add(step, 1);
1448  prev_bottom = bottom;
1449  }
1450  part->set_side_step(static_cast<int>(side_steps.median() + 0.5));
1451  if (!it.at_last()) {
1452  ColPartition* next_part = it.data_relative(1);
1453  part->set_bottom_spacing(part->median_bottom() -
1454  next_part->median_bottom());
1455  part->set_top_spacing(part->median_top() - next_part->median_top());
1456  } else {
1457  part->set_bottom_spacing(page_height);
1458  part->set_top_spacing(page_height);
1459  }
1460  if (textord_debug_tabfind) {
1461  part->Print();
1462  tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n",
1463  side_steps.median(), part->top_spacing(), part->bottom_spacing());
1464  }
1465  ++part_count;
1466  }
1467  if (part_count == 0)
1468  return;
1469 
1470  SmoothSpacings(resolution, page_height, block_parts);
1471 
1472  // Move the partitions into individual block lists and make the blocks.
1473  BLOCK_IT block_it(completed_blocks);
1474  TO_BLOCK_IT to_block_it(to_blocks);
1475  ColPartition_LIST spacing_parts;
1476  ColPartition_IT sp_block_it(&spacing_parts);
1477  int same_block_threshold = max_line_height * kMaxSameBlockLineSpacing;
1478  for (it.mark_cycle_pt(); !it.empty();) {
1479  ColPartition* part = it.extract();
1480  sp_block_it.add_to_end(part);
1481  it.forward();
1482  if (it.empty() || part->bottom_spacing() > same_block_threshold ||
1483  !part->SpacingsEqual(*it.data(), resolution)) {
1484  // There is a spacing boundary. Check to see if it.data() belongs
1485  // better in the current block or the next one.
1486  if (!it.empty() && part->bottom_spacing() <= same_block_threshold) {
1487  ColPartition* next_part = it.data();
1488  // If there is a size match one-way, then the middle line goes with
1489  // its matched size, otherwise it goes with the smallest spacing.
1490  ColPartition* third_part = it.at_last() ? nullptr : it.data_relative(1);
1491  if (textord_debug_tabfind) {
1492  tprintf("Spacings unequal: upper:%d/%d, lower:%d/%d,"
1493  " sizes %d %d %d\n",
1494  part->top_spacing(), part->bottom_spacing(),
1495  next_part->top_spacing(), next_part->bottom_spacing(),
1496  part->median_height(), next_part->median_height(),
1497  third_part != nullptr ? third_part->median_height() : 0);
1498  }
1499  // We can only consider adding the next line to the block if the sizes
1500  // match and the lines are close enough for their size.
1501  if (part->SizesSimilar(*next_part) &&
1502  next_part->median_height() * kMaxSameBlockLineSpacing >
1503  part->bottom_spacing() &&
1504  part->median_height() * kMaxSameBlockLineSpacing >
1505  part->top_spacing()) {
1506  // Even now, we can only add it as long as the third line doesn't
1507  // match in the same way and have a smaller bottom spacing.
1508  if (third_part == nullptr ||
1509  !next_part->SizesSimilar(*third_part) ||
1510  third_part->median_height() * kMaxSameBlockLineSpacing <=
1511  next_part->bottom_spacing() ||
1512  next_part->median_height() * kMaxSameBlockLineSpacing <=
1513  next_part->top_spacing() ||
1514  next_part->bottom_spacing() > part->bottom_spacing()) {
1515  // Add to the current block.
1516  sp_block_it.add_to_end(it.extract());
1517  it.forward();
1518  if (textord_debug_tabfind) {
1519  tprintf("Added line to current block.\n");
1520  }
1521  }
1522  }
1523  }
1524  TO_BLOCK* to_block = MakeBlock(bleft, tright, &spacing_parts, used_parts);
1525  if (to_block != nullptr) {
1526  to_block_it.add_to_end(to_block);
1527  block_it.add_to_end(to_block->block);
1528  }
1529  sp_block_it.set_to_list(&spacing_parts);
1530  } else {
1531  if (textord_debug_tabfind && !it.empty()) {
1532  ColPartition* next_part = it.data();
1533  tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
1534  part->top_spacing(), part->bottom_spacing(),
1535  next_part->top_spacing(), next_part->bottom_spacing(),
1536  part->median_height(), next_part->median_height());
1537  }
1538  }
1539  }
1540 }
const TBOX & bounding_box() const
Definition: blobbox.h:230
const double kMaxSameBlockLineSpacing
int16_t y() const
access_function
Definition: points.h:56
BLOCK * block
Definition: blobbox.h:788
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
int textord_debug_tabfind
Definition: alignedblob.cpp:27
static TO_BLOCK * MakeBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
int16_t bottom() const
Definition: rect.h:65
#define ASSERT_HOST(x)
Definition: errcode.h:88
Definition: statistc.h:31
ColPartition_CLIST* tesseract::ColPartition::lower_partners ( )
inline

Definition at line 200 of file colpartition.h.

200  {
201  return &lower_partners_;
202  }
ColPartition * tesseract::ColPartition::MakeBigPartition ( BLOBNBOX box,
ColPartition_LIST *  big_part_list 
)
static

Definition at line 126 of file colpartition.cpp.

127  {
128  box->set_owner(nullptr);
129  ColPartition* single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
130  single->set_flow(BTFT_NONE);
131  single->AddBox(box);
132  single->ComputeLimits();
133  single->ClaimBoxes();
134  single->SetBlobTypes();
135  single->set_block_owned(true);
136  if (big_part_list != nullptr) {
137  ColPartition_IT part_it(big_part_list);
138  part_it.add_to_end(single);
139  }
140  return single;
141 }
integer coordinate
Definition: points.h:31
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:355
TO_BLOCK * tesseract::ColPartition::MakeBlock ( const ICOORD bleft,
const ICOORD tright,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
)
static

Definition at line 1633 of file colpartition.cpp.

1635  {
1636  if (block_parts->empty())
1637  return nullptr; // Nothing to do.
1638  // If the block_parts are not in reading order, then it will make an invalid
1639  // block polygon and bounding_box, so sort by bounding box now just to make
1640  // sure.
1641  block_parts->sort(&ColPartition::SortByBBox);
1642  ColPartition_IT it(block_parts);
1643  ColPartition* part = it.data();
1644  PolyBlockType type = part->type();
1645  if (type == PT_VERTICAL_TEXT)
1646  return MakeVerticalTextBlock(bleft, tright, block_parts, used_parts);
1647  // LineSpacingBlocks has handed us a collection of evenly spaced lines and
1648  // put the average spacing in each partition, so we can just take the
1649  // linespacing from the first partition.
1650  int line_spacing = part->bottom_spacing();
1651  if (line_spacing < part->median_height())
1652  line_spacing = part->bounding_box().height();
1653  ICOORDELT_LIST vertices;
1654  ICOORDELT_IT vert_it(&vertices);
1655  ICOORD start, end;
1656  int min_x = INT32_MAX;
1657  int max_x = -INT32_MAX;
1658  int min_y = INT32_MAX;
1659  int max_y = -INT32_MAX;
1660  int iteration = 0;
1661  do {
1662  if (iteration == 0)
1663  ColPartition::LeftEdgeRun(&it, &start, &end);
1664  else
1665  ColPartition::RightEdgeRun(&it, &start, &end);
1666  ClipCoord(bleft, tright, &start);
1667  ClipCoord(bleft, tright, &end);
1668  vert_it.add_after_then_move(new ICOORDELT(start));
1669  vert_it.add_after_then_move(new ICOORDELT(end));
1670  UpdateRange(start.x(), &min_x, &max_x);
1671  UpdateRange(end.x(), &min_x, &max_x);
1672  UpdateRange(start.y(), &min_y, &max_y);
1673  UpdateRange(end.y(), &min_y, &max_y);
1674  if ((iteration == 0 && it.at_first()) ||
1675  (iteration == 1 && it.at_last())) {
1676  ++iteration;
1677  it.move_to_last();
1678  }
1679  } while (iteration < 2);
1681  tprintf("Making block at (%d,%d)->(%d,%d)\n",
1682  min_x, min_y, max_x, max_y);
1683  auto* block = new BLOCK("", true, 0, 0, min_x, min_y, max_x, max_y);
1684  block->pdblk.set_poly_block(new POLY_BLOCK(&vertices, type));
1685  return MoveBlobsToBlock(false, line_spacing, block, block_parts, used_parts);
1686 }
int median_height() const
Definition: colpartition.h:137
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:120
static TO_BLOCK * MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
integer coordinate
Definition: points.h:31
int16_t y() const
access_function
Definition: points.h:56
PolyBlockType type() const
Definition: colpartition.h:182
PolyBlockType
Definition: publictypes.h:53
int16_t x() const
access function
Definition: points.h:52
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
int textord_debug_tabfind
Definition: alignedblob.cpp:27
static int SortByBBox(const void *p1, const void *p2)
Definition: colpartition.h:715
Definition: ocrblock.h:29
ColPartition * tesseract::ColPartition::MakeLinePartition ( BlobRegionType  blob_type,
const ICOORD vertical,
int  left,
int  bottom,
int  right,
int  top 
)
static

Constructs a fake ColPartition with no BLOBNBOXes to represent a horizontal or vertical line, given a type and a bounding box.

Definition at line 158 of file colpartition.cpp.

161  {
162  auto* part = new ColPartition(blob_type, vertical);
163  part->bounding_box_ = TBOX(left, bottom, right, top);
164  part->median_bottom_ = bottom;
165  part->median_top_ = top;
166  part->median_height_ = top - bottom;
167  part->median_left_ = left;
168  part->median_right_ = right;
169  part->median_width_ = right - left;
170  part->left_key_ = part->BoxLeftKey();
171  part->right_key_ = part->BoxRightKey();
172  return part;
173 }
Definition: rect.h:34
BlobRegionType blob_type() const
Definition: colpartition.h:149
TO_ROW * tesseract::ColPartition::MakeToRow ( )

Definition at line 1716 of file colpartition.cpp.

1716  {
1717  BLOBNBOX_C_IT blob_it(&boxes_);
1718  TO_ROW* row = nullptr;
1719  int line_size = IsVerticalType() ? median_width_ : median_height_;
1720  // Add all the blobs to a single TO_ROW.
1721  for (; !blob_it.empty(); blob_it.forward()) {
1722  BLOBNBOX* blob = blob_it.extract();
1723 // blob->compute_bounding_box();
1724  int top = blob->bounding_box().top();
1725  int bottom = blob->bounding_box().bottom();
1726  if (row == nullptr) {
1727  row = new TO_ROW(blob, static_cast<float>(top),
1728  static_cast<float>(bottom),
1729  static_cast<float>(line_size));
1730  } else {
1731  row->add_blob(blob, static_cast<float>(top),
1732  static_cast<float>(bottom),
1733  static_cast<float>(line_size));
1734  }
1735  }
1736  return row;
1737 }
bool IsVerticalType() const
Definition: colpartition.h:442
int16_t top() const
Definition: rect.h:58
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
Definition: blobbox.cpp:733
const TBOX & bounding_box() const
Definition: blobbox.h:230
int16_t bottom() const
Definition: rect.h:65
TO_BLOCK * tesseract::ColPartition::MakeVerticalTextBlock ( const ICOORD bleft,
const ICOORD tright,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
)
static

Definition at line 1690 of file colpartition.cpp.

1693  {
1694  if (block_parts->empty())
1695  return nullptr; // Nothing to do.
1696  ColPartition_IT it(block_parts);
1697  ColPartition* part = it.data();
1698  TBOX block_box = part->bounding_box();
1699  int line_spacing = block_box.width();
1700  PolyBlockType type = it.data()->type();
1701  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1702  block_box += it.data()->bounding_box();
1703  }
1704  if (textord_debug_tabfind) {
1705  tprintf("Making block at:");
1706  block_box.print();
1707  }
1708  auto* block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(),
1709  block_box.right(), block_box.top());
1710  block->pdblk.set_poly_block(new POLY_BLOCK(block_box, type));
1711  return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts);
1712 }
int16_t top() const
Definition: rect.h:58
Definition: rect.h:34
void print() const
Definition: rect.h:278
PolyBlockType type() const
Definition: colpartition.h:182
PolyBlockType
Definition: publictypes.h:53
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
int textord_debug_tabfind
Definition: alignedblob.cpp:27
int16_t width() const
Definition: rect.h:115
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:191
int16_t right() const
Definition: rect.h:79
int16_t bottom() const
Definition: rect.h:65
int16_t left() const
Definition: rect.h:72
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:58
Definition: ocrblock.h:29
bool tesseract::ColPartition::MarkAsLeaderIfMonospaced ( )

Definition at line 1093 of file colpartition.cpp.

1093  {
1094  bool result = false;
1095  // Gather statistics on the gaps between blobs and the widths of the blobs.
1096  int part_width = bounding_box_.width();
1097  STATS gap_stats(0, part_width);
1098  STATS width_stats(0, part_width);
1099  BLOBNBOX_C_IT it(&boxes_);
1100  BLOBNBOX* prev_blob = it.data();
1101  prev_blob->set_flow(BTFT_NEIGHBOURS);
1102  width_stats.add(prev_blob->bounding_box().width(), 1);
1103  int blob_count = 1;
1104  for (it.forward(); !it.at_first(); it.forward()) {
1105  BLOBNBOX* blob = it.data();
1106  int left = blob->bounding_box().left();
1107  int right = blob->bounding_box().right();
1108  gap_stats.add(left - prev_blob->bounding_box().right(), 1);
1109  width_stats.add(right - left, 1);
1110  blob->set_flow(BTFT_NEIGHBOURS);
1111  prev_blob = blob;
1112  ++blob_count;
1113  }
1114  double median_gap = gap_stats.median();
1115  double median_width = width_stats.median();
1116  double max_width = std::max(median_gap, median_width);
1117  double min_width = std::min(median_gap, median_width);
1118  double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f);
1119  if (textord_debug_tabfind >= 4) {
1120  tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n",
1121  gap_iqr, blob_count, max_width * kMaxLeaderGapFractionOfMax,
1122  min_width * kMaxLeaderGapFractionOfMin);
1123  }
1124  if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax &&
1125  gap_iqr < min_width * kMaxLeaderGapFractionOfMin &&
1126  blob_count >= kMinLeaderCount) {
1127  // This is stable enough to be called a leader, so check the widths.
1128  // Since leader dashes can join, run a dp cutting algorithm and go
1129  // on the cost.
1130  int offset = static_cast<int>(ceil(gap_iqr * 2));
1131  int min_step = static_cast<int>(median_gap + median_width + 0.5);
1132  int max_step = min_step + offset;
1133  min_step -= offset;
1134  // Pad the buffer with min_step/2 on each end.
1135  int part_left = bounding_box_.left() - min_step / 2;
1136  part_width += min_step;
1137  auto* projection = new DPPoint[part_width];
1138  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1139  BLOBNBOX* blob = it.data();
1140  int left = blob->bounding_box().left();
1141  int right = blob->bounding_box().right();
1142  int height = blob->bounding_box().height();
1143  for (int x = left; x < right; ++x) {
1144  projection[left - part_left].AddLocalCost(height);
1145  }
1146  }
1147  DPPoint* best_end = DPPoint::Solve(min_step, max_step, false,
1149  part_width, projection);
1150  if (best_end != nullptr && best_end->total_cost() < blob_count) {
1151  // Good enough. Call it a leader.
1152  result = true;
1153  bool modified_blob_list = false;
1154  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1155  BLOBNBOX* blob = it.data();
1156  // If the first or last blob is spaced too much, don't mark it.
1157  if (it.at_first()) {
1158  int gap = it.data_relative(1)->bounding_box().left() -
1159  blob->bounding_box().right();
1160  if (blob->bounding_box().width() + gap > max_step) {
1161  it.extract();
1162  modified_blob_list = true;
1163  continue;
1164  }
1165  }
1166  if (it.at_last()) {
1167  int gap = blob->bounding_box().left() -
1168  it.data_relative(-1)->bounding_box().right();
1169  if (blob->bounding_box().width() + gap > max_step) {
1170  it.extract();
1171  modified_blob_list = true;
1172  break;
1173  }
1174  }
1175  blob->set_region_type(BRT_TEXT);
1176  blob->set_flow(BTFT_LEADER);
1177  }
1178  if (modified_blob_list) ComputeLimits();
1179  blob_type_ = BRT_TEXT;
1180  flow_ = BTFT_LEADER;
1181  } else if (textord_debug_tabfind) {
1182  if (best_end == nullptr) {
1183  tprintf("No path\n");
1184  } else {
1185  tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost(),
1186  blob_count);
1187  }
1188  }
1189  delete [] projection;
1190  }
1191  return result;
1192 }
const TBOX & bounding_box() const
Definition: blobbox.h:230
static DPPoint * Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size, DPPoint *points)
Definition: dppoint.cpp:31
int16_t height() const
Definition: rect.h:108
const double kMaxLeaderGapFractionOfMax
const int kMinLeaderCount
int median_width() const
Definition: colpartition.h:143
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
int textord_debug_tabfind
Definition: alignedblob.cpp:27
int16_t width() const
Definition: rect.h:115
int64_t CostWithVariance(const DPPoint *prev)
Definition: dppoint.cpp:69
int16_t right() const
Definition: rect.h:79
void set_region_type(BlobRegionType new_type)
Definition: blobbox.h:286
int16_t left() const
Definition: rect.h:72
void set_flow(BlobTextFlowType value)
Definition: blobbox.h:298
const double kMaxLeaderGapFractionOfMin
Definition: statistc.h:31
bool tesseract::ColPartition::MatchingColumns ( const ColPartition other) const

Definition at line 380 of file colpartition.cpp.

380  {
381  int y = (MidY() + other.MidY()) / 2;
382  if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor,
383  LeftAtY(y) / kColumnWidthFactor, 1))
384  return false;
385  if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor,
386  RightAtY(y) / kColumnWidthFactor, 1))
387  return false;
388  return true;
389 }
int LeftAtY(int y) const
Definition: colpartition.h:341
const int kColumnWidthFactor
Definition: tabfind.h:42
bool NearlyEqual(T x, T y, T tolerance)
Definition: host.h:37
int RightAtY(int y) const
Definition: colpartition.h:345
bool tesseract::ColPartition::MatchingSizes ( const ColPartition other) const

Definition at line 415 of file colpartition.cpp.

415  {
416  if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT)
417  return !TabFind::DifferentSizes(median_width_, other.median_width_);
418  else
419  return !TabFind::DifferentSizes(median_height_, other.median_height_);
420 }
static bool DifferentSizes(int size1, int size2)
Definition: tabfind.cpp:407
bool tesseract::ColPartition::MatchingStrokeWidth ( const ColPartition other,
double  fractional_tolerance,
double  constant_tolerance 
) const

Definition at line 440 of file colpartition.cpp.

442  {
443  int match_count = 0;
444  int nonmatch_count = 0;
445  BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
446  BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST*>(&other.boxes_));
447  box_it.mark_cycle_pt();
448  other_it.mark_cycle_pt();
449  while (!box_it.cycled_list() && !other_it.cycled_list()) {
450  if (box_it.data()->MatchingStrokeWidth(*other_it.data(),
451  fractional_tolerance,
452  constant_tolerance))
453  ++match_count;
454  else
455  ++nonmatch_count;
456  box_it.forward();
457  other_it.forward();
458  }
459  return match_count > nonmatch_count;
460 }
bool tesseract::ColPartition::MatchingTextColor ( const ColPartition other) const

Definition at line 392 of file colpartition.cpp.

392  {
393  if (color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise &&
394  other.color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise)
395  return false; // Too noisy.
396 
397  // Colors must match for other to count.
398  double d_this1_o = ImageFind::ColorDistanceFromLine(other.color1_,
399  other.color2_,
400  color1_);
401  double d_this2_o = ImageFind::ColorDistanceFromLine(other.color1_,
402  other.color2_,
403  color2_);
404  double d_o1_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
405  other.color1_);
406  double d_o2_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
407  other.color2_);
408 // All 4 distances must be small enough.
409  return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance &&
410  d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance;
411 }
const int kMaxColorDistance
static double ColorDistanceFromLine(const uint8_t *line1, const uint8_t *line2, const uint8_t *point)
Definition: imagefind.cpp:355
const int kMaxRMSColorNoise
int tesseract::ColPartition::median_bottom ( ) const
inline

Definition at line 128 of file colpartition.h.

128  {
129  return median_bottom_;
130  }
int tesseract::ColPartition::median_height ( ) const
inline

Definition at line 137 of file colpartition.h.

137  {
138  return median_height_;
139  }
int tesseract::ColPartition::median_left ( ) const
inline

Definition at line 131 of file colpartition.h.

131  {
132  return median_left_;
133  }
int tesseract::ColPartition::median_right ( ) const
inline

Definition at line 134 of file colpartition.h.

134  {
135  return median_right_;
136  }
int tesseract::ColPartition::median_top ( ) const
inline

Definition at line 125 of file colpartition.h.

125  {
126  return median_top_;
127  }
int tesseract::ColPartition::median_width ( ) const
inline

Definition at line 143 of file colpartition.h.

143  {
144  return median_width_;
145  }
int tesseract::ColPartition::MedianY ( ) const
inline

Definition at line 309 of file colpartition.h.

309  {
310  return (median_top_ + median_bottom_) / 2;
311  }
int tesseract::ColPartition::MidX ( ) const
inline

Definition at line 313 of file colpartition.h.

313  {
314  return (bounding_box_.left() + bounding_box_.right()) / 2;
315  }
int16_t right() const
Definition: rect.h:79
int16_t left() const
Definition: rect.h:72
int tesseract::ColPartition::MidY ( ) const
inline

Definition at line 305 of file colpartition.h.

305  {
306  return (bounding_box_.top() + bounding_box_.bottom()) / 2;
307  }
int16_t top() const
Definition: rect.h:58
int16_t bottom() const
Definition: rect.h:65
ColPartition* tesseract::ColPartition::nearest_neighbor_above ( ) const
inline

Definition at line 250 of file colpartition.h.

250  {
251  return nearest_neighbor_above_;
252  }
ColPartition* tesseract::ColPartition::nearest_neighbor_below ( ) const
inline

Definition at line 256 of file colpartition.h.

256  {
257  return nearest_neighbor_below_;
258  }
bool tesseract::ColPartition::OKDiacriticMerge ( const ColPartition candidate,
bool  debug 
) const

Definition at line 468 of file colpartition.cpp.

469  {
470  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
471  int min_top = INT32_MAX;
472  int max_bottom = -INT32_MAX;
473  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
474  BLOBNBOX* blob = it.data();
475  if (!blob->IsDiacritic()) {
476  if (debug) {
477  tprintf("Blob is not a diacritic:");
478  blob->bounding_box().print();
479  }
480  return false; // All blobs must have diacritic bases.
481  }
482  if (blob->base_char_top() < min_top)
483  min_top = blob->base_char_top();
484  if (blob->base_char_bottom() > max_bottom)
485  max_bottom = blob->base_char_bottom();
486  }
487  // If the intersection of all vertical ranges of all base characters
488  // overlaps the median range of this, then it is OK.
489  bool result = min_top > candidate.median_bottom_ &&
490  max_bottom < candidate.median_top_;
491  if (debug) {
492  if (result)
493  tprintf("OKDiacritic!\n");
494  else
495  tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n",
496  max_bottom, min_top, median_bottom_, median_top_);
497  }
498  return result;
499 }
bool IsDiacritic() const
Definition: blobbox.h:380
void print() const
Definition: rect.h:278
const TBOX & bounding_box() const
Definition: blobbox.h:230
int base_char_bottom() const
Definition: blobbox.h:386
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
int base_char_top() const
Definition: blobbox.h:383
bool tesseract::ColPartition::OKMergeOverlap ( const ColPartition merge1,
const ColPartition merge2,
int  ok_box_overlap,
bool  debug 
)

Definition at line 746 of file colpartition.cpp.

748  {
749  // Vertical partitions are not allowed to be involved.
750  if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) {
751  if (debug)
752  tprintf("Vertical partition\n");
753  return false;
754  }
755  // The merging partitions must strongly overlap each other.
756  if (!merge1.VSignificantCoreOverlap(merge2)) {
757  if (debug)
758  tprintf("Voverlap %d (%d)\n",
759  merge1.VCoreOverlap(merge2),
760  merge1.VSignificantCoreOverlap(merge2));
761  return false;
762  }
763  // The merged box must not overlap the median bounds of this.
764  TBOX merged_box(merge1.bounding_box());
765  merged_box += merge2.bounding_box();
766  if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ &&
767  merged_box.bottom() < bounding_box_.top() - ok_box_overlap &&
768  merged_box.top() > bounding_box_.bottom() + ok_box_overlap) {
769  if (debug)
770  tprintf("Excessive box overlap\n");
771  return false;
772  }
773  // Looks OK!
774  return true;
775 }
bool IsVerticalType() const
Definition: colpartition.h:442
int16_t top() const
Definition: rect.h:58
Definition: rect.h:34
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
int16_t bottom() const
Definition: rect.h:65
BLOBNBOX * tesseract::ColPartition::OverlapSplitBlob ( const TBOX box)

Definition at line 779 of file colpartition.cpp.

779  {
780  if (boxes_.empty() || boxes_.singleton())
781  return nullptr;
782  BLOBNBOX_C_IT it(&boxes_);
783  TBOX left_box(it.data()->bounding_box());
784  for (it.forward(); !it.at_first(); it.forward()) {
785  BLOBNBOX* bbox = it.data();
786  left_box += bbox->bounding_box();
787  if (left_box.overlap(box))
788  return bbox;
789  }
790  return nullptr;
791 }
Definition: rect.h:34
const TBOX & bounding_box() const
Definition: blobbox.h:230
bool tesseract::ColPartition::owns_blobs ( ) const
inline

Definition at line 292 of file colpartition.h.

292  {
293  return owns_blobs_;
294  }
PolyBlockType tesseract::ColPartition::PartitionType ( ColumnSpanningType  flow) const

Definition at line 1016 of file colpartition.cpp.

1016  {
1017  if (flow == CST_NOISE) {
1018  if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE &&
1019  blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT)
1020  return PT_NOISE;
1021  flow = CST_FLOWING;
1022  }
1023 
1024  switch (blob_type_) {
1025  case BRT_NOISE:
1026  return PT_NOISE;
1027  case BRT_HLINE:
1028  return PT_HORZ_LINE;
1029  case BRT_VLINE:
1030  return PT_VERT_LINE;
1031  case BRT_RECTIMAGE:
1032  case BRT_POLYIMAGE:
1033  switch (flow) {
1034  case CST_FLOWING:
1035  return PT_FLOWING_IMAGE;
1036  case CST_HEADING:
1037  return PT_HEADING_IMAGE;
1038  case CST_PULLOUT:
1039  return PT_PULLOUT_IMAGE;
1040  default:
1041  ASSERT_HOST(!"Undefined flow type for image!");
1042  }
1043  break;
1044  case BRT_VERT_TEXT:
1045  return PT_VERTICAL_TEXT;
1046  case BRT_TEXT:
1047  case BRT_UNKNOWN:
1048  default:
1049  switch (flow) {
1050  case CST_FLOWING:
1051  return PT_FLOWING_TEXT;
1052  case CST_HEADING:
1053  return PT_HEADING_TEXT;
1054  case CST_PULLOUT:
1055  return PT_PULLOUT_TEXT;
1056  default:
1057  ASSERT_HOST(!"Undefined flow type for text!");
1058  }
1059  }
1060  ASSERT_HOST(!"Should never get here!");
1061  return PT_NOISE;
1062 }
BlobTextFlowType flow() const
Definition: colpartition.h:155
#define ASSERT_HOST(x)
Definition: errcode.h:88
Definition: capi.h:142
void tesseract::ColPartition::Print ( ) const

Definition at line 1792 of file colpartition.cpp.

1792  {
1793  int y = MidY();
1794  tprintf("ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)"
1795  " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d"
1796  " ts=%d bs=%d ls=%d rs=%d\n",
1797  boxes_.empty() ? 'E' : ' ',
1798  left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y),
1799  bounding_box_.left(), median_left_,
1800  bounding_box_.bottom(), median_bottom_,
1801  bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B',
1802  right_margin_, median_right_, bounding_box_.top(), median_top_,
1803  good_width_, good_column_, type_,
1804  kBlobTypes[blob_type_], flow_,
1805  first_column_, last_column_, boxes_.length(),
1806  space_above_, space_below_, space_to_left_, space_to_right_);
1807 }
int16_t top() const
Definition: rect.h:58
int LeftAtY(int y) const
Definition: colpartition.h:341
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
int16_t right() const
Definition: rect.h:79
int16_t bottom() const
Definition: rect.h:65
int16_t left() const
Definition: rect.h:72
int RightAtY(int y) const
Definition: colpartition.h:345
void tesseract::ColPartition::PrintColors ( )

Definition at line 1810 of file colpartition.cpp.

1810  {
1811  tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n",
1812  color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE],
1813  color1_[L_ALPHA_CHANNEL],
1814  color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
1815 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
void tesseract::ColPartition::RefinePartners ( PolyBlockType  type,
bool  get_desperate,
ColPartitionGrid grid 
)

Definition at line 1887 of file colpartition.cpp.

1888  {
1889  if (TypesSimilar(type_, type)) {
1890  RefinePartnersInternal(true, get_desperate, grid);
1891  RefinePartnersInternal(false, get_desperate, grid);
1892  } else if (type == PT_COUNT) {
1893  // This is the final pass. Make sure only the correctly typed
1894  // partners surivive, however many there are.
1895  RefinePartnersByType(true, &upper_partners_);
1896  RefinePartnersByType(false, &lower_partners_);
1897  // It is possible for a merge to have given a partition multiple
1898  // partners again, so the last resort is to use overlap which is
1899  // guaranteed to leave at most one partner left.
1900  if (!upper_partners_.empty() && !upper_partners_.singleton())
1901  RefinePartnersByOverlap(true, &upper_partners_);
1902  if (!lower_partners_.empty() && !lower_partners_.singleton())
1903  RefinePartnersByOverlap(false, &lower_partners_);
1904  }
1905 }
PolyBlockType type() const
Definition: colpartition.h:182
Definition: capi.h:143
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2)
Definition: colpartition.h:419
void tesseract::ColPartition::ReflectInYAxis ( )

Definition at line 330 of file colpartition.cpp.

330  {
331  BLOBNBOX_CLIST reversed_boxes;
332  BLOBNBOX_C_IT reversed_it(&reversed_boxes);
333  // Reverse the order of the boxes_.
334  BLOBNBOX_C_IT bb_it(&boxes_);
335  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
336  reversed_it.add_before_then_move(bb_it.extract());
337  }
338  bb_it.add_list_after(&reversed_boxes);
339  ASSERT_HOST(!left_key_tab_ && !right_key_tab_);
340  int tmp = left_margin_;
341  left_margin_ = -right_margin_;
342  right_margin_ = -tmp;
343  ComputeLimits();
344 }
#define ASSERT_HOST(x)
Definition: errcode.h:88
bool tesseract::ColPartition::ReleaseNonLeaderBoxes ( )

Definition at line 299 of file colpartition.cpp.

299  {
300  BLOBNBOX_C_IT bb_it(&boxes_);
301  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
302  BLOBNBOX* bblob = bb_it.data();
303  if (bblob->flow() != BTFT_LEADER) {
304  if (bblob->owner() == this) bblob->set_owner(nullptr);
305  bb_it.extract();
306  }
307  }
308  if (bb_it.empty()) return false;
309  flow_ = BTFT_LEADER;
310  ComputeLimits();
311  return true;
312 }
tesseract::ColPartition * owner() const
Definition: blobbox.h:352
BlobTextFlowType flow() const
Definition: blobbox.h:295
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:355
void tesseract::ColPartition::RemoveBox ( BLOBNBOX box)

Definition at line 212 of file colpartition.cpp.

212  {
213  BLOBNBOX_C_IT bb_it(&boxes_);
214  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
215  if (box == bb_it.data()) {
216  bb_it.extract();
217  ComputeLimits();
218  return;
219  }
220  }
221 }
void tesseract::ColPartition::RemovePartner ( bool  upper,
ColPartition partner 
)

Definition at line 628 of file colpartition.cpp.

628  {
629  ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
630  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
631  if (it.data() == partner) {
632  it.extract();
633  break;
634  }
635  }
636 }
int tesseract::ColPartition::right_key ( ) const
inline

Definition at line 179 of file colpartition.h.

179  {
180  return right_key_;
181  }
bool tesseract::ColPartition::right_key_tab ( ) const
inline

Definition at line 176 of file colpartition.h.

176  {
177  return right_key_tab_;
178  }
int tesseract::ColPartition::right_margin ( ) const
inline

Definition at line 119 of file colpartition.h.

119  {
120  return right_margin_;
121  }
int tesseract::ColPartition::RightAtY ( int  y) const
inline

Definition at line 345 of file colpartition.h.

345  {
346  return XAtY(right_key_, y);
347  }
int XAtY(int sort_key, int y) const
Definition: colpartition.h:321
int tesseract::ColPartition::RightBlobRule ( ) const

Definition at line 560 of file colpartition.cpp.

560  {
561  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
562  it.move_to_last();
563  return it.data()->right_rule();
564 }
void tesseract::ColPartition::set_blob_type ( BlobRegionType  t)
inline

Definition at line 152 of file colpartition.h.

152  {
153  blob_type_ = t;
154  }
void tesseract::ColPartition::set_block_owned ( bool  owned)
inline

Definition at line 209 of file colpartition.h.

209  {
210  block_owned_ = owned;
211  }
void tesseract::ColPartition::set_bottom_spacing ( int  spacing)
inline

Definition at line 224 of file colpartition.h.

224  {
225  bottom_spacing_ = spacing;
226  }
void tesseract::ColPartition::set_first_column ( int  column)
inline

Definition at line 732 of file colpartition.h.

732  {
733  first_column_ = column;
734  }
void tesseract::ColPartition::set_flow ( BlobTextFlowType  f)
inline

Definition at line 158 of file colpartition.h.

158  {
159  flow_ = f;
160  }
void tesseract::ColPartition::set_inside_table_column ( bool  val)
inline

Definition at line 247 of file colpartition.h.

247  {
248  inside_table_column_ = val;
249  }
void tesseract::ColPartition::set_last_column ( int  column)
inline

Definition at line 735 of file colpartition.h.

735  {
736  last_column_ = column;
737  }
void tesseract::ColPartition::set_left_margin ( int  margin)
inline

Definition at line 116 of file colpartition.h.

116  {
117  left_margin_ = margin;
118  }
void tesseract::ColPartition::set_median_height ( int  height)
inline

Definition at line 140 of file colpartition.h.

140  {
141  median_height_ = height;
142  }
void tesseract::ColPartition::set_median_width ( int  width)
inline

Definition at line 146 of file colpartition.h.

146  {
147  median_width_ = width;
148  }
void tesseract::ColPartition::set_nearest_neighbor_above ( ColPartition part)
inline

Definition at line 253 of file colpartition.h.

253  {
254  nearest_neighbor_above_ = part;
255  }
void tesseract::ColPartition::set_nearest_neighbor_below ( ColPartition part)
inline

Definition at line 259 of file colpartition.h.

259  {
260  nearest_neighbor_below_ = part;
261  }
void tesseract::ColPartition::set_owns_blobs ( bool  owns_blobs)
inline

Definition at line 295 of file colpartition.h.

295  {
296  // Do NOT change ownership flag when there are blobs in the list.
297  // Immediately set the ownership flag when creating copies.
298  ASSERT_HOST(boxes_.empty());
299  owns_blobs_ = owns_blobs;
300  }
#define ASSERT_HOST(x)
Definition: errcode.h:88
bool owns_blobs() const
Definition: colpartition.h:292
void tesseract::ColPartition::set_right_margin ( int  margin)
inline

Definition at line 122 of file colpartition.h.

122  {
123  right_margin_ = margin;
124  }
void tesseract::ColPartition::set_side_step ( int  step)
inline

Definition at line 218 of file colpartition.h.

218  {
219  side_step_ = step;
220  }
void tesseract::ColPartition::set_space_above ( int  space)
inline

Definition at line 265 of file colpartition.h.

265  {
266  space_above_ = space;
267  }
void tesseract::ColPartition::set_space_below ( int  space)
inline

Definition at line 271 of file colpartition.h.

271  {
272  space_below_ = space;
273  }
void tesseract::ColPartition::set_space_to_left ( int  space)
inline

Definition at line 277 of file colpartition.h.

277  {
278  space_to_left_ = space;
279  }
void tesseract::ColPartition::set_space_to_right ( int  space)
inline

Definition at line 283 of file colpartition.h.

283  {
284  space_to_right_ = space;
285  }
void tesseract::ColPartition::set_table_type ( )
inline

Definition at line 234 of file colpartition.h.

234  {
235  if (type_ != PT_TABLE) {
236  type_before_table_ = type_;
237  type_ = PT_TABLE;
238  }
239  }
Definition: capi.h:134
void tesseract::ColPartition::set_top_spacing ( int  spacing)
inline

Definition at line 230 of file colpartition.h.

230  {
231  top_spacing_ = spacing;
232  }
void tesseract::ColPartition::set_type ( PolyBlockType  t)
inline

Definition at line 185 of file colpartition.h.

185  {
186  type_ = t;
187  }
void tesseract::ColPartition::set_vertical ( const ICOORD v)
inline

Definition at line 194 of file colpartition.h.

194  {
195  vertical_ = v;
196  }
void tesseract::ColPartition::set_working_set ( WorkingPartSet working_set)
inline

Definition at line 203 of file colpartition.h.

203  {
204  working_set_ = working_set;
205  }
void tesseract::ColPartition::SetBlobTypes ( )

Definition at line 1275 of file colpartition.cpp.

1275  {
1276  if (!owns_blobs())
1277  return;
1278  BLOBNBOX_C_IT it(&boxes_);
1279  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1280  BLOBNBOX* blob = it.data();
1281  if (blob->flow() != BTFT_LEADER)
1282  blob->set_flow(flow_);
1283  blob->set_region_type(blob_type_);
1284  ASSERT_HOST(blob->owner() == nullptr || blob->owner() == this);
1285  }
1286 }
tesseract::ColPartition * owner() const
Definition: blobbox.h:352
BlobTextFlowType flow() const
Definition: blobbox.h:295
#define ASSERT_HOST(x)
Definition: errcode.h:88
void set_region_type(BlobRegionType new_type)
Definition: blobbox.h:286
void set_flow(BlobTextFlowType value)
Definition: blobbox.h:298
bool owns_blobs() const
Definition: colpartition.h:292
void tesseract::ColPartition::SetColumnGoodness ( WidthCallback cb)

Definition at line 1080 of file colpartition.cpp.

1080  {
1081  int y = MidY();
1082  int width = RightAtY(y) - LeftAtY(y);
1083  good_width_ = cb->Run(width);
1084  good_column_ = blob_type_ == BRT_TEXT && left_key_tab_ && right_key_tab_;
1085 }
int LeftAtY(int y) const
Definition: colpartition.h:341
int RightAtY(int y) const
Definition: colpartition.h:345
void tesseract::ColPartition::SetLeftTab ( const TabVector tab_vector)

Definition at line 504 of file colpartition.cpp.

504  {
505  if (tab_vector != nullptr) {
506  left_key_ = tab_vector->sort_key();
507  left_key_tab_ = left_key_ <= BoxLeftKey();
508  } else {
509  left_key_tab_ = false;
510  }
511  if (!left_key_tab_)
512  left_key_ = BoxLeftKey();
513 }
void tesseract::ColPartition::SetPartitionType ( int  resolution,
ColPartitionSet columns 
)

Definition at line 983 of file colpartition.cpp.

983  {
984  int first_spanned_col = -1;
985  ColumnSpanningType span_type =
986  columns->SpanningType(resolution,
987  bounding_box_.left(), bounding_box_.right(),
988  std::min(bounding_box_.height(), bounding_box_.width()),
989  MidY(), left_margin_, right_margin_,
990  &first_column_, &last_column_,
991  &first_spanned_col);
992  column_set_ = columns;
993  if (first_column_ < last_column_ && span_type == CST_PULLOUT &&
994  !IsLineType()) {
995  // Unequal columns may indicate that the pullout spans one of the columns
996  // it lies in, so force it to be allocated to just that column.
997  if (first_spanned_col >= 0) {
998  first_column_ = first_spanned_col;
999  last_column_ = first_spanned_col;
1000  } else {
1001  if ((first_column_ & 1) == 0)
1002  last_column_ = first_column_;
1003  else if ((last_column_ & 1) == 0)
1004  first_column_ = last_column_;
1005  else
1006  first_column_ = last_column_ = (first_column_ + last_column_) / 2;
1007  }
1008  }
1009  type_ = PartitionType(span_type);
1010 }
bool IsLineType() const
Definition: colpartition.h:426
int16_t height() const
Definition: rect.h:108
int16_t width() const
Definition: rect.h:115
int16_t right() const
Definition: rect.h:79
PolyBlockType PartitionType(ColumnSpanningType flow) const
int16_t left() const
Definition: rect.h:72
void tesseract::ColPartition::SetRegionAndFlowTypesFromProjectionValue ( int  value)

Definition at line 1201 of file colpartition.cpp.

1201  {
1202  int blob_count = 0; // Total # blobs.
1203  int good_blob_score_ = 0; // Total # good strokewidth neighbours.
1204  int noisy_count = 0; // Total # neighbours marked as noise.
1205  int hline_count = 0;
1206  int vline_count = 0;
1207  BLOBNBOX_C_IT it(&boxes_);
1208  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1209  BLOBNBOX* blob = it.data();
1210  ++blob_count;
1211  noisy_count += blob->NoisyNeighbours();
1212  good_blob_score_ += blob->GoodTextBlob();
1213  if (blob->region_type() == BRT_HLINE) ++hline_count;
1214  if (blob->region_type() == BRT_VLINE) ++vline_count;
1215  }
1216  flow_ = BTFT_NEIGHBOURS;
1217  blob_type_ = BRT_UNKNOWN;
1218  if (hline_count > vline_count) {
1219  flow_ = BTFT_NONE;
1220  blob_type_ = BRT_HLINE;
1221  } else if (vline_count > hline_count) {
1222  flow_ = BTFT_NONE;
1223  blob_type_ = BRT_VLINE;
1224  } else if (value < -1 || 1 < value) {
1225  int long_side;
1226  int short_side;
1227  if (value > 0) {
1228  long_side = bounding_box_.width();
1229  short_side = bounding_box_.height();
1230  blob_type_ = BRT_TEXT;
1231  } else {
1232  long_side = bounding_box_.height();
1233  short_side = bounding_box_.width();
1234  blob_type_ = BRT_VERT_TEXT;
1235  }
1236  // We will combine the old metrics using aspect ratio and blob counts
1237  // with the input value by allowing a strong indication to flip the
1238  // STRONG_CHAIN/CHAIN flow values.
1239  int strong_score = blob_count >= kHorzStrongTextlineCount ? 1 : 0;
1240  if (short_side > kHorzStrongTextlineHeight) ++strong_score;
1241  if (short_side * kHorzStrongTextlineAspect < long_side) ++strong_score;
1242  if (abs(value) >= kMinStrongTextValue)
1243  flow_ = BTFT_STRONG_CHAIN;
1244  else if (abs(value) >= kMinChainTextValue)
1245  flow_ = BTFT_CHAIN;
1246  else
1247  flow_ = BTFT_NEIGHBOURS;
1248  // Upgrade chain to strong chain if the other indicators are good
1249  if (flow_ == BTFT_CHAIN && strong_score == 3)
1250  flow_ = BTFT_STRONG_CHAIN;
1251  // Downgrade strong vertical text to chain if the indicators are bad.
1252  if (flow_ == BTFT_STRONG_CHAIN && value < 0 && strong_score < 2)
1253  flow_ = BTFT_CHAIN;
1254  }
1255  if (flow_ == BTFT_NEIGHBOURS) {
1256  // Check for noisy neighbours.
1257  if (noisy_count >= blob_count) {
1258  flow_ = BTFT_NONTEXT;
1259  blob_type_= BRT_NOISE;
1260  }
1261  }
1262  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
1263  bounding_box_.bottom())) {
1264  tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
1265  blob_count, noisy_count, good_blob_score_);
1266  tprintf(" Projection value=%d, flow=%d, blob_type=%d\n",
1267  value, flow_, blob_type_);
1268  Print();
1269  }
1270  SetBlobTypes();
1271 }
const int kHorzStrongTextlineHeight
const int kMinStrongTextValue
int GoodTextBlob() const
Definition: blobbox.cpp:226
int NoisyNeighbours() const
Definition: blobbox.cpp:237
int16_t height() const
Definition: rect.h:108
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
const int kHorzStrongTextlineCount
int16_t width() const
Definition: rect.h:115
const int kMinChainTextValue
int16_t bottom() const
Definition: rect.h:65
BlobRegionType region_type() const
Definition: blobbox.h:283
const int kHorzStrongTextlineAspect
int16_t left() const
Definition: rect.h:72
static bool WithinTestRegion(int detail_level, int x, int y)
void tesseract::ColPartition::SetRightTab ( const TabVector tab_vector)

Definition at line 516 of file colpartition.cpp.

516  {
517  if (tab_vector != nullptr) {
518  right_key_ = tab_vector->sort_key();
519  right_key_tab_ = right_key_ >= BoxRightKey();
520  } else {
521  right_key_tab_ = false;
522  }
523  if (!right_key_tab_)
524  right_key_ = BoxRightKey();
525 }
int BoxRightKey() const
Definition: colpartition.h:337
void tesseract::ColPartition::SetSpecialBlobsDensity ( const BlobSpecialTextType  type,
const float  density 
)

Definition at line 586 of file colpartition.cpp.

587  {
589  special_blobs_densities_[type] = density;
590 }
PolyBlockType type() const
Definition: colpartition.h:182
#define ASSERT_HOST(x)
Definition: errcode.h:88
ColPartition * tesseract::ColPartition::ShallowCopy ( ) const

Definition at line 1741 of file colpartition.cpp.

1741  {
1742  auto* part = new ColPartition(blob_type_, vertical_);
1743  part->left_margin_ = left_margin_;
1744  part->right_margin_ = right_margin_;
1745  part->bounding_box_ = bounding_box_;
1746  memcpy(part->special_blobs_densities_, special_blobs_densities_,
1747  sizeof(special_blobs_densities_));
1748  part->median_bottom_ = median_bottom_;
1749  part->median_top_ = median_top_;
1750  part->median_height_ = median_height_;
1751  part->median_left_ = median_left_;
1752  part->median_right_ = median_right_;
1753  part->median_width_ = median_width_;
1754  part->good_width_ = good_width_;
1755  part->good_column_ = good_column_;
1756  part->left_key_tab_ = left_key_tab_;
1757  part->right_key_tab_ = right_key_tab_;
1758  part->type_ = type_;
1759  part->flow_ = flow_;
1760  part->left_key_ = left_key_;
1761  part->right_key_ = right_key_;
1762  part->first_column_ = first_column_;
1763  part->last_column_ = last_column_;
1764  part->owns_blobs_ = false;
1765  return part;
1766 }
ColPartition * tesseract::ColPartition::SingletonPartner ( bool  upper)

Definition at line 639 of file colpartition.cpp.

639  {
640  ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
641  if (!partners->singleton())
642  return nullptr;
643  ColPartition_C_IT it(partners);
644  return it.data();
645 }
void tesseract::ColPartition::SmoothPartnerRun ( int  working_set_count)

Definition at line 1818 of file colpartition.cpp.

1818  {
1819  STATS left_stats(0, working_set_count);
1820  STATS right_stats(0, working_set_count);
1821  PolyBlockType max_type = type_;
1822  ColPartition* partner;
1823  for (partner = SingletonPartner(false); partner != nullptr;
1824  partner = partner->SingletonPartner(false)) {
1825  if (partner->type_ > max_type)
1826  max_type = partner->type_;
1827  if (column_set_ == partner->column_set_) {
1828  left_stats.add(partner->first_column_, 1);
1829  right_stats.add(partner->last_column_, 1);
1830  }
1831  }
1832  type_ = max_type;
1833  // TODO(rays) Either establish that it isn't necessary to set the columns,
1834  // or find a way to do it that does not cause an assert failure in
1835  // AddToWorkingSet.
1836 #if 0
1837  first_column_ = left_stats.mode();
1838  last_column_ = right_stats.mode();
1839  if (last_column_ < first_column_)
1840  last_column_ = first_column_;
1841 #endif
1842 
1843  for (partner = SingletonPartner(false); partner != nullptr;
1844  partner = partner->SingletonPartner(false)) {
1845  partner->type_ = max_type;
1846 #if 0 // See TODO above
1847  if (column_set_ == partner->column_set_) {
1848  partner->first_column_ = first_column_;
1849  partner->last_column_ = last_column_;
1850  }
1851 #endif
1852  }
1853 }
PolyBlockType
Definition: publictypes.h:53
ColPartition * SingletonPartner(bool upper)
Definition: statistc.h:31
static int tesseract::ColPartition::SortByBBox ( const void *  p1,
const void *  p2 
)
inlinestatic

Definition at line 715 of file colpartition.h.

715  {
716  const ColPartition* part1 = *static_cast<const ColPartition* const*>(p1);
717  const ColPartition* part2 = *static_cast<const ColPartition* const*>(p2);
718  int mid_y1 = part1->bounding_box_.y_middle();
719  int mid_y2 = part2->bounding_box_.y_middle();
720  if ((part2->bounding_box_.bottom() <= mid_y1 &&
721  mid_y1 <= part2->bounding_box_.top()) ||
722  (part1->bounding_box_.bottom() <= mid_y2 &&
723  mid_y2 <= part1->bounding_box_.top())) {
724  // Sort by increasing x.
725  return part1->bounding_box_.x_middle() - part2->bounding_box_.x_middle();
726  }
727  // Sort by decreasing y.
728  return mid_y2 - mid_y1;
729  }
int tesseract::ColPartition::SortKey ( int  x,
int  y 
) const
inline

Definition at line 317 of file colpartition.h.

317  {
318  return TabVector::SortKey(vertical_, x, y);
319  }
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:280
int tesseract::ColPartition::space_above ( ) const
inline

Definition at line 262 of file colpartition.h.

262  {
263  return space_above_;
264  }
int tesseract::ColPartition::space_below ( ) const
inline

Definition at line 268 of file colpartition.h.

268  {
269  return space_below_;
270  }
int tesseract::ColPartition::space_to_left ( ) const
inline

Definition at line 274 of file colpartition.h.

274  {
275  return space_to_left_;
276  }
int tesseract::ColPartition::space_to_right ( ) const
inline

Definition at line 280 of file colpartition.h.

280  {
281  return space_to_right_;
282  }
int tesseract::ColPartition::SpecialBlobsCount ( const BlobSpecialTextType  type)

Definition at line 571 of file colpartition.cpp.

571  {
573  BLOBNBOX_C_IT blob_it(&boxes_);
574  int count = 0;
575  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
576  BLOBNBOX* blob = blob_it.data();
578  if (blob_type == type) {
579  count++;
580  }
581  }
582 
583  return count;
584 }
BlobRegionType blob_type() const
Definition: colpartition.h:149
PolyBlockType type() const
Definition: colpartition.h:182
#define ASSERT_HOST(x)
Definition: errcode.h:88
BlobSpecialTextType special_text_type() const
Definition: blobbox.h:289
BlobSpecialTextType
Definition: blobbox.h:96
int count(LIST var_list)
Definition: oldlist.cpp:96
float tesseract::ColPartition::SpecialBlobsDensity ( const BlobSpecialTextType  type) const

Definition at line 566 of file colpartition.cpp.

566  {
568  return special_blobs_densities_[type];
569 }
PolyBlockType type() const
Definition: colpartition.h:182
#define ASSERT_HOST(x)
Definition: errcode.h:88
ColPartition * tesseract::ColPartition::SplitAt ( int  split_x)

Definition at line 833 of file colpartition.cpp.

833  {
834  if (split_x <= bounding_box_.left() || split_x >= bounding_box_.right())
835  return nullptr; // There will be no change.
836  ColPartition* split_part = ShallowCopy();
837  split_part->set_owns_blobs(owns_blobs());
838  BLOBNBOX_C_IT it(&boxes_);
839  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
840  BLOBNBOX* bbox = it.data();
841  ColPartition* prev_owner = bbox->owner();
842  ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr);
843  const TBOX& box = bbox->bounding_box();
844  if (box.left() >= split_x) {
845  split_part->AddBox(it.extract());
846  if (owns_blobs() && prev_owner != nullptr)
847  bbox->set_owner(split_part);
848  }
849  }
850  if (it.empty()) {
851  // Possible if split-x passes through the first blob.
852  it.add_list_after(&split_part->boxes_);
853  }
854  ASSERT_HOST(!it.empty());
855  if (split_part->IsEmpty()) {
856  // Split part ended up with nothing. Possible if split_x passes
857  // through the last blob.
858  delete split_part;
859  return nullptr;
860  }
861  right_key_tab_ = false;
862  split_part->left_key_tab_ = false;
863  right_margin_ = split_x;
864  split_part->left_margin_ = split_x;
865  ComputeLimits();
866  split_part->ComputeLimits();
867  return split_part;
868 }
Definition: rect.h:34
tesseract::ColPartition * owner() const
Definition: blobbox.h:352
const TBOX & bounding_box() const
Definition: blobbox.h:230
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:355
ColPartition * ShallowCopy() const
int16_t right() const
Definition: rect.h:79
#define ASSERT_HOST(x)
Definition: errcode.h:88
int16_t left() const
Definition: rect.h:72
bool owns_blobs() const
Definition: colpartition.h:292
ColPartition * tesseract::ColPartition::SplitAtBlob ( BLOBNBOX split_blob)

Definition at line 797 of file colpartition.cpp.

797  {
798  ColPartition* split_part = ShallowCopy();
799  split_part->set_owns_blobs(owns_blobs());
800  BLOBNBOX_C_IT it(&boxes_);
801  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
802  BLOBNBOX* bbox = it.data();
803  ColPartition* prev_owner = bbox->owner();
804  ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr);
805  if (bbox == split_blob || !split_part->boxes_.empty()) {
806  split_part->AddBox(it.extract());
807  if (owns_blobs() && prev_owner != nullptr)
808  bbox->set_owner(split_part);
809  }
810  }
811  ASSERT_HOST(!it.empty());
812  if (split_part->IsEmpty()) {
813  // Split part ended up with nothing. Possible if split_blob is not
814  // in the list of blobs.
815  delete split_part;
816  return nullptr;
817  }
818  right_key_tab_ = false;
819  split_part->left_key_tab_ = false;
820  ComputeLimits();
821  // TODO(nbeato) Merge Ray's CL like this:
822  // if (owns_blobs())
823  // SetBlobTextlineGoodness();
824  split_part->ComputeLimits();
825  // TODO(nbeato) Merge Ray's CL like this:
826  // if (split_part->owns_blobs())
827  // split_part->SetBlobTextlineGoodness();
828  return split_part;
829 }
tesseract::ColPartition * owner() const
Definition: blobbox.h:352
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:355
ColPartition * ShallowCopy() const
#define ASSERT_HOST(x)
Definition: errcode.h:88
bool owns_blobs() const
Definition: colpartition.h:292
int tesseract::ColPartition::top_spacing ( ) const
inline

Definition at line 227 of file colpartition.h.

227  {
228  return top_spacing_;
229  }
PolyBlockType tesseract::ColPartition::type ( ) const
inline

Definition at line 182 of file colpartition.h.

182  {
183  return type_;
184  }
bool tesseract::ColPartition::TypesMatch ( const ColPartition other) const
inline

Definition at line 410 of file colpartition.h.

410  {
411  return TypesMatch(blob_type_, other.blob_type_);
412  }
bool TypesMatch(const ColPartition &other) const
Definition: colpartition.h:410
static bool tesseract::ColPartition::TypesMatch ( BlobRegionType  type1,
BlobRegionType  type2 
)
inlinestatic

Definition at line 413 of file colpartition.h.

413  {
414  return (type1 == type2 || type1 == BRT_UNKNOWN || type2 == BRT_UNKNOWN) &&
415  !BLOBNBOX::IsLineType(type1) && !BLOBNBOX::IsLineType(type2);
416  }
static bool IsLineType(BlobRegionType type)
Definition: blobbox.h:426
static bool tesseract::ColPartition::TypesSimilar ( PolyBlockType  type1,
PolyBlockType  type2 
)
inlinestatic

Definition at line 419 of file colpartition.h.

419  {
420  return (type1 == type2 ||
421  (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
422  (type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION));
423  }
ColPartition_CLIST* tesseract::ColPartition::upper_partners ( )
inline

Definition at line 197 of file colpartition.h.

197  {
198  return &upper_partners_;
199  }
int tesseract::ColPartition::VCoreOverlap ( const ColPartition other) const
inline

Definition at line 376 of file colpartition.h.

376  {
377  if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
378  return 0;
379  }
380  return std::min(median_top_, other.median_top_) -
381  std::max(median_bottom_, other.median_bottom_);
382  }
bool tesseract::ColPartition::VOverlaps ( const ColPartition other) const
inline

Definition at line 371 of file colpartition.h.

371  {
372  return bounding_box_.y_gap(other.bounding_box_) < 0;
373  }
int y_gap(const TBOX &box) const
Definition: rect.h:233
bool tesseract::ColPartition::VSignificantCoreOverlap ( const ColPartition other) const
inline

Definition at line 391 of file colpartition.h.

391  {
392  if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
393  return false;
394  }
395  int overlap = VCoreOverlap(other);
396  int height = std::min(median_top_ - median_bottom_,
397  other.median_top_ - other.median_bottom_);
398  return overlap * 3 > height;
399  }
int VCoreOverlap(const ColPartition &other) const
Definition: colpartition.h:376
bool tesseract::ColPartition::WithinSameMargins ( const ColPartition other) const
inline

Definition at line 402 of file colpartition.h.

402  {
403  return left_margin_ <= other.bounding_box_.left() &&
404  bounding_box_.left() >= other.left_margin_ &&
405  bounding_box_.right() <= other.right_margin_ &&
406  right_margin_ >= other.bounding_box_.right();
407  }
int16_t right() const
Definition: rect.h:79
int16_t left() const
Definition: rect.h:72
int tesseract::ColPartition::XAtY ( int  sort_key,
int  y 
) const
inline

Definition at line 321 of file colpartition.h.

321  {
322  return TabVector::XAtY(vertical_, sort_key, y);
323  }
int XAtY(int y) const
Definition: tabvector.h:189

The documentation for this class was generated from the following files: