tesseract  4.1.0
cjkpitch.cpp
Go to the documentation of this file.
1 // File: cjkpitch.cpp
3 // Description: Code to determine fixed pitchness and the pitch if fixed,
4 // for CJK text.
5 // Author: takenaka@google.com (Hiroshi Takenaka)
6 //
7 // Copyright 2011 Google Inc. All Rights Reserved.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #include "cjkpitch.h"
21 #include "genericvector.h"
22 #include "topitch.h"
23 #include "tovars.h"
24 
25 #include <algorithm>
26 #include <vector> // for std::vector
27 
28 static BOOL_VAR(textord_space_size_is_variable, false,
29  "If true, word delimiter spaces are assumed to have "
30  "variable width, even though characters have fixed pitch.");
31 
32 namespace {
33 
34 // Allow +/-10% error for character pitch / body size.
35 static const float kFPTolerance = 0.1;
36 
37 // Minimum ratio of "good" character pitch for a row to be considered
38 // to be fixed-pitch.
39 static const float kFixedPitchThreshold = 0.35;
40 
41 // rank statistics for a small collection of float values.
42 class SimpleStats {
43  public:
44  SimpleStats(): finalized_(false), values_() { }
45  ~SimpleStats() { }
46 
47  void Clear() {
48  values_.clear();
49  finalized_ = false;
50  }
51 
52  void Add(float value) {
53  values_.push_back(value);
54  finalized_ = false;
55  }
56 
57  void Finish() {
58  values_.sort(float_compare);
59  finalized_ = true;
60  }
61 
62  float ile(double frac) {
63  if (!finalized_) Finish();
64  if (values_.empty()) return 0.0;
65  if (frac >= 1.0) return values_.back();
66  if (frac <= 0.0 || values_.size() == 1) return values_[0];
67  int index = static_cast<int>((values_.size() - 1) * frac);
68  float reminder = (values_.size() - 1) * frac - index;
69 
70  return values_[index] * (1.0 - reminder) +
71  values_[index + 1] * reminder;
72  }
73 
74  float median() {
75  return ile(0.5);
76  }
77 
78  float minimum() {
79  if (!finalized_) Finish();
80  if (values_.empty()) return 0.0;
81  return values_[0];
82  }
83 
84  int size() const {
85  return values_.size();
86  }
87 
88  private:
89  static int float_compare(const void* a, const void* b) {
90  const auto* f_a = static_cast<const float*>(a);
91  const auto* f_b = static_cast<const float*>(b);
92  return (*f_a > *f_b) ? 1 : ((*f_a < *f_b) ? -1 : 0);
93  }
94 
95  bool finalized_;
96  GenericVector<float> values_;
97 };
98 
99 // statistics for a small collection of float pairs (x, y).
100 // EstimateYFor(x, r) returns the estimated y at x, based on
101 // existing samples between x*(1-r) ~ x*(1+r).
102 class LocalCorrelation {
103  public:
104  struct float_pair {
105  float x, y;
106  int vote;
107  };
108 
109  LocalCorrelation(): finalized_(false) { }
110  ~LocalCorrelation() { }
111 
112  void Finish() {
113  values_.sort(float_pair_compare);
114  finalized_ = true;
115  }
116 
117  void Clear() {
118  finalized_ = false;
119  }
120 
121  void Add(float x, float y, int v) {
122  struct float_pair value;
123  value.x = x;
124  value.y = y;
125  value.vote = v;
126  values_.push_back(value);
127  finalized_ = false;
128  }
129 
130  float EstimateYFor(float x, float r) {
131  ASSERT_HOST(finalized_);
132  int start = 0, end = values_.size();
133  // Because the number of samples (used_) is assumed to be small,
134  // just use linear search to find values within the range.
135  while (start < values_.size() && values_[start].x < x * (1.0 - r)) start++;
136  while (end - 1 >= 0 && values_[end - 1].x > x * (1.0 + r)) end--;
137 
138  // Fall back to the global average if there are no data within r
139  // of x.
140  if (start >= end) {
141  start = 0;
142  end = values_.size();
143  }
144 
145  // Compute weighted average of the values.
146  float rc = 0;
147  int vote = 0;
148  for (int i = start; i < end; i++) {
149  rc += values_[i].vote * x * values_[i].y / values_[i].x;
150  vote += values_[i].vote;
151  }
152 
153  return rc / vote;
154  }
155 
156  private:
157  static int float_pair_compare(const void* a, const void* b) {
158  const auto* f_a = static_cast<const float_pair*>(a);
159  const auto* f_b = static_cast<const float_pair*>(b);
160  return (f_a->x > f_b->x) ? 1 : ((f_a->x < f_b->x) ? -1 : 0);
161  }
162 
163  bool finalized_;
165 };
166 
167 // Class to represent a character on a fixed pitch row. A FPChar may
168 // consist of multiple blobs (BLOBNBOX's).
169 class FPChar {
170  public:
171  enum Alignment {
172  ALIGN_UNKNOWN, ALIGN_GOOD, ALIGN_BAD
173  };
174 
175  FPChar(): box_(), real_body_(),
176  from_(nullptr), to_(nullptr), num_blobs_(0), max_gap_(0),
177  final_(false), alignment_(ALIGN_UNKNOWN),
178  merge_to_prev_(false), delete_flag_(false) {
179  }
180 
181  // Initialize from blob.
182  void Init(BLOBNBOX *blob) {
183  box_ = blob->bounding_box();
184  real_body_ = box_;
185  from_ = to_ = blob;
186  num_blobs_ = 1;
187  }
188 
189  // Merge this character with "next". The "next" character should
190  // consist of succeeding blobs on the same row.
191  void Merge(const FPChar &next) {
192  int gap = real_body_.x_gap(next.real_body_);
193  if (gap > max_gap_) max_gap_ = gap;
194 
195  box_ += next.box_;
196  real_body_ += next.real_body_;
197  to_ = next.to_;
198  num_blobs_ += next.num_blobs_;
199  }
200 
201  // Accessors.
202  const TBOX &box() const { return box_; }
203  void set_box(const TBOX &box) {
204  box_ = box;
205  }
206  const TBOX &real_body() const { return real_body_; }
207 
208  bool is_final() const { return final_; }
209  void set_final(bool flag) {
210  final_ = flag;
211  }
212 
213  const Alignment& alignment() const {
214  return alignment_;
215  }
216  void set_alignment(Alignment alignment) {
217  alignment_ = alignment;
218  }
219 
220  bool merge_to_prev() const {
221  return merge_to_prev_;
222  }
223  void set_merge_to_prev(bool flag) {
224  merge_to_prev_ = flag;
225  }
226 
227  bool delete_flag() const {
228  return delete_flag_;
229  }
230  void set_delete_flag(bool flag) {
231  delete_flag_ = flag;
232  }
233 
234  int max_gap() const {
235  return max_gap_;
236  }
237 
238  int num_blobs() const {
239  return num_blobs_;
240  }
241 
242  private:
243  TBOX box_; // Rectangle region considered to be occupied by this
244  // character. It could be bigger than the bounding box.
245  TBOX real_body_; // Real bounding box of this character.
246  BLOBNBOX *from_; // The first blob of this character.
247  BLOBNBOX *to_; // The last blob of this character.
248  int num_blobs_; // Number of blobs that belong to this character.
249  int max_gap_; // Maximum x gap between the blobs.
250 
251  bool final_; // True if alignment/fragmentation decision for this
252  // character is finalized.
253 
254  Alignment alignment_; // Alignment status.
255  bool merge_to_prev_; // True if this is a fragmented blob that
256  // needs to be merged to the previous
257  // character.
258 
259  int delete_flag_; // True if this character is merged to another
260  // one and needs to be deleted.
261 };
262 
263 // Class to represent a fixed pitch row, as a linear collection of
264 // FPChar's.
265 class FPRow {
266  public:
267  FPRow() : pitch_(0.0f), estimated_pitch_(0.0f),
268  all_pitches_(), all_gaps_(), good_pitches_(), good_gaps_(),
269  heights_(), characters_(), real_row_(nullptr) {
270  }
271 
272  ~FPRow() { }
273 
274  // Initialize from TD_ROW.
275  void Init(TO_ROW *row);
276 
277  // Estimate character pitch of this row, based on current alignment
278  // status of underlying FPChar's. The argument pass1 can be set to
279  // true if the function is called after Pass1Analyze(), to eliminate
280  // some redundant computation.
281  void EstimatePitch(bool pass1);
282 
283  // Check each character if it has good character pitches between its
284  // predecessor and its successor and set its alignment status. If
285  // we already calculated the estimated pitch for this row, the value
286  // is used. If we didn't, a character is considered to be good, if
287  // the pitches between its predecessor and its successor are almost
288  // equal.
289  void Pass1Analyze();
290 
291  // Find characters that fit nicely into one imaginary body next to a
292  // character which is already finalized. Then mark them as character
293  // fragments.
294  bool Pass2Analyze();
295 
296  // Merge FPChars marked as character fragments into one.
297  void MergeFragments();
298 
299  // Finalize characters that are already large enough and cannot be
300  // merged with others any more.
301  void FinalizeLargeChars();
302 
303  // Output pitch estimation results to attributes of TD_ROW.
304  void OutputEstimations();
305 
306  void DebugOutputResult(int row_index);
307 
308  int good_pitches() {
309  return good_pitches_.size();
310  }
311 
312  float pitch() {
313  return pitch_;
314  }
315 
316  float estimated_pitch() {
317  return estimated_pitch_;
318  }
319 
320  void set_estimated_pitch(float v) {
321  estimated_pitch_ = v;
322  }
323 
324  float height() {
325  return height_;
326  }
327 
328  float height_pitch_ratio() {
329  if (good_pitches_.size() < 2) return -1.0;
330  return height_ / good_pitches_.median();
331  }
332 
333  float gap() {
334  return gap_;
335  }
336 
337  size_t num_chars() {
338  return characters_.size();
339  }
340  FPChar *character(int i) {
341  return &characters_[i];
342  }
343 
344  const TBOX &box(int i) {
345  return characters_[i].box();
346  }
347 
348  const TBOX &real_body(int i) {
349  return characters_[i].real_body();
350  }
351 
352  bool is_box_modified(int i) {
353  return !(characters_[i].box() == characters_[i].real_body());
354  }
355 
356  float center_x(int i) {
357  return (characters_[i].box().left() + characters_[i].box().right()) / 2.0;
358  }
359 
360  bool is_final(int i) {
361  return characters_[i].is_final();
362  }
363 
364  void finalize(int i) {
365  characters_[i].set_final(true);
366  }
367 
368  bool is_good(int i) {
369  return characters_[i].alignment() == FPChar::ALIGN_GOOD;
370  }
371 
372  void mark_good(int i) {
373  characters_[i].set_alignment(FPChar::ALIGN_GOOD);
374  }
375 
376  void mark_bad(int i) {
377  characters_[i].set_alignment(FPChar::ALIGN_BAD);
378  }
379 
380  void clear_alignment(int i) {
381  characters_[i].set_alignment(FPChar::ALIGN_UNKNOWN);
382  }
383 
384  private:
385  static float x_overlap_fraction(const TBOX& box1, const TBOX& box2) {
386  if (std::min(box1.width(), box2.width()) == 0) return 0.0;
387  return -box1.x_gap(box2) / static_cast<float>(std::min(box1.width(), box2.width()));
388  }
389 
390  static bool mostly_overlap(const TBOX& box1, const TBOX& box2) {
391  return x_overlap_fraction(box1, box2) > 0.9;
392  }
393 
394  static bool significant_overlap(const TBOX& box1, const TBOX& box2) {
395  if (std::min(box1.width(), box2.width()) == 0) return false;
396  int overlap = -box1.x_gap(box2);
397  return overlap > 1 || x_overlap_fraction(box1, box2) > 0.1;
398  }
399 
400  static float box_pitch(const TBOX& ref, const TBOX& box) {
401  return abs(ref.left() + ref.right() - box.left() - box.right()) / 2.0;
402  }
403 
404  // Check if two neighboring characters satisfy the fixed pitch model.
405  static bool is_good_pitch(float pitch, const TBOX& box1, const TBOX& box2) {
406  // Character box shouldn't exceed pitch.
407  if (box1.width() >= pitch * (1.0 + kFPTolerance) ||
408  box2.width() >= pitch * (1.0 + kFPTolerance) ||
409  box1.height() >= pitch * (1.0 + kFPTolerance) ||
410  box2.height() >= pitch * (1.0 + kFPTolerance)) return false;
411 
412  const float real_pitch = box_pitch(box1, box2);
413  if (fabs(real_pitch - pitch) < pitch * kFPTolerance) return true;
414 
415  if (textord_space_size_is_variable) {
416  // Hangul characters usually have fixed pitch, but words are
417  // delimited by space which can be narrower than characters.
418  if (real_pitch > pitch && real_pitch < pitch * 2.0 &&
419  real_pitch - box1.x_gap(box2) < pitch) {
420  return true;
421  }
422  }
423  return false;
424  }
425 
426  static bool is_interesting_blob(const BLOBNBOX *blob) {
427  return !blob->joined_to_prev() && blob->flow() != BTFT_LEADER;
428  }
429 
430  // Cleanup chars that are already merged to others.
431  void DeleteChars() {
432  int index = 0;
433  for (int i = 0; i < characters_.size(); ++i) {
434  if (!characters_[i].delete_flag()) {
435  if (index != i) characters_[index] = characters_[i];
436  index++;
437  }
438  }
439  characters_.truncate(index);
440  }
441 
442  float pitch_; // Character pitch.
443  float estimated_pitch_; // equal to pitch_ if pitch_ is considered
444  // to be good enough.
445  float height_; // Character height.
446  float gap_; // Minimum gap between characters.
447 
448  // Pitches between any two successive characters.
449  SimpleStats all_pitches_;
450  // Gaps between any two successive characters.
451  SimpleStats all_gaps_;
452  // Pitches between any two successive characters that are consistent
453  // with the fixed pitch model.
454  SimpleStats good_pitches_;
455  // Gaps between any two successive characters that are consistent
456  // with the fixed pitch model.
457  SimpleStats good_gaps_;
458 
459  SimpleStats heights_;
460 
461  GenericVector<FPChar> characters_;
462  TO_ROW *real_row_; // Underlying TD_ROW for this row.
463 };
464 
465 void FPRow::Init(TO_ROW *row) {
466  ASSERT_HOST(row != nullptr);
467  ASSERT_HOST(row->xheight > 0);
468  real_row_ = row;
469  real_row_->pitch_decision = PITCH_CORR_PROP; // Default decision.
470 
471  BLOBNBOX_IT blob_it = row->blob_list();
472  // Initialize characters_ and compute the initial estimation of
473  // character height.
474  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
475  if (is_interesting_blob(blob_it.data())) {
476  FPChar fp_char;
477  fp_char.Init(blob_it.data());
478  // Merge unconditionally if two blobs overlap.
479  if (!characters_.empty() &&
480  significant_overlap(fp_char.box(), characters_.back().box())) {
481  characters_.back().Merge(fp_char);
482  } else {
483  characters_.push_back(fp_char);
484  }
485  TBOX bound = blob_it.data()->bounding_box();
486  if (bound.height() * 3.0 > bound.width()) {
487  heights_.Add(bound.height());
488  }
489  }
490  }
491  heights_.Finish();
492  height_ = heights_.ile(0.875);
493 }
494 
495 void FPRow::OutputEstimations() {
496  if (good_pitches_.size() == 0) {
497  pitch_ = 0.0f;
498  real_row_->pitch_decision = PITCH_CORR_PROP;
499  return;
500  }
501 
502  pitch_ = good_pitches_.median();
503  real_row_->fixed_pitch = pitch_;
504  // good_gaps_.ile(0.125) can be large if most characters on the row
505  // are skinny. Use pitch_ - height_ instead if it's smaller, but
506  // positive.
507  real_row_->kern_size = real_row_->pr_nonsp =
508  std::min(good_gaps_.ile(0.125), std::max(pitch_ - height_, 0.0f));
509  real_row_->body_size = pitch_ - real_row_->kern_size;
510 
511  if (good_pitches_.size() < all_pitches_.size() * kFixedPitchThreshold) {
512  // If more than half of the characters of a line don't fit to the
513  // fixed pitch model, consider the line to be proportional. 50%
514  // seems to be a good threshold in practice as well.
515  // Anyway we store estimated values (fixed_pitch, kern_size, etc.) in
516  // real_row_ as a partial estimation result and try to use them in the
517  // normalization process.
518  real_row_->pitch_decision = PITCH_CORR_PROP;
519  return;
520  } else if (good_pitches_.size() > all_pitches_.size() * 0.75) {
521  real_row_->pitch_decision = PITCH_DEF_FIXED;
522  } else {
523  real_row_->pitch_decision = PITCH_CORR_FIXED;
524  }
525 
526  real_row_->space_size = real_row_->pr_space = pitch_;
527  // Set min_space to 50% of character pitch so that we can break CJK
528  // text at a half-width space after punctuation.
529  real_row_->min_space = (pitch_ + good_gaps_.minimum()) * 0.5;
530 
531  // Don't consider a quarter space as a real space, because it's used
532  // for line justification in traditional Japanese books.
533  real_row_->max_nonspace = std::max(pitch_ * 0.25 + good_gaps_.minimum(),
534  static_cast<double>(good_gaps_.ile(0.875)));
535 
536  int space_threshold =
537  std::min((real_row_->max_nonspace + real_row_->min_space) / 2,
538  static_cast<int>(real_row_->xheight));
539 
540  // Make max_nonspace larger than any intra-character gap so that
541  // make_prop_words() won't break a row at the middle of a character.
542  for (size_t i = 0; i < num_chars(); ++i) {
543  if (characters_[i].max_gap() > real_row_->max_nonspace) {
544  real_row_->max_nonspace = characters_[i].max_gap();
545  }
546  }
547  real_row_->space_threshold =
548  std::min((real_row_->max_nonspace + real_row_->min_space) / 2,
549  static_cast<int>(real_row_->xheight));
550  real_row_->used_dm_model = false;
551 
552  // Setup char_cells.
553  ICOORDELT_IT cell_it = &real_row_->char_cells;
554  auto *cell = new ICOORDELT(real_body(0).left(), 0);
555  cell_it.add_after_then_move(cell);
556 
557  int right = real_body(0).right();
558  for (size_t i = 1; i < num_chars(); ++i) {
559  // Put a word break if gap between two characters is bigger than
560  // space_threshold. Don't break if none of two characters
561  // couldn't be "finalized", because maybe they need to be merged
562  // to one character.
563  if ((is_final(i - 1) || is_final(i)) &&
564  real_body(i - 1).x_gap(real_body(i)) > space_threshold) {
565  cell = new ICOORDELT(right + 1, 0);
566  cell_it.add_after_then_move(cell);
567  while (right + pitch_ < box(i).left()) {
568  right += pitch_;
569  cell = new ICOORDELT(right + 1, 0);
570  cell_it.add_after_then_move(cell);
571  }
572  right = box(i).left();
573  }
574  cell = new ICOORDELT((right + real_body(i).left()) / 2, 0);
575  cell_it.add_after_then_move(cell);
576  right = real_body(i).right();
577  }
578 
579  cell = new ICOORDELT(right + 1, 0);
580  cell_it.add_after_then_move(cell);
581 
582  // TODO(takenaka): add code to store alignment/fragmentation
583  // information to blobs so that it can be reused later, e.g. in
584  // recognition phase.
585 }
586 
587 void FPRow::EstimatePitch(bool pass1) {
588  good_pitches_.Clear();
589  all_pitches_.Clear();
590  good_gaps_.Clear();
591  all_gaps_.Clear();
592  heights_.Clear();
593  if (num_chars() == 0) return;
594 
595  int32_t cx0, cx1;
596  bool prev_was_good = is_good(0);
597  cx0 = center_x(0);
598 
599  heights_.Add(box(0).height());
600  for (size_t i = 1; i < num_chars(); i++) {
601  cx1 = center_x(i);
602  int32_t pitch = cx1 - cx0;
603  int32_t gap = std::max(0, real_body(i - 1).x_gap(real_body(i)));
604 
605  heights_.Add(box(i).height());
606  // Ignore if the pitch is too close. But don't ignore wide pitch
607  // may be the result of large tracking.
608  if (pitch > height_ * 0.5) {
609  all_pitches_.Add(pitch);
610  all_gaps_.Add(gap);
611  if (is_good(i)) {
612  // In pass1 (after Pass1Analyze()), all characters marked as
613  // "good" have a good consistent pitch with their previous
614  // characters. However, it's not true in pass2 and a good
615  // character may have a good pitch only between its successor.
616  // So we collect only pitch values between two good
617  // characters. and within tolerance in pass2.
618  if (pass1 || (prev_was_good &&
619  fabs(estimated_pitch_ - pitch) <
620  kFPTolerance * estimated_pitch_)) {
621  good_pitches_.Add(pitch);
622  if (!is_box_modified(i - 1) && !is_box_modified(i)) {
623  good_gaps_.Add(gap);
624  }
625  }
626  prev_was_good = true;
627  } else {
628  prev_was_good = false;
629  }
630  }
631  cx0 = cx1;
632  }
633 
634  good_pitches_.Finish();
635  all_pitches_.Finish();
636  good_gaps_.Finish();
637  all_gaps_.Finish();
638  heights_.Finish();
639 
640  height_ = heights_.ile(0.875);
641  if (all_pitches_.size() == 0) {
642  pitch_ = 0.0f;
643  gap_ = 0.0f;
644  } else if (good_pitches_.size() < 2) {
645  // We don't have enough data to estimate the pitch of this row yet.
646  // Use median of all pitches as the initial guess.
647  pitch_ = all_pitches_.median();
648  ASSERT_HOST(pitch_ > 0.0f);
649  gap_ = all_gaps_.ile(0.125);
650  } else {
651  pitch_ = good_pitches_.median();
652  ASSERT_HOST(pitch_ > 0.0f);
653  gap_ = good_gaps_.ile(0.125);
654  }
655 }
656 
657 void FPRow::DebugOutputResult(int row_index) {
658  if (num_chars() > 0) {
659  tprintf("Row %d: pitch_decision=%d, fixed_pitch=%f, max_nonspace=%d, "
660  "space_size=%f, space_threshold=%d, xheight=%f\n",
661  row_index, static_cast<int>(real_row_->pitch_decision),
662  real_row_->fixed_pitch, real_row_->max_nonspace,
663  real_row_->space_size, real_row_->space_threshold,
664  real_row_->xheight);
665 
666  for (unsigned i = 0; i < num_chars(); i++) {
667  tprintf("Char %u: is_final=%d is_good=%d num_blobs=%d: ",
668  i, is_final(i), is_good(i), character(i)->num_blobs());
669  box(i).print();
670  }
671  }
672 }
673 
674 void FPRow::Pass1Analyze() {
675  if (num_chars() < 2) return;
676 
677  if (estimated_pitch_ > 0.0f) {
678  for (size_t i = 2; i < num_chars(); i++) {
679  if (is_good_pitch(estimated_pitch_, box(i - 2), box(i-1)) &&
680  is_good_pitch(estimated_pitch_, box(i - 1), box(i))) {
681  mark_good(i - 1);
682  }
683  }
684  } else {
685  for (size_t i = 2; i < num_chars(); i++) {
686  if (is_good_pitch(box_pitch(box(i-2), box(i-1)), box(i - 1), box(i))) {
687  mark_good(i - 1);
688  }
689  }
690  }
691  character(0)->set_alignment(character(1)->alignment());
692  character(num_chars() - 1)->set_alignment(
693  character(num_chars() - 2)->alignment());
694 }
695 
696 bool FPRow::Pass2Analyze() {
697  bool changed = false;
698  if (num_chars() <= 1 || estimated_pitch_ == 0.0f) {
699  return false;
700  }
701  for (size_t i = 0; i < num_chars(); i++) {
702  if (is_final(i)) continue;
703 
704  FPChar::Alignment alignment = character(i)->alignment();
705  bool intersecting = false;
706  bool not_intersecting = false;
707 
708  if (i < num_chars() - 1 && is_final(i + 1)) {
709  // Next character is already finalized. Estimate the imaginary
710  // body including this character based on the character. Skip
711  // whitespace if necessary.
712  bool skipped_whitespaces = false;
713  float c1 = center_x(i + 1) - 1.5 * estimated_pitch_;
714  while (c1 > box(i).right()) {
715  skipped_whitespaces = true;
716  c1 -= estimated_pitch_;
717  }
718  TBOX ibody(c1, box(i).bottom(), c1 + estimated_pitch_, box(i).top());
719 
720  // Collect all characters that mostly fit in the region.
721  // Also, their union height shouldn't be too big.
722  int j = i;
723  TBOX merged;
724  while (j >= 0 && !is_final(j) && mostly_overlap(ibody, box(j)) &&
725  merged.bounding_union(box(j)).height() <
726  estimated_pitch_ * (1 + kFPTolerance)) {
727  merged += box(j);
728  j--;
729  }
730 
731  if (j >= 0 && significant_overlap(ibody, box(j))) {
732  // character(j) lies on the character boundary and doesn't fit
733  // well into the imaginary body.
734  if (!is_final(j)) intersecting = true;
735  } else {
736  not_intersecting = true;
737  if (i - j > 0) {
738  // Merge character(j+1) ... character(i) because they fit
739  // into the body nicely.
740  if (i - j == 1) {
741  // Only one char in the imaginary body.
742  if (!skipped_whitespaces) mark_good(i);
743  // set ibody as bounding box of this character to get
744  // better pitch analysis result for halfwidth glyphs
745  // followed by a halfwidth space.
746  if (box(i).width() <= estimated_pitch_ * 0.5) {
747  ibody += box(i);
748  character(i)->set_box(ibody);
749  }
750  character(i)->set_merge_to_prev(false);
751  finalize(i);
752  } else {
753  for (int k = i; k > j + 1; k--) {
754  character(k)->set_merge_to_prev(true);
755  }
756  }
757  }
758  }
759  }
760  if (i > 0 && is_final(i - 1)) {
761  // Now we repeat everything from the opposite side. Previous
762  // character is already finalized. Estimate the imaginary body
763  // including this character based on the character.
764  bool skipped_whitespaces = false;
765  float c1 = center_x(i - 1) + 1.5 * estimated_pitch_;
766  while (c1 < box(i).left()) {
767  skipped_whitespaces = true;
768  c1 += estimated_pitch_;
769  }
770  TBOX ibody(c1 - estimated_pitch_, box(i).bottom(), c1, box(i).top());
771 
772  size_t j = i;
773  TBOX merged;
774  while (j < num_chars() && !is_final(j) && mostly_overlap(ibody, box(j)) &&
775  merged.bounding_union(box(j)).height() <
776  estimated_pitch_ * (1 + kFPTolerance)) {
777  merged += box(j);
778  j++;
779  }
780 
781  if (j < num_chars() && significant_overlap(ibody, box(j))) {
782  if (!is_final(j)) intersecting = true;
783  } else {
784  not_intersecting = true;
785  if (j - i > 0) {
786  if (j - i == 1) {
787  if (!skipped_whitespaces) mark_good(i);
788  if (box(i).width() <= estimated_pitch_ * 0.5) {
789  ibody += box(i);
790  character(i)->set_box(ibody);
791  }
792  character(i)->set_merge_to_prev(false);
793  finalize(i);
794  } else {
795  for (size_t k = i + 1; k < j; k++) {
796  character(k)->set_merge_to_prev(true);
797  }
798  }
799  }
800  }
801  }
802 
803  // This character doesn't fit well into the estimated imaginary
804  // bodies. Mark it as bad.
805  if (intersecting && !not_intersecting) mark_bad(i);
806  if (character(i)->alignment() != alignment ||
807  character(i)->merge_to_prev()) {
808  changed = true;
809  }
810  }
811 
812  return changed;
813 }
814 
815 void FPRow::MergeFragments() {
816  int last_char = 0;
817 
818  for (size_t j = 0; j < num_chars(); ++j) {
819  if (character(j)->merge_to_prev()) {
820  character(last_char)->Merge(*character(j));
821  character(j)->set_delete_flag(true);
822  clear_alignment(last_char);
823  character(j-1)->set_merge_to_prev(false);
824  } else {
825  last_char = j;
826  }
827  }
828  DeleteChars();
829 }
830 
831 void FPRow::FinalizeLargeChars() {
832  float row_pitch = estimated_pitch();
833  for (size_t i = 0; i < num_chars(); i++) {
834  if (is_final(i)) continue;
835 
836  // Finalize if both neighbors are finalized. We have no other choice.
837  if (i > 0 && is_final(i - 1) && i < num_chars() - 1 && is_final(i + 1)) {
838  finalize(i);
839  continue;
840  }
841 
842  float cx = center_x(i);
843  TBOX ibody(cx - 0.5 * row_pitch, 0, cx + 0.5 * row_pitch, 1);
844  if (i > 0) {
845  // The preceding character significantly intersects with the
846  // imaginary body of this character. Let Pass2Analyze() handle
847  // this case.
848  if (x_overlap_fraction(ibody, box(i - 1)) > 0.1) continue;
849  if (!is_final(i - 1)) {
850  TBOX merged = box(i);
851  merged += box(i - 1);
852  if (merged.width() < row_pitch) continue;
853  // This character cannot be finalized yet because it can be
854  // merged with the previous one. Again, let Pass2Analyze()
855  // handle this case.
856  }
857  }
858  if (i < num_chars() - 1) {
859  if (x_overlap_fraction(ibody, box(i + 1)) > 0.1) continue;
860  if (!is_final(i + 1)) {
861  TBOX merged = box(i);
862  merged += box(i + 1);
863  if (merged.width() < row_pitch) continue;
864  }
865  }
866  finalize(i);
867  }
868 
869  // Update alignment decision. We only consider finalized characters
870  // in pass2. E.g. if a finalized character C has another finalized
871  // character L on its left and a not-finalized character R on its
872  // right, we mark C as good if the pitch between C and L is good,
873  // regardless of the pitch between C and R.
874  for (size_t i = 0; i < num_chars(); i++) {
875  if (!is_final(i)) continue;
876  bool good_pitch = false;
877  bool bad_pitch = false;
878  if (i > 0 && is_final(i - 1)) {
879  if (is_good_pitch(row_pitch, box(i - 1), box(i))) {
880  good_pitch = true;
881  } else {
882  bad_pitch = true;
883  }
884  }
885  if (i < num_chars() - 1 && is_final(i + 1)) {
886  if (is_good_pitch(row_pitch, box(i), box(i + 1))) {
887  good_pitch = true;
888  } else {
889  bad_pitch = true;
890  }
891  }
892  if (good_pitch && !bad_pitch) mark_good(i);
893  else if (!good_pitch && bad_pitch) mark_bad(i);
894  }
895 }
896 
897 class FPAnalyzer {
898  public:
899  FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks);
900  ~FPAnalyzer() { }
901 
902  void Pass1Analyze() {
903  for (auto & row : rows_) row.Pass1Analyze();
904  }
905 
906  // Estimate character pitch for each row. The argument pass1 can be
907  // set to true if the function is called after Pass1Analyze(), to
908  // eliminate some redundant computation.
909  void EstimatePitch(bool pass1);
910 
911  bool maybe_fixed_pitch() {
912  if (rows_.empty() ||
913  rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) return false;
914  return true;
915  }
916 
917  void MergeFragments() {
918  for (auto & row : rows_) row.MergeFragments();
919  }
920 
921  void FinalizeLargeChars() {
922  for (auto & row : rows_) row.FinalizeLargeChars();
923  }
924 
925  bool Pass2Analyze() {
926  bool changed = false;
927  for (auto & row : rows_) {
928  if (row.Pass2Analyze()) {
929  changed = true;
930  }
931  }
932  return changed;
933  }
934 
935  void OutputEstimations() {
936  for (auto & row : rows_) row.OutputEstimations();
937  // Don't we need page-level estimation of gaps/spaces?
938  }
939 
940  void DebugOutputResult() {
941  tprintf("FPAnalyzer: final result\n");
942  for (size_t i = 0; i < rows_.size(); i++) rows_[i].DebugOutputResult(i);
943  }
944 
945  size_t num_rows() {
946  return rows_.size();
947  }
948 
949  // Returns the upper limit for pass2 loop iteration.
950  unsigned max_iteration() {
951  // We're fixing at least one character per iteration. So basically
952  // we shouldn't require more than max_chars_per_row_ iterations.
953  return max_chars_per_row_ + 100;
954  }
955 
956  private:
957  ICOORD page_tr_;
958  std::vector<FPRow> rows_;
959  unsigned num_tall_rows_;
960  unsigned num_bad_rows_;
961  // TODO: num_empty_rows_ is incremented, but never used otherwise.
962  unsigned num_empty_rows_;
963  unsigned max_chars_per_row_;
964 };
965 
966 FPAnalyzer::FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
967 : page_tr_(page_tr),
968  num_tall_rows_(0),
969  num_bad_rows_(0),
970  num_empty_rows_(0),
971  max_chars_per_row_(0)
972 {
973  TO_BLOCK_IT block_it(port_blocks);
974 
975  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
976  block_it.forward()) {
977  TO_BLOCK *block = block_it.data();
978  if (!block->get_rows()->empty()) {
979  ASSERT_HOST(block->xheight > 0);
980  find_repeated_chars(block, false);
981  }
982  }
983 
984  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
985  block_it.forward()) {
986  TO_ROW_IT row_it = block_it.data()->get_rows();
987  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
988  FPRow row;
989  row.Init(row_it.data());
990  rows_.push_back(row);
991  size_t num_chars = rows_.back().num_chars();
992  if (num_chars <= 1) num_empty_rows_++;
993  if (num_chars > max_chars_per_row_) max_chars_per_row_ = num_chars;
994  }
995  }
996 }
997 
998 void FPAnalyzer::EstimatePitch(bool pass1) {
999  LocalCorrelation pitch_height_stats;
1000 
1001  num_tall_rows_ = 0;
1002  num_bad_rows_ = 0;
1003  pitch_height_stats.Clear();
1004  for (auto & row : rows_) {
1005  row.EstimatePitch(pass1);
1006  if (row.good_pitches()) {
1007  pitch_height_stats.Add(row.height() + row.gap(),
1008  row.pitch(), row.good_pitches());
1009  if (row.height_pitch_ratio() > 1.1) num_tall_rows_++;
1010  } else {
1011  num_bad_rows_++;
1012  }
1013  }
1014 
1015  pitch_height_stats.Finish();
1016  for (auto & row : rows_) {
1017  if (row.good_pitches() >= 5) {
1018  // We have enough evidences. Just use the pitch estimation
1019  // from this row.
1020  row.set_estimated_pitch(row.pitch());
1021  } else if (row.num_chars() > 1) {
1022  float estimated_pitch =
1023  pitch_height_stats.EstimateYFor(row.height() + row.gap(),
1024  0.1);
1025  // CJK characters are more likely to be fragmented than poorly
1026  // chopped. So trust the page-level estimation of character
1027  // pitch only if it's larger than row-level estimation or
1028  // row-level estimation is too large (2x bigger than row height).
1029  if (estimated_pitch > row.pitch() ||
1030  row.pitch() > row.height() * 2.0) {
1031  row.set_estimated_pitch(estimated_pitch);
1032  } else {
1033  row.set_estimated_pitch(row.pitch());
1034  }
1035  }
1036  }
1037 }
1038 
1039 } // namespace
1040 
1042  TO_BLOCK_LIST *port_blocks) {
1043  FPAnalyzer analyzer(page_tr, port_blocks);
1044  if (analyzer.num_rows() == 0) return;
1045 
1046  analyzer.Pass1Analyze();
1047  analyzer.EstimatePitch(true);
1048 
1049  // Perform pass1 analysis again with the initial estimation of row
1050  // pitches, for better estimation.
1051  analyzer.Pass1Analyze();
1052  analyzer.EstimatePitch(true);
1053 
1054  // Early exit if the page doesn't seem to contain fixed pitch rows.
1055  if (!analyzer.maybe_fixed_pitch()) {
1057  tprintf("Page doesn't seem to contain fixed pitch rows\n");
1058  }
1059  return;
1060  }
1061 
1062  unsigned iteration = 0;
1063  do {
1064  analyzer.MergeFragments();
1065  analyzer.FinalizeLargeChars();
1066  analyzer.EstimatePitch(false);
1067  iteration++;
1068  } while (analyzer.Pass2Analyze() && iteration < analyzer.max_iteration());
1069 
1071  tprintf("compute_fixed_pitch_cjk finished after %u iteration (limit=%u)\n",
1072  iteration, analyzer.max_iteration());
1073  }
1074 
1075  analyzer.OutputEstimations();
1076  if (textord_debug_pitch_test) analyzer.DebugOutputResult();
1077 }
PITCH_TYPE pitch_decision
Definition: blobbox.h:661
float xheight
Definition: blobbox.h:668
Definition: rect.h:34
void print() const
Definition: rect.h:278
void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: cjkpitch.cpp:1041
TBOX bounding_union(const TBOX &box) const
Definition: rect.cpp:129
void find_repeated_chars(TO_BLOCK *block, bool testing_on)
Definition: topitch.cpp:1759
integer coordinate
Definition: points.h:31
const TBOX & bounding_box() const
Definition: blobbox.h:230
BlobTextFlowType flow() const
Definition: blobbox.h:295
int16_t height() const
Definition: rect.h:108
int x_gap(const TBOX &box) const
Definition: rect.h:225
bool textord_debug_pitch_test
Definition: topitch.cpp:39
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
TO_ROW_LIST * get_rows()
Definition: blobbox.h:715
int16_t width() const
Definition: rect.h:115
int16_t right() const
Definition: rect.h:79
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:611
#define ASSERT_HOST(x)
Definition: errcode.h:88
int16_t left() const
Definition: rect.h:72
#define BOOL_VAR(name, val, comment)
Definition: params.h:306
bool joined_to_prev() const
Definition: blobbox.h:256
float xheight
Definition: blobbox.h:799