tesseract  4.1.0
PAGE_RES_IT Class Reference

#include <pageres.h>

Public Member Functions

 PAGE_RES_IT ()=default
 
 PAGE_RES_IT (PAGE_RES *the_page_res)
 
bool operator== (const PAGE_RES_IT &other) const
 
bool operator!= (const PAGE_RES_IT &other) const
 
int cmp (const PAGE_RES_IT &other) const
 
WERD_RESrestart_page ()
 
WERD_RESrestart_page_with_empties ()
 
WERD_RESstart_page (bool empty_ok)
 
WERD_RESrestart_row ()
 
WERD_RESInsertSimpleCloneWord (const WERD_RES &clone_res, WERD *new_word)
 
void ReplaceCurrentWord (tesseract::PointerVector< WERD_RES > *words)
 
void DeleteCurrentWord ()
 
void MakeCurrentWordFuzzy ()
 
WERD_RESforward ()
 
WERD_RESforward_with_empties ()
 
WERD_RESforward_paragraph ()
 
WERD_RESforward_block ()
 
WERD_RESprev_word () const
 
ROW_RESprev_row () const
 
BLOCK_RESprev_block () const
 
WERD_RESword () const
 
ROW_RESrow () const
 
BLOCK_RESblock () const
 
WERD_RESnext_word () const
 
ROW_RESnext_row () const
 
BLOCK_RESnext_block () const
 
void rej_stat_word ()
 
void ResetWordIterator ()
 

Public Attributes

PAGE_RESpage_res
 

Detailed Description

Definition at line 676 of file pageres.h.

Constructor & Destructor Documentation

PAGE_RES_IT::PAGE_RES_IT ( )
default
PAGE_RES_IT::PAGE_RES_IT ( PAGE_RES the_page_res)
inline

Definition at line 682 of file pageres.h.

682  { // page result
683  page_res = the_page_res;
684  restart_page(); // ready to scan
685  }
WERD_RES * restart_page()
Definition: pageres.h:702
PAGE_RES * page_res
Definition: pageres.h:678

Member Function Documentation

BLOCK_RES* PAGE_RES_IT::block ( ) const
inline

Definition at line 761 of file pageres.h.

761  { // block of cur. word
762  return block_res;
763  }
int PAGE_RES_IT::cmp ( const PAGE_RES_IT other) const

Definition at line 1192 of file pageres.cpp.

1192  {
1193  ASSERT_HOST(page_res == other.page_res);
1194  if (other.block_res == nullptr) {
1195  // other points to the end of the page.
1196  if (block_res == nullptr)
1197  return 0;
1198  return -1;
1199  }
1200  if (block_res == nullptr) {
1201  return 1; // we point to the end of the page.
1202  }
1203  if (block_res == other.block_res) {
1204  if (other.row_res == nullptr || row_res == nullptr) {
1205  // this should only happen if we hit an image block.
1206  return 0;
1207  }
1208  if (row_res == other.row_res) {
1209  // we point to the same block and row.
1210  ASSERT_HOST(other.word_res != nullptr && word_res != nullptr);
1211  if (word_res == other.word_res) {
1212  // we point to the same word!
1213  return 0;
1214  }
1215 
1216  WERD_RES_IT word_res_it(&row_res->word_res_list);
1217  for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
1218  word_res_it.forward()) {
1219  if (word_res_it.data() == word_res) {
1220  return -1;
1221  } else if (word_res_it.data() == other.word_res) {
1222  return 1;
1223  }
1224  }
1225  ASSERT_HOST("Error: Incomparable PAGE_RES_ITs" == nullptr);
1226  }
1227 
1228  // we both point to the same block, but different rows.
1229  ROW_RES_IT row_res_it(&block_res->row_res_list);
1230  for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list();
1231  row_res_it.forward()) {
1232  if (row_res_it.data() == row_res) {
1233  return -1;
1234  } else if (row_res_it.data() == other.row_res) {
1235  return 1;
1236  }
1237  }
1238  ASSERT_HOST("Error: Incomparable PAGE_RES_ITs" == nullptr);
1239  }
1240 
1241  // We point to different blocks.
1242  BLOCK_RES_IT block_res_it(&page_res->block_res_list);
1243  for (block_res_it.mark_cycle_pt();
1244  !block_res_it.cycled_list(); block_res_it.forward()) {
1245  if (block_res_it.data() == block_res) {
1246  return -1;
1247  } else if (block_res_it.data() == other.block_res) {
1248  return 1;
1249  }
1250  }
1251  // Shouldn't happen...
1252  ASSERT_HOST("Error: Incomparable PAGE_RES_ITs" == nullptr);
1253  return 0;
1254 }
WERD_RES_LIST word_res_list
Definition: pageres.h:146
ROW_RES_LIST row_res_list
Definition: pageres.h:127
PAGE_RES * page_res
Definition: pageres.h:678
#define ASSERT_HOST(x)
Definition: errcode.h:88
BLOCK_RES_LIST block_res_list
Definition: pageres.h:80
void PAGE_RES_IT::DeleteCurrentWord ( )

Definition at line 1487 of file pageres.cpp.

1487  {
1488  // Check that this word is as we expect. part_of_combos are NEVER iterated
1489  // by the normal iterator, so we should never be trying to delete them.
1490  ASSERT_HOST(!word_res->part_of_combo);
1491  if (!word_res->combination) {
1492  // Combinations own their own word, so we won't find the word on the
1493  // row's word_list, but it is legitimate to try to delete them.
1494  // Delete word from the ROW when not a combination.
1495  WERD_IT w_it(row()->row->word_list());
1496  for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
1497  if (w_it.data() == word_res->word) {
1498  break;
1499  }
1500  }
1501  ASSERT_HOST(!w_it.cycled_list());
1502  delete w_it.extract();
1503  }
1504  // Remove the WERD_RES for the new_word.
1505  // Remove the WORD_RES from the ROW_RES.
1506  WERD_RES_IT wr_it(&row()->word_res_list);
1507  for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1508  if (wr_it.data() == word_res) {
1509  word_res = nullptr;
1510  break;
1511  }
1512  }
1513  ASSERT_HOST(!wr_it.cycled_list());
1514  delete wr_it.extract();
1516 }
void ResetWordIterator()
Definition: pageres.cpp:1570
ROW_RES * row() const
Definition: pageres.h:758
bool combination
Definition: pageres.h:333
#define ASSERT_HOST(x)
Definition: errcode.h:88
bool part_of_combo
Definition: pageres.h:334
WERD * word
Definition: pageres.h:188
WERD_RES* PAGE_RES_IT::forward ( )
inline

Definition at line 735 of file pageres.h.

735  { // Get next word.
736  return internal_forward(false, false);
737  }
WERD_RES * PAGE_RES_IT::forward_block ( )

Definition at line 1707 of file pageres.cpp.

1707  {
1708  while (block_res == next_block_res) {
1709  internal_forward(false, true);
1710  }
1711  return internal_forward(false, true);
1712 }
WERD_RES * PAGE_RES_IT::forward_paragraph ( )

Definition at line 1692 of file pageres.cpp.

1692  {
1693  while (block_res == next_block_res &&
1694  (next_row_res != nullptr && next_row_res->row != nullptr &&
1695  row_res->row->para() == next_row_res->row->para())) {
1696  internal_forward(false, true);
1697  }
1698  return internal_forward(false, true);
1699 }
PARA * para() const
Definition: ocrrow.h:118
ROW * row
Definition: pageres.h:142
WERD_RES* PAGE_RES_IT::forward_with_empties ( )
inline

Definition at line 739 of file pageres.h.

739  {
740  return internal_forward(false, true);
741  }
WERD_RES * PAGE_RES_IT::InsertSimpleCloneWord ( const WERD_RES clone_res,
WERD new_word 
)

Definition at line 1260 of file pageres.cpp.

1261  {
1262  // Make a WERD_RES for the new_word.
1263  auto* new_res = new WERD_RES(new_word);
1264  new_res->CopySimpleFields(clone_res);
1265  new_res->combination = true;
1266  // Insert into the appropriate place in the ROW_RES.
1267  WERD_RES_IT wr_it(&row()->word_res_list);
1268  for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1269  WERD_RES* word = wr_it.data();
1270  if (word == word_res)
1271  break;
1272  }
1273  ASSERT_HOST(!wr_it.cycled_list());
1274  wr_it.add_before_then_move(new_res);
1275  if (wr_it.at_first()) {
1276  // This is the new first word, so reset the member iterator so it
1277  // detects the cycled_list state correctly.
1279  }
1280  return new_res;
1281 }
void ResetWordIterator()
Definition: pageres.cpp:1570
ROW_RES * row() const
Definition: pageres.h:758
WERD_RES * word() const
Definition: pageres.h:755
#define ASSERT_HOST(x)
Definition: errcode.h:88
void PAGE_RES_IT::MakeCurrentWordFuzzy ( )

Definition at line 1520 of file pageres.cpp.

1520  {
1521  WERD* real_word = word_res->word;
1522  if (!real_word->flag(W_FUZZY_SP) && !real_word->flag(W_FUZZY_NON)) {
1523  real_word->set_flag(W_FUZZY_SP, true);
1524  if (word_res->combination) {
1525  // The next word should be the corresponding part of combo, but we have
1526  // already stepped past it, so find it by search.
1527  WERD_RES_IT wr_it(&row()->word_res_list);
1528  for (wr_it.mark_cycle_pt();
1529  !wr_it.cycled_list() && wr_it.data() != word_res; wr_it.forward()) {
1530  }
1531  wr_it.forward();
1532  ASSERT_HOST(wr_it.data()->part_of_combo);
1533  real_word = wr_it.data()->word;
1534  ASSERT_HOST(!real_word->flag(W_FUZZY_SP) &&
1535  !real_word->flag(W_FUZZY_NON));
1536  real_word->set_flag(W_FUZZY_SP, true);
1537  }
1538  }
1539 }
Definition: werd.h:56
fuzzy nonspace
Definition: werd.h:40
ROW_RES * row() const
Definition: pageres.h:758
bool combination
Definition: pageres.h:333
fuzzy space
Definition: werd.h:39
#define ASSERT_HOST(x)
Definition: errcode.h:88
bool flag(WERD_FLAGS mask) const
Definition: werd.h:117
WERD * word
Definition: pageres.h:188
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:118
BLOCK_RES* PAGE_RES_IT::next_block ( ) const
inline

Definition at line 770 of file pageres.h.

770  { // block of next word
771  return next_block_res;
772  }
ROW_RES* PAGE_RES_IT::next_row ( ) const
inline

Definition at line 767 of file pageres.h.

767  { // row of next word
768  return next_row_res;
769  }
WERD_RES* PAGE_RES_IT::next_word ( ) const
inline

Definition at line 764 of file pageres.h.

764  { // next word
765  return next_word_res;
766  }
bool PAGE_RES_IT::operator!= ( const PAGE_RES_IT other) const
inline

Definition at line 694 of file pageres.h.

694 {return !(*this == other); }
bool PAGE_RES_IT::operator== ( const PAGE_RES_IT other) const
inline

Definition at line 689 of file pageres.h.

689  {
690  return word_res == other.word_res && row_res == other.row_res &&
691  block_res == other.block_res;
692  }
BLOCK_RES* PAGE_RES_IT::prev_block ( ) const
inline

Definition at line 752 of file pageres.h.

752  { // block of prev word
753  return prev_block_res;
754  }
ROW_RES* PAGE_RES_IT::prev_row ( ) const
inline

Definition at line 749 of file pageres.h.

749  { // row of prev word
750  return prev_row_res;
751  }
WERD_RES* PAGE_RES_IT::prev_word ( ) const
inline

Definition at line 746 of file pageres.h.

746  { // previous word
747  return prev_word_res;
748  }
void PAGE_RES_IT::rej_stat_word ( )

Definition at line 1714 of file pageres.cpp.

1714  {
1715  int16_t chars_in_word;
1716  int16_t rejects_in_word = 0;
1717 
1718  chars_in_word = word_res->reject_map.length ();
1719  page_res->char_count += chars_in_word;
1720  block_res->char_count += chars_in_word;
1721  row_res->char_count += chars_in_word;
1722 
1723  rejects_in_word = word_res->reject_map.reject_count ();
1724 
1725  page_res->rej_count += rejects_in_word;
1726  block_res->rej_count += rejects_in_word;
1727  row_res->rej_count += rejects_in_word;
1728  if (chars_in_word == rejects_in_word)
1729  row_res->whole_word_rej_count += rejects_in_word;
1730 }
int32_t length() const
Definition: rejctmap.h:223
int32_t char_count
Definition: pageres.h:143
int32_t whole_word_rej_count
Definition: pageres.h:145
REJMAP reject_map
Definition: pageres.h:286
int32_t rej_count
Definition: pageres.h:118
int32_t char_count
Definition: pageres.h:78
PAGE_RES * page_res
Definition: pageres.h:678
int32_t rej_count
Definition: pageres.h:79
int16_t reject_count()
Definition: rejctmap.h:229
int32_t char_count
Definition: pageres.h:117
int32_t rej_count
Definition: pageres.h:144
void PAGE_RES_IT::ReplaceCurrentWord ( tesseract::PointerVector< WERD_RES > *  words)

Definition at line 1380 of file pageres.cpp.

1381  {
1382  if (words->empty()) {
1384  return;
1385  }
1386  WERD_RES* input_word = word();
1387  // Set the BOL/EOL flags on the words from the input word.
1388  if (input_word->word->flag(W_BOL)) {
1389  (*words)[0]->word->set_flag(W_BOL, true);
1390  } else {
1391  (*words)[0]->word->set_blanks(input_word->word->space());
1392  }
1393  words->back()->word->set_flag(W_EOL, input_word->word->flag(W_EOL));
1394 
1395  // Move the blobs from the input word to the new set of words.
1396  // If the input word_res is a combination, then the replacements will also be
1397  // combinations, and will own their own words. If the input word_res is not a
1398  // combination, then the final replacements will not be either, (although it
1399  // is allowed for the input words to be combinations) and their words
1400  // will get put on the row list. This maintains the ownership rules.
1401  WERD_IT w_it(row()->row->word_list());
1402  if (!input_word->combination) {
1403  for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
1404  WERD* word = w_it.data();
1405  if (word == input_word->word)
1406  break;
1407  }
1408  // w_it is now set to the input_word's word.
1409  ASSERT_HOST(!w_it.cycled_list());
1410  }
1411  // Insert into the appropriate place in the ROW_RES.
1412  WERD_RES_IT wr_it(&row()->word_res_list);
1413  for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1414  WERD_RES* word = wr_it.data();
1415  if (word == input_word)
1416  break;
1417  }
1418  ASSERT_HOST(!wr_it.cycled_list());
1419  // Since we only have an estimate of the bounds between blobs, use the blob
1420  // x-middle as the determiner of where to put the blobs
1421  C_BLOB_IT src_b_it(input_word->word->cblob_list());
1422  src_b_it.sort(&C_BLOB::SortByXMiddle);
1423  C_BLOB_IT rej_b_it(input_word->word->rej_cblob_list());
1424  rej_b_it.sort(&C_BLOB::SortByXMiddle);
1425  TBOX clip_box;
1426  for (int w = 0; w < words->size(); ++w) {
1427  WERD_RES* word_w = (*words)[w];
1428  clip_box = ComputeWordBounds(*words, w, clip_box, wr_it_of_current_word);
1429  // Compute blob boundaries.
1430  GenericVector<int> blob_ends;
1431  C_BLOB_LIST* next_word_blobs =
1432  w + 1 < words->size() ? (*words)[w + 1]->word->cblob_list() : nullptr;
1433  ComputeBlobEnds(*word_w, clip_box, next_word_blobs, &blob_ends);
1434  // Remove the fake blobs on the current word, but keep safe for back-up if
1435  // no blob can be found.
1436  C_BLOB_LIST fake_blobs;
1437  C_BLOB_IT fake_b_it(&fake_blobs);
1438  fake_b_it.add_list_after(word_w->word->cblob_list());
1439  fake_b_it.move_to_first();
1440  word_w->word->cblob_list()->clear();
1441  C_BLOB_IT dest_it(word_w->word->cblob_list());
1442  // Build the box word as we move the blobs.
1443  auto* box_word = new tesseract::BoxWord;
1444  for (int i = 0; i < blob_ends.size(); ++i, fake_b_it.forward()) {
1445  int end_x = blob_ends[i];
1446  TBOX blob_box;
1447  // Add the blobs up to end_x.
1448  while (!src_b_it.empty() &&
1449  src_b_it.data()->bounding_box().x_middle() < end_x) {
1450  blob_box += MoveAndClipBlob(&src_b_it, &dest_it, clip_box);
1451  src_b_it.forward();
1452  }
1453  while (!rej_b_it.empty() &&
1454  rej_b_it.data()->bounding_box().x_middle() < end_x) {
1455  blob_box += MoveAndClipBlob(&rej_b_it, &dest_it, clip_box);
1456  rej_b_it.forward();
1457  }
1458  if (blob_box.null_box()) {
1459  // Use the original box as a back-up.
1460  blob_box = MoveAndClipBlob(&fake_b_it, &dest_it, clip_box);
1461  }
1462  box_word->InsertBox(i, blob_box);
1463  }
1464  delete word_w->box_word;
1465  word_w->box_word = box_word;
1466  if (!input_word->combination) {
1467  // Insert word_w->word into the ROW. It doesn't own its word, so the
1468  // ROW needs to own it.
1469  w_it.add_before_stay_put(word_w->word);
1470  word_w->combination = false;
1471  }
1472  (*words)[w] = nullptr; // We are taking ownership.
1473  wr_it.add_before_stay_put(word_w);
1474  }
1475  // We have taken ownership of the words.
1476  words->clear();
1477  // Delete the current word, which has been replaced. We could just call
1478  // DeleteCurrentWord, but that would iterate both lists again, and we know
1479  // we are already in the right place.
1480  if (!input_word->combination)
1481  delete w_it.extract();
1482  delete wr_it.extract();
1484 }
Definition: werd.h:56
Definition: rect.h:34
void ResetWordIterator()
Definition: pageres.cpp:1570
C_BLOB_LIST * rej_cblob_list()
Definition: werd.h:90
ROW_RES * row() const
Definition: pageres.h:758
start of line
Definition: werd.h:32
bool combination
Definition: pageres.h:333
end of line
Definition: werd.h:33
static int SortByXMiddle(const void *v1, const void *v2)
Definition: stepblob.h:125
bool empty() const
Definition: genericvector.h:89
tesseract::BoxWord * box_word
Definition: pageres.h:265
WERD_RES * word() const
Definition: pageres.h:755
uint8_t space()
Definition: werd.h:99
#define ASSERT_HOST(x)
Definition: errcode.h:88
bool flag(WERD_FLAGS mask) const
Definition: werd.h:117
int size() const
Definition: genericvector.h:70
bool null_box() const
Definition: rect.h:50
WERD * word
Definition: pageres.h:188
void DeleteCurrentWord()
Definition: pageres.cpp:1487
T & back() const
C_BLOB_LIST * cblob_list()
Definition: werd.h:95
void PAGE_RES_IT::ResetWordIterator ( )

Definition at line 1570 of file pageres.cpp.

1570  {
1571  if (row_res == next_row_res) {
1572  // Reset the member iterator so it can move forward and detect the
1573  // cycled_list state correctly.
1574  word_res_it.move_to_first();
1575  for (word_res_it.mark_cycle_pt();
1576  !word_res_it.cycled_list() && word_res_it.data() != next_word_res;
1577  word_res_it.forward()) {
1578  if (!word_res_it.data()->part_of_combo) {
1579  if (prev_row_res == row_res) prev_word_res = word_res;
1580  word_res = word_res_it.data();
1581  }
1582  }
1583  ASSERT_HOST(!word_res_it.cycled_list());
1584  wr_it_of_next_word = word_res_it;
1585  word_res_it.forward();
1586  } else {
1587  // word_res_it is OK, but reset word_res and prev_word_res if needed.
1588  WERD_RES_IT wr_it(&row_res->word_res_list);
1589  for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1590  if (!wr_it.data()->part_of_combo) {
1591  if (prev_row_res == row_res) prev_word_res = word_res;
1592  word_res = wr_it.data();
1593  }
1594  }
1595  }
1596 }
WERD_RES_LIST word_res_list
Definition: pageres.h:146
#define ASSERT_HOST(x)
Definition: errcode.h:88
WERD_RES* PAGE_RES_IT::restart_page ( )
inline

Definition at line 702 of file pageres.h.

702  {
703  return start_page(false); // Skip empty blocks.
704  }
WERD_RES * start_page(bool empty_ok)
Definition: pageres.cpp:1547
WERD_RES* PAGE_RES_IT::restart_page_with_empties ( )
inline

Definition at line 705 of file pageres.h.

705  {
706  return start_page(true); // Allow empty blocks.
707  }
WERD_RES * start_page(bool empty_ok)
Definition: pageres.cpp:1547
WERD_RES * PAGE_RES_IT::restart_row ( )

Definition at line 1677 of file pageres.cpp.

1677  {
1678  ROW_RES *row = this->row();
1679  if (!row) return nullptr;
1680  for (restart_page(); this->row() != row; forward()) {
1681  // pass
1682  }
1683  return word();
1684 }
WERD_RES * restart_page()
Definition: pageres.h:702
ROW_RES * row() const
Definition: pageres.h:758
WERD_RES * word() const
Definition: pageres.h:755
WERD_RES * forward()
Definition: pageres.h:735
ROW_RES* PAGE_RES_IT::row ( ) const
inline

Definition at line 758 of file pageres.h.

758  { // row of current word
759  return row_res;
760  }
WERD_RES * PAGE_RES_IT::start_page ( bool  empty_ok)

Definition at line 1547 of file pageres.cpp.

1547  {
1548  block_res_it.set_to_list(&page_res->block_res_list);
1549  block_res_it.mark_cycle_pt();
1550  prev_block_res = nullptr;
1551  prev_row_res = nullptr;
1552  prev_word_res = nullptr;
1553  block_res = nullptr;
1554  row_res = nullptr;
1555  word_res = nullptr;
1556  next_block_res = nullptr;
1557  next_row_res = nullptr;
1558  next_word_res = nullptr;
1559  internal_forward(true, empty_ok);
1560  return internal_forward(false, empty_ok);
1561 }
PAGE_RES * page_res
Definition: pageres.h:678
BLOCK_RES_LIST block_res_list
Definition: pageres.h:80
WERD_RES* PAGE_RES_IT::word ( ) const
inline

Definition at line 755 of file pageres.h.

755  { // current word
756  return word_res;
757  }

Member Data Documentation

PAGE_RES* PAGE_RES_IT::page_res

Definition at line 678 of file pageres.h.


The documentation for this class was generated from the following files: