tesseract 3.04.01

PAGE_RES_IT Class Reference

#include <pageres.h>

List of all members.

Public Member Functions

 PAGE_RES_IT ()
 PAGE_RES_IT (PAGE_RES *the_page_res)
bool operator== (const PAGE_RES_IT &other) const
bool operator!= (const PAGE_RES_IT &other) const
int cmp (const PAGE_RES_IT &other) const
WERD_RESrestart_page ()
WERD_RESrestart_page_with_empties ()
WERD_RESstart_page (bool empty_ok)
WERD_RESrestart_row ()
WERD_RESInsertSimpleCloneWord (const WERD_RES &clone_res, WERD *new_word)
void ReplaceCurrentWord (tesseract::PointerVector< WERD_RES > *words)
void DeleteCurrentWord ()
void MakeCurrentWordFuzzy ()
WERD_RESforward ()
WERD_RESforward_with_empties ()
WERD_RESforward_paragraph ()
WERD_RESforward_block ()
WERD_RESprev_word () const
ROW_RESprev_row () const
BLOCK_RESprev_block () const
WERD_RESword () const
ROW_RESrow () const
BLOCK_RESblock () const
WERD_RESnext_word () const
ROW_RESnext_row () const
BLOCK_RESnext_block () const
void rej_stat_word ()
void ResetWordIterator ()

Public Attributes

PAGE_RESpage_res

Detailed Description

Definition at line 656 of file pageres.h.


Constructor & Destructor Documentation

PAGE_RES_IT::PAGE_RES_IT ( ) [inline]

Definition at line 660 of file pageres.h.

                {
  }                            // empty contructor
PAGE_RES_IT::PAGE_RES_IT ( PAGE_RES the_page_res) [inline]

Definition at line 663 of file pageres.h.

                                      {    // page result
    page_res = the_page_res;
    restart_page();  // ready to scan
  }

Member Function Documentation

BLOCK_RES* PAGE_RES_IT::block ( ) const [inline]

Definition at line 739 of file pageres.h.

                           {  // block of cur. word
    return block_res;
  }
int PAGE_RES_IT::cmp ( const PAGE_RES_IT other) const

Definition at line 1200 of file pageres.cpp.

                                                   {
  ASSERT_HOST(page_res == other.page_res);
  if (other.block_res == NULL) {
    // other points to the end of the page.
    if (block_res == NULL)
      return 0;
    return -1;
  }
  if (block_res == NULL) {
    return 1; // we point to the end of the page.
  }
  if (block_res == other.block_res) {
    if (other.row_res == NULL || row_res == NULL) {
      // this should only happen if we hit an image block.
      return 0;
    }
    if (row_res == other.row_res) {
      // we point to the same block and row.
      ASSERT_HOST(other.word_res != NULL && word_res != NULL);
      if (word_res == other.word_res) {
        // we point to the same word!
        return 0;
      }

      WERD_RES_IT word_res_it(&row_res->word_res_list);
      for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
           word_res_it.forward()) {
        if (word_res_it.data() == word_res) {
          return -1;
        } else if (word_res_it.data() == other.word_res) {
          return 1;
        }
      }
      ASSERT_HOST("Error: Incomparable PAGE_RES_ITs" == NULL);
    }

    // we both point to the same block, but different rows.
    ROW_RES_IT row_res_it(&block_res->row_res_list);
    for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list();
         row_res_it.forward()) {
      if (row_res_it.data() == row_res) {
        return -1;
      } else if (row_res_it.data() == other.row_res) {
        return 1;
      }
    }
    ASSERT_HOST("Error: Incomparable PAGE_RES_ITs" == NULL);
  }

  // We point to different blocks.
  BLOCK_RES_IT block_res_it(&page_res->block_res_list);
  for (block_res_it.mark_cycle_pt();
       !block_res_it.cycled_list(); block_res_it.forward()) {
    if (block_res_it.data() == block_res) {
      return -1;
    } else if (block_res_it.data() == other.block_res) {
      return 1;
    }
  }
  // Shouldn't happen...
  ASSERT_HOST("Error: Incomparable PAGE_RES_ITs" == NULL);
  return 0;
}
void PAGE_RES_IT::DeleteCurrentWord ( )

Definition at line 1449 of file pageres.cpp.

                                    {
  // Check that this word is as we expect. part_of_combos are NEVER iterated
  // by the normal iterator, so we should never be trying to delete them.
  ASSERT_HOST(!word_res->part_of_combo);
  if (!word_res->combination) {
    // Combinations own their own word, so we won't find the word on the
    // row's word_list, but it is legitimate to try to delete them.
    // Delete word from the ROW when not a combination.
    WERD_IT w_it(row()->row->word_list());
    for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
      if (w_it.data() == word_res->word) {
        break;
      }
    }
    ASSERT_HOST(!w_it.cycled_list());
    delete w_it.extract();
  }
  // Remove the WERD_RES for the new_word.
  // Remove the WORD_RES from the ROW_RES.
  WERD_RES_IT wr_it(&row()->word_res_list);
  for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
    if (wr_it.data() == word_res) {
      word_res = NULL;
      break;
    }
  }
  ASSERT_HOST(!wr_it.cycled_list());
  delete wr_it.extract();
  ResetWordIterator();
}
WERD_RES* PAGE_RES_IT::forward ( ) [inline]

Definition at line 713 of file pageres.h.

                      {  // Get next word.
    return internal_forward(false, false);
  }
WERD_RES * PAGE_RES_IT::forward_block ( )

Definition at line 1666 of file pageres.cpp.

                                     {
  while (block_res == next_block_res) {
    internal_forward(false, true);
  }
  return internal_forward(false, true);
}
WERD_RES * PAGE_RES_IT::forward_paragraph ( )

Definition at line 1651 of file pageres.cpp.

                                         {
  while (block_res == next_block_res &&
         (next_row_res != NULL && next_row_res->row != NULL &&
          row_res->row->para() == next_row_res->row->para())) {
    internal_forward(false, true);
  }
  return internal_forward(false, true);
}
WERD_RES* PAGE_RES_IT::forward_with_empties ( ) [inline]

Definition at line 717 of file pageres.h.

                                   {
    return internal_forward(false, true);
  }
WERD_RES * PAGE_RES_IT::InsertSimpleCloneWord ( const WERD_RES clone_res,
WERD new_word 
)

Definition at line 1268 of file pageres.cpp.

                                                             {
  // Make a WERD_RES for the new_word.
  WERD_RES* new_res = new WERD_RES(new_word);
  new_res->CopySimpleFields(clone_res);
  new_res->combination = true;
  // Insert into the appropriate place in the ROW_RES.
  WERD_RES_IT wr_it(&row()->word_res_list);
  for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
    WERD_RES* word = wr_it.data();
    if (word == word_res)
      break;
  }
  ASSERT_HOST(!wr_it.cycled_list());
  wr_it.add_before_then_move(new_res);
  if (wr_it.at_first()) {
    // This is the new first word, so reset the member iterator so it
    // detects the cycled_list state correctly.
    ResetWordIterator();
  }
  return new_res;
}
void PAGE_RES_IT::MakeCurrentWordFuzzy ( )

Definition at line 1482 of file pageres.cpp.

                                       {
  WERD* real_word = word_res->word;
  if (!real_word->flag(W_FUZZY_SP) && !real_word->flag(W_FUZZY_NON)) {
    real_word->set_flag(W_FUZZY_SP, true);
    if (word_res->combination) {
      // The next word should be the corresponding part of combo, but we have
      // already stepped past it, so find it by search.
      WERD_RES_IT wr_it(&row()->word_res_list);
      for (wr_it.mark_cycle_pt();
           !wr_it.cycled_list() && wr_it.data() != word_res; wr_it.forward()) {
      }
      wr_it.forward();
      ASSERT_HOST(wr_it.data()->part_of_combo);
      real_word = wr_it.data()->word;
      ASSERT_HOST(!real_word->flag(W_FUZZY_SP) &&
                  !real_word->flag(W_FUZZY_NON));
      real_word->set_flag(W_FUZZY_SP, true);
    }
  }
}
BLOCK_RES* PAGE_RES_IT::next_block ( ) const [inline]

Definition at line 748 of file pageres.h.

                                {  // block of next word
    return next_block_res;
  }
ROW_RES* PAGE_RES_IT::next_row ( ) const [inline]

Definition at line 745 of file pageres.h.

                            {  // row of next word
    return next_row_res;
  }
WERD_RES* PAGE_RES_IT::next_word ( ) const [inline]

Definition at line 742 of file pageres.h.

                              {  // next word
    return next_word_res;
  }
bool PAGE_RES_IT::operator!= ( const PAGE_RES_IT other) const [inline]

Definition at line 672 of file pageres.h.

{return !(*this == other); }
bool PAGE_RES_IT::operator== ( const PAGE_RES_IT other) const

Definition at line 1194 of file pageres.cpp.

                                                            {
  return word_res == other.word_res &&
      row_res == other.row_res &&
      block_res == other.block_res;
}
BLOCK_RES* PAGE_RES_IT::prev_block ( ) const [inline]

Definition at line 730 of file pageres.h.

                                {  // block of prev word
    return prev_block_res;
  }
ROW_RES* PAGE_RES_IT::prev_row ( ) const [inline]

Definition at line 727 of file pageres.h.

                            {  // row of prev word
    return prev_row_res;
  }
WERD_RES* PAGE_RES_IT::prev_word ( ) const [inline]

Definition at line 724 of file pageres.h.

                              {  // previous word
    return prev_word_res;
  }
void PAGE_RES_IT::rej_stat_word ( )

Definition at line 1673 of file pageres.cpp.

                                {
  inT16 chars_in_word;
  inT16 rejects_in_word = 0;

  chars_in_word = word_res->reject_map.length ();
  page_res->char_count += chars_in_word;
  block_res->char_count += chars_in_word;
  row_res->char_count += chars_in_word;

  rejects_in_word = word_res->reject_map.reject_count ();

  page_res->rej_count += rejects_in_word;
  block_res->rej_count += rejects_in_word;
  row_res->rej_count += rejects_in_word;
  if (chars_in_word == rejects_in_word)
    row_res->whole_word_rej_count += rejects_in_word;
}
void PAGE_RES_IT::ReplaceCurrentWord ( tesseract::PointerVector< WERD_RES > *  words)

Definition at line 1321 of file pageres.cpp.

                                             {
  if (words->empty()) {
    DeleteCurrentWord();
    return;
  }
  WERD_RES* input_word = word();
  // Set the BOL/EOL flags on the words from the input word.
  if (input_word->word->flag(W_BOL)) {
    (*words)[0]->word->set_flag(W_BOL, true);
  } else {
    (*words)[0]->word->set_blanks(1);
  }
  words->back()->word->set_flag(W_EOL, input_word->word->flag(W_EOL));

  // Move the blobs from the input word to the new set of words.
  // If the input word_res is a combination, then the replacements will also be
  // combinations, and will own their own words. If the input word_res is not a
  // combination, then the final replacements will not be either, (although it
  // is allowed for the input words to be combinations) and their words
  // will get put on the row list. This maintains the ownership rules.
  WERD_IT w_it(row()->row->word_list());
  if (!input_word->combination) {
    for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
      WERD* word = w_it.data();
      if (word == input_word->word)
        break;
    }
    // w_it is now set to the input_word's word.
    ASSERT_HOST(!w_it.cycled_list());
  }
  // Insert into the appropriate place in the ROW_RES.
  WERD_RES_IT wr_it(&row()->word_res_list);
  for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
    WERD_RES* word = wr_it.data();
    if (word == input_word)
      break;
  }
  ASSERT_HOST(!wr_it.cycled_list());
  // Since we only have an estimate of the bounds between blobs, use the blob
  // x-middle as the determiner of where to put the blobs
  C_BLOB_IT src_b_it(input_word->word->cblob_list());
  src_b_it.sort(&C_BLOB::SortByXMiddle);
  C_BLOB_IT rej_b_it(input_word->word->rej_cblob_list());
  rej_b_it.sort(&C_BLOB::SortByXMiddle);
  for (int w = 0; w < words->size(); ++w) {
    WERD_RES* word_w = (*words)[w];
    // Compute blob boundaries.
    GenericVector<int> blob_ends;
    C_BLOB_LIST* next_word_blobs =
        w + 1 < words->size() ? (*words)[w + 1]->word->cblob_list() : NULL;
    ComputeBlobEnds(*word_w, next_word_blobs, &blob_ends);
    // Delete the fake blobs on the current word.
    word_w->word->cblob_list()->clear();
    C_BLOB_IT dest_it(word_w->word->cblob_list());
    // Build the box word as we move the blobs.
    tesseract::BoxWord* box_word = new tesseract::BoxWord;
    for (int i = 0; i < blob_ends.size(); ++i) {
      int end_x = blob_ends[i];
      TBOX blob_box;
      // Add the blobs up to end_x.
      while (!src_b_it.empty() &&
             src_b_it.data()->bounding_box().x_middle() < end_x) {
        blob_box += src_b_it.data()->bounding_box();
        dest_it.add_after_then_move(src_b_it.extract());
        src_b_it.forward();
      }
      while (!rej_b_it.empty() &&
             rej_b_it.data()->bounding_box().x_middle() < end_x) {
        blob_box += rej_b_it.data()->bounding_box();
        dest_it.add_after_then_move(rej_b_it.extract());
        rej_b_it.forward();
      }
      // Clip to the previously computed bounds. Although imperfectly accurate,
      // it is good enough, and much more complicated to determine where else
      // to clip.
      if (i > 0 && blob_box.left() < blob_ends[i - 1])
        blob_box.set_left(blob_ends[i - 1]);
      if (blob_box.right() > end_x)
        blob_box.set_right(end_x);
      box_word->InsertBox(i, blob_box);
    }
    // Fix empty boxes. If a very joined blob sits over multiple characters,
    // then we will have some empty boxes from using the middle, so look for
    // overlaps.
    for (int i = 0; i < box_word->length(); ++i) {
      TBOX box = box_word->BlobBox(i);
      if (box.null_box()) {
        // Nothing has its middle in the bounds of this blob, so use anything
        // that overlaps.
        for (dest_it.mark_cycle_pt(); !dest_it.cycled_list();
             dest_it.forward()) {
          TBOX blob_box = dest_it.data()->bounding_box();
          if (blob_box.left() < blob_ends[i] &&
              (i == 0 || blob_box.right() >= blob_ends[i - 1])) {
            if (i > 0 && blob_box.left() < blob_ends[i - 1])
              blob_box.set_left(blob_ends[i - 1]);
            if (blob_box.right() > blob_ends[i])
              blob_box.set_right(blob_ends[i]);
            box_word->ChangeBox(i, blob_box);
            break;
          }
        }
      }
    }
    delete word_w->box_word;
    word_w->box_word = box_word;
    if (!input_word->combination) {
      // Insert word_w->word into the ROW. It doesn't own its word, so the
      // ROW needs to own it.
      w_it.add_before_stay_put(word_w->word);
      word_w->combination = false;
    }
    (*words)[w] = NULL;  // We are taking ownership.
    wr_it.add_before_stay_put(word_w);
  }
  // We have taken ownership of the words.
  words->clear();
  // Delete the current word, which has been replaced. We could just call
  // DeleteCurrentWord, but that would iterate both lists again, and we know
  // we are already in the right place.
  if (!input_word->combination)
    delete w_it.extract();
  delete wr_it.extract();
  ResetWordIterator();
}
void PAGE_RES_IT::ResetWordIterator ( )

Definition at line 1532 of file pageres.cpp.

                                    {
  if (row_res == next_row_res) {
    // Reset the member iterator so it can move forward and detect the
    // cycled_list state correctly.
    word_res_it.move_to_first();
    for (word_res_it.mark_cycle_pt();
         !word_res_it.cycled_list() && word_res_it.data() != next_word_res;
         word_res_it.forward()) {
      if (!word_res_it.data()->part_of_combo) {
        if (prev_row_res == row_res) prev_word_res = word_res;
        word_res = word_res_it.data();
      }
    }
    ASSERT_HOST(!word_res_it.cycled_list());
    word_res_it.forward();
  } else {
    // word_res_it is OK, but reset word_res and prev_word_res if needed.
    WERD_RES_IT wr_it(&row_res->word_res_list);
    for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
      if (!wr_it.data()->part_of_combo) {
        if (prev_row_res == row_res) prev_word_res = word_res;
        word_res = wr_it.data();
      }
    }
  }
}
WERD_RES* PAGE_RES_IT::restart_page ( ) [inline]

Definition at line 680 of file pageres.h.

                           {
    return start_page(false);  // Skip empty blocks.
  }
WERD_RES* PAGE_RES_IT::restart_page_with_empties ( ) [inline]

Definition at line 683 of file pageres.h.

                                        {
    return start_page(true);  // Allow empty blocks.
  }
WERD_RES * PAGE_RES_IT::restart_row ( )

Definition at line 1636 of file pageres.cpp.

                                   {
  ROW_RES *row = this->row();
  if (!row) return NULL;
  for (restart_page(); this->row() != row; forward()) {
    // pass
  }
  return word();
}
ROW_RES* PAGE_RES_IT::row ( ) const [inline]

Definition at line 736 of file pageres.h.

                       {  // row of current word
    return row_res;
  }
WERD_RES * PAGE_RES_IT::start_page ( bool  empty_ok)

Definition at line 1509 of file pageres.cpp.

                                               {
  block_res_it.set_to_list(&page_res->block_res_list);
  block_res_it.mark_cycle_pt();
  prev_block_res = NULL;
  prev_row_res = NULL;
  prev_word_res = NULL;
  block_res = NULL;
  row_res = NULL;
  word_res = NULL;
  next_block_res = NULL;
  next_row_res = NULL;
  next_word_res = NULL;
  internal_forward(true, empty_ok);
  return internal_forward(false, empty_ok);
}
WERD_RES* PAGE_RES_IT::word ( ) const [inline]

Definition at line 733 of file pageres.h.

                         {  // current word
    return word_res;
  }

Member Data Documentation

Definition at line 658 of file pageres.h.


The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines