tesseract 3.04.01

ccmain/pageiterator.cpp

Go to the documentation of this file.
00001 
00002 // File:        pageiterator.cpp
00003 // Description: Iterator for tesseract page structure that avoids using
00004 //              tesseract internal data structures.
00005 // Author:      Ray Smith
00006 // Created:     Fri Feb 26 14:32:09 PST 2010
00007 //
00008 // (C) Copyright 2010, Google Inc.
00009 // Licensed under the Apache License, Version 2.0 (the "License");
00010 // you may not use this file except in compliance with the License.
00011 // You may obtain a copy of the License at
00012 // http://www.apache.org/licenses/LICENSE-2.0
00013 // Unless required by applicable law or agreed to in writing, software
00014 // distributed under the License is distributed on an "AS IS" BASIS,
00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 // See the License for the specific language governing permissions and
00017 // limitations under the License.
00018 //
00020 
00021 #include "pageiterator.h"
00022 #include "allheaders.h"
00023 #include "helpers.h"
00024 #include "pageres.h"
00025 #include "tesseractclass.h"
00026 
00027 namespace tesseract {
00028 
00029 PageIterator::PageIterator(PAGE_RES* page_res, Tesseract* tesseract, int scale,
00030                            int scaled_yres, int rect_left, int rect_top,
00031                            int rect_width, int rect_height)
00032     : page_res_(page_res),
00033       tesseract_(tesseract),
00034       word_(NULL),
00035       word_length_(0),
00036       blob_index_(0),
00037       cblob_it_(NULL),
00038       include_upper_dots_(false),
00039       include_lower_dots_(false),
00040       scale_(scale),
00041       scaled_yres_(scaled_yres),
00042       rect_left_(rect_left),
00043       rect_top_(rect_top),
00044       rect_width_(rect_width),
00045       rect_height_(rect_height) {
00046   it_ = new PAGE_RES_IT(page_res);
00047   PageIterator::Begin();
00048 }
00049 
00050 PageIterator::~PageIterator() {
00051   delete it_;
00052   delete cblob_it_;
00053 }
00054 
00060 PageIterator::PageIterator(const PageIterator& src)
00061     : page_res_(src.page_res_),
00062       tesseract_(src.tesseract_),
00063       word_(NULL),
00064       word_length_(src.word_length_),
00065       blob_index_(src.blob_index_),
00066       cblob_it_(NULL),
00067       include_upper_dots_(src.include_upper_dots_),
00068       include_lower_dots_(src.include_lower_dots_),
00069       scale_(src.scale_),
00070       scaled_yres_(src.scaled_yres_),
00071       rect_left_(src.rect_left_),
00072       rect_top_(src.rect_top_),
00073       rect_width_(src.rect_width_),
00074       rect_height_(src.rect_height_) {
00075   it_ = new PAGE_RES_IT(*src.it_);
00076   BeginWord(src.blob_index_);
00077 }
00078 
00079 const PageIterator& PageIterator::operator=(const PageIterator& src) {
00080   page_res_ = src.page_res_;
00081   tesseract_ = src.tesseract_;
00082   include_upper_dots_ = src.include_upper_dots_;
00083   include_lower_dots_ = src.include_lower_dots_;
00084   scale_ = src.scale_;
00085   scaled_yres_ = src.scaled_yres_;
00086   rect_left_ = src.rect_left_;
00087   rect_top_ = src.rect_top_;
00088   rect_width_ = src.rect_width_;
00089   rect_height_ = src.rect_height_;
00090   if (it_ != NULL) delete it_;
00091   it_ = new PAGE_RES_IT(*src.it_);
00092   BeginWord(src.blob_index_);
00093   return *this;
00094 }
00095 
00096 bool PageIterator::PositionedAtSameWord(const PAGE_RES_IT* other) const {
00097   return (it_ == NULL && it_ == other) ||
00098      ((other != NULL) && (it_ != NULL) && (*it_ == *other));
00099 }
00100 
00101 // ============= Moving around within the page ============.
00102 
00104 void PageIterator::Begin() {
00105   it_->restart_page_with_empties();
00106   BeginWord(0);
00107 }
00108 
00109 void PageIterator::RestartParagraph() {
00110   if (it_->block() == NULL) return; // At end of the document.
00111   PAGE_RES_IT para(page_res_);
00112   PAGE_RES_IT next_para(para);
00113   next_para.forward_paragraph();
00114   while (next_para.cmp(*it_) <= 0) {
00115     para = next_para;
00116     next_para.forward_paragraph();
00117   }
00118   *it_ = para;
00119   BeginWord(0);
00120 }
00121 
00122 bool PageIterator::IsWithinFirstTextlineOfParagraph() const {
00123   PageIterator p_start(*this);
00124   p_start.RestartParagraph();
00125   return p_start.it_->row() == it_->row();
00126 }
00127 
00128 void PageIterator::RestartRow() {
00129   it_->restart_row();
00130   BeginWord(0);
00131 }
00132 
00146 bool PageIterator::Next(PageIteratorLevel level) {
00147   if (it_->block() == NULL) return false;  // Already at the end!
00148   if (it_->word() == NULL)
00149     level = RIL_BLOCK;
00150 
00151   switch (level) {
00152     case RIL_BLOCK:
00153       it_->forward_block();
00154       break;
00155     case RIL_PARA:
00156       it_->forward_paragraph();
00157       break;
00158     case RIL_TEXTLINE:
00159       for (it_->forward_with_empties(); it_->row() == it_->prev_row();
00160            it_->forward_with_empties());
00161       break;
00162     case RIL_WORD:
00163       it_->forward_with_empties();
00164       break;
00165     case RIL_SYMBOL:
00166       if (cblob_it_ != NULL)
00167         cblob_it_->forward();
00168       ++blob_index_;
00169       if (blob_index_ >= word_length_)
00170         it_->forward_with_empties();
00171       else
00172         return true;
00173       break;
00174   }
00175   BeginWord(0);
00176   return it_->block() != NULL;
00177 }
00178 
00184 bool PageIterator::IsAtBeginningOf(PageIteratorLevel level) const {
00185   if (it_->block() == NULL) return false;  // Already at the end!
00186   if (it_->word() == NULL) return true;  // In an image block.
00187   switch (level) {
00188     case RIL_BLOCK:
00189       return blob_index_ == 0 && it_->block() != it_->prev_block();
00190     case RIL_PARA:
00191       return blob_index_ == 0 &&
00192           (it_->block() != it_->prev_block() ||
00193            it_->row()->row->para() != it_->prev_row()->row->para());
00194     case RIL_TEXTLINE:
00195       return blob_index_ == 0 && it_->row() != it_->prev_row();
00196     case RIL_WORD:
00197       return blob_index_ == 0;
00198     case RIL_SYMBOL:
00199       return true;
00200   }
00201   return false;
00202 }
00203 
00208 bool PageIterator::IsAtFinalElement(PageIteratorLevel level,
00209                                     PageIteratorLevel element) const {
00210   if (Empty(element)) return true;  // Already at the end!
00211   // The result is true if we step forward by element and find we are
00212   // at the the end of the page or at beginning of *all* levels in:
00213   // [level, element).
00214   // When there is more than one level difference between element and level,
00215   // we could for instance move forward one symbol and still be at the first
00216   // word on a line, so we also have to be at the first symbol in a word.
00217   PageIterator next(*this);
00218   next.Next(element);
00219   if (next.Empty(element)) return true;  // Reached the end of the page.
00220   while (element > level) {
00221     element = static_cast<PageIteratorLevel>(element - 1);
00222     if (!next.IsAtBeginningOf(element))
00223       return false;
00224   }
00225   return true;
00226 }
00227 
00234 int PageIterator::Cmp(const PageIterator &other) const {
00235   int word_cmp = it_->cmp(*other.it_);
00236   if (word_cmp != 0)
00237     return word_cmp;
00238   if (blob_index_ < other.blob_index_)
00239     return -1;
00240   if (blob_index_ == other.blob_index_)
00241     return 0;
00242   return 1;
00243 }
00244 
00245 // ============= Accessing data ==============.
00246 // Coordinate system:
00247 // Integer coordinates are at the cracks between the pixels.
00248 // The top-left corner of the top-left pixel in the image is at (0,0).
00249 // The bottom-right corner of the bottom-right pixel in the image is at
00250 // (width, height).
00251 // Every bounding box goes from the top-left of the top-left contained
00252 // pixel to the bottom-right of the bottom-right contained pixel, so
00253 // the bounding box of the single top-left pixel in the image is:
00254 // (0,0)->(1,1).
00255 // If an image rectangle has been set in the API, then returned coordinates
00256 // relate to the original (full) image, rather than the rectangle.
00257 
00264 bool PageIterator::BoundingBoxInternal(PageIteratorLevel level,
00265                                        int* left, int* top,
00266                                        int* right, int* bottom) const {
00267   if (Empty(level))
00268     return false;
00269   TBOX box;
00270   PARA *para = NULL;
00271   switch (level) {
00272     case RIL_BLOCK:
00273       box = it_->block()->block->restricted_bounding_box(include_upper_dots_,
00274                                                          include_lower_dots_);
00275       break;
00276     case RIL_PARA:
00277       para = it_->row()->row->para();
00278       // explicit fall-through.
00279     case RIL_TEXTLINE:
00280       box = it_->row()->row->restricted_bounding_box(include_upper_dots_,
00281                                                      include_lower_dots_);
00282       break;
00283     case RIL_WORD:
00284       box = it_->word()->word->restricted_bounding_box(include_upper_dots_,
00285                                                        include_lower_dots_);
00286       break;
00287     case RIL_SYMBOL:
00288       if (cblob_it_ == NULL)
00289         box = it_->word()->box_word->BlobBox(blob_index_);
00290       else
00291         box = cblob_it_->data()->bounding_box();
00292   }
00293   if (level == RIL_PARA) {
00294     PageIterator other = *this;
00295     other.Begin();
00296     do {
00297       if (other.it_->block() &&
00298           other.it_->block()->block == it_->block()->block &&
00299           other.it_->row() && other.it_->row()->row &&
00300           other.it_->row()->row->para() == para) {
00301         box = box.bounding_union(other.it_->row()->row->bounding_box());
00302       }
00303     } while (other.Next(RIL_TEXTLINE));
00304   }
00305   if (level != RIL_SYMBOL || cblob_it_ != NULL)
00306     box.rotate(it_->block()->block->re_rotation());
00307   // Now we have a box in tesseract coordinates relative to the image rectangle,
00308   // we have to convert the coords to a top-down system.
00309   const int pix_height = pixGetHeight(tesseract_->pix_binary());
00310   const int pix_width = pixGetWidth(tesseract_->pix_binary());
00311   *left = ClipToRange(static_cast<int>(box.left()), 0, pix_width);
00312   *top = ClipToRange(pix_height - box.top(), 0, pix_height);
00313   *right = ClipToRange(static_cast<int>(box.right()), *left, pix_width);
00314   *bottom = ClipToRange(pix_height - box.bottom(), *top, pix_height);
00315   return true;
00316 }
00317 
00324 bool PageIterator::BoundingBox(PageIteratorLevel level,
00325                                int* left, int* top,
00326                                int* right, int* bottom) const {
00327   return BoundingBox(level, 0, left, top, right, bottom);
00328 }
00329 
00330 bool PageIterator::BoundingBox(PageIteratorLevel level, const int padding,
00331                                int* left, int* top,
00332                                int* right, int* bottom) const {
00333   if (!BoundingBoxInternal(level, left, top, right, bottom))
00334     return false;
00335   // Convert to the coordinate system of the original image.
00336   *left = ClipToRange(*left / scale_ + rect_left_ - padding,
00337                       rect_left_, rect_left_ + rect_width_);
00338   *top = ClipToRange(*top / scale_ + rect_top_ - padding,
00339                      rect_top_, rect_top_ + rect_height_);
00340   *right = ClipToRange((*right + scale_ - 1) / scale_ + rect_left_ + padding,
00341                        *left, rect_left_ + rect_width_);
00342   *bottom = ClipToRange((*bottom + scale_ - 1) / scale_ + rect_top_ + padding,
00343                         *top, rect_top_ + rect_height_);
00344   return true;
00345 }
00346 
00348 bool PageIterator::Empty(PageIteratorLevel level) const {
00349   if (it_->block() == NULL) return true;  // Already at the end!
00350   if (it_->word() == NULL && level != RIL_BLOCK) return true;  // image block
00351   if (level == RIL_SYMBOL && blob_index_ >= word_length_)
00352     return true;  // Zero length word, or already at the end of it.
00353   return false;
00354 }
00355 
00357 PolyBlockType PageIterator::BlockType() const {
00358   if (it_->block() == NULL || it_->block()->block == NULL)
00359     return PT_UNKNOWN;  // Already at the end!
00360   if (it_->block()->block->poly_block() == NULL)
00361     return PT_FLOWING_TEXT;  // No layout analysis used - assume text.
00362   return it_->block()->block->poly_block()->isA();
00363 }
00364 
00367 Pta* PageIterator::BlockPolygon() const {
00368   if (it_->block() == NULL || it_->block()->block == NULL)
00369     return NULL;  // Already at the end!
00370   if (it_->block()->block->poly_block() == NULL)
00371     return NULL;  // No layout analysis used - no polygon.
00372   ICOORDELT_IT it(it_->block()->block->poly_block()->points());
00373   Pta* pta = ptaCreate(it.length());
00374   int num_pts = 0;
00375   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++num_pts) {
00376     ICOORD* pt = it.data();
00377     // Convert to top-down coords within the input image.
00378     float x = static_cast<float>(pt->x()) / scale_ + rect_left_;
00379     float y = rect_top_ + rect_height_ - static_cast<float>(pt->y()) / scale_;
00380     ptaAddPt(pta, x, y);
00381   }
00382   return pta;
00383 }
00384 
00407 Pix* PageIterator::GetBinaryImage(PageIteratorLevel level) const {
00408   int left, top, right, bottom;
00409   if (!BoundingBoxInternal(level, &left, &top, &right, &bottom))
00410     return NULL;
00411   if (level == RIL_SYMBOL && cblob_it_ != NULL &&
00412       cblob_it_->data()->area() != 0)
00413     return cblob_it_->data()->render();
00414   Box* box = boxCreate(left, top, right - left, bottom - top);
00415   Pix* pix = pixClipRectangle(tesseract_->pix_binary(), box, NULL);
00416   boxDestroy(&box);
00417   if (level == RIL_BLOCK || level == RIL_PARA) {
00418     // Clip to the block polygon as well.
00419     TBOX mask_box;
00420     Pix* mask = it_->block()->block->render_mask(&mask_box);
00421     int mask_x = left - mask_box.left();
00422     int mask_y = top - (tesseract_->ImageHeight() - mask_box.top());
00423     // AND the mask and pix, putting the result in pix.
00424     pixRasterop(pix, MAX(0, -mask_x), MAX(0, -mask_y), pixGetWidth(pix),
00425                 pixGetHeight(pix), PIX_SRC & PIX_DST, mask, MAX(0, mask_x),
00426                 MAX(0, mask_y));
00427     pixDestroy(&mask);
00428   }
00429   return pix;
00430 }
00431 
00443 Pix* PageIterator::GetImage(PageIteratorLevel level, int padding,
00444                             Pix* original_img,
00445                             int* left, int* top) const {
00446   int right, bottom;
00447   if (!BoundingBox(level, left, top, &right, &bottom))
00448     return NULL;
00449   if (original_img == NULL)
00450     return GetBinaryImage(level);
00451 
00452   // Expand the box.
00453   *left = MAX(*left - padding, 0);
00454   *top = MAX(*top - padding, 0);
00455   right = MIN(right + padding, rect_width_);
00456   bottom = MIN(bottom + padding, rect_height_);
00457   Box* box = boxCreate(*left, *top, right - *left, bottom - *top);
00458   Pix* grey_pix = pixClipRectangle(original_img, box, NULL);
00459   boxDestroy(&box);
00460   if (level == RIL_BLOCK || level == RIL_PARA) {
00461     // Clip to the block polygon as well.
00462     TBOX mask_box;
00463     Pix* mask = it_->block()->block->render_mask(&mask_box);
00464     // Copy the mask registered correctly into an image the size of grey_pix.
00465     int mask_x = *left - mask_box.left();
00466     int mask_y = *top - (pixGetHeight(original_img) - mask_box.top());
00467     int width = pixGetWidth(grey_pix);
00468     int height = pixGetHeight(grey_pix);
00469     Pix* resized_mask = pixCreate(width, height, 1);
00470     pixRasterop(resized_mask, MAX(0, -mask_x), MAX(0, -mask_y), width, height,
00471                 PIX_SRC, mask, MAX(0, mask_x), MAX(0, mask_y));
00472     pixDestroy(&mask);
00473     pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1,
00474                    2 * padding + 1);
00475     pixInvert(resized_mask, resized_mask);
00476     pixSetMasked(grey_pix, resized_mask, MAX_UINT32);
00477     pixDestroy(&resized_mask);
00478   }
00479   return grey_pix;
00480 }
00481 
00487 bool PageIterator::Baseline(PageIteratorLevel level,
00488                             int* x1, int* y1, int* x2, int* y2) const {
00489   if (it_->word() == NULL) return false;  // Already at the end!
00490   ROW* row = it_->row()->row;
00491   WERD* word = it_->word()->word;
00492   TBOX box = (level == RIL_WORD || level == RIL_SYMBOL)
00493            ? word->bounding_box()
00494            : row->bounding_box();
00495   int left = box.left();
00496   ICOORD startpt(left, static_cast<inT16>(row->base_line(left) + 0.5));
00497   int right = box.right();
00498   ICOORD endpt(right, static_cast<inT16>(row->base_line(right) + 0.5));
00499   // Rotate to image coordinates and convert to global image coords.
00500   startpt.rotate(it_->block()->block->re_rotation());
00501   endpt.rotate(it_->block()->block->re_rotation());
00502   *x1 = startpt.x() / scale_ + rect_left_;
00503   *y1 = (rect_height_ - startpt.y()) / scale_ + rect_top_;
00504   *x2 = endpt.x() / scale_ + rect_left_;
00505   *y2 = (rect_height_ - endpt.y()) / scale_ + rect_top_;
00506   return true;
00507 }
00508 
00509 void PageIterator::Orientation(tesseract::Orientation *orientation,
00510                                tesseract::WritingDirection *writing_direction,
00511                                tesseract::TextlineOrder *textline_order,
00512                                float *deskew_angle) const {
00513   BLOCK* block = it_->block()->block;
00514 
00515   // Orientation
00516   FCOORD up_in_image(0.0, 1.0);
00517   up_in_image.unrotate(block->classify_rotation());
00518   up_in_image.rotate(block->re_rotation());
00519 
00520   if (up_in_image.x() == 0.0F) {
00521     if (up_in_image.y() > 0.0F) {
00522       *orientation = ORIENTATION_PAGE_UP;
00523     } else {
00524       *orientation = ORIENTATION_PAGE_DOWN;
00525     }
00526   } else if (up_in_image.x() > 0.0F) {
00527     *orientation = ORIENTATION_PAGE_RIGHT;
00528   } else {
00529     *orientation = ORIENTATION_PAGE_LEFT;
00530   }
00531 
00532   // Writing direction
00533   bool is_vertical_text = (block->classify_rotation().x() == 0.0);
00534   bool right_to_left = block->right_to_left();
00535   *writing_direction =
00536       is_vertical_text
00537           ? WRITING_DIRECTION_TOP_TO_BOTTOM
00538           : (right_to_left
00539                 ? WRITING_DIRECTION_RIGHT_TO_LEFT
00540                 : WRITING_DIRECTION_LEFT_TO_RIGHT);
00541 
00542   // Textline Order
00543   bool is_mongolian = false;  // TODO(eger): fix me
00544   *textline_order = is_vertical_text
00545       ? (is_mongolian
00546          ? TEXTLINE_ORDER_LEFT_TO_RIGHT
00547          : TEXTLINE_ORDER_RIGHT_TO_LEFT)
00548       : TEXTLINE_ORDER_TOP_TO_BOTTOM;
00549 
00550   // Deskew angle
00551   FCOORD skew = block->skew();  // true horizontal for textlines
00552   *deskew_angle = -skew.angle();
00553 }
00554 
00555 void PageIterator::ParagraphInfo(tesseract::ParagraphJustification *just,
00556                                  bool *is_list_item,
00557                                  bool *is_crown,
00558                                  int *first_line_indent) const {
00559   *just = tesseract::JUSTIFICATION_UNKNOWN;
00560   if (!it_->row() || !it_->row()->row || !it_->row()->row->para() ||
00561       !it_->row()->row->para()->model)
00562     return;
00563 
00564   PARA *para = it_->row()->row->para();
00565   *is_list_item = para->is_list_item;
00566   *is_crown = para->is_very_first_or_continuation;
00567   *first_line_indent = para->model->first_indent() -
00568       para->model->body_indent();
00569 }
00570 
00575 void PageIterator::BeginWord(int offset) {
00576   WERD_RES* word_res = it_->word();
00577   if (word_res == NULL) {
00578     // This is a non-text block, so there is no word.
00579     word_length_ = 0;
00580     blob_index_ = 0;
00581     word_ = NULL;
00582     return;
00583   }
00584   if (word_res->best_choice != NULL) {
00585     // Recognition has been done, so we are using the box_word, which
00586     // is already baseline denormalized.
00587     word_length_ = word_res->best_choice->length();
00588     if (word_res->box_word != NULL) {
00589       if (word_res->box_word->length() != word_length_) {
00590         tprintf("Corrupted word! best_choice[len=%d] = %s, box_word[len=%d]: ",
00591                 word_length_, word_res->best_choice->unichar_string().string(),
00592                 word_res->box_word->length());
00593         word_res->box_word->bounding_box().print();
00594       }
00595       ASSERT_HOST(word_res->box_word->length() == word_length_);
00596     }
00597     word_ = NULL;
00598     // We will be iterating the box_word.
00599     if (cblob_it_ != NULL) {
00600       delete cblob_it_;
00601       cblob_it_ = NULL;
00602     }
00603   } else {
00604     // No recognition yet, so a "symbol" is a cblob.
00605     word_ = word_res->word;
00606     ASSERT_HOST(word_->cblob_list() != NULL);
00607     word_length_ = word_->cblob_list()->length();
00608     if (cblob_it_ == NULL) cblob_it_ = new C_BLOB_IT;
00609     cblob_it_->set_to_list(word_->cblob_list());
00610   }
00611   for (blob_index_ = 0; blob_index_ < offset; ++blob_index_) {
00612     if (cblob_it_ != NULL)
00613       cblob_it_->forward();
00614   }
00615 }
00616 
00617 bool PageIterator::SetWordBlamerBundle(BlamerBundle *blamer_bundle) {
00618   if (it_->word() != NULL) {
00619     it_->word()->blamer_bundle = blamer_bundle;
00620     return true;
00621   } else {
00622     return false;
00623   }
00624 }
00625 
00626 }  // namespace tesseract.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines