|
tesseract 3.04.01
|
00001 00002 // File: pageiterator.cpp 00003 // Description: Iterator for tesseract page structure that avoids using 00004 // tesseract internal data structures. 00005 // Author: Ray Smith 00006 // Created: Fri Feb 26 14:32:09 PST 2010 00007 // 00008 // (C) Copyright 2010, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #include "pageiterator.h" 00022 #include "allheaders.h" 00023 #include "helpers.h" 00024 #include "pageres.h" 00025 #include "tesseractclass.h" 00026 00027 namespace tesseract { 00028 00029 PageIterator::PageIterator(PAGE_RES* page_res, Tesseract* tesseract, int scale, 00030 int scaled_yres, int rect_left, int rect_top, 00031 int rect_width, int rect_height) 00032 : page_res_(page_res), 00033 tesseract_(tesseract), 00034 word_(NULL), 00035 word_length_(0), 00036 blob_index_(0), 00037 cblob_it_(NULL), 00038 include_upper_dots_(false), 00039 include_lower_dots_(false), 00040 scale_(scale), 00041 scaled_yres_(scaled_yres), 00042 rect_left_(rect_left), 00043 rect_top_(rect_top), 00044 rect_width_(rect_width), 00045 rect_height_(rect_height) { 00046 it_ = new PAGE_RES_IT(page_res); 00047 PageIterator::Begin(); 00048 } 00049 00050 PageIterator::~PageIterator() { 00051 delete it_; 00052 delete cblob_it_; 00053 } 00054 00060 PageIterator::PageIterator(const PageIterator& src) 00061 : page_res_(src.page_res_), 00062 tesseract_(src.tesseract_), 00063 word_(NULL), 00064 word_length_(src.word_length_), 00065 blob_index_(src.blob_index_), 00066 cblob_it_(NULL), 00067 include_upper_dots_(src.include_upper_dots_), 00068 include_lower_dots_(src.include_lower_dots_), 00069 scale_(src.scale_), 00070 scaled_yres_(src.scaled_yres_), 00071 rect_left_(src.rect_left_), 00072 rect_top_(src.rect_top_), 00073 rect_width_(src.rect_width_), 00074 rect_height_(src.rect_height_) { 00075 it_ = new PAGE_RES_IT(*src.it_); 00076 BeginWord(src.blob_index_); 00077 } 00078 00079 const PageIterator& PageIterator::operator=(const PageIterator& src) { 00080 page_res_ = src.page_res_; 00081 tesseract_ = src.tesseract_; 00082 include_upper_dots_ = src.include_upper_dots_; 00083 include_lower_dots_ = src.include_lower_dots_; 00084 scale_ = src.scale_; 00085 scaled_yres_ = src.scaled_yres_; 00086 rect_left_ = src.rect_left_; 00087 rect_top_ = src.rect_top_; 00088 rect_width_ = src.rect_width_; 00089 rect_height_ = src.rect_height_; 00090 if (it_ != NULL) delete it_; 00091 it_ = new PAGE_RES_IT(*src.it_); 00092 BeginWord(src.blob_index_); 00093 return *this; 00094 } 00095 00096 bool PageIterator::PositionedAtSameWord(const PAGE_RES_IT* other) const { 00097 return (it_ == NULL && it_ == other) || 00098 ((other != NULL) && (it_ != NULL) && (*it_ == *other)); 00099 } 00100 00101 // ============= Moving around within the page ============. 00102 00104 void PageIterator::Begin() { 00105 it_->restart_page_with_empties(); 00106 BeginWord(0); 00107 } 00108 00109 void PageIterator::RestartParagraph() { 00110 if (it_->block() == NULL) return; // At end of the document. 00111 PAGE_RES_IT para(page_res_); 00112 PAGE_RES_IT next_para(para); 00113 next_para.forward_paragraph(); 00114 while (next_para.cmp(*it_) <= 0) { 00115 para = next_para; 00116 next_para.forward_paragraph(); 00117 } 00118 *it_ = para; 00119 BeginWord(0); 00120 } 00121 00122 bool PageIterator::IsWithinFirstTextlineOfParagraph() const { 00123 PageIterator p_start(*this); 00124 p_start.RestartParagraph(); 00125 return p_start.it_->row() == it_->row(); 00126 } 00127 00128 void PageIterator::RestartRow() { 00129 it_->restart_row(); 00130 BeginWord(0); 00131 } 00132 00146 bool PageIterator::Next(PageIteratorLevel level) { 00147 if (it_->block() == NULL) return false; // Already at the end! 00148 if (it_->word() == NULL) 00149 level = RIL_BLOCK; 00150 00151 switch (level) { 00152 case RIL_BLOCK: 00153 it_->forward_block(); 00154 break; 00155 case RIL_PARA: 00156 it_->forward_paragraph(); 00157 break; 00158 case RIL_TEXTLINE: 00159 for (it_->forward_with_empties(); it_->row() == it_->prev_row(); 00160 it_->forward_with_empties()); 00161 break; 00162 case RIL_WORD: 00163 it_->forward_with_empties(); 00164 break; 00165 case RIL_SYMBOL: 00166 if (cblob_it_ != NULL) 00167 cblob_it_->forward(); 00168 ++blob_index_; 00169 if (blob_index_ >= word_length_) 00170 it_->forward_with_empties(); 00171 else 00172 return true; 00173 break; 00174 } 00175 BeginWord(0); 00176 return it_->block() != NULL; 00177 } 00178 00184 bool PageIterator::IsAtBeginningOf(PageIteratorLevel level) const { 00185 if (it_->block() == NULL) return false; // Already at the end! 00186 if (it_->word() == NULL) return true; // In an image block. 00187 switch (level) { 00188 case RIL_BLOCK: 00189 return blob_index_ == 0 && it_->block() != it_->prev_block(); 00190 case RIL_PARA: 00191 return blob_index_ == 0 && 00192 (it_->block() != it_->prev_block() || 00193 it_->row()->row->para() != it_->prev_row()->row->para()); 00194 case RIL_TEXTLINE: 00195 return blob_index_ == 0 && it_->row() != it_->prev_row(); 00196 case RIL_WORD: 00197 return blob_index_ == 0; 00198 case RIL_SYMBOL: 00199 return true; 00200 } 00201 return false; 00202 } 00203 00208 bool PageIterator::IsAtFinalElement(PageIteratorLevel level, 00209 PageIteratorLevel element) const { 00210 if (Empty(element)) return true; // Already at the end! 00211 // The result is true if we step forward by element and find we are 00212 // at the the end of the page or at beginning of *all* levels in: 00213 // [level, element). 00214 // When there is more than one level difference between element and level, 00215 // we could for instance move forward one symbol and still be at the first 00216 // word on a line, so we also have to be at the first symbol in a word. 00217 PageIterator next(*this); 00218 next.Next(element); 00219 if (next.Empty(element)) return true; // Reached the end of the page. 00220 while (element > level) { 00221 element = static_cast<PageIteratorLevel>(element - 1); 00222 if (!next.IsAtBeginningOf(element)) 00223 return false; 00224 } 00225 return true; 00226 } 00227 00234 int PageIterator::Cmp(const PageIterator &other) const { 00235 int word_cmp = it_->cmp(*other.it_); 00236 if (word_cmp != 0) 00237 return word_cmp; 00238 if (blob_index_ < other.blob_index_) 00239 return -1; 00240 if (blob_index_ == other.blob_index_) 00241 return 0; 00242 return 1; 00243 } 00244 00245 // ============= Accessing data ==============. 00246 // Coordinate system: 00247 // Integer coordinates are at the cracks between the pixels. 00248 // The top-left corner of the top-left pixel in the image is at (0,0). 00249 // The bottom-right corner of the bottom-right pixel in the image is at 00250 // (width, height). 00251 // Every bounding box goes from the top-left of the top-left contained 00252 // pixel to the bottom-right of the bottom-right contained pixel, so 00253 // the bounding box of the single top-left pixel in the image is: 00254 // (0,0)->(1,1). 00255 // If an image rectangle has been set in the API, then returned coordinates 00256 // relate to the original (full) image, rather than the rectangle. 00257 00264 bool PageIterator::BoundingBoxInternal(PageIteratorLevel level, 00265 int* left, int* top, 00266 int* right, int* bottom) const { 00267 if (Empty(level)) 00268 return false; 00269 TBOX box; 00270 PARA *para = NULL; 00271 switch (level) { 00272 case RIL_BLOCK: 00273 box = it_->block()->block->restricted_bounding_box(include_upper_dots_, 00274 include_lower_dots_); 00275 break; 00276 case RIL_PARA: 00277 para = it_->row()->row->para(); 00278 // explicit fall-through. 00279 case RIL_TEXTLINE: 00280 box = it_->row()->row->restricted_bounding_box(include_upper_dots_, 00281 include_lower_dots_); 00282 break; 00283 case RIL_WORD: 00284 box = it_->word()->word->restricted_bounding_box(include_upper_dots_, 00285 include_lower_dots_); 00286 break; 00287 case RIL_SYMBOL: 00288 if (cblob_it_ == NULL) 00289 box = it_->word()->box_word->BlobBox(blob_index_); 00290 else 00291 box = cblob_it_->data()->bounding_box(); 00292 } 00293 if (level == RIL_PARA) { 00294 PageIterator other = *this; 00295 other.Begin(); 00296 do { 00297 if (other.it_->block() && 00298 other.it_->block()->block == it_->block()->block && 00299 other.it_->row() && other.it_->row()->row && 00300 other.it_->row()->row->para() == para) { 00301 box = box.bounding_union(other.it_->row()->row->bounding_box()); 00302 } 00303 } while (other.Next(RIL_TEXTLINE)); 00304 } 00305 if (level != RIL_SYMBOL || cblob_it_ != NULL) 00306 box.rotate(it_->block()->block->re_rotation()); 00307 // Now we have a box in tesseract coordinates relative to the image rectangle, 00308 // we have to convert the coords to a top-down system. 00309 const int pix_height = pixGetHeight(tesseract_->pix_binary()); 00310 const int pix_width = pixGetWidth(tesseract_->pix_binary()); 00311 *left = ClipToRange(static_cast<int>(box.left()), 0, pix_width); 00312 *top = ClipToRange(pix_height - box.top(), 0, pix_height); 00313 *right = ClipToRange(static_cast<int>(box.right()), *left, pix_width); 00314 *bottom = ClipToRange(pix_height - box.bottom(), *top, pix_height); 00315 return true; 00316 } 00317 00324 bool PageIterator::BoundingBox(PageIteratorLevel level, 00325 int* left, int* top, 00326 int* right, int* bottom) const { 00327 return BoundingBox(level, 0, left, top, right, bottom); 00328 } 00329 00330 bool PageIterator::BoundingBox(PageIteratorLevel level, const int padding, 00331 int* left, int* top, 00332 int* right, int* bottom) const { 00333 if (!BoundingBoxInternal(level, left, top, right, bottom)) 00334 return false; 00335 // Convert to the coordinate system of the original image. 00336 *left = ClipToRange(*left / scale_ + rect_left_ - padding, 00337 rect_left_, rect_left_ + rect_width_); 00338 *top = ClipToRange(*top / scale_ + rect_top_ - padding, 00339 rect_top_, rect_top_ + rect_height_); 00340 *right = ClipToRange((*right + scale_ - 1) / scale_ + rect_left_ + padding, 00341 *left, rect_left_ + rect_width_); 00342 *bottom = ClipToRange((*bottom + scale_ - 1) / scale_ + rect_top_ + padding, 00343 *top, rect_top_ + rect_height_); 00344 return true; 00345 } 00346 00348 bool PageIterator::Empty(PageIteratorLevel level) const { 00349 if (it_->block() == NULL) return true; // Already at the end! 00350 if (it_->word() == NULL && level != RIL_BLOCK) return true; // image block 00351 if (level == RIL_SYMBOL && blob_index_ >= word_length_) 00352 return true; // Zero length word, or already at the end of it. 00353 return false; 00354 } 00355 00357 PolyBlockType PageIterator::BlockType() const { 00358 if (it_->block() == NULL || it_->block()->block == NULL) 00359 return PT_UNKNOWN; // Already at the end! 00360 if (it_->block()->block->poly_block() == NULL) 00361 return PT_FLOWING_TEXT; // No layout analysis used - assume text. 00362 return it_->block()->block->poly_block()->isA(); 00363 } 00364 00367 Pta* PageIterator::BlockPolygon() const { 00368 if (it_->block() == NULL || it_->block()->block == NULL) 00369 return NULL; // Already at the end! 00370 if (it_->block()->block->poly_block() == NULL) 00371 return NULL; // No layout analysis used - no polygon. 00372 ICOORDELT_IT it(it_->block()->block->poly_block()->points()); 00373 Pta* pta = ptaCreate(it.length()); 00374 int num_pts = 0; 00375 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++num_pts) { 00376 ICOORD* pt = it.data(); 00377 // Convert to top-down coords within the input image. 00378 float x = static_cast<float>(pt->x()) / scale_ + rect_left_; 00379 float y = rect_top_ + rect_height_ - static_cast<float>(pt->y()) / scale_; 00380 ptaAddPt(pta, x, y); 00381 } 00382 return pta; 00383 } 00384 00407 Pix* PageIterator::GetBinaryImage(PageIteratorLevel level) const { 00408 int left, top, right, bottom; 00409 if (!BoundingBoxInternal(level, &left, &top, &right, &bottom)) 00410 return NULL; 00411 if (level == RIL_SYMBOL && cblob_it_ != NULL && 00412 cblob_it_->data()->area() != 0) 00413 return cblob_it_->data()->render(); 00414 Box* box = boxCreate(left, top, right - left, bottom - top); 00415 Pix* pix = pixClipRectangle(tesseract_->pix_binary(), box, NULL); 00416 boxDestroy(&box); 00417 if (level == RIL_BLOCK || level == RIL_PARA) { 00418 // Clip to the block polygon as well. 00419 TBOX mask_box; 00420 Pix* mask = it_->block()->block->render_mask(&mask_box); 00421 int mask_x = left - mask_box.left(); 00422 int mask_y = top - (tesseract_->ImageHeight() - mask_box.top()); 00423 // AND the mask and pix, putting the result in pix. 00424 pixRasterop(pix, MAX(0, -mask_x), MAX(0, -mask_y), pixGetWidth(pix), 00425 pixGetHeight(pix), PIX_SRC & PIX_DST, mask, MAX(0, mask_x), 00426 MAX(0, mask_y)); 00427 pixDestroy(&mask); 00428 } 00429 return pix; 00430 } 00431 00443 Pix* PageIterator::GetImage(PageIteratorLevel level, int padding, 00444 Pix* original_img, 00445 int* left, int* top) const { 00446 int right, bottom; 00447 if (!BoundingBox(level, left, top, &right, &bottom)) 00448 return NULL; 00449 if (original_img == NULL) 00450 return GetBinaryImage(level); 00451 00452 // Expand the box. 00453 *left = MAX(*left - padding, 0); 00454 *top = MAX(*top - padding, 0); 00455 right = MIN(right + padding, rect_width_); 00456 bottom = MIN(bottom + padding, rect_height_); 00457 Box* box = boxCreate(*left, *top, right - *left, bottom - *top); 00458 Pix* grey_pix = pixClipRectangle(original_img, box, NULL); 00459 boxDestroy(&box); 00460 if (level == RIL_BLOCK || level == RIL_PARA) { 00461 // Clip to the block polygon as well. 00462 TBOX mask_box; 00463 Pix* mask = it_->block()->block->render_mask(&mask_box); 00464 // Copy the mask registered correctly into an image the size of grey_pix. 00465 int mask_x = *left - mask_box.left(); 00466 int mask_y = *top - (pixGetHeight(original_img) - mask_box.top()); 00467 int width = pixGetWidth(grey_pix); 00468 int height = pixGetHeight(grey_pix); 00469 Pix* resized_mask = pixCreate(width, height, 1); 00470 pixRasterop(resized_mask, MAX(0, -mask_x), MAX(0, -mask_y), width, height, 00471 PIX_SRC, mask, MAX(0, mask_x), MAX(0, mask_y)); 00472 pixDestroy(&mask); 00473 pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1, 00474 2 * padding + 1); 00475 pixInvert(resized_mask, resized_mask); 00476 pixSetMasked(grey_pix, resized_mask, MAX_UINT32); 00477 pixDestroy(&resized_mask); 00478 } 00479 return grey_pix; 00480 } 00481 00487 bool PageIterator::Baseline(PageIteratorLevel level, 00488 int* x1, int* y1, int* x2, int* y2) const { 00489 if (it_->word() == NULL) return false; // Already at the end! 00490 ROW* row = it_->row()->row; 00491 WERD* word = it_->word()->word; 00492 TBOX box = (level == RIL_WORD || level == RIL_SYMBOL) 00493 ? word->bounding_box() 00494 : row->bounding_box(); 00495 int left = box.left(); 00496 ICOORD startpt(left, static_cast<inT16>(row->base_line(left) + 0.5)); 00497 int right = box.right(); 00498 ICOORD endpt(right, static_cast<inT16>(row->base_line(right) + 0.5)); 00499 // Rotate to image coordinates and convert to global image coords. 00500 startpt.rotate(it_->block()->block->re_rotation()); 00501 endpt.rotate(it_->block()->block->re_rotation()); 00502 *x1 = startpt.x() / scale_ + rect_left_; 00503 *y1 = (rect_height_ - startpt.y()) / scale_ + rect_top_; 00504 *x2 = endpt.x() / scale_ + rect_left_; 00505 *y2 = (rect_height_ - endpt.y()) / scale_ + rect_top_; 00506 return true; 00507 } 00508 00509 void PageIterator::Orientation(tesseract::Orientation *orientation, 00510 tesseract::WritingDirection *writing_direction, 00511 tesseract::TextlineOrder *textline_order, 00512 float *deskew_angle) const { 00513 BLOCK* block = it_->block()->block; 00514 00515 // Orientation 00516 FCOORD up_in_image(0.0, 1.0); 00517 up_in_image.unrotate(block->classify_rotation()); 00518 up_in_image.rotate(block->re_rotation()); 00519 00520 if (up_in_image.x() == 0.0F) { 00521 if (up_in_image.y() > 0.0F) { 00522 *orientation = ORIENTATION_PAGE_UP; 00523 } else { 00524 *orientation = ORIENTATION_PAGE_DOWN; 00525 } 00526 } else if (up_in_image.x() > 0.0F) { 00527 *orientation = ORIENTATION_PAGE_RIGHT; 00528 } else { 00529 *orientation = ORIENTATION_PAGE_LEFT; 00530 } 00531 00532 // Writing direction 00533 bool is_vertical_text = (block->classify_rotation().x() == 0.0); 00534 bool right_to_left = block->right_to_left(); 00535 *writing_direction = 00536 is_vertical_text 00537 ? WRITING_DIRECTION_TOP_TO_BOTTOM 00538 : (right_to_left 00539 ? WRITING_DIRECTION_RIGHT_TO_LEFT 00540 : WRITING_DIRECTION_LEFT_TO_RIGHT); 00541 00542 // Textline Order 00543 bool is_mongolian = false; // TODO(eger): fix me 00544 *textline_order = is_vertical_text 00545 ? (is_mongolian 00546 ? TEXTLINE_ORDER_LEFT_TO_RIGHT 00547 : TEXTLINE_ORDER_RIGHT_TO_LEFT) 00548 : TEXTLINE_ORDER_TOP_TO_BOTTOM; 00549 00550 // Deskew angle 00551 FCOORD skew = block->skew(); // true horizontal for textlines 00552 *deskew_angle = -skew.angle(); 00553 } 00554 00555 void PageIterator::ParagraphInfo(tesseract::ParagraphJustification *just, 00556 bool *is_list_item, 00557 bool *is_crown, 00558 int *first_line_indent) const { 00559 *just = tesseract::JUSTIFICATION_UNKNOWN; 00560 if (!it_->row() || !it_->row()->row || !it_->row()->row->para() || 00561 !it_->row()->row->para()->model) 00562 return; 00563 00564 PARA *para = it_->row()->row->para(); 00565 *is_list_item = para->is_list_item; 00566 *is_crown = para->is_very_first_or_continuation; 00567 *first_line_indent = para->model->first_indent() - 00568 para->model->body_indent(); 00569 } 00570 00575 void PageIterator::BeginWord(int offset) { 00576 WERD_RES* word_res = it_->word(); 00577 if (word_res == NULL) { 00578 // This is a non-text block, so there is no word. 00579 word_length_ = 0; 00580 blob_index_ = 0; 00581 word_ = NULL; 00582 return; 00583 } 00584 if (word_res->best_choice != NULL) { 00585 // Recognition has been done, so we are using the box_word, which 00586 // is already baseline denormalized. 00587 word_length_ = word_res->best_choice->length(); 00588 if (word_res->box_word != NULL) { 00589 if (word_res->box_word->length() != word_length_) { 00590 tprintf("Corrupted word! best_choice[len=%d] = %s, box_word[len=%d]: ", 00591 word_length_, word_res->best_choice->unichar_string().string(), 00592 word_res->box_word->length()); 00593 word_res->box_word->bounding_box().print(); 00594 } 00595 ASSERT_HOST(word_res->box_word->length() == word_length_); 00596 } 00597 word_ = NULL; 00598 // We will be iterating the box_word. 00599 if (cblob_it_ != NULL) { 00600 delete cblob_it_; 00601 cblob_it_ = NULL; 00602 } 00603 } else { 00604 // No recognition yet, so a "symbol" is a cblob. 00605 word_ = word_res->word; 00606 ASSERT_HOST(word_->cblob_list() != NULL); 00607 word_length_ = word_->cblob_list()->length(); 00608 if (cblob_it_ == NULL) cblob_it_ = new C_BLOB_IT; 00609 cblob_it_->set_to_list(word_->cblob_list()); 00610 } 00611 for (blob_index_ = 0; blob_index_ < offset; ++blob_index_) { 00612 if (cblob_it_ != NULL) 00613 cblob_it_->forward(); 00614 } 00615 } 00616 00617 bool PageIterator::SetWordBlamerBundle(BlamerBundle *blamer_bundle) { 00618 if (it_->word() != NULL) { 00619 it_->word()->blamer_bundle = blamer_bundle; 00620 return true; 00621 } else { 00622 return false; 00623 } 00624 } 00625 00626 } // namespace tesseract.