tesseract 3.04.01

ccstruct/ocrblock.cpp

Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        ocrblock.cpp  (Formerly block.c)
00003  * Description: BLOCK member functions and iterator functions.
00004  * Author:              Ray Smith
00005  * Created:             Fri Mar 15 09:41:28 GMT 1991
00006  *
00007  * (C) Copyright 1991, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include <stdlib.h>
00021 #include "blckerr.h"
00022 #include "ocrblock.h"
00023 #include "stepblob.h"
00024 #include "tprintf.h"
00025 
00026 #define BLOCK_LABEL_HEIGHT  150  //char height of block id
00027 
00028 ELISTIZE (BLOCK)
00034 BLOCK::BLOCK(const char *name,                //< filename
00035              BOOL8 prop,                      //< proportional
00036              inT16 kern,                      //< kerning
00037              inT16 space,                     //< spacing
00038              inT16 xmin,                      //< bottom left
00039              inT16 ymin, inT16 xmax,          //< top right
00040              inT16 ymax)
00041   : PDBLK (xmin, ymin, xmax, ymax),
00042     filename(name),
00043     re_rotation_(1.0f, 0.0f),
00044     classify_rotation_(1.0f, 0.0f),
00045     skew_(1.0f, 0.0f) {
00046   ICOORDELT_IT left_it = &leftside;
00047   ICOORDELT_IT right_it = &rightside;
00048 
00049   proportional = prop;
00050   right_to_left_ = false;
00051   kerning = kern;
00052   spacing = space;
00053   font_class = -1;               //not assigned
00054   cell_over_xheight_ = 2.0f;
00055   hand_poly = NULL;
00056   left_it.set_to_list (&leftside);
00057   right_it.set_to_list (&rightside);
00058                                  //make default box
00059   left_it.add_to_end (new ICOORDELT (xmin, ymin));
00060   left_it.add_to_end (new ICOORDELT (xmin, ymax));
00061   right_it.add_to_end (new ICOORDELT (xmax, ymin));
00062   right_it.add_to_end (new ICOORDELT (xmax, ymax));
00063 }
00064 
00071 int decreasing_top_order(  //
00072                          const void *row1,
00073                          const void *row2) {
00074   return (*(ROW **) row2)->bounding_box ().top () -
00075     (*(ROW **) row1)->bounding_box ().top ();
00076 }
00077 
00078 
00084 void BLOCK::rotate(const FCOORD& rotation) {
00085   poly_block()->rotate(rotation);
00086   box = *poly_block()->bounding_box();
00087 }
00088 
00089 // Returns the bounding box including the desired combination of upper and
00090 // lower noise/diacritic elements.
00091 TBOX BLOCK::restricted_bounding_box(bool upper_dots, bool lower_dots) const {
00092   TBOX box;
00093   // This is a read-only iteration of the rows in the block.
00094   ROW_IT it(const_cast<ROW_LIST*>(&rows));
00095   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00096     box += it.data()->restricted_bounding_box(upper_dots, lower_dots);
00097   }
00098   return box;
00099 }
00100 
00107 void BLOCK::reflect_polygon_in_y_axis() {
00108   poly_block()->reflect_in_y_axis();
00109   box = *poly_block()->bounding_box();
00110 }
00111 
00118 void BLOCK::sort_rows() {  // order on "top"
00119   ROW_IT row_it(&rows);
00120 
00121   row_it.sort (decreasing_top_order);
00122 }
00123 
00124 
00132 void BLOCK::compress() {  // squash it up
00133   #define           ROW_SPACING 5
00134 
00135   ROW_IT row_it(&rows);
00136   ROW *row;
00137   ICOORD row_spacing (0, ROW_SPACING);
00138 
00139   ICOORDELT_IT icoordelt_it;
00140 
00141   sort_rows();
00142 
00143   box = TBOX (box.topleft (), box.topleft ());
00144   box.move_bottom_edge (ROW_SPACING);
00145   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00146     row = row_it.data ();
00147     row->move (box.botleft () - row_spacing -
00148       row->bounding_box ().topleft ());
00149     box += row->bounding_box ();
00150   }
00151 
00152   leftside.clear ();
00153   icoordelt_it.set_to_list (&leftside);
00154   icoordelt_it.add_to_end (new ICOORDELT (box.left (), box.bottom ()));
00155   icoordelt_it.add_to_end (new ICOORDELT (box.left (), box.top ()));
00156   rightside.clear ();
00157   icoordelt_it.set_to_list (&rightside);
00158   icoordelt_it.add_to_end (new ICOORDELT (box.right (), box.bottom ()));
00159   icoordelt_it.add_to_end (new ICOORDELT (box.right (), box.top ()));
00160 }
00161 
00162 
00170 void BLOCK::check_pitch() {  // check prop
00171   //      tprintf("Missing FFT fixed pitch stuff!\n");
00172   pitch = -1;
00173 }
00174 
00175 
00182 void BLOCK::compress(                  // squash it up
00183                      const ICOORD vec  // and move
00184                     ) {
00185   box.move (vec);
00186   compress();
00187 }
00188 
00189 
00196 void BLOCK::print(            //print list of sides
00197                   FILE *,     //< file to print on
00198                   BOOL8 dump  //< print full detail
00199                  ) {
00200   ICOORDELT_IT it = &leftside;   //iterator
00201 
00202   box.print ();
00203   tprintf ("Proportional= %s\n", proportional ? "TRUE" : "FALSE");
00204   tprintf ("Kerning= %d\n", kerning);
00205   tprintf ("Spacing= %d\n", spacing);
00206   tprintf ("Fixed_pitch=%d\n", pitch);
00207   tprintf ("Filename= %s\n", filename.string ());
00208 
00209   if (dump) {
00210     tprintf ("Left side coords are:\n");
00211     for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ())
00212       tprintf ("(%d,%d) ", it.data ()->x (), it.data ()->y ());
00213     tprintf ("\n");
00214     tprintf ("Right side coords are:\n");
00215     it.set_to_list (&rightside);
00216     for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ())
00217       tprintf ("(%d,%d) ", it.data ()->x (), it.data ()->y ());
00218     tprintf ("\n");
00219   }
00220 }
00221 
00228 BLOCK & BLOCK::operator= (       //assignment
00229 const BLOCK & source             //from this
00230 ) {
00231   this->ELIST_LINK::operator= (source);
00232   this->PDBLK::operator= (source);
00233   proportional = source.proportional;
00234   kerning = source.kerning;
00235   spacing = source.spacing;
00236   filename = source.filename;    //STRINGs assign ok
00237   if (!rows.empty ())
00238     rows.clear ();
00239   re_rotation_ = source.re_rotation_;
00240   classify_rotation_ = source.classify_rotation_;
00241   skew_ = source.skew_;
00242   return *this;
00243 }
00244 
00245 // This function is for finding the approximate (horizontal) distance from
00246 // the x-coordinate of the left edge of a symbol to the left edge of the
00247 // text block which contains it.  We are passed:
00248 //   segments - output of PB_LINE_IT::get_line() which contains x-coordinate
00249 //       intervals for the scan line going through the symbol's y-coordinate.
00250 //       Each element of segments is of the form (x()=start_x, y()=length).
00251 //   x - the x coordinate of the symbol we're interested in.
00252 //   margin - return value, the distance from x,y to the left margin of the
00253 //       block containing it.
00254 // If all segments were to the right of x, we return false and 0.
00255 bool LeftMargin(ICOORDELT_LIST *segments, int x, int *margin) {
00256   bool found = false;
00257   *margin = 0;
00258   if (segments->empty())
00259     return found;
00260   ICOORDELT_IT seg_it(segments);
00261   for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
00262     int cur_margin = x - seg_it.data()->x();
00263     if (cur_margin >= 0) {
00264       if (!found) {
00265         *margin = cur_margin;
00266       } else if (cur_margin < *margin) {
00267         *margin = cur_margin;
00268       }
00269       found = true;
00270     }
00271   }
00272   return found;
00273 }
00274 
00275 // This function is for finding the approximate (horizontal) distance from
00276 // the x-coordinate of the right edge of a symbol to the right edge of the
00277 // text block which contains it.  We are passed:
00278 //   segments - output of PB_LINE_IT::get_line() which contains x-coordinate
00279 //       intervals for the scan line going through the symbol's y-coordinate.
00280 //       Each element of segments is of the form (x()=start_x, y()=length).
00281 //   x - the x coordinate of the symbol we're interested in.
00282 //   margin - return value, the distance from x,y to the right margin of the
00283 //       block containing it.
00284 // If all segments were to the left of x, we return false and 0.
00285 bool RightMargin(ICOORDELT_LIST *segments, int x, int *margin) {
00286   bool found = false;
00287   *margin = 0;
00288   if (segments->empty())
00289     return found;
00290   ICOORDELT_IT seg_it(segments);
00291   for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
00292     int cur_margin = seg_it.data()->x() + seg_it.data()->y() - x;
00293     if (cur_margin >= 0) {
00294       if (!found) {
00295         *margin = cur_margin;
00296       } else if (cur_margin < *margin) {
00297         *margin = cur_margin;
00298       }
00299       found = true;
00300     }
00301   }
00302   return found;
00303 }
00304 
00305 // Compute the distance from the left and right ends of each row to the
00306 // left and right edges of the block's polyblock.  Illustration:
00307 //  ____________________________   _______________________
00308 //  |  Howdy neighbor!         |  |rectangular blocks look|
00309 //  |  This text is  written to|  |more like stacked pizza|
00310 //  |illustrate how useful poly-  |boxes.                 |
00311 //  |blobs  are   in -----------  ------   The    polyblob|
00312 //  |dealing    with|     _________     |for a BLOCK  rec-|
00313 //  |harder   layout|   /===========\   |ords the possibly|
00314 //  |issues.        |    |  _    _  |   |skewed    pseudo-|
00315 //  |  You  see this|    | |_| \|_| |   |rectangular      |
00316 //  |text is  flowed|    |      }   |   |boundary     that|
00317 //  |around  a  mid-|     \   ____  |   |forms the  ideal-|
00318 //  |cloumn portrait._____ \       /  __|ized  text margin|
00319 //  |  Polyblobs     exist| \    /   |from which we should|
00320 //  |to account for insets|  |   |   |measure    paragraph|
00321 //  |which make  otherwise|  -----   |indentation.        |
00322 //  -----------------------          ----------------------
00323 //
00324 // If we identify a drop-cap, we measure the left margin for the lines
00325 // below the first line relative to one space past the drop cap.  The
00326 // first line's margin and those past the drop cap area are measured
00327 // relative to the enclosing polyblock.
00328 //
00329 // TODO(rays): Before this will work well, we'll need to adjust the
00330 //             polyblob tighter around the text near images, as in:
00331 //             UNLV_AUTO:mag.3G0  page 2
00332 //             UNLV_AUTO:mag.3G4  page 16
00333 void BLOCK::compute_row_margins() {
00334   if (row_list()->empty() || row_list()->singleton()) {
00335     return;
00336   }
00337 
00338   // If Layout analysis was not called, default to this.
00339   POLY_BLOCK rect_block(bounding_box(), PT_FLOWING_TEXT);
00340   POLY_BLOCK *pblock = &rect_block;
00341   if (poly_block() != NULL) {
00342     pblock = poly_block();
00343   }
00344 
00345   // Step One: Determine if there is a drop-cap.
00346   //           TODO(eger): Fix up drop cap code for RTL languages.
00347   ROW_IT r_it(row_list());
00348   ROW *first_row = r_it.data();
00349   ROW *second_row = r_it.data_relative(1);
00350 
00351   // initialize the bottom of a fictitious drop cap far above the first line.
00352   int drop_cap_bottom = first_row->bounding_box().top() +
00353                         first_row->bounding_box().height();
00354   int drop_cap_right = first_row->bounding_box().left();
00355   int mid_second_line = second_row->bounding_box().top() -
00356                         second_row->bounding_box().height() / 2;
00357   WERD_IT werd_it(r_it.data()->word_list());  // words of line one
00358   if (!werd_it.empty()) {
00359     C_BLOB_IT cblob_it(werd_it.data()->cblob_list());
00360     for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list();
00361          cblob_it.forward()) {
00362       TBOX bbox = cblob_it.data()->bounding_box();
00363       if (bbox.bottom() <= mid_second_line) {
00364         // we found a real drop cap
00365         first_row->set_has_drop_cap(true);
00366         if (drop_cap_bottom >  bbox.bottom())
00367           drop_cap_bottom = bbox.bottom();
00368         if (drop_cap_right < bbox.right())
00369           drop_cap_right = bbox.right();
00370       }
00371     }
00372   }
00373 
00374   // Step Two: Calculate the margin from the text of each row to the block
00375   //           (or drop-cap) boundaries.
00376   PB_LINE_IT lines(pblock);
00377   r_it.set_to_list(row_list());
00378   for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
00379     ROW *row = r_it.data();
00380     TBOX row_box = row->bounding_box();
00381     int left_y = row->base_line(row_box.left()) + row->x_height();
00382     int left_margin;
00383     ICOORDELT_LIST *segments = lines.get_line(left_y);
00384     LeftMargin(segments, row_box.left(), &left_margin);
00385     delete segments;
00386 
00387     if (row_box.top() >= drop_cap_bottom) {
00388       int drop_cap_distance = row_box.left() - row->space() - drop_cap_right;
00389       if (drop_cap_distance < 0)
00390         drop_cap_distance = 0;
00391       if (drop_cap_distance < left_margin)
00392         left_margin = drop_cap_distance;
00393     }
00394 
00395     int right_y = row->base_line(row_box.right()) + row->x_height();
00396     int right_margin;
00397     segments = lines.get_line(right_y);
00398     RightMargin(segments, row_box.right(), &right_margin);
00399     delete segments;
00400     row->set_lmargin(left_margin);
00401     row->set_rmargin(right_margin);
00402   }
00403 }
00404 
00405 /**********************************************************************
00406  * PrintSegmentationStats
00407  *
00408  * Prints segmentation stats for the given block list.
00409  **********************************************************************/
00410 
00411 void PrintSegmentationStats(BLOCK_LIST* block_list) {
00412   int num_blocks = 0;
00413   int num_rows = 0;
00414   int num_words = 0;
00415   int num_blobs = 0;
00416   BLOCK_IT block_it(block_list);
00417   for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
00418     BLOCK* block = block_it.data();
00419     ++num_blocks;
00420     ROW_IT row_it(block->row_list());
00421     for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
00422       ++num_rows;
00423       ROW* row = row_it.data();
00424       // Iterate over all werds in the row.
00425       WERD_IT werd_it(row->word_list());
00426       for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
00427         WERD* werd = werd_it.data();
00428         ++num_words;
00429         num_blobs += werd->cblob_list()->length();
00430       }
00431     }
00432   }
00433   tprintf("Block list stats:\nBlocks = %d\nRows = %d\nWords = %d\nBlobs = %d\n",
00434           num_blocks, num_rows, num_words, num_blobs);
00435 }
00436 
00437 /**********************************************************************
00438  * ExtractBlobsFromSegmentation
00439  *
00440  * Extracts blobs from the given block list and adds them to the output list.
00441  * The block list must have been created by performing a page segmentation.
00442  **********************************************************************/
00443 
00444 void ExtractBlobsFromSegmentation(BLOCK_LIST* blocks,
00445                                   C_BLOB_LIST* output_blob_list) {
00446   C_BLOB_IT return_list_it(output_blob_list);
00447   BLOCK_IT block_it(blocks);
00448   for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
00449     BLOCK* block = block_it.data();
00450     ROW_IT row_it(block->row_list());
00451     for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
00452       ROW* row = row_it.data();
00453       // Iterate over all werds in the row.
00454       WERD_IT werd_it(row->word_list());
00455       for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
00456         WERD* werd = werd_it.data();
00457         return_list_it.move_to_last();
00458         return_list_it.add_list_after(werd->cblob_list());
00459         return_list_it.move_to_last();
00460         return_list_it.add_list_after(werd->rej_cblob_list());
00461       }
00462     }
00463   }
00464 }
00465 
00466 /**********************************************************************
00467  * RefreshWordBlobsFromNewBlobs()
00468  *
00469  * Refreshes the words in the block_list by using blobs in the
00470  * new_blobs list.
00471  * Block list must have word segmentation in it.
00472  * It consumes the blobs provided in the new_blobs list. The blobs leftover in
00473  * the new_blobs list after the call weren't matched to any blobs of the words
00474  * in block list.
00475  * The output not_found_blobs is a list of blobs from the original segmentation
00476  * in the block_list for which no corresponding new blobs were found.
00477  **********************************************************************/
00478 
00479 void RefreshWordBlobsFromNewBlobs(BLOCK_LIST* block_list,
00480                                   C_BLOB_LIST* new_blobs,
00481                                   C_BLOB_LIST* not_found_blobs) {
00482   // Now iterate over all the blobs in the segmentation_block_list_, and just
00483   // replace the corresponding c-blobs inside the werds.
00484   BLOCK_IT block_it(block_list);
00485   for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
00486     BLOCK* block = block_it.data();
00487     if (block->poly_block() != NULL && !block->poly_block()->IsText())
00488       continue;  // Don't touch non-text blocks.
00489     // Iterate over all rows in the block.
00490     ROW_IT row_it(block->row_list());
00491     for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
00492       ROW* row = row_it.data();
00493       // Iterate over all werds in the row.
00494       WERD_IT werd_it(row->word_list());
00495       WERD_LIST new_words;
00496       WERD_IT new_words_it(&new_words);
00497       for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
00498         WERD* werd = werd_it.extract();
00499         WERD* new_werd = werd->ConstructWerdWithNewBlobs(new_blobs,
00500                                                          not_found_blobs);
00501         if (new_werd) {
00502           // Insert this new werd into the actual row's werd-list. Remove the
00503           // existing one.
00504           new_words_it.add_after_then_move(new_werd);
00505           delete werd;
00506         } else {
00507           // Reinsert the older word back, for lack of better options.
00508           // This is critical since dropping the words messes up segmentation:
00509           // eg. 1st word in the row might otherwise have W_FUZZY_NON turned on.
00510           new_words_it.add_after_then_move(werd);
00511         }
00512       }
00513       // Get rid of the old word list & replace it with the new one.
00514       row->word_list()->clear();
00515       werd_it.move_to_first();
00516       werd_it.add_list_after(&new_words);
00517     }
00518   }
00519 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines