tesseract 3.04.01

ccstruct/stepblob.cpp

Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        stepblob.cpp  (Formerly cblob.c)
00003  * Description: Code for C_BLOB class.
00004  * Author:              Ray Smith
00005  * Created:             Tue Oct 08 10:41:13 BST 1991
00006  *
00007  * (C) Copyright 1991, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include "stepblob.h"
00021 #include "allheaders.h"
00022 
00023 // Include automatically generated configuration file if running autoconf.
00024 #ifdef HAVE_CONFIG_H
00025 #include "config_auto.h"
00026 #endif
00027 
00028 // Max perimeter to width ratio for a baseline position above box bottom.
00029 const double kMaxPerimeterWidthRatio = 8.0;
00030 
00031 ELISTIZE (C_BLOB)
00032 /**********************************************************************
00033  * position_outline
00034  *
00035  * Position the outline in the given list at the relevant place
00036  * according to its nesting.
00037  **********************************************************************/
00038 static void position_outline(                          //put in place
00039                              C_OUTLINE *outline,       //thing to place
00040                              C_OUTLINE_LIST *destlist  //desstination list
00041                             ) {
00042   C_OUTLINE *dest_outline;       //outline from dest list
00043   C_OUTLINE_IT it = destlist;    //iterator
00044                                  //iterator on children
00045   C_OUTLINE_IT child_it = outline->child ();
00046 
00047   if (!it.empty ()) {
00048     do {
00049       dest_outline = it.data (); //get destination
00050                                  //encloses dest
00051       if (*dest_outline < *outline) {
00052                                  //take off list
00053         dest_outline = it.extract ();
00054                                  //put this in place
00055         it.add_after_then_move (outline);
00056                                  //make it a child
00057         child_it.add_to_end (dest_outline);
00058         while (!it.at_last ()) {
00059           it.forward ();         //do rest of list
00060                                  //check for other children
00061           dest_outline = it.data ();
00062           if (*dest_outline < *outline) {
00063                                  //take off list
00064             dest_outline = it.extract ();
00065             child_it.add_to_end (dest_outline);
00066             //make it a child
00067             if (it.empty ())
00068               break;
00069           }
00070         }
00071         return;                  //finished
00072       }
00073                                  //enclosed by dest
00074       else if (*outline < *dest_outline) {
00075         position_outline (outline, dest_outline->child ());
00076         //place in child list
00077         return;                  //finished
00078       }
00079       it.forward ();
00080     }
00081     while (!it.at_first ());
00082   }
00083   it.add_to_end (outline);       //at outer level
00084 }
00085 
00086 
00087 /**********************************************************************
00088  * plot_outline_list
00089  *
00090  * Draw a list of outlines in the given colour and their children
00091  * in the child colour.
00092  **********************************************************************/
00093 
00094 #ifndef GRAPHICS_DISABLED
00095 static void plot_outline_list(                       //draw outlines
00096                               C_OUTLINE_LIST *list,  //outline to draw
00097                               ScrollView* window,         //window to draw in
00098                               ScrollView::Color colour,         //colour to use
00099                               ScrollView::Color child_colour    //colour of children
00100                              ) {
00101   C_OUTLINE *outline;            //current outline
00102   C_OUTLINE_IT it = list;        //iterator
00103 
00104   for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
00105     outline = it.data ();
00106                                  //draw it
00107     outline->plot (window, colour);
00108     if (!outline->child ()->empty ())
00109       plot_outline_list (outline->child (), window,
00110         child_colour, child_colour);
00111   }
00112 }
00113 // Draws the outlines in the given colour, and child_colour, normalized
00114 // using the given denorm, making use of sub-pixel accurate information
00115 // if available.
00116 static void plot_normed_outline_list(const DENORM& denorm,
00117                                      C_OUTLINE_LIST *list,
00118                                      ScrollView::Color colour,
00119                                      ScrollView::Color child_colour,
00120                                      ScrollView* window) {
00121   C_OUTLINE_IT it(list);
00122   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00123     C_OUTLINE* outline = it.data();
00124     outline->plot_normed(denorm, colour, window);
00125     if (!outline->child()->empty())
00126       plot_normed_outline_list(denorm, outline->child(), child_colour,
00127                                child_colour, window);
00128   }
00129 }
00130 #endif
00131 
00132 
00133 /**********************************************************************
00134  * reverse_outline_list
00135  *
00136  * Reverse a list of outlines and their children.
00137  **********************************************************************/
00138 
00139 static void reverse_outline_list(C_OUTLINE_LIST *list) {
00140   C_OUTLINE_IT it = list;        // iterator
00141 
00142   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00143     C_OUTLINE* outline = it.data();
00144     outline->reverse();         // reverse it
00145     outline->set_flag(COUT_INVERSE, TRUE);
00146     if (!outline->child()->empty())
00147       reverse_outline_list(outline->child());
00148   }
00149 }
00150 
00151 
00152 /**********************************************************************
00153  * C_BLOB::C_BLOB
00154  *
00155  * Constructor to build a C_BLOB from a list of C_OUTLINEs.
00156  * The C_OUTLINEs are not copied so the source list is emptied.
00157  * The C_OUTLINEs are nested correctly in the blob.
00158  **********************************************************************/
00159 
00160 C_BLOB::C_BLOB(C_OUTLINE_LIST *outline_list) {
00161   for (C_OUTLINE_IT ol_it(outline_list); !ol_it.empty(); ol_it.forward()) {
00162     C_OUTLINE* outline = ol_it.extract();
00163     // Position this outline in appropriate position in the hierarchy.
00164     position_outline(outline, &outlines);
00165   }
00166   CheckInverseFlagAndDirection();
00167 }
00168 
00169 // Simpler constructor to build a blob from a single outline that has
00170 // already been fully initialized.
00171 C_BLOB::C_BLOB(C_OUTLINE* outline) {
00172   C_OUTLINE_IT it(&outlines);
00173   it.add_to_end(outline);
00174 }
00175 
00176 // Builds a set of one or more blobs from a list of outlines.
00177 // Input: one outline on outline_list contains all the others, but the
00178 // nesting and order are undefined.
00179 // If good_blob is true, the blob is added to good_blobs_it, unless
00180 // an illegal (generation-skipping) parent-child relationship is found.
00181 // If so, the parent blob goes to bad_blobs_it, and the immediate children
00182 // are promoted to the top level, recursively being sent to good_blobs_it.
00183 // If good_blob is false, all created blobs will go to the bad_blobs_it.
00184 // Output: outline_list is empty. One or more blobs are added to
00185 // good_blobs_it and/or bad_blobs_it.
00186 void C_BLOB::ConstructBlobsFromOutlines(bool good_blob,
00187                                         C_OUTLINE_LIST* outline_list,
00188                                         C_BLOB_IT* good_blobs_it,
00189                                         C_BLOB_IT* bad_blobs_it) {
00190   // List of top-level outlines with correctly nested children.
00191   C_OUTLINE_LIST nested_outlines;
00192   for (C_OUTLINE_IT ol_it(outline_list); !ol_it.empty(); ol_it.forward()) {
00193     C_OUTLINE* outline = ol_it.extract();
00194     // Position this outline in appropriate position in the hierarchy.
00195     position_outline(outline, &nested_outlines);
00196   }
00197   // Check for legal nesting and reassign as required.
00198   for (C_OUTLINE_IT ol_it(&nested_outlines); !ol_it.empty(); ol_it.forward()) {
00199     C_OUTLINE* outline = ol_it.extract();
00200     bool blob_is_good = good_blob;
00201     if (!outline->IsLegallyNested()) {
00202       // The blob is illegally nested.
00203       // Mark it bad, and add all its children to the top-level list.
00204       blob_is_good = false;
00205       ol_it.add_list_after(outline->child());
00206     }
00207     C_BLOB* blob = new C_BLOB(outline);
00208     // Set inverse flag and reverse if needed.
00209     blob->CheckInverseFlagAndDirection();
00210     // Put on appropriate list.
00211     if (!blob_is_good && bad_blobs_it != NULL)
00212       bad_blobs_it->add_after_then_move(blob);
00213     else
00214       good_blobs_it->add_after_then_move(blob);
00215   }
00216 }
00217 
00218 // Sets the COUT_INVERSE flag appropriately on the outlines and their
00219 // children recursively, reversing the outlines if needed so that
00220 // everything has an anticlockwise top-level.
00221 void C_BLOB::CheckInverseFlagAndDirection() {
00222   C_OUTLINE_IT ol_it(&outlines);
00223   for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
00224     C_OUTLINE* outline = ol_it.data();
00225     if (outline->turn_direction() < 0) {
00226       outline->reverse();
00227       reverse_outline_list(outline->child());
00228       outline->set_flag(COUT_INVERSE, TRUE);
00229     } else {
00230       outline->set_flag(COUT_INVERSE, FALSE);
00231     }
00232   }
00233 }
00234 
00235 
00236 // Build and return a fake blob containing a single fake outline with no
00237 // steps.
00238 C_BLOB* C_BLOB::FakeBlob(const TBOX& box) {
00239   C_OUTLINE_LIST outlines;
00240   C_OUTLINE::FakeOutline(box, &outlines);
00241   return new C_BLOB(&outlines);
00242 }
00243 
00244 /**********************************************************************
00245  * C_BLOB::bounding_box
00246  *
00247  * Return the bounding box of the blob.
00248  **********************************************************************/
00249 
00250 TBOX C_BLOB::bounding_box() const {  // bounding box
00251   C_OUTLINE *outline;                // current outline
00252   // This is a read-only iteration of the outlines.
00253   C_OUTLINE_IT it = const_cast<C_OUTLINE_LIST*>(&outlines);
00254   TBOX box;                          // bounding box
00255 
00256   for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
00257     outline = it.data ();
00258     box += outline->bounding_box ();
00259   }
00260   return box;
00261 }
00262 
00263 
00264 /**********************************************************************
00265  * C_BLOB::area
00266  *
00267  * Return the area of the blob.
00268  **********************************************************************/
00269 
00270 inT32 C_BLOB::area() {  //area
00271   C_OUTLINE *outline;            //current outline
00272   C_OUTLINE_IT it = &outlines;   //outlines of blob
00273   inT32 total;                   //total area
00274 
00275   total = 0;
00276   for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
00277     outline = it.data ();
00278     total += outline->area ();
00279   }
00280   return total;
00281 }
00282 
00283 /**********************************************************************
00284  * C_BLOB::perimeter
00285  *
00286  * Return the perimeter of the top and 2nd level outlines.
00287  **********************************************************************/
00288 
00289 inT32 C_BLOB::perimeter() {
00290   C_OUTLINE *outline;            // current outline
00291   C_OUTLINE_IT it = &outlines;   // outlines of blob
00292   inT32 total;                   // total perimeter
00293 
00294   total = 0;
00295   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00296     outline = it.data();
00297     total += outline->perimeter();
00298   }
00299   return total;
00300 }
00301 
00302 
00303 /**********************************************************************
00304  * C_BLOB::outer_area
00305  *
00306  * Return the area of the blob.
00307  **********************************************************************/
00308 
00309 inT32 C_BLOB::outer_area() {  //area
00310   C_OUTLINE *outline;            //current outline
00311   C_OUTLINE_IT it = &outlines;   //outlines of blob
00312   inT32 total;                   //total area
00313 
00314   total = 0;
00315   for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
00316     outline = it.data ();
00317     total += outline->outer_area ();
00318   }
00319   return total;
00320 }
00321 
00322 
00323 /**********************************************************************
00324  * C_BLOB::count_transitions
00325  *
00326  * Return the total x and y maxes and mins in the blob.
00327  * Chlid outlines are not counted.
00328  **********************************************************************/
00329 
00330 inT32 C_BLOB::count_transitions(                 //area
00331                                 inT32 threshold  //on size
00332                                ) {
00333   C_OUTLINE *outline;            //current outline
00334   C_OUTLINE_IT it = &outlines;   //outlines of blob
00335   inT32 total;                   //total area
00336 
00337   total = 0;
00338   for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
00339     outline = it.data ();
00340     total += outline->count_transitions (threshold);
00341   }
00342   return total;
00343 }
00344 
00345 
00346 /**********************************************************************
00347  * C_BLOB::move
00348  *
00349  * Move C_BLOB by vector
00350  **********************************************************************/
00351 
00352 void C_BLOB::move(                  // reposition blob
00353                   const ICOORD vec  // by vector
00354                  ) {
00355   C_OUTLINE_IT it(&outlines);  // iterator
00356 
00357   for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ())
00358     it.data ()->move (vec);      // move each outline
00359 }
00360 
00361 // Static helper for C_BLOB::rotate to allow recursion of child outlines.
00362 void RotateOutlineList(const FCOORD& rotation, C_OUTLINE_LIST* outlines) {
00363   C_OUTLINE_LIST new_outlines;
00364   C_OUTLINE_IT src_it(outlines);
00365   C_OUTLINE_IT dest_it(&new_outlines);
00366   while (!src_it.empty()) {
00367     C_OUTLINE* old_outline = src_it.extract();
00368     src_it.forward();
00369     C_OUTLINE* new_outline = new C_OUTLINE(old_outline, rotation);
00370     if (!old_outline->child()->empty()) {
00371       RotateOutlineList(rotation, old_outline->child());
00372       C_OUTLINE_IT child_it(new_outline->child());
00373       child_it.add_list_after(old_outline->child());
00374     }
00375     delete old_outline;
00376     dest_it.add_to_end(new_outline);
00377   }
00378   src_it.add_list_after(&new_outlines);
00379 }
00380 
00381 /**********************************************************************
00382  * C_BLOB::rotate
00383  *
00384  * Rotate C_BLOB by rotation.
00385  * Warning! has to rebuild all the C_OUTLINEs.
00386  **********************************************************************/
00387 void C_BLOB::rotate(const FCOORD& rotation) {
00388   RotateOutlineList(rotation, &outlines);
00389 }
00390 
00391 // Helper calls ComputeEdgeOffsets or ComputeBinaryOffsets recursively on the
00392 // outline list and its children.
00393 static void ComputeEdgeOffsetsOutlineList(int threshold, Pix* pix,
00394                                           C_OUTLINE_LIST *list) {
00395   C_OUTLINE_IT it(list);
00396   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00397     C_OUTLINE* outline = it.data();
00398     if (pix != NULL && pixGetDepth(pix) == 8)
00399       outline->ComputeEdgeOffsets(threshold, pix);
00400     else
00401       outline->ComputeBinaryOffsets();
00402     if (!outline->child()->empty())
00403       ComputeEdgeOffsetsOutlineList(threshold, pix, outline->child());
00404   }
00405 }
00406 
00407 // Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale
00408 // if the supplied pix is 8-bit or the binary edges if NULL.
00409 void C_BLOB::ComputeEdgeOffsets(int threshold, Pix* pix) {
00410   ComputeEdgeOffsetsOutlineList(threshold, pix, &outlines);
00411 }
00412 
00413 // Estimates and returns the baseline position based on the shape of the
00414 // outlines.
00415 // We first find the minimum y-coord (y_mins) at each x-coord within the blob.
00416 // If there is a run of some y or y+1 in y_mins that is longer than the total
00417 // number of positions at bottom or bottom+1, subject to the additional
00418 // condition that at least one side of the y/y+1 run is higher than y+1, so it
00419 // is not a local minimum, then y, not the bottom, makes a good candidate
00420 // baseline position for this blob. Eg
00421 //   |                  ---|
00422 //   |                  |
00423 //   |-      -----------|        <=  Good candidate baseline position.
00424 //    |-    -|
00425 //     |   -|
00426 //     |---|                     <=  Bottom of blob
00427 inT16 C_BLOB::EstimateBaselinePosition() {
00428   TBOX box = bounding_box();
00429   int left = box.left();
00430   int width = box.width();
00431   int bottom = box.bottom();
00432   if (outlines.empty() || perimeter() > width * kMaxPerimeterWidthRatio)
00433     return bottom;  // This is only for non-CJK blobs.
00434   // Get the minimum y coordinate at each x-coordinate.
00435   GenericVector<int> y_mins;
00436   y_mins.init_to_size(width + 1, box.top());
00437   C_OUTLINE_IT it(&outlines);
00438   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00439     C_OUTLINE* outline = it.data();
00440     ICOORD pos = outline->start_pos();
00441     for (int s = 0; s < outline->pathlength(); ++s) {
00442       if (pos.y() < y_mins[pos.x() - left])
00443         y_mins[pos.x() - left] = pos.y();
00444       pos += outline->step(s);
00445     }
00446   }
00447   // Find the total extent of the bottom or bottom + 1.
00448   int bottom_extent = 0;
00449   for (int x = 0; x <= width; ++x) {
00450     if (y_mins[x] == bottom || y_mins[x] == bottom + 1)
00451       ++bottom_extent;
00452   }
00453   // Find the lowest run longer than the bottom extent that is not the bottom.
00454   int best_min = box.top();
00455   int prev_run = 0;
00456   int prev_y = box.top();
00457   int prev_prev_y = box.top();
00458   for (int x = 0; x < width; x += prev_run) {
00459     // Find the length of the current run.
00460     int y_at_x = y_mins[x];
00461     int run = 1;
00462     while (x + run <= width && y_mins[x + run] == y_at_x) ++run;
00463     if (y_at_x > bottom + 1) {
00464       // Possible contender.
00465       int total_run = run;
00466       // Find extent of current value or +1 to the right of x.
00467       while (x + total_run <= width &&
00468           (y_mins[x + total_run] == y_at_x ||
00469               y_mins[x + total_run] == y_at_x + 1)) ++total_run;
00470       // At least one end has to be higher so it is not a local max.
00471       if (prev_prev_y > y_at_x + 1 || x + total_run > width ||
00472           y_mins[x + total_run] > y_at_x + 1) {
00473         // If the prev_run is at y + 1, then we can add that too. There cannot
00474         // be a suitable run at y before that or we would have found it already.
00475         if (prev_run > 0 && prev_y == y_at_x + 1) total_run += prev_run;
00476         if (total_run > bottom_extent && y_at_x < best_min) {
00477           best_min = y_at_x;
00478         }
00479       }
00480     }
00481     prev_run = run;
00482     prev_prev_y = prev_y;
00483     prev_y = y_at_x;
00484   }
00485   return best_min == box.top() ? bottom : best_min;
00486 }
00487 
00488 static void render_outline_list(C_OUTLINE_LIST *list,
00489                                 int left, int top, Pix* pix) {
00490   C_OUTLINE_IT it(list);
00491   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00492     C_OUTLINE* outline = it.data();
00493     outline->render(left, top, pix);
00494     if (!outline->child()->empty())
00495       render_outline_list(outline->child(), left, top, pix);
00496   }
00497 }
00498 
00499 static void render_outline_list_outline(C_OUTLINE_LIST *list,
00500                                         int left, int top, Pix* pix) {
00501   C_OUTLINE_IT it(list);
00502   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00503     C_OUTLINE* outline = it.data();
00504     outline->render_outline(left, top, pix);
00505   }
00506 }
00507 
00508 // Returns a Pix rendering of the blob. pixDestroy after use.
00509 Pix* C_BLOB::render() {
00510   TBOX box = bounding_box();
00511   Pix* pix = pixCreate(box.width(), box.height(), 1);
00512   render_outline_list(&outlines, box.left(), box.top(), pix);
00513   return pix;
00514 }
00515 
00516 // Returns a Pix rendering of the outline of the blob. (no fill).
00517 // pixDestroy after use.
00518 Pix* C_BLOB::render_outline() {
00519   TBOX box = bounding_box();
00520   Pix* pix = pixCreate(box.width(), box.height(), 1);
00521   render_outline_list_outline(&outlines, box.left(), box.top(), pix);
00522   return pix;
00523 }
00524 
00525 /**********************************************************************
00526  * C_BLOB::plot
00527  *
00528  * Draw the C_BLOB in the given colour.
00529  **********************************************************************/
00530 
00531 #ifndef GRAPHICS_DISABLED
00532 void C_BLOB::plot(ScrollView* window,                // window to draw in
00533                   ScrollView::Color blob_colour,     // main colour
00534                   ScrollView::Color child_colour) {  // for holes
00535   plot_outline_list(&outlines, window, blob_colour, child_colour);
00536 }
00537 // Draws the blob in the given colour, and child_colour, normalized
00538 // using the given denorm, making use of sub-pixel accurate information
00539 // if available.
00540 void C_BLOB::plot_normed(const DENORM& denorm,
00541                          ScrollView::Color blob_colour,
00542                          ScrollView::Color child_colour,
00543                          ScrollView* window) {
00544   plot_normed_outline_list(denorm, &outlines, blob_colour, child_colour,
00545                            window);
00546 }
00547 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines