tesseract 3.04.01

textord/tabfind.h

Go to the documentation of this file.
00001 
00002 // File:        tabfind.h
00003 // Description: Subclass of BBGrid to find tabstops.
00004 // Author:      Ray Smith
00005 // Created:     Fri Mar 21 15:03:01 PST 2008
00006 //
00007 // (C) Copyright 2008, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_TEXTORD_TABFIND_H__
00021 #define TESSERACT_TEXTORD_TABFIND_H__
00022 
00023 #include "alignedblob.h"
00024 #include "tesscallback.h"
00025 #include "tabvector.h"
00026 #include "linefind.h"
00027 
00028 class BLOBNBOX;
00029 class BLOBNBOX_LIST;
00030 class TO_BLOCK;
00031 class ScrollView;
00032 struct Pix;
00033 
00034 namespace tesseract {
00035 
00036 typedef TessResultCallback1<bool, int> WidthCallback;
00037 
00038 struct AlignedBlobParams;
00039 class ColPartitionGrid;
00040 
00042 const int kColumnWidthFactor = 20;
00043 
00053 class TabFind : public AlignedBlob {
00054  public:
00055   TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright,
00056           TabVector_LIST* vlines, int vertical_x, int vertical_y,
00057           int resolution);
00058   virtual ~TabFind();
00059 
00068   void InsertBlobsToGrid(bool h_spread, bool v_spread,
00069                          BLOBNBOX_LIST* blobs,
00070                          BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid);
00071 
00079   bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX* blob,
00080                   BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid);
00081   // Calls SetBlobRuleEdges for all the blobs in the given block.
00082   void SetBlockRuleEdges(TO_BLOCK* block);
00083   // Sets the left and right rule and crossing_rules for the blobs in the given
00084   // list by finding the next outermost tabvectors for each blob.
00085   void SetBlobRuleEdges(BLOBNBOX_LIST* blobs);
00086 
00087   // Returns the gutter width of the given TabVector between the given y limits.
00088   // Also returns x-shift to be added to the vector to clear any intersecting
00089   // blobs. The shift is deducted from the returned gutter.
00090   // If ignore_unmergeables is true, then blobs of UnMergeableType are
00091   // ignored as if they don't exist. (Used for text on image.)
00092   // max_gutter_width is used as the maximum width worth searching for in case
00093   // there is nothing near the TabVector.
00094   int GutterWidth(int bottom_y, int top_y, const TabVector& v,
00095                   bool ignore_unmergeables, int max_gutter_width,
00096                   int* required_shift);
00100   void GutterWidthAndNeighbourGap(int tab_x, int mean_height,
00101                                   int max_gutter, bool left,
00102                                   BLOBNBOX* bbox, int* gutter_width,
00103                                   int* neighbour_gap);
00104 
00111   int RightEdgeForBox(const TBOX& box, bool crossing, bool extended);
00115   int LeftEdgeForBox(const TBOX& box, bool crossing, bool extended);
00116 
00133   TabVector* RightTabForBox(const TBOX& box, bool crossing, bool extended);
00137   TabVector* LeftTabForBox(const TBOX& box, bool crossing, bool extended);
00138 
00143   bool CommonWidth(int width);
00148   static bool DifferentSizes(int size1, int size2);
00153   static bool VeryDifferentSizes(int size1, int size2);
00154 
00158   WidthCallback* WidthCB() {
00159     return width_cb_;
00160   }
00161 
00165   const ICOORD& image_origin() const {
00166     return image_origin_;
00167   }
00168 
00169  protected:
00173   TabVector_LIST* vectors() {
00174     return &vectors_;
00175   }
00176   TabVector_LIST* dead_vectors() {
00177     return &dead_vectors_;
00178   }
00179 
00187   bool FindTabVectors(TabVector_LIST* hlines,
00188                       BLOBNBOX_LIST* image_blobs, TO_BLOCK* block,
00189                       int min_gutter_width, double tabfind_aligned_gap_fraction,
00190                       ColPartitionGrid* part_grid,
00191                       FCOORD* deskew, FCOORD* reskew);
00192 
00193   // Top-level function to not find TabVectors in an input page block,
00194   // but setup for single column mode.
00195   void DontFindTabVectors(BLOBNBOX_LIST* image_blobs,
00196                           TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew);
00197 
00198   // Cleans up the lists of blobs in the block ready for use by TabFind.
00199   // Large blobs that look like text are moved to the main blobs list.
00200   // Main blobs that are superseded by the image blobs are deleted.
00201   void TidyBlobs(TO_BLOCK* block);
00202 
00203   // Helper function to setup search limits for *TabForBox.
00204   void SetupTabSearch(int x, int y, int* min_key, int* max_key);
00205 
00209   ScrollView* DisplayTabVectors(ScrollView* tab_win);
00210 
00211   // First part of FindTabVectors, which may be used twice if the text
00212   // is mostly of vertical alignment.  If find_vertical_text flag is
00213   // true, this finds vertical textlines in possibly rotated blob space.
00214   // In other words, when the page has mostly vertical lines and is rotated,
00215   // setting this to true will find horizontal lines on the page.
00216   // tabfind_aligned_gap_fraction should be the value of parameter
00217   // textord_tabfind_aligned_gap_fraction
00218   ScrollView* FindInitialTabVectors(BLOBNBOX_LIST* image_blobs,
00219                                     int min_gutter_width,
00220                                     double tabfind_aligned_gap_fraction,
00221                                     TO_BLOCK* block);
00222 
00223   // Apply the given rotation to the given list of blobs.
00224   static void RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs);
00225 
00226   // Flip the vertical and horizontal lines and rotate the grid ready
00227   // for working on the rotated image.
00228   // The min_gutter_width will be adjusted to the median gutter width between
00229   // vertical tabs to set a better threshold for tabboxes in the 2nd pass.
00230   void ResetForVerticalText(const FCOORD& rotate, const FCOORD& rerotate,
00231                             TabVector_LIST* horizontal_lines,
00232                             int* min_gutter_width);
00233 
00234   // Clear the grid and get rid of the tab vectors, but not separators,
00235   // ready to start again.
00236   void Reset();
00237 
00238   // Reflect the separator tab vectors and the grids in the y-axis.
00239   // Can only be called after Reset!
00240   void ReflectInYAxis();
00241 
00242  private:
00243   // For each box in the grid, decide whether it is a candidate tab-stop,
00244   // and if so add it to the left and right tab boxes.
00245   // tabfind_aligned_gap_fraction should be the value of parameter
00246   // textord_tabfind_aligned_gap_fraction
00247   ScrollView* FindTabBoxes(int min_gutter_width,
00248                            double tabfind_aligned_gap_fraction);
00249 
00250   // Return true if this box looks like a candidate tab stop, and set
00251   // the appropriate tab type(s) to TT_UNCONFIRMED.
00252   // tabfind_aligned_gap_fraction should be the value of parameter
00253   // textord_tabfind_aligned_gap_fraction
00254   bool TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width,
00255                       double tabfind_aligned_gap_fraction);
00256 
00257   // Returns true if there is nothing in the rectangle of width min_gutter to
00258   // the left of bbox.
00259   bool ConfirmRaggedLeft(BLOBNBOX* bbox, int min_gutter);
00260   // Returns true if there is nothing in the rectangle of width min_gutter to
00261   // the right of bbox.
00262   bool ConfirmRaggedRight(BLOBNBOX* bbox, int min_gutter);
00263   // Returns true if there is nothing in the given search_box that vertically
00264   // overlaps target_box other than target_box itself.
00265   bool NothingYOverlapsInBox(const TBOX& search_box, const TBOX& target_box);
00266 
00267   // Fills the list of TabVector with the tabstops found in the grid,
00268   // and estimates the logical vertical direction.
00269   void FindAllTabVectors(int min_gutter_width);
00270   // Helper for FindAllTabVectors finds the vectors of a particular type.
00271   int FindTabVectors(int search_size_multiple,
00272                      TabAlignment alignment,
00273                      int min_gutter_width,
00274                      TabVector_LIST* vectors,
00275                      int* vertical_x, int* vertical_y);
00276   // Finds a vector corresponding to a tabstop running through the
00277   // given box of the given alignment type.
00278   // search_size_multiple is a multiple of height used to control
00279   // the size of the search.
00280   // vertical_x and y are updated with an estimate of the real
00281   // vertical direction. (skew finding.)
00282   // Returns NULL if no decent tabstop can be found.
00283   TabVector* FindTabVector(int search_size_multiple, int min_gutter_width,
00284                            TabAlignment alignment,
00285                            BLOBNBOX* bbox,
00286                            int* vertical_x, int* vertical_y);
00287 
00288   // Set the vertical_skew_ member from the given vector and refit
00289   // all vectors parallel to the skew vector.
00290   void SetVerticalSkewAndParellelize(int vertical_x, int vertical_y);
00291 
00292   // Sort all the current vectors using the vertical_skew_ vector.
00293   void SortVectors();
00294 
00295   // Evaluate all the current tab vectors.
00296   void EvaluateTabs();
00297 
00298   // Trace textlines from one side to the other of each tab vector, saving
00299   // the most frequent column widths found in a list so that a given width
00300   // can be tested for being a common width with a simple callback function.
00301   void ComputeColumnWidths(ScrollView* tab_win,
00302                            ColPartitionGrid* part_grid);
00303 
00304   // Finds column width and:
00305   //   if col_widths is not null (pass1):
00306   //     pair-up tab vectors with existing ColPartitions and accumulate widths.
00307   //   else (pass2):
00308   //     find the largest real partition width for each recorded column width,
00309   //     to be used as the minimum acceptable width.
00310   void ApplyPartitionsToColumnWidths(ColPartitionGrid* part_grid,
00311                                      STATS* col_widths);
00312 
00313   // Helper makes the list of common column widths in column_widths_ from the
00314   // input col_widths. Destroys the content of col_widths by repeatedly
00315   // finding the mode and erasing the peak.
00316   void MakeColumnWidths(int col_widths_size, STATS* col_widths);
00317 
00318   // Mark blobs as being in a vertical text line where that is the case.
00319   void MarkVerticalText();
00320 
00321   // Returns the median gutter width between pairs of matching tab vectors
00322   // assuming they are sorted left-to-right.  If there are too few data
00323   // points (< kMinLinesInColumn), then 0 is returned.
00324   int FindMedianGutterWidth(TabVector_LIST* tab_vectors);
00325 
00326   // Find the next adjacent (to left or right) blob on this text line,
00327   // with the constraint that it must vertically significantly overlap
00328   // the [top_y, bottom_y] range.
00329   // If ignore_images is true, then blobs with aligned_text() < 0 are treated
00330   // as if they do not exist.
00331   BLOBNBOX* AdjacentBlob(const BLOBNBOX* bbox,
00332                          bool look_left, bool ignore_images,
00333                          double min_overlap_fraction,
00334                          int gap_limit, int top_y, int bottom_y);
00335 
00336   // Add a bi-directional partner relationship between the left
00337   // and the right. If one (or both) of the vectors is a separator,
00338   // extend a nearby extendable vector or create a new one of the
00339   // correct type, using the given left or right blob as a guide.
00340   void AddPartnerVector(BLOBNBOX* left_blob, BLOBNBOX* right_blob,
00341                         TabVector* left, TabVector* right);
00342 
00347   void CleanupTabs();
00348 
00354   bool Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs,
00355               TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew);
00356 
00357   // Compute the rotation required to deskew, and its inverse rotation.
00358   void ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew);
00359 
00364   void ApplyTabConstraints();
00365 
00366  protected:
00367   ICOORD vertical_skew_;          //< Estimate of true vertical in this image.
00368   int resolution_;                //< Of source image in pixels per inch.
00369  private:
00370   ICOORD image_origin_;           //< Top-left of image in deskewed coords
00371   TabVector_LIST vectors_;        //< List of rule line and tabstops.
00372   TabVector_IT v_it_;             //< Iterator for searching vectors_.
00373   TabVector_LIST dead_vectors_;   //< Separators and unpartnered tab vectors.
00374   // List of commonly occurring width ranges with x=min and y=max.
00375   ICOORDELT_LIST column_widths_;  //< List of commonly occurring width ranges.
00377   WidthCallback* width_cb_;
00378   // Sets of bounding boxes that are candidate tab stops.
00379   GenericVector<BLOBNBOX*> left_tab_boxes_;
00380   GenericVector<BLOBNBOX*> right_tab_boxes_;
00381 };
00382 
00383 }  // namespace tesseract.
00384 
00385 #endif  // TESSERACT_TEXTORD_TABFIND_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines