tesseract 3.04.01

textord/colpartitiongrid.h

Go to the documentation of this file.
00001 
00002 // File:        colpartitionrid.h
00003 // Description: Class collecting code that acts on a BBGrid of ColPartitions.
00004 // Author:      Ray Smith
00005 // Created:     Mon Oct 05 08:42:01 PDT 2009
00006 //
00007 // (C) Copyright 2009, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_TEXTORD_COLPARTITIONGRID_H__
00021 #define TESSERACT_TEXTORD_COLPARTITIONGRID_H__
00022 
00023 #include "bbgrid.h"
00024 #include "colpartition.h"
00025 #include "colpartitionset.h"
00026 
00027 namespace tesseract {
00028 
00029 class TabFind;
00030 
00031 // ColPartitionGrid is a BBGrid of ColPartition.
00032 // It collects functions that work on the grid.
00033 class ColPartitionGrid : public BBGrid<ColPartition,
00034                                        ColPartition_CLIST,
00035                                        ColPartition_C_IT> {
00036  public:
00037   ColPartitionGrid();
00038   ColPartitionGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright);
00039 
00040   virtual ~ColPartitionGrid();
00041 
00042   // Handles a click event in a display window.
00043   void HandleClick(int x, int y);
00044 
00045   // Merges ColPartitions in the grid that look like they belong in the same
00046   // textline.
00047   // For all partitions in the grid, calls the box_cb permanent callback
00048   // to compute the search box, searches the box, and if a candidate is found,
00049   // calls the confirm_cb to check any more rules. If the confirm_cb returns
00050   // true, then the partitions are merged.
00051   // Both callbacks are deleted before returning.
00052   void Merges(TessResultCallback2<bool, ColPartition*, TBOX*>* box_cb,
00053               TessResultCallback2<bool, const ColPartition*,
00054                                   const ColPartition*>* confirm_cb);
00055 
00056   // For the given partition, calls the box_cb permanent callback
00057   // to compute the search box, searches the box, and if a candidate is found,
00058   // calls the confirm_cb to check any more rules. If the confirm_cb returns
00059   // true, then the partitions are merged.
00060   // Returns true if the partition is consumed by one or more merges.
00061   bool MergePart(TessResultCallback2<bool, ColPartition*, TBOX*>* box_cb,
00062                  TessResultCallback2<bool, const ColPartition*,
00063                                      const ColPartition*>* confirm_cb,
00064                  ColPartition* part);
00065 
00066   // Computes and returns the total overlap of all partitions in the grid.
00067   // If overlap_grid is non-null, it is filled with a grid that holds empty
00068   // partitions representing the union of all overlapped partitions.
00069   int ComputeTotalOverlap(ColPartitionGrid** overlap_grid);
00070 
00071   // Finds all the ColPartitions in the grid that overlap with the given
00072   // box and returns them SortByBoxLeft(ed) and uniqued in the given list.
00073   // Any partition equal to not_this (may be NULL) is excluded.
00074   void FindOverlappingPartitions(const TBOX& box, const ColPartition* not_this,
00075                                  ColPartition_CLIST* parts);
00076 
00077   // Finds and returns the best candidate ColPartition to merge with part,
00078   // selected from the candidates list, based on the minimum increase in
00079   // pairwise overlap among all the partitions overlapped by the combined box.
00080   // If overlap_increase is not NULL then it returns the increase in overlap
00081   // that would result from the merge.
00082   // See colpartitiongrid.cpp for a diagram.
00083   ColPartition* BestMergeCandidate(
00084       const ColPartition* part, ColPartition_CLIST* candidates, bool debug,
00085       TessResultCallback2<bool, const ColPartition*,
00086                           const ColPartition*>* confirm_cb,
00087       int* overlap_increase);
00088 
00089   // Split partitions where it reduces overlap between their bounding boxes.
00090   // ColPartitions are after all supposed to be a partitioning of the blobs
00091   // AND of the space on the page!
00092   // Blobs that cause overlaps get removed, put in individual partitions
00093   // and added to the big_parts list. They are most likely characters on
00094   // 2 textlines that touch, or something big like a dropcap.
00095   void SplitOverlappingPartitions(ColPartition_LIST* big_parts);
00096 
00097   // Filters partitions of source_type by looking at local neighbours.
00098   // Where a majority of neighbours have a text type, the partitions are
00099   // changed to text, where the neighbours have image type, they are changed
00100   // to image, and partitions that have no definite neighbourhood type are
00101   // left unchanged.
00102   // im_box and rerotation are used to map blob coordinates onto the
00103   // nontext_map, which is used to prevent the spread of text neighbourhoods
00104   // into images.
00105   // Returns true if anything was changed.
00106   bool GridSmoothNeighbours(BlobTextFlowType source_type, Pix* nontext_map,
00107                             const TBOX& im_box, const FCOORD& rerotation);
00108 
00109   // Compute the mean RGB of the light and dark pixels in each ColPartition
00110   // and also the rms error in the linearity of color.
00111   void ComputePartitionColors(Pix* scaled_color, int scaled_factor,
00112                               const FCOORD& rerotation);
00113 
00114   // Reflects the grid and its colpartitions in the y-axis, assuming that
00115   // all blob boxes have already been done.
00116   void ReflectInYAxis();
00117 
00118   // Rotates the grid and its colpartitions by the given angle, assuming that
00119   // all blob boxes have already been done.
00120   void Deskew(const FCOORD& deskew);
00121 
00122   // Transforms the grid of partitions to the output blocks, putting each
00123   // partition into a separate block. We don't really care about the order,
00124   // as we just want to get as much text as possible without trying to organize
00125   // it into proper blocks or columns.
00126   void ExtractPartitionsAsBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
00127 
00128   // Sets the left and right tabs of the partitions in the grid.
00129   void SetTabStops(TabFind* tabgrid);
00130 
00131   // Makes the ColPartSets and puts them in the PartSetVector ready
00132   // for finding column bounds. Returns false if no partitions were found.
00133   // Each ColPartition in the grid is placed in a single ColPartSet based
00134   // on the bottom-left of its bounding box.
00135   bool MakeColPartSets(PartSetVector* part_sets);
00136 
00137   // Makes a single ColPartitionSet consisting of a single ColPartition that
00138   // represents the total horizontal extent of the significant content on the
00139   // page. Used for the single column setting in place of automatic detection.
00140   // Returns NULL if the page is empty of significant content.
00141   ColPartitionSet* MakeSingleColumnSet(WidthCallback* cb);
00142 
00143   // Mark the BLOBNBOXes in each partition as being owned by that partition.
00144   void ClaimBoxes();
00145 
00146   // Retypes all the blobs referenced by the partitions in the grid.
00147   // Image blobs are sliced on the grid boundaries to give the tab finder
00148   // a better handle on the edges of the images, and the actual blobs are
00149   // returned in the im_blobs list, as they are not owned by the block.
00150   void ReTypeBlobs(BLOBNBOX_LIST* im_blobs);
00151 
00152   // The boxes within the partitions have changed (by deskew) so recompute
00153   // the bounds of all the partitions and reinsert them into the grid.
00154   void RecomputeBounds(int gridsize, const ICOORD& bleft,
00155                        const ICOORD& tright, const ICOORD& vertical);
00156 
00157   // Improves the margins of the ColPartitions in the grid by calling
00158   // FindPartitionMargins on each.
00159   void GridFindMargins(ColPartitionSet** best_columns);
00160 
00161   // Improves the margins of the ColPartitions in the list by calling
00162   // FindPartitionMargins on each.
00163   void ListFindMargins(ColPartitionSet** best_columns,
00164                        ColPartition_LIST* parts);
00165 
00166   // Deletes all the partitions in the grid after disowning all the blobs.
00167   void DeleteParts();
00168 
00169   // Deletes all the partitions in the grid that are of type BRT_UNKNOWN and
00170   // all the blobs in them.
00171   void DeleteUnknownParts(TO_BLOCK* block);
00172 
00173   // Deletes all the partitions in the grid that are NOT of flow type
00174   // BTFT_LEADER.
00175   void DeleteNonLeaderParts();
00176 
00177   // Finds and marks text partitions that represent figure captions.
00178   void FindFigureCaptions();
00179 
00182   // For every ColPartition in the grid, finds its upper and lower neighbours.
00183   void FindPartitionPartners();
00184   // Finds the best partner in the given direction for the given partition.
00185   // Stores the result with AddPartner.
00186   void FindPartitionPartners(bool upper, ColPartition* part);
00187   // Finds the best partner in the given direction for the given partition.
00188   // Stores the result with AddPartner.
00189   void FindVPartitionPartners(bool to_the_left, ColPartition* part);
00190   // For every ColPartition with multiple partners in the grid, reduces the
00191   // number of partners to 0 or 1. If get_desperate is true, goes to more
00192   // desperate merge methods to merge flowing text before breaking partnerships.
00193   void RefinePartitionPartners(bool get_desperate);
00194 
00195  private:
00196   // Finds and returns a list of candidate ColPartitions to merge with part.
00197   // The candidates must overlap search_box, and when merged must not
00198   // overlap any other partitions that are not overlapped by each individually.
00199   void FindMergeCandidates(const ColPartition* part, const TBOX& search_box,
00200                            bool debug, ColPartition_CLIST* candidates);
00201 
00202   // Smoothes the region type/flow type of the given part by looking at local
00203   // neighbours and the given image mask. Searches a padded rectangle with the
00204   // padding truncated on one size of the part's box in turn for each side,
00205   // using the result (if any) that has the least distance to all neighbours
00206   // that contribute to the decision. This biases in favor of rectangular
00207   // regions without completely enforcing them.
00208   // If a good decision cannot be reached, the part is left unchanged.
00209   // im_box and rerotation are used to map blob coordinates onto the
00210   // nontext_map, which is used to prevent the spread of text neighbourhoods
00211   // into images.
00212   // Returns true if the partition was changed.
00213   bool SmoothRegionType(Pix* nontext_map,
00214                         const TBOX& im_box,
00215                         const FCOORD& rerotation,
00216                         bool debug,
00217                         ColPartition* part);
00218   // Executes the search for SmoothRegionType in a single direction.
00219   // Creates a bounding box that is padded in all directions except direction,
00220   // and searches it for other partitions. Finds the nearest collection of
00221   // partitions that makes a decisive result (if any) and returns the type
00222   // and the distance of the collection. If there are any pixels in the
00223   // nontext_map, then the decision is biased towards image.
00224   BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction,
00225                                       Pix* nontext_map,
00226                                       const TBOX& im_box,
00227                                       const FCOORD& rerotation,
00228                                       bool debug,
00229                                       const ColPartition& part,
00230                                       int* best_distance);
00231   // Counts the partitions in the given search_box by appending the gap
00232   // distance (scaled by dist_scaling) of the part from the base_part to the
00233   // vector of the appropriate type for the partition. Prior to return, the
00234   // vectors in the dists array are sorted in increasing order.
00235   // dists must be an array of GenericVectors of size NPT_COUNT.
00236   void AccumulatePartDistances(const ColPartition& base_part,
00237                                const ICOORD& dist_scaling,
00238                                const TBOX& search_box,
00239                                Pix* nontext_map,
00240                                const TBOX& im_box,
00241                                const FCOORD& rerotation,
00242                                bool debug,
00243                                GenericVector<int>* dists);
00244 
00245   // Improves the margins of the ColPartition by searching for
00246   // neighbours that vertically overlap significantly.
00247   void FindPartitionMargins(ColPartitionSet* columns, ColPartition* part);
00248 
00249   // Starting at x, and going in the specified direction, up to x_limit, finds
00250   // the margin for the given y range by searching sideways,
00251   // and ignoring not_this.
00252   int FindMargin(int x, bool right_to_left, int x_limit,
00253                  int y_bottom, int y_top, const ColPartition* not_this);
00254 };
00255 
00256 }  // namespace tesseract.
00257 
00258 #endif  // TESSERACT_TEXTORD_COLPARTITIONGRID_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines