|
tesseract 3.04.01
|
00001 00002 // File: colpartitionrid.h 00003 // Description: Class collecting code that acts on a BBGrid of ColPartitions. 00004 // Author: Ray Smith 00005 // Created: Mon Oct 05 08:42:01 PDT 2009 00006 // 00007 // (C) Copyright 2009, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_TEXTORD_COLPARTITIONGRID_H__ 00021 #define TESSERACT_TEXTORD_COLPARTITIONGRID_H__ 00022 00023 #include "bbgrid.h" 00024 #include "colpartition.h" 00025 #include "colpartitionset.h" 00026 00027 namespace tesseract { 00028 00029 class TabFind; 00030 00031 // ColPartitionGrid is a BBGrid of ColPartition. 00032 // It collects functions that work on the grid. 00033 class ColPartitionGrid : public BBGrid<ColPartition, 00034 ColPartition_CLIST, 00035 ColPartition_C_IT> { 00036 public: 00037 ColPartitionGrid(); 00038 ColPartitionGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright); 00039 00040 virtual ~ColPartitionGrid(); 00041 00042 // Handles a click event in a display window. 00043 void HandleClick(int x, int y); 00044 00045 // Merges ColPartitions in the grid that look like they belong in the same 00046 // textline. 00047 // For all partitions in the grid, calls the box_cb permanent callback 00048 // to compute the search box, searches the box, and if a candidate is found, 00049 // calls the confirm_cb to check any more rules. If the confirm_cb returns 00050 // true, then the partitions are merged. 00051 // Both callbacks are deleted before returning. 00052 void Merges(TessResultCallback2<bool, ColPartition*, TBOX*>* box_cb, 00053 TessResultCallback2<bool, const ColPartition*, 00054 const ColPartition*>* confirm_cb); 00055 00056 // For the given partition, calls the box_cb permanent callback 00057 // to compute the search box, searches the box, and if a candidate is found, 00058 // calls the confirm_cb to check any more rules. If the confirm_cb returns 00059 // true, then the partitions are merged. 00060 // Returns true if the partition is consumed by one or more merges. 00061 bool MergePart(TessResultCallback2<bool, ColPartition*, TBOX*>* box_cb, 00062 TessResultCallback2<bool, const ColPartition*, 00063 const ColPartition*>* confirm_cb, 00064 ColPartition* part); 00065 00066 // Computes and returns the total overlap of all partitions in the grid. 00067 // If overlap_grid is non-null, it is filled with a grid that holds empty 00068 // partitions representing the union of all overlapped partitions. 00069 int ComputeTotalOverlap(ColPartitionGrid** overlap_grid); 00070 00071 // Finds all the ColPartitions in the grid that overlap with the given 00072 // box and returns them SortByBoxLeft(ed) and uniqued in the given list. 00073 // Any partition equal to not_this (may be NULL) is excluded. 00074 void FindOverlappingPartitions(const TBOX& box, const ColPartition* not_this, 00075 ColPartition_CLIST* parts); 00076 00077 // Finds and returns the best candidate ColPartition to merge with part, 00078 // selected from the candidates list, based on the minimum increase in 00079 // pairwise overlap among all the partitions overlapped by the combined box. 00080 // If overlap_increase is not NULL then it returns the increase in overlap 00081 // that would result from the merge. 00082 // See colpartitiongrid.cpp for a diagram. 00083 ColPartition* BestMergeCandidate( 00084 const ColPartition* part, ColPartition_CLIST* candidates, bool debug, 00085 TessResultCallback2<bool, const ColPartition*, 00086 const ColPartition*>* confirm_cb, 00087 int* overlap_increase); 00088 00089 // Split partitions where it reduces overlap between their bounding boxes. 00090 // ColPartitions are after all supposed to be a partitioning of the blobs 00091 // AND of the space on the page! 00092 // Blobs that cause overlaps get removed, put in individual partitions 00093 // and added to the big_parts list. They are most likely characters on 00094 // 2 textlines that touch, or something big like a dropcap. 00095 void SplitOverlappingPartitions(ColPartition_LIST* big_parts); 00096 00097 // Filters partitions of source_type by looking at local neighbours. 00098 // Where a majority of neighbours have a text type, the partitions are 00099 // changed to text, where the neighbours have image type, they are changed 00100 // to image, and partitions that have no definite neighbourhood type are 00101 // left unchanged. 00102 // im_box and rerotation are used to map blob coordinates onto the 00103 // nontext_map, which is used to prevent the spread of text neighbourhoods 00104 // into images. 00105 // Returns true if anything was changed. 00106 bool GridSmoothNeighbours(BlobTextFlowType source_type, Pix* nontext_map, 00107 const TBOX& im_box, const FCOORD& rerotation); 00108 00109 // Compute the mean RGB of the light and dark pixels in each ColPartition 00110 // and also the rms error in the linearity of color. 00111 void ComputePartitionColors(Pix* scaled_color, int scaled_factor, 00112 const FCOORD& rerotation); 00113 00114 // Reflects the grid and its colpartitions in the y-axis, assuming that 00115 // all blob boxes have already been done. 00116 void ReflectInYAxis(); 00117 00118 // Rotates the grid and its colpartitions by the given angle, assuming that 00119 // all blob boxes have already been done. 00120 void Deskew(const FCOORD& deskew); 00121 00122 // Transforms the grid of partitions to the output blocks, putting each 00123 // partition into a separate block. We don't really care about the order, 00124 // as we just want to get as much text as possible without trying to organize 00125 // it into proper blocks or columns. 00126 void ExtractPartitionsAsBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); 00127 00128 // Sets the left and right tabs of the partitions in the grid. 00129 void SetTabStops(TabFind* tabgrid); 00130 00131 // Makes the ColPartSets and puts them in the PartSetVector ready 00132 // for finding column bounds. Returns false if no partitions were found. 00133 // Each ColPartition in the grid is placed in a single ColPartSet based 00134 // on the bottom-left of its bounding box. 00135 bool MakeColPartSets(PartSetVector* part_sets); 00136 00137 // Makes a single ColPartitionSet consisting of a single ColPartition that 00138 // represents the total horizontal extent of the significant content on the 00139 // page. Used for the single column setting in place of automatic detection. 00140 // Returns NULL if the page is empty of significant content. 00141 ColPartitionSet* MakeSingleColumnSet(WidthCallback* cb); 00142 00143 // Mark the BLOBNBOXes in each partition as being owned by that partition. 00144 void ClaimBoxes(); 00145 00146 // Retypes all the blobs referenced by the partitions in the grid. 00147 // Image blobs are sliced on the grid boundaries to give the tab finder 00148 // a better handle on the edges of the images, and the actual blobs are 00149 // returned in the im_blobs list, as they are not owned by the block. 00150 void ReTypeBlobs(BLOBNBOX_LIST* im_blobs); 00151 00152 // The boxes within the partitions have changed (by deskew) so recompute 00153 // the bounds of all the partitions and reinsert them into the grid. 00154 void RecomputeBounds(int gridsize, const ICOORD& bleft, 00155 const ICOORD& tright, const ICOORD& vertical); 00156 00157 // Improves the margins of the ColPartitions in the grid by calling 00158 // FindPartitionMargins on each. 00159 void GridFindMargins(ColPartitionSet** best_columns); 00160 00161 // Improves the margins of the ColPartitions in the list by calling 00162 // FindPartitionMargins on each. 00163 void ListFindMargins(ColPartitionSet** best_columns, 00164 ColPartition_LIST* parts); 00165 00166 // Deletes all the partitions in the grid after disowning all the blobs. 00167 void DeleteParts(); 00168 00169 // Deletes all the partitions in the grid that are of type BRT_UNKNOWN and 00170 // all the blobs in them. 00171 void DeleteUnknownParts(TO_BLOCK* block); 00172 00173 // Deletes all the partitions in the grid that are NOT of flow type 00174 // BTFT_LEADER. 00175 void DeleteNonLeaderParts(); 00176 00177 // Finds and marks text partitions that represent figure captions. 00178 void FindFigureCaptions(); 00179 00182 // For every ColPartition in the grid, finds its upper and lower neighbours. 00183 void FindPartitionPartners(); 00184 // Finds the best partner in the given direction for the given partition. 00185 // Stores the result with AddPartner. 00186 void FindPartitionPartners(bool upper, ColPartition* part); 00187 // Finds the best partner in the given direction for the given partition. 00188 // Stores the result with AddPartner. 00189 void FindVPartitionPartners(bool to_the_left, ColPartition* part); 00190 // For every ColPartition with multiple partners in the grid, reduces the 00191 // number of partners to 0 or 1. If get_desperate is true, goes to more 00192 // desperate merge methods to merge flowing text before breaking partnerships. 00193 void RefinePartitionPartners(bool get_desperate); 00194 00195 private: 00196 // Finds and returns a list of candidate ColPartitions to merge with part. 00197 // The candidates must overlap search_box, and when merged must not 00198 // overlap any other partitions that are not overlapped by each individually. 00199 void FindMergeCandidates(const ColPartition* part, const TBOX& search_box, 00200 bool debug, ColPartition_CLIST* candidates); 00201 00202 // Smoothes the region type/flow type of the given part by looking at local 00203 // neighbours and the given image mask. Searches a padded rectangle with the 00204 // padding truncated on one size of the part's box in turn for each side, 00205 // using the result (if any) that has the least distance to all neighbours 00206 // that contribute to the decision. This biases in favor of rectangular 00207 // regions without completely enforcing them. 00208 // If a good decision cannot be reached, the part is left unchanged. 00209 // im_box and rerotation are used to map blob coordinates onto the 00210 // nontext_map, which is used to prevent the spread of text neighbourhoods 00211 // into images. 00212 // Returns true if the partition was changed. 00213 bool SmoothRegionType(Pix* nontext_map, 00214 const TBOX& im_box, 00215 const FCOORD& rerotation, 00216 bool debug, 00217 ColPartition* part); 00218 // Executes the search for SmoothRegionType in a single direction. 00219 // Creates a bounding box that is padded in all directions except direction, 00220 // and searches it for other partitions. Finds the nearest collection of 00221 // partitions that makes a decisive result (if any) and returns the type 00222 // and the distance of the collection. If there are any pixels in the 00223 // nontext_map, then the decision is biased towards image. 00224 BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction, 00225 Pix* nontext_map, 00226 const TBOX& im_box, 00227 const FCOORD& rerotation, 00228 bool debug, 00229 const ColPartition& part, 00230 int* best_distance); 00231 // Counts the partitions in the given search_box by appending the gap 00232 // distance (scaled by dist_scaling) of the part from the base_part to the 00233 // vector of the appropriate type for the partition. Prior to return, the 00234 // vectors in the dists array are sorted in increasing order. 00235 // dists must be an array of GenericVectors of size NPT_COUNT. 00236 void AccumulatePartDistances(const ColPartition& base_part, 00237 const ICOORD& dist_scaling, 00238 const TBOX& search_box, 00239 Pix* nontext_map, 00240 const TBOX& im_box, 00241 const FCOORD& rerotation, 00242 bool debug, 00243 GenericVector<int>* dists); 00244 00245 // Improves the margins of the ColPartition by searching for 00246 // neighbours that vertically overlap significantly. 00247 void FindPartitionMargins(ColPartitionSet* columns, ColPartition* part); 00248 00249 // Starting at x, and going in the specified direction, up to x_limit, finds 00250 // the margin for the given y range by searching sideways, 00251 // and ignoring not_this. 00252 int FindMargin(int x, bool right_to_left, int x_limit, 00253 int y_bottom, int y_top, const ColPartition* not_this); 00254 }; 00255 00256 } // namespace tesseract. 00257 00258 #endif // TESSERACT_TEXTORD_COLPARTITIONGRID_H__