|
tesseract 3.04.01
|
00001 00002 // File: strokewidth.h 00003 // Description: Subclass of BBGrid to find uniformity of strokewidth. 00004 // Author: Ray Smith 00005 // Created: Mon Mar 31 16:17:01 PST 2008 00006 // 00007 // (C) Copyright 2008, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_TEXTORD_STROKEWIDTH_H__ 00021 #define TESSERACT_TEXTORD_STROKEWIDTH_H__ 00022 00023 #include "blobbox.h" // BlobNeighourDir. 00024 #include "blobgrid.h" // Base class. 00025 #include "colpartitiongrid.h" 00026 #include "textlineprojection.h" 00027 00028 class DENORM; 00029 class ScrollView; 00030 class TO_BLOCK; 00031 00032 namespace tesseract { 00033 00034 class ColPartition_LIST; 00035 class TabFind; 00036 class TextlineProjection; 00037 00038 // Misc enums to clarify bool arguments for direction-controlling args. 00039 enum LeftOrRight { 00040 LR_LEFT, 00041 LR_RIGHT 00042 }; 00043 00044 // Return value from FindInitialPartitions indicates detection of severe 00045 // skew or noise. 00046 enum PartitionFindResult { 00047 PFR_OK, // Everything is OK. 00048 PFR_SKEW, // Skew was detected and rotated. 00049 PFR_NOISE // Noise was detected and removed. 00050 }; 00051 00057 class StrokeWidth : public BlobGrid { 00058 public: 00059 StrokeWidth(int gridsize, const ICOORD& bleft, const ICOORD& tright); 00060 virtual ~StrokeWidth(); 00061 00062 // Sets the neighbours member of the medium-sized blobs in the block. 00063 // Searches on 4 sides of each blob for similar-sized, similar-strokewidth 00064 // blobs and sets pointers to the good neighbours. 00065 void SetNeighboursOnMediumBlobs(TO_BLOCK* block); 00066 00067 // Sets the neighbour/textline writing direction members of the medium 00068 // and large blobs with optional repair of broken CJK characters first. 00069 // Repair of broken CJK is needed here because broken CJK characters 00070 // can fool the textline direction detection algorithm. 00071 void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, 00072 bool cjk_merge, 00073 TO_BLOCK* input_block); 00074 00075 // To save computation, the process of generating partitions is broken 00076 // into the following 4 steps: 00077 // TestVerticalTextDirection 00078 // CorrectForRotation (used only if a rotation is to be applied) 00079 // FindLeaderPartitions 00080 // GradeBlobsIntoPartitions. 00081 // These functions are all required, in sequence, except for 00082 // CorrectForRotation, which is not needed if no rotation is applied. 00083 00084 // Types all the blobs as vertical or horizontal text or unknown and 00085 // returns true if the majority are vertical. 00086 // If the blobs are rotated, it is necessary to call CorrectForRotation 00087 // after rotating everything, otherwise the work done here will be enough. 00088 // If osd_blobs is not null, a list of blobs from the dominant textline 00089 // direction are returned for use in orientation and script detection. 00090 // find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio. 00091 bool TestVerticalTextDirection(double find_vertical_text_ratio, 00092 TO_BLOCK* block, 00093 BLOBNBOX_CLIST* osd_blobs); 00094 00095 // Corrects the data structures for the given rotation. 00096 void CorrectForRotation(const FCOORD& rerotation, 00097 ColPartitionGrid* part_grid); 00098 00099 // Finds leader partitions and inserts them into the give grid. 00100 void FindLeaderPartitions(TO_BLOCK* block, 00101 ColPartitionGrid* part_grid); 00102 00103 // Finds and marks noise those blobs that look like bits of vertical lines 00104 // that would otherwise screw up layout analysis. 00105 void RemoveLineResidue(ColPartition_LIST* big_part_list); 00106 00107 // Types all the blobs as vertical text or horizontal text or unknown and 00108 // puts them into initial ColPartitions in the supplied part_grid. 00109 // rerotation determines how to get back to the image coordinates from the 00110 // blob coordinates (since they may have been rotated for vertical text). 00111 // block is the single block for the whole page or rectangle to be OCRed. 00112 // nontext_pix (full-size), is a binary mask used to prevent merges across 00113 // photo/text boundaries. It is not kept beyond this function. 00114 // denorm provides a mapping back to the image from the current blob 00115 // coordinate space. 00116 // projection provides a measure of textline density over the image and 00117 // provides functions to assist with diacritic detection. It should be a 00118 // pointer to a new TextlineProjection, and will be setup here. 00119 // part_grid is the output grid of textline partitions. 00120 // Large blobs that cause overlap are put in separate partitions and added 00121 // to the big_parts list. 00122 void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, 00123 const FCOORD& rerotation, TO_BLOCK* block, 00124 Pix* nontext_pix, const DENORM* denorm, 00125 bool cjk_script, TextlineProjection* projection, 00126 BLOBNBOX_LIST* diacritic_blobs, 00127 ColPartitionGrid* part_grid, 00128 ColPartition_LIST* big_parts); 00129 00130 // Handles a click event in a display window. 00131 virtual void HandleClick(int x, int y); 00132 00133 private: 00134 // Computes the noise_density_ by summing the number of elements in a 00135 // neighbourhood of each grid cell. 00136 void ComputeNoiseDensity(TO_BLOCK* block, TabFind* line_grid); 00137 00138 // Detects and marks leader dots/dashes. 00139 // Leaders are horizontal chains of small or noise blobs that look 00140 // monospace according to ColPartition::MarkAsLeaderIfMonospaced(). 00141 // Detected leaders become the only occupants of the block->small_blobs list. 00142 // Non-leader small blobs get moved to the blobs list. 00143 // Non-leader noise blobs remain singletons in the noise list. 00144 // All small and noise blobs in high density regions are marked BTFT_NONTEXT. 00145 // block is the single block for the whole page or rectangle to be OCRed. 00146 // leader_parts is the output. 00147 void FindLeadersAndMarkNoise(TO_BLOCK* block, 00148 ColPartition_LIST* leader_parts); 00149 00152 void InsertBlobs(TO_BLOCK* block); 00153 00154 // Fix broken CJK characters, using the fake joined blobs mechanism. 00155 // Blobs are really merged, ie the master takes all the outlines and the 00156 // others are deleted. 00157 // Returns true if sufficient blobs are merged that it may be worth running 00158 // again, due to a better estimate of character size. 00159 bool FixBrokenCJK(TO_BLOCK* block); 00160 00161 // Collect blobs that overlap or are within max_dist of the input bbox. 00162 // Return them in the list of blobs and expand the bbox to be the union 00163 // of all the boxes. not_this is excluded from the search, as are blobs 00164 // that cause the merged box to exceed max_size in either dimension. 00165 void AccumulateOverlaps(const BLOBNBOX* not_this, bool debug, 00166 int max_size, int max_dist, 00167 TBOX* bbox, BLOBNBOX_CLIST* blobs); 00168 00169 // For each blob in this grid, Finds the textline direction to be horizontal 00170 // or vertical according to distance to neighbours and 1st and 2nd order 00171 // neighbours. Non-text tends to end up without a definite direction. 00172 // Result is setting of the neighbours and vert_possible/horz_possible 00173 // flags in the BLOBNBOXes currently in this grid. 00174 // This function is called more than once if page orientation is uncertain, 00175 // so display_if_debugging is true on the final call to display the results. 00176 void FindTextlineFlowDirection(PageSegMode pageseg_mode, 00177 bool display_if_debugging); 00178 00179 // Sets the neighbours and good_stroke_neighbours members of the blob by 00180 // searching close on all 4 sides. 00181 // When finding leader dots/dashes, there is a slightly different rule for 00182 // what makes a good neighbour. 00183 // If activate_line_trap, then line-like objects are found and isolated. 00184 void SetNeighbours(bool leaders, bool activate_line_trap, BLOBNBOX* blob); 00185 00186 // Sets the good_stroke_neighbours member of the blob if it has a 00187 // GoodNeighbour on the given side. 00188 // Also sets the neighbour in the blob, whether or not a good one is found. 00189 // Return value is the number of neighbours in the line trap size range. 00190 // Leaders get extra special lenient treatment. 00191 int FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, BLOBNBOX* blob); 00192 00193 // Makes the blob to be only horizontal or vertical where evidence 00194 // is clear based on gaps of 2nd order neighbours. 00195 void SetNeighbourFlows(BLOBNBOX* blob); 00196 00197 // Nullify the neighbours in the wrong directions where the direction 00198 // is clear-cut based on a distance margin. Good for isolating vertical 00199 // text from neighbouring horizontal text. 00200 void SimplifyObviousNeighbours(BLOBNBOX* blob); 00201 00202 // Smoothes the vertical/horizontal type of the blob based on the 00203 // 2nd-order neighbours. If reset_all is true, then all blobs are 00204 // changed. Otherwise, only ambiguous blobs are processed. 00205 void SmoothNeighbourTypes(PageSegMode pageseg_mode, bool desperate, 00206 BLOBNBOX* blob); 00207 00208 // Checks the left or right side of the given leader partition and sets the 00209 // (opposite) leader_on_right or leader_on_left flags for blobs 00210 // that are next to the given side of the given leader partition. 00211 void MarkLeaderNeighbours(const ColPartition* part, LeftOrRight side); 00212 00213 // Partition creation. Accumulates vertical and horizontal text chains, 00214 // puts the remaining blobs in as unknowns, and then merges/splits to 00215 // minimize overlap and smoothes the types with neighbours and the color 00216 // image if provided. rerotation is used to rotate the coordinate space 00217 // back to the nontext_map_ image. 00218 // If find_problems is true, detects possible noise pollution by the amount 00219 // of partition overlap that is created by the diacritics. If excessive, the 00220 // noise is separated out into diacritic blobs, and PFR_NOISE is returned. 00221 // [TODO(rays): if the partition overlap is caused by heavy skew, deskews 00222 // the components, saves the skew_angle and returns PFR_SKEW.] If the return 00223 // is not PFR_OK, the job is incomplete, and FindInitialPartitions must be 00224 // called again after cleaning up the partly done work. 00225 PartitionFindResult FindInitialPartitions(PageSegMode pageseg_mode, 00226 const FCOORD& rerotation, 00227 bool find_problems, TO_BLOCK* block, 00228 BLOBNBOX_LIST* diacritic_blobs, 00229 ColPartitionGrid* part_grid, 00230 ColPartition_LIST* big_parts, 00231 FCOORD* skew_angle); 00232 // Detects noise by a significant increase in partition overlap from 00233 // pre_overlap to now, and removes noise from the union of all the overlapping 00234 // partitions, placing the blobs in diacritic_blobs. Returns true if any noise 00235 // was found and removed. 00236 bool DetectAndRemoveNoise(int pre_overlap, const TBOX& grid_box, 00237 TO_BLOCK* block, ColPartitionGrid* part_grid, 00238 BLOBNBOX_LIST* diacritic_blobs); 00239 // Finds vertical chains of text-like blobs and puts them in ColPartitions. 00240 void FindVerticalTextChains(ColPartitionGrid* part_grid); 00241 // Finds horizontal chains of text-like blobs and puts them in ColPartitions. 00242 void FindHorizontalTextChains(ColPartitionGrid* part_grid); 00243 // Finds diacritics and saves their base character in the blob. 00244 void TestDiacritics(ColPartitionGrid* part_grid, TO_BLOCK* block); 00245 // Searches this grid for an appropriately close and sized neighbour of the 00246 // given [small] blob. If such a blob is found, the diacritic base is saved 00247 // in the blob and true is returned. 00248 // The small_grid is a secondary grid that contains the small/noise objects 00249 // that are not in this grid, but may be useful for determining a connection 00250 // between blob and its potential base character. (See DiacriticXGapFilled.) 00251 bool DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob); 00252 // Returns true if there is no gap between the base char and the diacritic 00253 // bigger than a fraction of the height of the base char: 00254 // Eg: line end.....' 00255 // The quote is a long way from the end of the line, yet it needs to be a 00256 // diacritic. To determine that the quote is not part of an image, or 00257 // a different text block, we check for other marks in the gap between 00258 // the base char and the diacritic. 00259 // '<--Diacritic 00260 // |---------| 00261 // | |<-toobig-gap-> 00262 // | Base |<ok gap> 00263 // |---------| x<-----Dot occupying gap 00264 // The grid is const really. 00265 bool DiacriticXGapFilled(BlobGrid* grid, const TBOX& diacritic_box, 00266 const TBOX& base_box); 00267 // Merges diacritics with the ColPartition of the base character blob. 00268 void MergeDiacritics(TO_BLOCK* block, ColPartitionGrid* part_grid); 00269 // Any blobs on the large_blobs list of block that are still unowned by a 00270 // ColPartition, are probably drop-cap or vertically touching so the blobs 00271 // are removed to the big_parts list and treated separately. 00272 void RemoveLargeUnusedBlobs(TO_BLOCK* block, 00273 ColPartitionGrid* part_grid, 00274 ColPartition_LIST* big_parts); 00275 00276 // All remaining unused blobs are put in individual ColPartitions. 00277 void PartitionRemainingBlobs(PageSegMode pageseg_mode, 00278 ColPartitionGrid* part_grid); 00279 00280 // If combine, put all blobs in the cell_list into a single partition, 00281 // otherwise put each one into its own partition. 00282 void MakePartitionsFromCellList(PageSegMode pageseg_mode, bool combine, 00283 ColPartitionGrid* part_grid, 00284 BLOBNBOX_CLIST* cell_list); 00285 00286 // Helper function to finish setting up a ColPartition and insert into 00287 // part_grid. 00288 void CompletePartition(PageSegMode pageseg_mode, ColPartition* part, 00289 ColPartitionGrid* part_grid); 00290 00291 // Helper returns true if we are looking only for vertical textlines, 00292 // taking into account any rotation that has been done. 00293 bool FindingVerticalOnly(PageSegMode pageseg_mode) const { 00294 if (rerotation_.y() == 0.0f) { 00295 return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT; 00296 } 00297 return !PSM_ORIENTATION_ENABLED(pageseg_mode) && 00298 pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT; 00299 } 00300 // Helper returns true if we are looking only for horizontal textlines, 00301 // taking into account any rotation that has been done. 00302 bool FindingHorizontalOnly(PageSegMode pageseg_mode) const { 00303 if (rerotation_.y() == 0.0f) { 00304 return !PSM_ORIENTATION_ENABLED(pageseg_mode) && 00305 pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT; 00306 } 00307 return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT; 00308 } 00309 00310 // Merge partitions where the merge appears harmless. 00311 void EasyMerges(ColPartitionGrid* part_grid); 00312 00313 // Compute a search box based on the orientation of the partition. 00314 // Returns true if a suitable box can be calculated. 00315 // Callback for EasyMerges. 00316 bool OrientationSearchBox(ColPartition* part, TBOX* box); 00317 00318 // Merge confirmation callback for EasyMerges. 00319 bool ConfirmEasyMerge(const ColPartition* p1, const ColPartition* p2); 00320 00321 // Returns true if there is no significant noise in between the boxes. 00322 bool NoNoiseInBetween(const TBOX& box1, const TBOX& box2) const; 00323 00324 // Displays the blobs colored according to the number of good neighbours 00325 // and the vertical/horizontal flow. 00326 ScrollView* DisplayGoodBlobs(const char* window_name, int x, int y); 00327 00328 // Displays blobs colored according to whether or not they are diacritics. 00329 ScrollView* DisplayDiacritics(const char* window_name, 00330 int x, int y, TO_BLOCK* block); 00331 00332 private: 00333 // Image map of photo/noise areas on the page. Borrowed pointer (not owned.) 00334 Pix* nontext_map_; 00335 // Textline projection map. Borrowed pointer. 00336 TextlineProjection* projection_; 00337 // DENORM used by projection_ to get back to image coords. Borrowed pointer. 00338 const DENORM* denorm_; 00339 // Bounding box of the grid. 00340 TBOX grid_box_; 00341 // Rerotation to get back to the original image. 00342 FCOORD rerotation_; 00343 // Windows for debug display. 00344 ScrollView* leaders_win_; 00345 ScrollView* initial_widths_win_; 00346 ScrollView* widths_win_; 00347 ScrollView* chains_win_; 00348 ScrollView* diacritics_win_; 00349 ScrollView* textlines_win_; 00350 ScrollView* smoothed_win_; 00351 }; 00352 00353 } // namespace tesseract. 00354 00355 #endif // TESSERACT_TEXTORD_STROKEWIDTH_H__