|
tesseract 3.04.01
|
00001 // Copyright 2008 Google Inc. All Rights Reserved. 00002 // Author: shobhitsaxena@google.com (Shobhit Saxena) 00003 00004 #ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ 00005 #define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ 00006 00007 #include "ocrblock.h" 00008 #include "params.h" 00009 00010 struct Pix; 00011 struct Box; 00012 struct Boxa; 00013 00014 extern 00015 INT_VAR_H(devanagari_split_debuglevel, 0, 00016 "Debug level for split shiro-rekha process."); 00017 00018 extern 00019 BOOL_VAR_H(devanagari_split_debugimage, 0, 00020 "Whether to create a debug image for split shiro-rekha process."); 00021 00022 class TBOX; 00023 00024 namespace tesseract { 00025 00026 class PixelHistogram { 00027 public: 00028 PixelHistogram() { 00029 hist_ = NULL; 00030 length_ = 0; 00031 } 00032 00033 ~PixelHistogram() { 00034 Clear(); 00035 } 00036 00037 void Clear() { 00038 if (hist_) { 00039 delete[] hist_; 00040 } 00041 length_ = 0; 00042 } 00043 00044 int* hist() const { 00045 return hist_; 00046 } 00047 00048 int length() const { 00049 return length_; 00050 } 00051 00052 // Methods to construct histograms from images. These clear any existing data. 00053 void ConstructVerticalCountHist(Pix* pix); 00054 void ConstructHorizontalCountHist(Pix* pix); 00055 00056 // This method returns the global-maxima for the histogram. The frequency of 00057 // the global maxima is returned in count, if specified. 00058 int GetHistogramMaximum(int* count) const; 00059 00060 private: 00061 int* hist_; 00062 int length_; 00063 }; 00064 00065 class ShiroRekhaSplitter { 00066 public: 00067 enum SplitStrategy { 00068 NO_SPLIT = 0, // No splitting is performed for the phase. 00069 MINIMAL_SPLIT, // Blobs are split minimally. 00070 MAXIMAL_SPLIT // Blobs are split maximally. 00071 }; 00072 00073 ShiroRekhaSplitter(); 00074 virtual ~ShiroRekhaSplitter(); 00075 00076 // Top-level method to perform splitting based on current settings. 00077 // Returns true if a split was actually performed. 00078 // If split_for_pageseg is true, the pageseg_split_strategy_ is used for 00079 // splitting. If false, the ocr_split_strategy_ is used. 00080 bool Split(bool split_for_pageseg); 00081 00082 // Clears the memory held by this object. 00083 void Clear(); 00084 00085 // Refreshes the words in the segmentation block list by using blobs in the 00086 // input blob list. 00087 // The segmentation block list must be set. 00088 void RefreshSegmentationWithNewBlobs(C_BLOB_LIST* new_blobs); 00089 00090 // Returns true if the split strategies for pageseg and ocr are different. 00091 bool HasDifferentSplitStrategies() const { 00092 return pageseg_split_strategy_ != ocr_split_strategy_; 00093 } 00094 00095 // This only keeps a copy of the block list pointer. At split call, the list 00096 // object should still be alive. This block list is used as a golden 00097 // segmentation when performing splitting. 00098 void set_segmentation_block_list(BLOCK_LIST* block_list) { 00099 segmentation_block_list_ = block_list; 00100 } 00101 00102 static const int kUnspecifiedXheight = -1; 00103 00104 void set_global_xheight(int xheight) { 00105 global_xheight_ = xheight; 00106 } 00107 00108 void set_perform_close(bool perform) { 00109 perform_close_ = perform; 00110 } 00111 00112 // Returns the image obtained from shiro-rekha splitting. The returned object 00113 // is owned by this class. Callers may want to clone the returned pix to keep 00114 // it alive beyond the life of ShiroRekhaSplitter object. 00115 Pix* splitted_image() { 00116 return splitted_image_; 00117 } 00118 00119 // On setting the input image, a clone of it is owned by this class. 00120 void set_orig_pix(Pix* pix); 00121 00122 // Returns the input image provided to the object. This object is owned by 00123 // this class. Callers may want to clone the returned pix to work with it. 00124 Pix* orig_pix() { 00125 return orig_pix_; 00126 } 00127 00128 SplitStrategy ocr_split_strategy() const { 00129 return ocr_split_strategy_; 00130 } 00131 00132 void set_ocr_split_strategy(SplitStrategy strategy) { 00133 ocr_split_strategy_ = strategy; 00134 } 00135 00136 SplitStrategy pageseg_split_strategy() const { 00137 return pageseg_split_strategy_; 00138 } 00139 00140 void set_pageseg_split_strategy(SplitStrategy strategy) { 00141 pageseg_split_strategy_ = strategy; 00142 } 00143 00144 BLOCK_LIST* segmentation_block_list() { 00145 return segmentation_block_list_; 00146 } 00147 00148 // This method dumps a debug image to the specified location. 00149 void DumpDebugImage(const char* filename) const; 00150 00151 // This method returns the computed mode-height of blobs in the pix. 00152 // It also prunes very small blobs from calculation. Could be used to provide 00153 // a global xheight estimate for images which have the same point-size text. 00154 static int GetModeHeight(Pix* pix); 00155 00156 private: 00157 // Method to perform a close operation on the input image. The xheight 00158 // estimate decides the size of sel used. 00159 static void PerformClose(Pix* pix, int xheight_estimate); 00160 00161 // This method resolves the cc bbox to a particular row and returns the row's 00162 // xheight. This uses block_list_ if available, else just returns the 00163 // global_xheight_ estimate currently set in the object. 00164 int GetXheightForCC(Box* cc_bbox); 00165 00166 // Returns a list of regions (boxes) which should be cleared in the original 00167 // image so as to perform shiro-rekha splitting. Pix is assumed to carry one 00168 // (or less) word only. Xheight measure could be the global estimate, the row 00169 // estimate, or unspecified. If unspecified, over splitting may occur, since a 00170 // conservative estimate of stroke width along with an associated multiplier 00171 // is used in its place. It is advisable to have a specified xheight when 00172 // splitting for classification/training. 00173 void SplitWordShiroRekha(SplitStrategy split_strategy, 00174 Pix* pix, 00175 int xheight, 00176 int word_left, 00177 int word_top, 00178 Boxa* regions_to_clear); 00179 00180 // Returns a new box object for the corresponding TBOX, based on the original 00181 // image's coordinate system. 00182 Box* GetBoxForTBOX(const TBOX& tbox) const; 00183 00184 // This method returns y-extents of the shiro-rekha computed from the input 00185 // word image. 00186 static void GetShiroRekhaYExtents(Pix* word_pix, 00187 int* shirorekha_top, 00188 int* shirorekha_bottom, 00189 int* shirorekha_ylevel); 00190 00191 Pix* orig_pix_; // Just a clone of the input image passed. 00192 Pix* splitted_image_; // Image produced after the last splitting round. The 00193 // object is owned by this class. 00194 SplitStrategy pageseg_split_strategy_; 00195 SplitStrategy ocr_split_strategy_; 00196 Pix* debug_image_; 00197 // This block list is used as a golden segmentation when performing splitting. 00198 BLOCK_LIST* segmentation_block_list_; 00199 int global_xheight_; 00200 bool perform_close_; // Whether a morphological close operation should be 00201 // performed before CCs are run through splitting. 00202 }; 00203 00204 } // namespace tesseract. 00205 00206 #endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_