tesseract 3.04.01

textord/devanagari_processing.h

Go to the documentation of this file.
00001 // Copyright 2008 Google Inc. All Rights Reserved.
00002 // Author: shobhitsaxena@google.com (Shobhit Saxena)
00003 
00004 #ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
00005 #define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
00006 
00007 #include "ocrblock.h"
00008 #include "params.h"
00009 
00010 struct Pix;
00011 struct Box;
00012 struct Boxa;
00013 
00014 extern
00015 INT_VAR_H(devanagari_split_debuglevel, 0,
00016           "Debug level for split shiro-rekha process.");
00017 
00018 extern
00019 BOOL_VAR_H(devanagari_split_debugimage, 0,
00020            "Whether to create a debug image for split shiro-rekha process.");
00021 
00022 class TBOX;
00023 
00024 namespace tesseract {
00025 
00026 class PixelHistogram {
00027  public:
00028   PixelHistogram() {
00029     hist_ = NULL;
00030     length_ = 0;
00031   }
00032 
00033   ~PixelHistogram() {
00034     Clear();
00035   }
00036 
00037   void Clear() {
00038     if (hist_) {
00039       delete[] hist_;
00040     }
00041     length_ = 0;
00042   }
00043 
00044   int* hist() const {
00045     return hist_;
00046   }
00047 
00048   int length() const {
00049     return length_;
00050   }
00051 
00052   // Methods to construct histograms from images. These clear any existing data.
00053   void ConstructVerticalCountHist(Pix* pix);
00054   void ConstructHorizontalCountHist(Pix* pix);
00055 
00056   // This method returns the global-maxima for the histogram. The frequency of
00057   // the global maxima is returned in count, if specified.
00058   int GetHistogramMaximum(int* count) const;
00059 
00060  private:
00061   int* hist_;
00062   int length_;
00063 };
00064 
00065 class ShiroRekhaSplitter {
00066  public:
00067   enum SplitStrategy {
00068     NO_SPLIT = 0,   // No splitting is performed for the phase.
00069     MINIMAL_SPLIT,  // Blobs are split minimally.
00070     MAXIMAL_SPLIT   // Blobs are split maximally.
00071   };
00072 
00073   ShiroRekhaSplitter();
00074   virtual ~ShiroRekhaSplitter();
00075 
00076   // Top-level method to perform splitting based on current settings.
00077   // Returns true if a split was actually performed.
00078   // If split_for_pageseg is true, the pageseg_split_strategy_ is used for
00079   // splitting. If false, the ocr_split_strategy_ is used.
00080   bool Split(bool split_for_pageseg);
00081 
00082   // Clears the memory held by this object.
00083   void Clear();
00084 
00085   // Refreshes the words in the segmentation block list by using blobs in the
00086   // input blob list.
00087   // The segmentation block list must be set.
00088   void RefreshSegmentationWithNewBlobs(C_BLOB_LIST* new_blobs);
00089 
00090   // Returns true if the split strategies for pageseg and ocr are different.
00091   bool HasDifferentSplitStrategies() const {
00092     return pageseg_split_strategy_ != ocr_split_strategy_;
00093   }
00094 
00095   // This only keeps a copy of the block list pointer. At split call, the list
00096   // object should still be alive. This block list is used as a golden
00097   // segmentation when performing splitting.
00098   void set_segmentation_block_list(BLOCK_LIST* block_list) {
00099     segmentation_block_list_ = block_list;
00100   }
00101 
00102   static const int kUnspecifiedXheight = -1;
00103 
00104   void set_global_xheight(int xheight) {
00105     global_xheight_ = xheight;
00106   }
00107 
00108   void set_perform_close(bool perform) {
00109     perform_close_ = perform;
00110   }
00111 
00112   // Returns the image obtained from shiro-rekha splitting. The returned object
00113   // is owned by this class. Callers may want to clone the returned pix to keep
00114   // it alive beyond the life of ShiroRekhaSplitter object.
00115   Pix* splitted_image() {
00116     return splitted_image_;
00117   }
00118 
00119   // On setting the input image, a clone of it is owned by this class.
00120   void set_orig_pix(Pix* pix);
00121 
00122   // Returns the input image provided to the object. This object is owned by
00123   // this class. Callers may want to clone the returned pix to work with it.
00124   Pix* orig_pix() {
00125     return orig_pix_;
00126   }
00127 
00128   SplitStrategy ocr_split_strategy() const {
00129     return ocr_split_strategy_;
00130   }
00131 
00132   void set_ocr_split_strategy(SplitStrategy strategy) {
00133     ocr_split_strategy_ = strategy;
00134   }
00135 
00136   SplitStrategy pageseg_split_strategy() const {
00137     return pageseg_split_strategy_;
00138   }
00139 
00140   void set_pageseg_split_strategy(SplitStrategy strategy) {
00141     pageseg_split_strategy_ = strategy;
00142   }
00143 
00144   BLOCK_LIST* segmentation_block_list() {
00145     return segmentation_block_list_;
00146   }
00147 
00148   // This method dumps a debug image to the specified location.
00149   void DumpDebugImage(const char* filename) const;
00150 
00151   // This method returns the computed mode-height of blobs in the pix.
00152   // It also prunes very small blobs from calculation. Could be used to provide
00153   // a global xheight estimate for images which have the same point-size text.
00154   static int GetModeHeight(Pix* pix);
00155 
00156  private:
00157   // Method to perform a close operation on the input image. The xheight
00158   // estimate decides the size of sel used.
00159   static void PerformClose(Pix* pix, int xheight_estimate);
00160 
00161   // This method resolves the cc bbox to a particular row and returns the row's
00162   // xheight. This uses block_list_ if available, else just returns the
00163   // global_xheight_ estimate currently set in the object.
00164   int GetXheightForCC(Box* cc_bbox);
00165 
00166   // Returns a list of regions (boxes) which should be cleared in the original
00167   // image so as to perform shiro-rekha splitting. Pix is assumed to carry one
00168   // (or less) word only. Xheight measure could be the global estimate, the row
00169   // estimate, or unspecified. If unspecified, over splitting may occur, since a
00170   // conservative estimate of stroke width along with an associated multiplier
00171   // is used in its place. It is advisable to have a specified xheight when
00172   // splitting for classification/training.
00173   void SplitWordShiroRekha(SplitStrategy split_strategy,
00174                            Pix* pix,
00175                            int xheight,
00176                            int word_left,
00177                            int word_top,
00178                            Boxa* regions_to_clear);
00179 
00180   // Returns a new box object for the corresponding TBOX, based on the original
00181   // image's coordinate system.
00182   Box* GetBoxForTBOX(const TBOX& tbox) const;
00183 
00184   // This method returns y-extents of the shiro-rekha computed from the input
00185   // word image.
00186   static void GetShiroRekhaYExtents(Pix* word_pix,
00187                                     int* shirorekha_top,
00188                                     int* shirorekha_bottom,
00189                                     int* shirorekha_ylevel);
00190 
00191   Pix* orig_pix_;         // Just a clone of the input image passed.
00192   Pix* splitted_image_;   // Image produced after the last splitting round. The
00193                           // object is owned by this class.
00194   SplitStrategy pageseg_split_strategy_;
00195   SplitStrategy ocr_split_strategy_;
00196   Pix* debug_image_;
00197   // This block list is used as a golden segmentation when performing splitting.
00198   BLOCK_LIST* segmentation_block_list_;
00199   int global_xheight_;
00200   bool perform_close_;  // Whether a morphological close operation should be
00201                         // performed before CCs are run through splitting.
00202 };
00203 
00204 }  // namespace tesseract.
00205 
00206 #endif  // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines