tesseract 3.04.01

api/baseapi.h

Go to the documentation of this file.
00001 
00002 // File:        baseapi.h
00003 // Description: Simple API for calling tesseract.
00004 // Author:      Ray Smith
00005 // Created:     Fri Oct 06 15:35:01 PDT 2006
00006 //
00007 // (C) Copyright 2006, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_API_BASEAPI_H__
00021 #define TESSERACT_API_BASEAPI_H__
00022 
00023 #define TESSERACT_VERSION_STR "3.04.01"
00024 #define TESSERACT_VERSION 0x030401
00025 #define MAKE_VERSION(major, minor, patch) (((major) << 16) | ((minor) << 8) | \
00026                                             (patch))
00027 
00028 #include <stdio.h>
00029 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
00030 // complexity of includes here. Use forward declarations wherever possible
00031 // and hide includes of complex types in baseapi.cpp.
00032 #include "platform.h"
00033 #include "apitypes.h"
00034 #include "thresholder.h"
00035 #include "unichar.h"
00036 #include "tesscallback.h"
00037 #include "publictypes.h"
00038 #include "pageiterator.h"
00039 #include "resultiterator.h"
00040 
00041 template <typename T> class GenericVector;
00042 class PAGE_RES;
00043 class PAGE_RES_IT;
00044 class ParagraphModel;
00045 struct BlamerBundle;
00046 class BLOCK_LIST;
00047 class DENORM;
00048 class MATRIX;
00049 class ROW;
00050 class STRING;
00051 class WERD;
00052 struct Pix;
00053 struct Box;
00054 struct Pixa;
00055 struct Boxa;
00056 class ETEXT_DESC;
00057 struct OSResults;
00058 class TBOX;
00059 class UNICHARSET;
00060 class WERD_CHOICE_LIST;
00061 
00062 struct INT_FEATURE_STRUCT;
00063 typedef INT_FEATURE_STRUCT *INT_FEATURE;
00064 struct TBLOB;
00065 
00066 namespace tesseract {
00067 
00068 #ifndef NO_CUBE_BUILD
00069 class CubeRecoContext;
00070 #endif  // NO_CUBE_BUILD
00071 class Dawg;
00072 class Dict;
00073 class EquationDetect;
00074 class PageIterator;
00075 class LTRResultIterator;
00076 class ResultIterator;
00077 class MutableIterator;
00078 class TessResultRenderer;
00079 class Tesseract;
00080 class Trie;
00081 class Wordrec;
00082 
00083 typedef int (Dict::*DictFunc)(void* void_dawg_args,
00084                               UNICHAR_ID unichar_id, bool word_end) const;
00085 typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
00086                                                  const char* context,
00087                                                  int context_bytes,
00088                                                  const char* character,
00089                                                  int character_bytes);
00090 typedef float (Dict::*ParamsModelClassifyFunc)(
00091     const char *lang, void *path);
00092 typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings,
00093                                          const WERD_CHOICE_LIST &best_choices,
00094                                          const UNICHARSET &unicharset,
00095                                          BlamerBundle *blamer_bundle);
00096 typedef TessCallback4<const UNICHARSET &, int, PageIterator *, Pix *>
00097     TruthCallback;
00098 
00107 class TESS_API TessBaseAPI {
00108  public:
00109   TessBaseAPI();
00110   virtual ~TessBaseAPI();
00111 
00115   static const char* Version();
00116 
00124   static size_t getOpenCLDevice(void **device);
00125 
00130   static void CatchSignals();
00131 
00136   void SetInputName(const char* name);
00144   const char* GetInputName();
00145   void SetInputImage(Pix *pix);
00146   Pix* GetInputImage();
00147   int GetSourceYResolution();
00148   const char* GetDatapath();
00149 
00151   void SetOutputName(const char* name);
00152 
00166   bool SetVariable(const char* name, const char* value);
00167   bool SetDebugVariable(const char* name, const char* value);
00168 
00173   bool GetIntVariable(const char *name, int *value) const;
00174   bool GetBoolVariable(const char *name, bool *value) const;
00175   bool GetDoubleVariable(const char *name, double *value) const;
00176 
00181   const char *GetStringVariable(const char *name) const;
00182 
00186   void PrintVariables(FILE *fp) const;
00187 
00191   bool GetVariableAsString(const char *name, STRING *val);
00192 
00231   int Init(const char* datapath, const char* language, OcrEngineMode mode,
00232            char **configs, int configs_size,
00233            const GenericVector<STRING> *vars_vec,
00234            const GenericVector<STRING> *vars_values,
00235            bool set_only_non_debug_params);
00236   int Init(const char* datapath, const char* language, OcrEngineMode oem) {
00237     return Init(datapath, language, oem, NULL, 0, NULL, NULL, false);
00238   }
00239   int Init(const char* datapath, const char* language) {
00240     return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false);
00241   }
00242 
00251   const char* GetInitLanguagesAsString() const;
00252 
00258   void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
00259 
00263   void GetAvailableLanguagesAsVector(GenericVector<STRING>* langs) const;
00264 
00271   int InitLangMod(const char* datapath, const char* language);
00272 
00277   void InitForAnalysePage();
00278 
00285   void ReadConfigFile(const char* filename);
00287   void ReadDebugConfigFile(const char* filename);
00288 
00294   void SetPageSegMode(PageSegMode mode);
00295 
00297   PageSegMode GetPageSegMode() const;
00298 
00316   char* TesseractRect(const unsigned char* imagedata,
00317                       int bytes_per_pixel, int bytes_per_line,
00318                       int left, int top, int width, int height);
00319 
00324   void ClearAdaptiveClassifier();
00325 
00332    /* @{ */
00333 
00343   void SetImage(const unsigned char* imagedata, int width, int height,
00344                 int bytes_per_pixel, int bytes_per_line);
00345 
00356   void SetImage(Pix* pix);
00357 
00362   void SetSourceResolution(int ppi);
00363 
00369   void SetRectangle(int left, int top, int width, int height);
00370 
00378   void SetThresholder(ImageThresholder* thresholder) {
00379     if (thresholder_ != NULL)
00380       delete thresholder_;
00381     thresholder_ = thresholder;
00382     ClearResults();
00383   }
00384 
00390   Pix* GetThresholdedImage();
00391 
00397   Boxa* GetRegions(Pixa** pixa);
00398 
00410   Boxa* GetTextlines(const bool raw_image, const int raw_padding,
00411                      Pixa** pixa, int** blockids, int** paraids);
00412   /*
00413      Helper method to extract from the thresholded image. (most common usage)
00414   */
00415   Boxa* GetTextlines(Pixa** pixa, int** blockids) {
00416     return GetTextlines(false, 0, pixa, blockids, NULL);
00417   }
00418 
00427   Boxa* GetStrips(Pixa** pixa, int** blockids);
00428 
00434   Boxa* GetWords(Pixa** pixa);
00435 
00444   Boxa* GetConnectedComponents(Pixa** cc);
00445 
00459   Boxa* GetComponentImages(const PageIteratorLevel level,
00460                            const bool text_only, const bool raw_image,
00461                            const int raw_padding,
00462                            Pixa** pixa, int** blockids, int** paraids);
00463   // Helper function to get binary images with no padding (most common usage).
00464   Boxa* GetComponentImages(const PageIteratorLevel level,
00465                            const bool text_only,
00466                            Pixa** pixa, int** blockids) {
00467     return GetComponentImages(level, text_only, false, 0, pixa, blockids, NULL);
00468   }
00469 
00476   int GetThresholdedImageScaleFactor() const;
00477 
00483   void DumpPGM(const char* filename);
00484 
00500   PageIterator* AnalyseLayout() {
00501     return AnalyseLayout(false);
00502   }
00503   PageIterator* AnalyseLayout(bool merge_similar_words);
00504 
00511   int Recognize(ETEXT_DESC* monitor);
00512 
00519   int RecognizeForChopTest(ETEXT_DESC* monitor);
00520 
00543   bool ProcessPages(const char* filename, const char* retry_config,
00544                     int timeout_millisec, TessResultRenderer* renderer);
00545   // Does the real work of ProcessPages.
00546   bool ProcessPagesInternal(const char* filename, const char* retry_config,
00547                             int timeout_millisec, TessResultRenderer* renderer);
00548 
00558   bool ProcessPage(Pix* pix, int page_index, const char* filename,
00559                    const char* retry_config, int timeout_millisec,
00560                    TessResultRenderer* renderer);
00561 
00570   ResultIterator* GetIterator();
00571 
00580   MutableIterator* GetMutableIterator();
00581 
00586   char* GetUTF8Text();
00587 
00593   char* GetHOCRText(int page_number);
00594 
00602   char* GetBoxText(int page_number);
00603 
00609   char* GetUNLVText();
00610 
00616   char* GetOsdText(int page_number);
00617 
00619   int MeanTextConf();
00626   int* AllWordConfidences();
00627 
00638   bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
00639 
00646   void Clear();
00647 
00654   void End();
00655 
00663   static void ClearPersistentCache();
00664 
00671   int IsValidWord(const char *word);
00672   // Returns true if utf8_character is defined in the UniCharset.
00673   bool IsValidCharacter(const char *utf8_character);
00674 
00675 
00676   bool GetTextDirection(int* out_offset, float* out_slope);
00677 
00679   void SetDictFunc(DictFunc f);
00680 
00684   void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
00685 
00687   void SetFillLatticeFunc(FillLatticeFunc f);
00688 
00693   bool DetectOS(OSResults*);
00694 
00696   void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features,
00697                           int* num_features, int* feature_outline_index);
00698 
00703   static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
00704                             int right, int bottom);
00705 
00710   void RunAdaptiveClassifier(TBLOB* blob,
00711                              int num_max_matches,
00712                              int* unichar_ids,
00713                              float* ratings,
00714                              int* num_matches_returned);
00715 
00717   const char* GetUnichar(int unichar_id);
00718 
00720   const Dawg *GetDawg(int i) const;
00721 
00723   int NumDawgs() const;
00724 
00726   static ROW *MakeTessOCRRow(float baseline, float xheight,
00727                              float descender, float ascender);
00728 
00730   static TBLOB *MakeTBLOB(Pix *pix);
00731 
00737   static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode);
00738 
00739   Tesseract* tesseract() const {
00740     return tesseract_;
00741   }
00742 
00743   OcrEngineMode oem() const {
00744     return last_oem_requested_;
00745   }
00746 
00747   void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
00748 
00749 #ifndef NO_CUBE_BUILD
00750 
00751   CubeRecoContext *GetCubeRecoContext() const;
00752 #endif  // NO_CUBE_BUILD
00753 
00754   void set_min_orientation_margin(double margin);
00755 
00760   void GetBlockTextOrientations(int** block_orientation,
00761                                 bool** vertical_writing);
00762 
00764   BLOCK_LIST* FindLinesCreateBlockList();
00765 
00771   static void DeleteBlockList(BLOCK_LIST* block_list);
00772  /* @} */
00773 
00774  protected:
00775 
00777   TESS_LOCAL bool InternalSetImage();
00778 
00783   TESS_LOCAL virtual void Threshold(Pix** pix);
00784 
00789   TESS_LOCAL int FindLines();
00790 
00792   void ClearResults();
00793 
00799   TESS_LOCAL LTRResultIterator* GetLTRIterator();
00800 
00807   TESS_LOCAL int TextLength(int* blob_count);
00808 
00810   /* @{ */
00811 
00816   TESS_LOCAL void AdaptToCharacter(const char *unichar_repr,
00817                                    int length,
00818                                    float baseline,
00819                                    float xheight,
00820                                    float descender,
00821                                    float ascender);
00822 
00824   TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
00825   TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list,
00826                                         PAGE_RES* pass1_result);
00827 
00829   TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
00830 
00835   TESS_LOCAL static int TesseractExtractResult(char** text,
00836                                     int** lengths,
00837                                     float** costs,
00838                                     int** x0,
00839                                     int** y0,
00840                                     int** x1,
00841                                     int** y1,
00842                                     PAGE_RES* page_res);
00843 
00844   TESS_LOCAL const PAGE_RES* GetPageRes() const {
00845     return page_res_;
00846   };
00847   /* @} */
00848 
00849 
00850  protected:
00851   Tesseract*        tesseract_;       
00852   Tesseract*        osd_tesseract_;   
00853   EquationDetect*   equ_detect_;      
00854   ImageThresholder* thresholder_;     
00855   GenericVector<ParagraphModel *>* paragraph_models_;
00856   BLOCK_LIST*       block_list_;      
00857   PAGE_RES*         page_res_;        
00858   STRING*           input_file_;      
00859   Pix*              input_image_;     
00860   STRING*           output_file_;     
00861   STRING*           datapath_;        
00862   STRING*           language_;        
00863   OcrEngineMode last_oem_requested_;  
00864   bool          recognition_done_;   
00865   TruthCallback *truth_cb_;           
00866 
00871   /* @{ */
00872   int rect_left_;
00873   int rect_top_;
00874   int rect_width_;
00875   int rect_height_;
00876   int image_width_;
00877   int image_height_;
00878   /* @} */
00879 
00880  private:
00881   // A list of image filenames gets special consideration
00882   bool ProcessPagesFileList(FILE *fp,
00883                             STRING *buf,
00884                             const char* retry_config, int timeout_millisec,
00885                             TessResultRenderer* renderer,
00886                             int tessedit_page_number);
00887   // TIFF supports multipage so gets special consideration
00888   bool ProcessPagesMultipageTiff(const unsigned char *data,
00889                                  size_t size,
00890                                  const char* filename,
00891                                  const char* retry_config,
00892                                  int timeout_millisec,
00893                                  TessResultRenderer* renderer,
00894                                  int tessedit_page_number);
00895 };  // class TessBaseAPI.
00896 
00898 STRING HOcrEscape(const char* text);
00899 }  // namespace tesseract.
00900 
00901 #endif  // TESSERACT_API_BASEAPI_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines