|
tesseract 3.04.01
|
00001 00002 // File: baseapi.h 00003 // Description: Simple API for calling tesseract. 00004 // Author: Ray Smith 00005 // Created: Fri Oct 06 15:35:01 PDT 2006 00006 // 00007 // (C) Copyright 2006, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_API_BASEAPI_H__ 00021 #define TESSERACT_API_BASEAPI_H__ 00022 00023 #define TESSERACT_VERSION_STR "3.04.01" 00024 #define TESSERACT_VERSION 0x030401 00025 #define MAKE_VERSION(major, minor, patch) (((major) << 16) | ((minor) << 8) | \ 00026 (patch)) 00027 00028 #include <stdio.h> 00029 // To avoid collision with other typenames include the ABSOLUTE MINIMUM 00030 // complexity of includes here. Use forward declarations wherever possible 00031 // and hide includes of complex types in baseapi.cpp. 00032 #include "platform.h" 00033 #include "apitypes.h" 00034 #include "thresholder.h" 00035 #include "unichar.h" 00036 #include "tesscallback.h" 00037 #include "publictypes.h" 00038 #include "pageiterator.h" 00039 #include "resultiterator.h" 00040 00041 template <typename T> class GenericVector; 00042 class PAGE_RES; 00043 class PAGE_RES_IT; 00044 class ParagraphModel; 00045 struct BlamerBundle; 00046 class BLOCK_LIST; 00047 class DENORM; 00048 class MATRIX; 00049 class ROW; 00050 class STRING; 00051 class WERD; 00052 struct Pix; 00053 struct Box; 00054 struct Pixa; 00055 struct Boxa; 00056 class ETEXT_DESC; 00057 struct OSResults; 00058 class TBOX; 00059 class UNICHARSET; 00060 class WERD_CHOICE_LIST; 00061 00062 struct INT_FEATURE_STRUCT; 00063 typedef INT_FEATURE_STRUCT *INT_FEATURE; 00064 struct TBLOB; 00065 00066 namespace tesseract { 00067 00068 #ifndef NO_CUBE_BUILD 00069 class CubeRecoContext; 00070 #endif // NO_CUBE_BUILD 00071 class Dawg; 00072 class Dict; 00073 class EquationDetect; 00074 class PageIterator; 00075 class LTRResultIterator; 00076 class ResultIterator; 00077 class MutableIterator; 00078 class TessResultRenderer; 00079 class Tesseract; 00080 class Trie; 00081 class Wordrec; 00082 00083 typedef int (Dict::*DictFunc)(void* void_dawg_args, 00084 UNICHAR_ID unichar_id, bool word_end) const; 00085 typedef double (Dict::*ProbabilityInContextFunc)(const char* lang, 00086 const char* context, 00087 int context_bytes, 00088 const char* character, 00089 int character_bytes); 00090 typedef float (Dict::*ParamsModelClassifyFunc)( 00091 const char *lang, void *path); 00092 typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings, 00093 const WERD_CHOICE_LIST &best_choices, 00094 const UNICHARSET &unicharset, 00095 BlamerBundle *blamer_bundle); 00096 typedef TessCallback4<const UNICHARSET &, int, PageIterator *, Pix *> 00097 TruthCallback; 00098 00107 class TESS_API TessBaseAPI { 00108 public: 00109 TessBaseAPI(); 00110 virtual ~TessBaseAPI(); 00111 00115 static const char* Version(); 00116 00124 static size_t getOpenCLDevice(void **device); 00125 00130 static void CatchSignals(); 00131 00136 void SetInputName(const char* name); 00144 const char* GetInputName(); 00145 void SetInputImage(Pix *pix); 00146 Pix* GetInputImage(); 00147 int GetSourceYResolution(); 00148 const char* GetDatapath(); 00149 00151 void SetOutputName(const char* name); 00152 00166 bool SetVariable(const char* name, const char* value); 00167 bool SetDebugVariable(const char* name, const char* value); 00168 00173 bool GetIntVariable(const char *name, int *value) const; 00174 bool GetBoolVariable(const char *name, bool *value) const; 00175 bool GetDoubleVariable(const char *name, double *value) const; 00176 00181 const char *GetStringVariable(const char *name) const; 00182 00186 void PrintVariables(FILE *fp) const; 00187 00191 bool GetVariableAsString(const char *name, STRING *val); 00192 00231 int Init(const char* datapath, const char* language, OcrEngineMode mode, 00232 char **configs, int configs_size, 00233 const GenericVector<STRING> *vars_vec, 00234 const GenericVector<STRING> *vars_values, 00235 bool set_only_non_debug_params); 00236 int Init(const char* datapath, const char* language, OcrEngineMode oem) { 00237 return Init(datapath, language, oem, NULL, 0, NULL, NULL, false); 00238 } 00239 int Init(const char* datapath, const char* language) { 00240 return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false); 00241 } 00242 00251 const char* GetInitLanguagesAsString() const; 00252 00258 void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const; 00259 00263 void GetAvailableLanguagesAsVector(GenericVector<STRING>* langs) const; 00264 00271 int InitLangMod(const char* datapath, const char* language); 00272 00277 void InitForAnalysePage(); 00278 00285 void ReadConfigFile(const char* filename); 00287 void ReadDebugConfigFile(const char* filename); 00288 00294 void SetPageSegMode(PageSegMode mode); 00295 00297 PageSegMode GetPageSegMode() const; 00298 00316 char* TesseractRect(const unsigned char* imagedata, 00317 int bytes_per_pixel, int bytes_per_line, 00318 int left, int top, int width, int height); 00319 00324 void ClearAdaptiveClassifier(); 00325 00332 /* @{ */ 00333 00343 void SetImage(const unsigned char* imagedata, int width, int height, 00344 int bytes_per_pixel, int bytes_per_line); 00345 00356 void SetImage(Pix* pix); 00357 00362 void SetSourceResolution(int ppi); 00363 00369 void SetRectangle(int left, int top, int width, int height); 00370 00378 void SetThresholder(ImageThresholder* thresholder) { 00379 if (thresholder_ != NULL) 00380 delete thresholder_; 00381 thresholder_ = thresholder; 00382 ClearResults(); 00383 } 00384 00390 Pix* GetThresholdedImage(); 00391 00397 Boxa* GetRegions(Pixa** pixa); 00398 00410 Boxa* GetTextlines(const bool raw_image, const int raw_padding, 00411 Pixa** pixa, int** blockids, int** paraids); 00412 /* 00413 Helper method to extract from the thresholded image. (most common usage) 00414 */ 00415 Boxa* GetTextlines(Pixa** pixa, int** blockids) { 00416 return GetTextlines(false, 0, pixa, blockids, NULL); 00417 } 00418 00427 Boxa* GetStrips(Pixa** pixa, int** blockids); 00428 00434 Boxa* GetWords(Pixa** pixa); 00435 00444 Boxa* GetConnectedComponents(Pixa** cc); 00445 00459 Boxa* GetComponentImages(const PageIteratorLevel level, 00460 const bool text_only, const bool raw_image, 00461 const int raw_padding, 00462 Pixa** pixa, int** blockids, int** paraids); 00463 // Helper function to get binary images with no padding (most common usage). 00464 Boxa* GetComponentImages(const PageIteratorLevel level, 00465 const bool text_only, 00466 Pixa** pixa, int** blockids) { 00467 return GetComponentImages(level, text_only, false, 0, pixa, blockids, NULL); 00468 } 00469 00476 int GetThresholdedImageScaleFactor() const; 00477 00483 void DumpPGM(const char* filename); 00484 00500 PageIterator* AnalyseLayout() { 00501 return AnalyseLayout(false); 00502 } 00503 PageIterator* AnalyseLayout(bool merge_similar_words); 00504 00511 int Recognize(ETEXT_DESC* monitor); 00512 00519 int RecognizeForChopTest(ETEXT_DESC* monitor); 00520 00543 bool ProcessPages(const char* filename, const char* retry_config, 00544 int timeout_millisec, TessResultRenderer* renderer); 00545 // Does the real work of ProcessPages. 00546 bool ProcessPagesInternal(const char* filename, const char* retry_config, 00547 int timeout_millisec, TessResultRenderer* renderer); 00548 00558 bool ProcessPage(Pix* pix, int page_index, const char* filename, 00559 const char* retry_config, int timeout_millisec, 00560 TessResultRenderer* renderer); 00561 00570 ResultIterator* GetIterator(); 00571 00580 MutableIterator* GetMutableIterator(); 00581 00586 char* GetUTF8Text(); 00587 00593 char* GetHOCRText(int page_number); 00594 00602 char* GetBoxText(int page_number); 00603 00609 char* GetUNLVText(); 00610 00616 char* GetOsdText(int page_number); 00617 00619 int MeanTextConf(); 00626 int* AllWordConfidences(); 00627 00638 bool AdaptToWordStr(PageSegMode mode, const char* wordstr); 00639 00646 void Clear(); 00647 00654 void End(); 00655 00663 static void ClearPersistentCache(); 00664 00671 int IsValidWord(const char *word); 00672 // Returns true if utf8_character is defined in the UniCharset. 00673 bool IsValidCharacter(const char *utf8_character); 00674 00675 00676 bool GetTextDirection(int* out_offset, float* out_slope); 00677 00679 void SetDictFunc(DictFunc f); 00680 00684 void SetProbabilityInContextFunc(ProbabilityInContextFunc f); 00685 00687 void SetFillLatticeFunc(FillLatticeFunc f); 00688 00693 bool DetectOS(OSResults*); 00694 00696 void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features, 00697 int* num_features, int* feature_outline_index); 00698 00703 static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, 00704 int right, int bottom); 00705 00710 void RunAdaptiveClassifier(TBLOB* blob, 00711 int num_max_matches, 00712 int* unichar_ids, 00713 float* ratings, 00714 int* num_matches_returned); 00715 00717 const char* GetUnichar(int unichar_id); 00718 00720 const Dawg *GetDawg(int i) const; 00721 00723 int NumDawgs() const; 00724 00726 static ROW *MakeTessOCRRow(float baseline, float xheight, 00727 float descender, float ascender); 00728 00730 static TBLOB *MakeTBLOB(Pix *pix); 00731 00737 static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode); 00738 00739 Tesseract* tesseract() const { 00740 return tesseract_; 00741 } 00742 00743 OcrEngineMode oem() const { 00744 return last_oem_requested_; 00745 } 00746 00747 void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; } 00748 00749 #ifndef NO_CUBE_BUILD 00750 00751 CubeRecoContext *GetCubeRecoContext() const; 00752 #endif // NO_CUBE_BUILD 00753 00754 void set_min_orientation_margin(double margin); 00755 00760 void GetBlockTextOrientations(int** block_orientation, 00761 bool** vertical_writing); 00762 00764 BLOCK_LIST* FindLinesCreateBlockList(); 00765 00771 static void DeleteBlockList(BLOCK_LIST* block_list); 00772 /* @} */ 00773 00774 protected: 00775 00777 TESS_LOCAL bool InternalSetImage(); 00778 00783 TESS_LOCAL virtual void Threshold(Pix** pix); 00784 00789 TESS_LOCAL int FindLines(); 00790 00792 void ClearResults(); 00793 00799 TESS_LOCAL LTRResultIterator* GetLTRIterator(); 00800 00807 TESS_LOCAL int TextLength(int* blob_count); 00808 00810 /* @{ */ 00811 00816 TESS_LOCAL void AdaptToCharacter(const char *unichar_repr, 00817 int length, 00818 float baseline, 00819 float xheight, 00820 float descender, 00821 float ascender); 00822 00824 TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list); 00825 TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, 00826 PAGE_RES* pass1_result); 00827 00829 TESS_LOCAL void DetectParagraphs(bool after_text_recognition); 00830 00835 TESS_LOCAL static int TesseractExtractResult(char** text, 00836 int** lengths, 00837 float** costs, 00838 int** x0, 00839 int** y0, 00840 int** x1, 00841 int** y1, 00842 PAGE_RES* page_res); 00843 00844 TESS_LOCAL const PAGE_RES* GetPageRes() const { 00845 return page_res_; 00846 }; 00847 /* @} */ 00848 00849 00850 protected: 00851 Tesseract* tesseract_; 00852 Tesseract* osd_tesseract_; 00853 EquationDetect* equ_detect_; 00854 ImageThresholder* thresholder_; 00855 GenericVector<ParagraphModel *>* paragraph_models_; 00856 BLOCK_LIST* block_list_; 00857 PAGE_RES* page_res_; 00858 STRING* input_file_; 00859 Pix* input_image_; 00860 STRING* output_file_; 00861 STRING* datapath_; 00862 STRING* language_; 00863 OcrEngineMode last_oem_requested_; 00864 bool recognition_done_; 00865 TruthCallback *truth_cb_; 00866 00871 /* @{ */ 00872 int rect_left_; 00873 int rect_top_; 00874 int rect_width_; 00875 int rect_height_; 00876 int image_width_; 00877 int image_height_; 00878 /* @} */ 00879 00880 private: 00881 // A list of image filenames gets special consideration 00882 bool ProcessPagesFileList(FILE *fp, 00883 STRING *buf, 00884 const char* retry_config, int timeout_millisec, 00885 TessResultRenderer* renderer, 00886 int tessedit_page_number); 00887 // TIFF supports multipage so gets special consideration 00888 bool ProcessPagesMultipageTiff(const unsigned char *data, 00889 size_t size, 00890 const char* filename, 00891 const char* retry_config, 00892 int timeout_millisec, 00893 TessResultRenderer* renderer, 00894 int tessedit_page_number); 00895 }; // class TessBaseAPI. 00896 00898 STRING HOcrEscape(const char* text); 00899 } // namespace tesseract. 00900 00901 #endif // TESSERACT_API_BASEAPI_H__