tesseract  4.1.0
tesseract::TessBaseAPI Class Reference

#include <baseapi.h>

Public Member Functions

 TessBaseAPI ()
 
virtual ~TessBaseAPI ()
 
void SetInputName (const char *name)
 
const char * GetInputName ()
 
void SetInputImage (Pix *pix)
 
Pix * GetInputImage ()
 
int GetSourceYResolution ()
 
const char * GetDatapath ()
 
void SetOutputName (const char *name)
 
bool SetVariable (const char *name, const char *value)
 
bool SetDebugVariable (const char *name, const char *value)
 
bool GetIntVariable (const char *name, int *value) const
 
bool GetBoolVariable (const char *name, bool *value) const
 
bool GetDoubleVariable (const char *name, double *value) const
 
const char * GetStringVariable (const char *name) const
 
void PrintVariables (FILE *fp) const
 
bool GetVariableAsString (const char *name, STRING *val)
 
int Init (const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
 
int Init (const char *datapath, const char *language, OcrEngineMode oem)
 
int Init (const char *datapath, const char *language)
 
int Init (const char *data, int data_size, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params, FileReader reader)
 
const char * GetInitLanguagesAsString () const
 
void GetLoadedLanguagesAsVector (GenericVector< STRING > *langs) const
 
void GetAvailableLanguagesAsVector (GenericVector< STRING > *langs) const
 
int InitLangMod (const char *datapath, const char *language)
 
void InitForAnalysePage ()
 
void ReadConfigFile (const char *filename)
 
void ReadDebugConfigFile (const char *filename)
 
void SetPageSegMode (PageSegMode mode)
 
PageSegMode GetPageSegMode () const
 
char * TesseractRect (const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height)
 
void ClearAdaptiveClassifier ()
 
void SetImage (const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
 
void SetImage (Pix *pix)
 
void SetSourceResolution (int ppi)
 
void SetRectangle (int left, int top, int width, int height)
 
void SetThresholder (ImageThresholder *thresholder)
 
Pix * GetThresholdedImage ()
 
Boxa * GetRegions (Pixa **pixa)
 
Boxa * GetTextlines (bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * GetTextlines (Pixa **pixa, int **blockids)
 
Boxa * GetStrips (Pixa **pixa, int **blockids)
 
Boxa * GetWords (Pixa **pixa)
 
Boxa * GetConnectedComponents (Pixa **cc)
 
Boxa * GetComponentImages (PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * GetComponentImages (const PageIteratorLevel level, const bool text_only, Pixa **pixa, int **blockids)
 
int GetThresholdedImageScaleFactor () const
 
PageIteratorAnalyseLayout ()
 
PageIteratorAnalyseLayout (bool merge_similar_words)
 
int Recognize (ETEXT_DESC *monitor)
 
int RecognizeForChopTest (ETEXT_DESC *monitor)
 
bool ProcessPages (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool ProcessPagesInternal (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool ProcessPage (Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
ResultIteratorGetIterator ()
 
MutableIteratorGetMutableIterator ()
 
char * GetUTF8Text ()
 
char * GetHOCRText (ETEXT_DESC *monitor, int page_number)
 
char * GetHOCRText (int page_number)
 
char * GetAltoText (ETEXT_DESC *monitor, int page_number)
 
char * GetAltoText (int page_number)
 
char * GetTSVText (int page_number)
 
char * GetLSTMBoxText (int page_number)
 
char * GetBoxText (int page_number)
 
char * GetWordStrBoxText (int page_number)
 
char * GetUNLVText ()
 
bool DetectOrientationScript (int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
 
char * GetOsdText (int page_number)
 
int MeanTextConf ()
 
int * AllWordConfidences ()
 
bool AdaptToWordStr (PageSegMode mode, const char *wordstr)
 
void Clear ()
 
void End ()
 
int IsValidWord (const char *word)
 
bool IsValidCharacter (const char *utf8_character)
 
bool GetTextDirection (int *out_offset, float *out_slope)
 
void SetDictFunc (DictFunc f)
 
void SetProbabilityInContextFunc (ProbabilityInContextFunc f)
 
bool DetectOS (OSResults *)
 
void GetBlockTextOrientations (int **block_orientation, bool **vertical_writing)
 
void SetFillLatticeFunc (FillLatticeFunc f)
 
BLOCK_LIST * FindLinesCreateBlockList ()
 
void GetFeaturesForBlob (TBLOB *blob, INT_FEATURE_STRUCT *int_features, int *num_features, int *feature_outline_index)
 
void RunAdaptiveClassifier (TBLOB *blob, int num_max_matches, int *unichar_ids, float *ratings, int *num_matches_returned)
 
const char * GetUnichar (int unichar_id)
 
const DawgGetDawg (int i) const
 
int NumDawgs () const
 
Tesseracttesseract () const
 
OcrEngineMode oem () const
 
void InitTruthCallback (TruthCallback *cb)
 
void set_min_orientation_margin (double margin)
 

Static Public Member Functions

static const char * Version ()
 
static size_t getOpenCLDevice (void **device)
 
static void CatchSignals ()
 
static void ClearPersistentCache ()
 
static void DeleteBlockList (BLOCK_LIST *block_list)
 
static ROWMakeTessOCRRow (float baseline, float xheight, float descender, float ascender)
 
static TBLOBMakeTBLOB (Pix *pix)
 
static void NormalizeTBLOB (TBLOB *tblob, ROW *row, bool numeric_mode)
 
static ROWFindRowForBox (BLOCK_LIST *blocks, int left, int top, int right, int bottom)
 

Protected Member Functions

TESS_LOCAL bool InternalSetImage ()
 
virtual TESS_LOCAL bool Threshold (Pix **pix)
 
TESS_LOCAL int FindLines ()
 
void ClearResults ()
 
TESS_LOCAL LTRResultIteratorGetLTRIterator ()
 
TESS_LOCAL int TextLength (int *blob_count)
 
TESS_LOCAL void DetectParagraphs (bool after_text_recognition)
 
TESS_LOCAL void AdaptToCharacter (const char *unichar_repr, int length, float baseline, float xheight, float descender, float ascender)
 
TESS_LOCAL PAGE_RESRecognitionPass1 (BLOCK_LIST *block_list)
 
TESS_LOCAL PAGE_RESRecognitionPass2 (BLOCK_LIST *block_list, PAGE_RES *pass1_result)
 
TESS_LOCAL const PAGE_RESGetPageRes () const
 

Static Protected Member Functions

static TESS_LOCAL int TesseractExtractResult (char **text, int **lengths, float **costs, int **x0, int **y0, int **x1, int **y1, PAGE_RES *page_res)
 

Protected Attributes

Tesseracttesseract_
 The underlying data object. More...
 
Tesseractosd_tesseract_
 For orientation & script detection. More...
 
EquationDetectequ_detect_
 The equation detector. More...
 
FileReader reader_
 Reads files from any filesystem. More...
 
ImageThresholderthresholder_
 Image thresholding module. More...
 
GenericVector< ParagraphModel * > * paragraph_models_
 
BLOCK_LIST * block_list_
 The page layout. More...
 
PAGE_RESpage_res_
 The page-level data. More...
 
STRINGinput_file_
 Name used by training code. More...
 
STRINGoutput_file_
 Name used by debug code. More...
 
STRINGdatapath_
 Current location of tessdata. More...
 
STRINGlanguage_
 Last initialized language. More...
 
OcrEngineMode last_oem_requested_
 Last ocr language mode requested. More...
 
bool recognition_done_
 page_res_ contains recognition data. More...
 
TruthCallbacktruth_cb_
 
int rect_left_
 
int rect_top_
 
int rect_width_
 
int rect_height_
 
int image_width_
 
int image_height_
 

Detailed Description

Base class for all tesseract APIs. Specific classes can add ability to work on different inputs or produce different outputs. This class is mostly an interface layer on top of the Tesseract instance class to hide the data types so that users of this class don't have to include any other Tesseract headers.

Definition at line 91 of file baseapi.h.

Constructor & Destructor Documentation

tesseract::TessBaseAPI::TessBaseAPI ( )

Definition at line 183 of file baseapi.cpp.

184  : tesseract_(nullptr),
185  osd_tesseract_(nullptr),
186  equ_detect_(nullptr),
187  reader_(nullptr),
188  // Thresholder is initialized to nullptr here, but will be set before use by:
189  // A constructor of a derived API, SetThresholder(), or
190  // created implicitly when used in InternalSetImage.
191  thresholder_(nullptr),
192  paragraph_models_(nullptr),
193  block_list_(nullptr),
194  page_res_(nullptr),
195  input_file_(nullptr),
196  output_file_(nullptr),
197  datapath_(nullptr),
198  language_(nullptr),
200  recognition_done_(false),
201  truth_cb_(nullptr),
202  rect_left_(0),
203  rect_top_(0),
204  rect_width_(0),
205  rect_height_(0),
206  image_width_(0),
207  image_height_(0) {
208 #if defined(DEBUG)
209  // The Tesseract executables would use the "C" locale by default,
210  // but other software which is linked against the Tesseract library
211  // typically uses the locale from the user's environment.
212  // Here the default is overridden to allow debugging of potential
213  // problems caused by the locale settings.
214 
215  // Use the current locale if building debug code.
216  std::locale::global(std::locale(""));
217 #endif
218 }
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:889
STRING * input_file_
Name used by training code.
Definition: baseapi.h:896
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:900
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:898
STRING * language_
Last initialized language.
Definition: baseapi.h:899
FileReader reader_
Reads files from any filesystem.
Definition: baseapi.h:891
TruthCallback * truth_cb_
Definition: baseapi.h:902
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:901
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:893
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:897
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:890
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:894
tesseract::TessBaseAPI::~TessBaseAPI ( )
virtual

Definition at line 220 of file baseapi.cpp.

220  {
221  End();
222 }

Member Function Documentation

void tesseract::TessBaseAPI::CatchSignals ( )
static

Writes the thresholded image to stderr as a PBM file on receipt of a SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only).

This method used to write the thresholded image to stderr as a PBM file on receipt of a SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only).

Definition at line 256 of file baseapi.cpp.

256  {
257  // Warn API users that an implementation is needed.
258  tprintf("Deprecated method CatchSignals has only a dummy implementation!\n");
259 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
void tesseract::TessBaseAPI::ClearAdaptiveClassifier ( )

Call between pages or documents etc to free up memory and forget adaptive data.

Definition at line 559 of file baseapi.cpp.

559  {
560  if (tesseract_ == nullptr)
561  return;
564 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
void tesseract::TessBaseAPI::ClearResults ( )
protected

Delete the pageres and block list ready for a new page.

Delete the pageres and clear the block list ready for a new page.

Definition at line 2102 of file baseapi.cpp.

2102  {
2103  if (tesseract_ != nullptr) {
2104  tesseract_->Clear();
2105  }
2106  delete page_res_;
2107  page_res_ = nullptr;
2108  recognition_done_ = false;
2109  if (block_list_ == nullptr)
2110  block_list_ = new BLOCK_LIST;
2111  else
2112  block_list_->clear();
2113  if (paragraph_models_ != nullptr) {
2115  delete paragraph_models_;
2116  paragraph_models_ = nullptr;
2117  }
2118 }
void delete_data_pointers()
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:901
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:893
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:894
void tesseract::TessBaseAPI::DetectParagraphs ( bool  after_text_recognition)
protected

Definition at line 2236 of file baseapi.cpp.

2236  {
2237  int debug_level = 0;
2238  GetIntVariable("paragraph_debug_level", &debug_level);
2239  if (paragraph_models_ == nullptr)
2241  MutableIterator *result_it = GetMutableIterator();
2242  do { // Detect paragraphs for this block
2244  ::tesseract::DetectParagraphs(debug_level, after_text_recognition,
2245  result_it, &models);
2246  *paragraph_models_ += models;
2247  } while (result_it->Next(RIL_BLOCK));
2248  delete result_it;
2249 }
MutableIterator * GetMutableIterator()
Definition: baseapi.cpp:1299
bool GetIntVariable(const char *name, int *value) const
Definition: baseapi.cpp:292
void DetectParagraphs(int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel * > *models)
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:893
int tesseract::TessBaseAPI::FindLines ( )
protected

Find lines from the image making the BLOCK_LIST.

Returns
0 on success.

Find lines from the image making the BLOCK_LIST.

Definition at line 2026 of file baseapi.cpp.

2026  {
2027  if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
2028  tprintf("Please call SetImage before attempting recognition.\n");
2029  return -1;
2030  }
2031  if (recognition_done_)
2032  ClearResults();
2033  if (!block_list_->empty()) {
2034  return 0;
2035  }
2036  if (tesseract_ == nullptr) {
2037  tesseract_ = new Tesseract;
2038  #ifndef DISABLED_LEGACY_ENGINE
2040  #endif
2041  }
2042  if (tesseract_->pix_binary() == nullptr &&
2044  return -1;
2045  }
2046 
2048 
2049 #ifndef DISABLED_LEGACY_ENGINE
2051  if (equ_detect_ == nullptr && datapath_ != nullptr) {
2052  equ_detect_ = new EquationDetect(datapath_->string(), nullptr);
2053  }
2054  if (equ_detect_ == nullptr) {
2055  tprintf("Warning: Could not set equation detector\n");
2056  } else {
2058  }
2059  }
2060 #endif // ndef DISABLED_LEGACY_ENGINE
2061 
2062  Tesseract* osd_tess = osd_tesseract_;
2063  OSResults osr;
2065  osd_tess == nullptr) {
2066  if (strcmp(language_->string(), "osd") == 0) {
2067  osd_tess = tesseract_;
2068  } else {
2069  osd_tesseract_ = new Tesseract;
2070  TessdataManager mgr(reader_);
2071  if (datapath_ == nullptr) {
2072  tprintf("Warning: Auto orientation and script detection requested,"
2073  " but data path is undefined\n");
2074  delete osd_tesseract_;
2075  osd_tesseract_ = nullptr;
2076  } else if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr,
2077  "osd", OEM_TESSERACT_ONLY,
2078  nullptr, 0, nullptr, nullptr,
2079  false, &mgr) == 0) {
2080  osd_tess = osd_tesseract_;
2083  } else {
2084  tprintf("Warning: Auto orientation and script detection requested,"
2085  " but osd language failed to load\n");
2086  delete osd_tesseract_;
2087  osd_tesseract_ = nullptr;
2088  }
2089  }
2090  }
2091 
2092  if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0)
2093  return -1;
2094 
2095  // If Devanagari is being recognized, we use different images for page seg
2096  // and for OCR.
2097  tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
2098  return 0;
2099 }
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:889
STRING * input_file_
Name used by training code.
Definition: baseapi.h:896
bool IsEmpty() const
Return true if no image has been set.
Definition: thresholder.cpp:53
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:898
void PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr)
STRING * language_
Last initialized language.
Definition: baseapi.h:899
bool PSM_OSD_ENABLED(int pageseg_mode)
Definition: publictypes.h:191
void SetEquationDetect(EquationDetect *detector)
int GetSourceYResolution() const
Definition: thresholder.h:89
int SegmentPage(const STRING *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr)
Definition: pagesegmain.cpp:99
virtual TESS_LOCAL bool Threshold(Pix **pix)
Definition: baseapi.cpp:1972
FileReader reader_
Reads files from any filesystem.
Definition: baseapi.h:891
const char * string() const
Definition: strngs.cpp:194
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:901
void InitAdaptiveClassifier(TessdataManager *mgr)
Definition: adaptmatch.cpp:527
Pix * pix_binary() const
int init_tesseract(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr)
Definition: tessedit.cpp:284
void set_source_resolution(int ppi)
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:890
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:894
void tesseract::TessBaseAPI::GetAvailableLanguagesAsVector ( GenericVector< STRING > *  langs) const

Returns the available languages in the sorted vector of STRINGs.

Definition at line 450 of file baseapi.cpp.

451  {
452  langs->clear();
453  if (tesseract_ != nullptr) {
454  addAvailableLanguages(tesseract_->datadir, "", langs);
455  langs->sort(CompareSTRING);
456  }
457 }
STRING datadir
Definition: ccutil.h:67
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
bool tesseract::TessBaseAPI::GetBoolVariable ( const char *  name,
bool *  value 
) const

Definition at line 300 of file baseapi.cpp.

300  {
301  auto *p = ParamUtils::FindParam<BoolParam>(
303  if (p == nullptr) return false;
304  *value = bool(*p);
305  return true;
306 }
ParamsVectors * params()
Definition: ccutil.h:65
tesseract::ParamsVectors * GlobalParams()
Definition: params.cpp:35
GenericVector< BoolParam * > bool_params
Definition: params.h:44
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
const char * tesseract::TessBaseAPI::GetDatapath ( )

Definition at line 958 of file baseapi.cpp.

958  {
959  return tesseract_->datadir.c_str();
960 }
STRING datadir
Definition: ccutil.h:67
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
const char * c_str() const
Definition: strngs.cpp:205
bool tesseract::TessBaseAPI::GetDoubleVariable ( const char *  name,
double *  value 
) const

Definition at line 314 of file baseapi.cpp.

314  {
315  auto *p = ParamUtils::FindParam<DoubleParam>(
317  if (p == nullptr) return false;
318  *value = (double)(*p);
319  return true;
320 }
ParamsVectors * params()
Definition: ccutil.h:65
tesseract::ParamsVectors * GlobalParams()
Definition: params.cpp:35
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
GenericVector< DoubleParam * > double_params
Definition: params.h:46
const char * tesseract::TessBaseAPI::GetInitLanguagesAsString ( ) const

Returns the languages string used in the last valid initialization. If the last initialization specified "deu+hin" then that will be returned. If hin loaded eng automatically as well, then that will not be included in this list. To find the languages actually loaded use GetLoadedLanguagesAsVector. The returned string should NOT be deleted.

Definition at line 426 of file baseapi.cpp.

426  {
427  return (language_ == nullptr || language_->string() == nullptr) ?
428  "" : language_->string();
429 }
STRING * language_
Last initialized language.
Definition: baseapi.h:899
const char * string() const
Definition: strngs.cpp:194
Pix * tesseract::TessBaseAPI::GetInputImage ( )

Definition at line 950 of file baseapi.cpp.

950 { return tesseract_->pix_original(); }
Pix * pix_original() const
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
const char * tesseract::TessBaseAPI::GetInputName ( )

These functions are required for searchable PDF output. We need our hands on the input file so that we can include it in the PDF without transcoding. If that is not possible, we need the original image. Finally, resolution metadata is stored in the PDF so we need that as well.

Definition at line 952 of file baseapi.cpp.

952  {
953  if (input_file_)
954  return input_file_->c_str();
955  return nullptr;
956 }
STRING * input_file_
Name used by training code.
Definition: baseapi.h:896
const char * c_str() const
Definition: strngs.cpp:205
bool tesseract::TessBaseAPI::GetIntVariable ( const char *  name,
int *  value 
) const

Returns true if the parameter was found among Tesseract parameters. Fills in value with the value of the parameter.

Definition at line 292 of file baseapi.cpp.

292  {
293  auto *p = ParamUtils::FindParam<IntParam>(
295  if (p == nullptr) return false;
296  *value = (int32_t)(*p);
297  return true;
298 }
ParamsVectors * params()
Definition: ccutil.h:65
GenericVector< IntParam * > int_params
Definition: params.h:43
tesseract::ParamsVectors * GlobalParams()
Definition: params.cpp:35
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
void tesseract::TessBaseAPI::GetLoadedLanguagesAsVector ( GenericVector< STRING > *  langs) const

Returns the loaded languages in the vector of STRINGs. Includes all languages loaded by the last Init, including those loaded as dependencies of other loaded languages.

Definition at line 436 of file baseapi.cpp.

437  {
438  langs->clear();
439  if (tesseract_ != nullptr) {
440  langs->push_back(tesseract_->lang);
441  int num_subs = tesseract_->num_sub_langs();
442  for (int i = 0; i < num_subs; ++i)
443  langs->push_back(tesseract_->get_sub_lang(i)->lang);
444  }
445 }
Tesseract * get_sub_lang(int index) const
int num_sub_langs() const
int push_back(T object)
STRING lang
Definition: ccutil.h:69
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
LTRResultIterator * tesseract::TessBaseAPI::GetLTRIterator ( )
protected

Return an LTR Result Iterator – used only for training, as we really want to ignore all BiDi smarts at that point. delete once you're done with it.

Get a left-to-right iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use.

Definition at line 1265 of file baseapi.cpp.

1265  {
1266  if (tesseract_ == nullptr || page_res_ == nullptr)
1267  return nullptr;
1268  return new LTRResultIterator(
1272 }
int GetScaledYResolution() const
Definition: thresholder.h:92
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
size_t tesseract::TessBaseAPI::getOpenCLDevice ( void **  data)
static

If compiled with OpenCL AND an available OpenCL device is deemed faster than serial code, then "device" is populated with the cl_device_id and returns sizeof(cl_device_id) otherwise *device=nullptr and returns 0.

Definition at line 238 of file baseapi.cpp.

238  {
239 #ifdef USE_OPENCL
240  ds_device device = OpenclDevice::getDeviceSelection();
241  if (device.type == DS_DEVICE_OPENCL_DEVICE) {
242  *data = new cl_device_id;
243  memcpy(*data, &device.oclDeviceID, sizeof(cl_device_id));
244  return sizeof(cl_device_id);
245  }
246 #endif
247 
248  *data = nullptr;
249  return 0;
250 }
PageSegMode tesseract::TessBaseAPI::GetPageSegMode ( ) const

Return the current page segmentation mode.

Definition at line 516 of file baseapi.cpp.

516  {
517  if (tesseract_ == nullptr)
518  return PSM_SINGLE_BLOCK;
519  return static_cast<PageSegMode>(
520  static_cast<int>(tesseract_->tessedit_pageseg_mode));
521 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
Assume a single uniform block of text. (Default.)
Definition: publictypes.h:172
int tesseract::TessBaseAPI::GetSourceYResolution ( )

Definition at line 962 of file baseapi.cpp.

962  {
964 }
int GetSourceYResolution() const
Definition: thresholder.h:89
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892
const char * tesseract::TessBaseAPI::GetStringVariable ( const char *  name) const

Returns the pointer to the string that represents the value of the parameter if it was found among Tesseract parameters.

Definition at line 308 of file baseapi.cpp.

308  {
309  auto *p = ParamUtils::FindParam<StringParam>(
311  return (p != nullptr) ? p->string() : nullptr;
312 }
ParamsVectors * params()
Definition: ccutil.h:65
tesseract::ParamsVectors * GlobalParams()
Definition: params.cpp:35
GenericVector< StringParam * > string_params
Definition: params.h:45
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
bool tesseract::TessBaseAPI::GetVariableAsString ( const char *  name,
STRING val 
)

Get value of named variable as a string, if it exists.

Definition at line 323 of file baseapi.cpp.

323  {
324  return ParamUtils::GetParamAsString(name, tesseract_->params(), val);
325 }
ParamsVectors * params()
Definition: ccutil.h:65
static bool GetParamAsString(const char *name, const ParamsVectors *member_params, STRING *value)
Definition: params.cpp:142
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
int tesseract::TessBaseAPI::Init ( const char *  datapath,
const char *  language,
OcrEngineMode  oem,
char **  configs,
int  configs_size,
const GenericVector< STRING > *  vars_vec,
const GenericVector< STRING > *  vars_values,
bool  set_only_non_debug_params 
)

Instances are now mostly thread-safe and totally independent, but some global parameters remain. Basically it is safe to use multiple TessBaseAPIs in different threads in parallel, UNLESS: you use SetVariable on some of the Params in classify and textord. If you do, then the effect will be to change it for all your instances.

Start tesseract. Returns zero on success and -1 on failure. NOTE that the only members that may be called before Init are those listed above here in the class definition.

The datapath must be the name of the tessdata directory. The language is (usually) an ISO 639-3 string or nullptr will default to eng. It is entirely safe (and eventually will be efficient too) to call Init multiple times on the same instance to change language, or just to reset the classifier. The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating that multiple languages are to be loaded. Eg hin+eng will load Hindi and English. Languages may specify internally that they want to be loaded with one or more other languages, so the ~ sign is available to override that. Eg if hin were set to load eng by default, then hin+~eng would force loading only hin. The number of loaded languages is limited only by memory, with the caveat that loading additional languages will impact both speed and accuracy, as there is more work to do to decide on the applicable language, and there is more chance of hallucinating incorrect words. WARNING: On changing languages, all Tesseract parameters are reset back to their default values. (Which may vary between languages.) If you have a rare need to set a Variable that controls initialization for a second call to Init you should explicitly call End() and then use SetVariable before Init. This is only a very rare use case, since there are very few uses that require any parameters to be set before Init.

If set_only_non_debug_params is true, only params that do not contain "debug" in the name will be set.

The datapath must be the name of the data directory or some other file in which the data directory resides (for instance argv[0].) The language is (usually) an ISO 639-3 string or nullptr will default to eng. If numeric_mode is true, then only digits and Roman numerals will be returned.

Returns
: 0 on success and -1 on initialization failure.

Definition at line 340 of file baseapi.cpp.

344  {
345  return Init(datapath, 0, language, oem, configs, configs_size, vars_vec,
346  vars_values, set_only_non_debug_params, nullptr);
347 }
OcrEngineMode oem() const
Definition: baseapi.h:803
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:340
int tesseract::TessBaseAPI::Init ( const char *  datapath,
const char *  language,
OcrEngineMode  oem 
)
inline

Definition at line 220 of file baseapi.h.

220  {
221  return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
222  }
OcrEngineMode oem() const
Definition: baseapi.h:803
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:340
int tesseract::TessBaseAPI::Init ( const char *  datapath,
const char *  language 
)
inline

Definition at line 223 of file baseapi.h.

223  {
224  return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false);
225  }
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:340
int tesseract::TessBaseAPI::Init ( const char *  data,
int  data_size,
const char *  language,
OcrEngineMode  mode,
char **  configs,
int  configs_size,
const GenericVector< STRING > *  vars_vec,
const GenericVector< STRING > *  vars_values,
bool  set_only_non_debug_params,
FileReader  reader 
)

Definition at line 352 of file baseapi.cpp.

356  {
357  // Default language is "eng".
358  if (language == nullptr) language = "eng";
359  STRING datapath = data_size == 0 ? data : language;
360  // If the datapath, OcrEngineMode or the language have changed - start again.
361  // Note that the language_ field stores the last requested language that was
362  // initialized successfully, while tesseract_->lang stores the language
363  // actually used. They differ only if the requested language was nullptr, in
364  // which case tesseract_->lang is set to the Tesseract default ("eng").
365  if (tesseract_ != nullptr &&
366  (datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath ||
368  (*language_ != language && tesseract_->lang != language))) {
369  delete tesseract_;
370  tesseract_ = nullptr;
371  }
372 #ifdef USE_OPENCL
373  OpenclDevice od;
374  od.InitEnv();
375 #endif
376  bool reset_classifier = true;
377  if (tesseract_ == nullptr) {
378  reset_classifier = false;
379  tesseract_ = new Tesseract;
380  if (reader != nullptr) reader_ = reader;
381  TessdataManager mgr(reader_);
382  if (data_size != 0) {
383  mgr.LoadMemBuffer(language, data, data_size);
384  }
386  datapath.string(),
387  output_file_ != nullptr ? output_file_->string() : nullptr,
388  language, oem, configs, configs_size, vars_vec, vars_values,
389  set_only_non_debug_params, &mgr) != 0) {
390  return -1;
391  }
392  }
393 
394  // Update datapath and language requested for the last valid initialization.
395  if (datapath_ == nullptr)
396  datapath_ = new STRING(datapath);
397  else
398  *datapath_ = datapath;
399  if ((strcmp(datapath_->string(), "") == 0) &&
400  (strcmp(tesseract_->datadir.string(), "") != 0))
402 
403  if (language_ == nullptr)
404  language_ = new STRING(language);
405  else
406  *language_ = language;
408 
409 #ifndef DISABLED_LEGACY_ENGINE
410  // For same language and datapath, just reset the adaptive classifier.
411  if (reset_classifier) {
413  }
414 #endif // ndef DISABLED_LEGACY_ENGINE
415  return 0;
416 }
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:900
Definition: strngs.h:45
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:898
STRING * language_
Last initialized language.
Definition: baseapi.h:899
FileReader reader_
Reads files from any filesystem.
Definition: baseapi.h:891
const char * string() const
Definition: strngs.cpp:194
STRING lang
Definition: ccutil.h:69
STRING datadir
Definition: ccutil.h:67
OcrEngineMode oem() const
Definition: baseapi.h:803
int init_tesseract(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr)
Definition: tessedit.cpp:284
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:897
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
void tesseract::TessBaseAPI::InitForAnalysePage ( )

Init only for page layout analysis. Use only for calls to SetImage and AnalysePage. Calls that attempt recognition will generate an error.

Definition at line 481 of file baseapi.cpp.

481  {
482  if (tesseract_ == nullptr) {
483  tesseract_ = new Tesseract;
484  #ifndef DISABLED_LEGACY_ENGINE
486  #endif
487  }
488 }
void InitAdaptiveClassifier(TessdataManager *mgr)
Definition: adaptmatch.cpp:527
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
int tesseract::TessBaseAPI::InitLangMod ( const char *  datapath,
const char *  language 
)

Init only the lang model component of Tesseract. The only functions that work after this init are SetVariable and IsValidWord. WARNING: temporary! This function will be removed from here and placed in a separate API at some future time.

Definition at line 467 of file baseapi.cpp.

467  {
468  if (tesseract_ == nullptr)
469  tesseract_ = new Tesseract;
470  else
472  TessdataManager mgr;
473  return tesseract_->init_tesseract_lm(datapath, nullptr, language, &mgr);
474 }
ParamsVectors * params()
Definition: ccutil.h:65
int init_tesseract_lm(const char *arg0, const char *textbase, const char *language, TessdataManager *mgr)
Definition: tessedit.cpp:450
static void ResetToDefaults(ParamsVectors *member_params)
Definition: params.cpp:205
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
bool tesseract::TessBaseAPI::InternalSetImage ( )
protected

Common code for setting the image. Returns true if Init has been called.

Common code for setting the image.

Definition at line 1955 of file baseapi.cpp.

1955  {
1956  if (tesseract_ == nullptr) {
1957  tprintf("Please call Init before attempting to set an image.\n");
1958  return false;
1959  }
1960  if (thresholder_ == nullptr)
1961  thresholder_ = new ImageThresholder;
1962  ClearResults();
1963  return true;
1964 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
void tesseract::TessBaseAPI::PrintVariables ( FILE *  fp) const

Print Tesseract parameters to the given file.

Definition at line 328 of file baseapi.cpp.

328  {
330 }
ParamsVectors * params()
Definition: ccutil.h:65
static void PrintParams(FILE *fp, const ParamsVectors *member_params)
Definition: params.cpp:180
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
void tesseract::TessBaseAPI::ReadConfigFile ( const char *  filename)

Read a "config" file containing a set of param, value pairs. Searches the standard places: tessdata/configs, tessdata/tessconfigs and also accepts a relative or absolute path name. Note: only non-init params will be set (init params are set by Init()).

Read a "config" file containing a set of parameter name, value pairs. Searches the standard places: tessdata/configs, tessdata/tessconfigs and also accepts a relative or absolute path name.

Definition at line 495 of file baseapi.cpp.

495  {
497 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
void read_config_file(const char *filename, SetParamConstraint constraint)
Definition: tessedit.cpp:49
void tesseract::TessBaseAPI::ReadDebugConfigFile ( const char *  filename)

Same as above, but only set debug params from the given config file.

Definition at line 500 of file baseapi.cpp.

500  {
502 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
void read_config_file(const char *filename, SetParamConstraint constraint)
Definition: tessedit.cpp:49
bool tesseract::TessBaseAPI::SetDebugVariable ( const char *  name,
const char *  value 
)

Definition at line 286 of file baseapi.cpp.

286  {
287  if (tesseract_ == nullptr) tesseract_ = new Tesseract;
289  tesseract_->params());
290 }
ParamsVectors * params()
Definition: ccutil.h:65
static bool SetParam(const char *name, const char *value, SetParamConstraint constraint, ParamsVectors *member_params)
Definition: params.cpp:92
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
void tesseract::TessBaseAPI::SetInputImage ( Pix *  pix)

Definition at line 948 of file baseapi.cpp.

948 { tesseract_->set_pix_original(pix); }
void set_pix_original(Pix *original_pix)
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
void tesseract::TessBaseAPI::SetInputName ( const char *  name)

Set the name of the input file. Needed for training and reading a UNLV zone file, and for searchable PDF output.

Set the name of the input file. Needed only for training and loading a UNLV zone file.

Definition at line 265 of file baseapi.cpp.

265  {
266  if (input_file_ == nullptr)
267  input_file_ = new STRING(name);
268  else
269  *input_file_ = name;
270 }
STRING * input_file_
Name used by training code.
Definition: baseapi.h:896
Definition: strngs.h:45
void tesseract::TessBaseAPI::SetOutputName ( const char *  name)

Set the name of the bonus output files. Needed only for debugging.

Set the name of the output files. Needed only for debugging.

Definition at line 273 of file baseapi.cpp.

273  {
274  if (output_file_ == nullptr)
275  output_file_ = new STRING(name);
276  else
277  *output_file_ = name;
278 }
Definition: strngs.h:45
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:897
void tesseract::TessBaseAPI::SetPageSegMode ( PageSegMode  mode)

Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. The mode is stored as an IntParam so it can also be modified by ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).

Set the current page segmentation mode. Defaults to PSM_AUTO. The mode is stored as an IntParam so it can also be modified by ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).

Definition at line 509 of file baseapi.cpp.

509  {
510  if (tesseract_ == nullptr)
511  tesseract_ = new Tesseract;
512  tesseract_->tessedit_pageseg_mode.set_value(mode);
513 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
bool tesseract::TessBaseAPI::SetVariable ( const char *  name,
const char *  value 
)

Set the value of an internal "parameter." Supply the name of the parameter and the value as a string, just as you would in a config file. Returns false if the name lookup failed. Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z. Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode. SetVariable may be used before Init, but settings will revert to defaults on End().

Note: Must be called after Init(). Only works for non-init variables (init variables should be passed to Init()).

Definition at line 280 of file baseapi.cpp.

280  {
281  if (tesseract_ == nullptr) tesseract_ = new Tesseract;
283  tesseract_->params());
284 }
ParamsVectors * params()
Definition: ccutil.h:65
static bool SetParam(const char *name, const char *value, SetParamConstraint constraint, ParamsVectors *member_params)
Definition: params.cpp:92
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
char * tesseract::TessBaseAPI::TesseractRect ( const unsigned char *  imagedata,
int  bytes_per_pixel,
int  bytes_per_line,
int  left,
int  top,
int  width,
int  height 
)

Recognize a rectangle from an image and return the result as a string. May be called many times for a single Init. Currently has no error checking. Greyscale of 8 and color of 24 or 32 bits per pixel may be given. Palette color images will not work properly and must be converted to 24 bit. Binary images of 1 bit per pixel may also be given but they must be byte packed with the MSB of the first byte being the first pixel, and a 1 represents WHITE. For binary images set bytes_per_pixel=0. The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator.

Note that TesseractRect is the simplified convenience interface. For advanced uses, use SetImage, (optionally) SetRectangle, Recognize, and one or more of the Get*Text functions below.

Recognize a rectangle from an image and return the result as a string. May be called many times for a single Init. Currently has no error checking. Greyscale of 8 and color of 24 or 32 bits per pixel may be given. Palette color images will not work properly and must be converted to 24 bit. Binary images of 1 bit per pixel may also be given but they must be byte packed with the MSB of the first byte being the first pixel, and a one pixel is WHITE. For binary images set bytes_per_pixel=0. The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator.

Definition at line 536 of file baseapi.cpp.

540  {
541  if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize)
542  return nullptr; // Nothing worth doing.
543 
544  // Since this original api didn't give the exact size of the image,
545  // we have to invent a reasonable value.
546  int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8;
547  SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top,
548  bytes_per_pixel, bytes_per_line);
549  SetRectangle(left, top, width, height);
550 
551  return GetUTF8Text();
552 }
void SetRectangle(int left, int top, int width, int height)
Definition: baseapi.cpp:618
const int kMinRectSize
Definition: baseapi.cpp:98
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:574
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
int tesseract::TessBaseAPI::TextLength ( int *  blob_count)
protected

Return the length of the output text string, as UTF8, assuming one newline per line and one per block, with a terminator, and assuming a single character reject marker for each rejected character. Also return the number of recognized blobs in blob_count.

Return the length of the output text string, as UTF8, assuming liberally two spacing marks after each word (as paragraphs end with two newlines), and assuming a single character reject marker for each rejected character. Also return the number of recognized blobs in blob_count.

Definition at line 2127 of file baseapi.cpp.

2127  {
2128  if (tesseract_ == nullptr || page_res_ == nullptr)
2129  return 0;
2130 
2131  PAGE_RES_IT page_res_it(page_res_);
2132  int total_length = 2;
2133  int total_blobs = 0;
2134  // Iterate over the data structures to extract the recognition result.
2135  for (page_res_it.restart_page(); page_res_it.word () != nullptr;
2136  page_res_it.forward()) {
2137  WERD_RES *word = page_res_it.word();
2138  WERD_CHOICE* choice = word->best_choice;
2139  if (choice != nullptr) {
2140  total_blobs += choice->length() + 2;
2141  total_length += choice->unichar_string().length() + 2;
2142  for (int i = 0; i < word->reject_map.length(); ++i) {
2143  if (word->reject_map[i].rejected())
2144  ++total_length;
2145  }
2146  }
2147  }
2148  if (blob_count != nullptr)
2149  *blob_count = total_blobs;
2150  return total_length;
2151 }
int32_t length() const
Definition: rejctmap.h:223
const STRING & unichar_string() const
Definition: ratngs.h:541
int32_t length() const
Definition: strngs.cpp:189
int length() const
Definition: ratngs.h:303
REJMAP reject_map
Definition: pageres.h:286
WERD_CHOICE * best_choice
Definition: pageres.h:234
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
WERD * word
Definition: pageres.h:188
bool tesseract::TessBaseAPI::Threshold ( Pix **  pix)
protectedvirtual

Run the thresholder to make the thresholded image. If pix is not nullptr, the source is thresholded to pix instead of the internal IMAGE.

Run the thresholder to make the thresholded image, returned in pix, which must not be nullptr. *pix must be initialized to nullptr, or point to an existing pixDestroyable Pix. The usual argument to Threshold is Tesseract::mutable_pix_binary().

Definition at line 1972 of file baseapi.cpp.

1972  {
1973  ASSERT_HOST(pix != nullptr);
1974  if (*pix != nullptr)
1975  pixDestroy(pix);
1976  // Zero resolution messes up the algorithms, so make sure it is credible.
1977  int user_dpi = 0;
1978  GetIntVariable("user_defined_dpi", &user_dpi);
1979  int y_res = thresholder_->GetScaledYResolution();
1980  if (user_dpi && (user_dpi < kMinCredibleResolution ||
1981  user_dpi > kMaxCredibleResolution)) {
1982  tprintf("Warning: User defined image dpi is outside of expected range "
1983  "(%d - %d)!\n",
1985  }
1986  // Always use user defined dpi
1987  if (user_dpi) {
1989  } else if (y_res < kMinCredibleResolution ||
1990  y_res > kMaxCredibleResolution) {
1991  tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n",
1992  y_res, kMinCredibleResolution);
1994  }
1995  auto pageseg_mode =
1996  static_cast<PageSegMode>(
1997  static_cast<int>(tesseract_->tessedit_pageseg_mode));
1998  if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false;
2002  if (!thresholder_->IsBinary()) {
2005  } else {
2006  tesseract_->set_pix_thresholds(nullptr);
2007  tesseract_->set_pix_grey(nullptr);
2008  }
2009  // Set the internal resolution that is used for layout parameters from the
2010  // estimated resolution, rather than the image resolution, which may be
2011  // fabricated, but we will use the image resolution, if there is one, to
2012  // report output point sizes.
2013  int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(),
2016  if (estimated_res != thresholder_->GetScaledEstimatedResolution()) {
2017  tprintf("Estimated internal resolution %d out of range! "
2018  "Corrected to %d.\n",
2019  thresholder_->GetScaledEstimatedResolution(), estimated_res);
2020  }
2021  tesseract_->set_source_resolution(estimated_res);
2022  return true;
2023 }
bool GetIntVariable(const char *name, int *value) const
Definition: baseapi.cpp:292
bool IsBinary() const
Returns true if the source image is binary.
Definition: thresholder.h:74
void set_pix_grey(Pix *grey_pix)
int GetScaledEstimatedResolution() const
Definition: thresholder.h:105
void set_pix_thresholds(Pix *thresholds)
constexpr int kMaxCredibleResolution
Definition: publictypes.h:40
void SetSourceYResolution(int ppi)
Definition: thresholder.h:85
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
virtual Pix * GetPixRectGrey()
int GetScaledYResolution() const
Definition: thresholder.h:92
virtual bool ThresholdToPix(PageSegMode pageseg_mode, Pix **pix)
Returns false on error.
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:108
virtual void GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth, int *imageheight)
#define ASSERT_HOST(x)
Definition: errcode.h:88
constexpr int kMinCredibleResolution
Definition: publictypes.h:38
void set_source_resolution(int ppi)
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892
virtual Pix * GetPixRectThresholds()
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
const char * tesseract::TessBaseAPI::Version ( )
static

Returns the version identifier as a static string. Do not delete.

Definition at line 227 of file baseapi.cpp.

227  {
228  return PACKAGE_VERSION;
229 }

Member Data Documentation

BLOCK_LIST* tesseract::TessBaseAPI::block_list_
protected

The page layout.

Definition at line 894 of file baseapi.h.

STRING* tesseract::TessBaseAPI::datapath_
protected

Current location of tessdata.

Definition at line 898 of file baseapi.h.

EquationDetect* tesseract::TessBaseAPI::equ_detect_
protected

The equation detector.

Definition at line 890 of file baseapi.h.

STRING* tesseract::TessBaseAPI::input_file_
protected

Name used by training code.

Definition at line 896 of file baseapi.h.

STRING* tesseract::TessBaseAPI::language_
protected

Last initialized language.

Definition at line 899 of file baseapi.h.

OcrEngineMode tesseract::TessBaseAPI::last_oem_requested_
protected

Last ocr language mode requested.

Definition at line 900 of file baseapi.h.

Tesseract* tesseract::TessBaseAPI::osd_tesseract_
protected

For orientation & script detection.

Definition at line 889 of file baseapi.h.

STRING* tesseract::TessBaseAPI::output_file_
protected

Name used by debug code.

Definition at line 897 of file baseapi.h.

PAGE_RES* tesseract::TessBaseAPI::page_res_
protected

The page-level data.

Definition at line 895 of file baseapi.h.

GenericVector<ParagraphModel *>* tesseract::TessBaseAPI::paragraph_models_
protected

Definition at line 893 of file baseapi.h.

FileReader tesseract::TessBaseAPI::reader_
protected

Reads files from any filesystem.

Definition at line 891 of file baseapi.h.

bool tesseract::TessBaseAPI::recognition_done_
protected

page_res_ contains recognition data.

Definition at line 901 of file baseapi.h.

Tesseract* tesseract::TessBaseAPI::tesseract_
protected

The underlying data object.

Definition at line 888 of file baseapi.h.

ImageThresholder* tesseract::TessBaseAPI::thresholder_
protected

Image thresholding module.

Definition at line 892 of file baseapi.h.

TruthCallback* tesseract::TessBaseAPI::truth_cb_
protected

Definition at line 902 of file baseapi.h.


The documentation for this class was generated from the following files: