tesseract  4.1.3
Advanced API

Functions

void tesseract::TessBaseAPI::SetImage (const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
 
void tesseract::TessBaseAPI::SetImage (Pix *pix)
 
void tesseract::TessBaseAPI::SetSourceResolution (int ppi)
 
void tesseract::TessBaseAPI::SetRectangle (int left, int top, int width, int height)
 
void tesseract::TessBaseAPI::SetThresholder (ImageThresholder *thresholder)
 
Pix * tesseract::TessBaseAPI::GetThresholdedImage ()
 
Boxa * tesseract::TessBaseAPI::GetRegions (Pixa **pixa)
 
Boxa * tesseract::TessBaseAPI::GetTextlines (bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * tesseract::TessBaseAPI::GetTextlines (Pixa **pixa, int **blockids)
 
Boxa * tesseract::TessBaseAPI::GetStrips (Pixa **pixa, int **blockids)
 
Boxa * tesseract::TessBaseAPI::GetWords (Pixa **pixa)
 
Boxa * tesseract::TessBaseAPI::GetConnectedComponents (Pixa **cc)
 
Boxa * tesseract::TessBaseAPI::GetComponentImages (PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * tesseract::TessBaseAPI::GetComponentImages (const PageIteratorLevel level, const bool text_only, Pixa **pixa, int **blockids)
 
int tesseract::TessBaseAPI::GetThresholdedImageScaleFactor () const
 
PageIteratortesseract::TessBaseAPI::AnalyseLayout ()
 
PageIteratortesseract::TessBaseAPI::AnalyseLayout (bool merge_similar_words)
 
int tesseract::TessBaseAPI::Recognize (ETEXT_DESC *monitor)
 
int tesseract::TessBaseAPI::RecognizeForChopTest (ETEXT_DESC *monitor)
 
bool tesseract::TessBaseAPI::ProcessPages (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool tesseract::TessBaseAPI::ProcessPagesInternal (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool tesseract::TessBaseAPI::ProcessPage (Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
ResultIteratortesseract::TessBaseAPI::GetIterator ()
 
MutableIteratortesseract::TessBaseAPI::GetMutableIterator ()
 
char * tesseract::TessBaseAPI::GetUTF8Text ()
 
char * tesseract::TessBaseAPI::GetHOCRText (ETEXT_DESC *monitor, int page_number)
 
char * tesseract::TessBaseAPI::GetHOCRText (int page_number)
 
char * tesseract::TessBaseAPI::GetAltoText (ETEXT_DESC *monitor, int page_number)
 
char * tesseract::TessBaseAPI::GetAltoText (int page_number)
 
char * tesseract::TessBaseAPI::GetTSVText (int page_number)
 
char * tesseract::TessBaseAPI::GetLSTMBoxText (int page_number)
 
char * tesseract::TessBaseAPI::GetBoxText (int page_number)
 
char * tesseract::TessBaseAPI::GetWordStrBoxText (int page_number)
 
char * tesseract::TessBaseAPI::GetUNLVText ()
 
bool tesseract::TessBaseAPI::DetectOrientationScript (int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
 
char * tesseract::TessBaseAPI::GetOsdText (int page_number)
 
int tesseract::TessBaseAPI::MeanTextConf ()
 
int * tesseract::TessBaseAPI::AllWordConfidences ()
 
bool tesseract::TessBaseAPI::AdaptToWordStr (PageSegMode mode, const char *wordstr)
 
void tesseract::TessBaseAPI::Clear ()
 
void tesseract::TessBaseAPI::End ()
 
static void tesseract::TessBaseAPI::ClearPersistentCache ()
 
int tesseract::TessBaseAPI::IsValidWord (const char *word)
 
bool tesseract::TessBaseAPI::IsValidCharacter (const char *utf8_character)
 
bool tesseract::TessBaseAPI::GetTextDirection (int *out_offset, float *out_slope)
 
void tesseract::TessBaseAPI::SetDictFunc (DictFunc f)
 
void tesseract::TessBaseAPI::SetProbabilityInContextFunc (ProbabilityInContextFunc f)
 
bool tesseract::TessBaseAPI::DetectOS (OSResults *)
 
void tesseract::TessBaseAPI::GetBlockTextOrientations (int **block_orientation, bool **vertical_writing)
 
void tesseract::TessBaseAPI::SetFillLatticeFunc (FillLatticeFunc f)
 
BLOCK_LIST * tesseract::TessBaseAPI::FindLinesCreateBlockList ()
 
static void tesseract::TessBaseAPI::DeleteBlockList (BLOCK_LIST *block_list)
 
static ROWtesseract::TessBaseAPI::MakeTessOCRRow (float baseline, float xheight, float descender, float ascender)
 
static TBLOBtesseract::TessBaseAPI::MakeTBLOB (Pix *pix)
 
static void tesseract::TessBaseAPI::NormalizeTBLOB (TBLOB *tblob, ROW *row, bool numeric_mode)
 
void tesseract::TessBaseAPI::GetFeaturesForBlob (TBLOB *blob, INT_FEATURE_STRUCT *int_features, int *num_features, int *feature_outline_index)
 
static ROWtesseract::TessBaseAPI::FindRowForBox (BLOCK_LIST *blocks, int left, int top, int right, int bottom)
 
void tesseract::TessBaseAPI::RunAdaptiveClassifier (TBLOB *blob, int num_max_matches, int *unichar_ids, float *ratings, int *num_matches_returned)
 
const char * tesseract::TessBaseAPI::GetUnichar (int unichar_id)
 
const Dawgtesseract::TessBaseAPI::GetDawg (int i) const
 
int tesseract::TessBaseAPI::NumDawgs () const
 
Tesseracttesseract::TessBaseAPI::tesseract () const
 
OcrEngineMode tesseract::TessBaseAPI::oem () const
 
void tesseract::TessBaseAPI::InitTruthCallback (TruthCallback *cb)
 
void tesseract::TessBaseAPI::set_min_orientation_margin (double margin)
 

Detailed Description

The following methods break TesseractRect into pieces, so you can get hold of the thresholded image, get the text in different formats, get bounding boxes, confidences etc.

Function Documentation

◆ AdaptToWordStr()

bool tesseract::TessBaseAPI::AdaptToWordStr ( PageSegMode  mode,
const char *  wordstr 
)

Applies the given word to the adaptive classifier if possible. The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can tell the boundaries of the graphemes. Assumes that SetImage/SetRectangle have been used to set the image to the given word. The mode arg should be PSM_SINGLE_WORD or PSM_CIRCLE_WORD, as that will be used to control layout analysis. The currently set PageSegMode is preserved. Returns false if adaption was not possible for some reason.

Definition at line 1798 of file baseapi.cpp.

1798  {
1799  int debug = 0;
1800  GetIntVariable("applybox_debug", &debug);
1801  bool success = true;
1802  PageSegMode current_psm = GetPageSegMode();
1803  SetPageSegMode(mode);
1804  SetVariable("classify_enable_learning", "0");
1805  const std::unique_ptr<const char[]> text(GetUTF8Text());
1806  if (debug) {
1807  tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr);
1808  }
1809  if (text != nullptr) {
1810  PAGE_RES_IT it(page_res_);
1811  WERD_RES* word_res = it.word();
1812  if (word_res != nullptr) {
1813  word_res->word->set_text(wordstr);
1814  // Check to see if text matches wordstr.
1815  int w = 0;
1816  int t;
1817  for (t = 0; text[t] != '\0'; ++t) {
1818  if (text[t] == '\n' || text[t] == ' ')
1819  continue;
1820  while (wordstr[w] == ' ') ++w;
1821  if (text[t] != wordstr[w])
1822  break;
1823  ++w;
1824  }
1825  if (text[t] != '\0' || wordstr[w] != '\0') {
1826  // No match.
1827  delete page_res_;
1828  GenericVector<TBOX> boxes;
1832  PAGE_RES_IT pr_it(page_res_);
1833  if (pr_it.word() == nullptr)
1834  success = false;
1835  else
1836  word_res = pr_it.word();
1837  } else {
1838  word_res->BestChoiceToCorrectText();
1839  }
1840  if (success) {
1841  tesseract_->EnableLearning = true;
1842  tesseract_->LearnWord(nullptr, word_res);
1843  }
1844  } else {
1845  success = false;
1846  }
1847  } else {
1848  success = false;
1849  }
1850  SetPageSegMode(current_psm);
1851  return success;
1852 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
bool SetVariable(const char *name, const char *value)
Definition: baseapi.cpp:286
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:515
void TidyUp(PAGE_RES *page_res)
void ReSegmentByClassification(PAGE_RES *page_res)
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
void LearnWord(const char *fontname, WERD_RES *word)
Definition: adaptmatch.cpp:250
bool GetIntVariable(const char *name, int *value) const
Definition: baseapi.cpp:298
void set_text(const char *new_text)
Definition: werd.h:115
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:894
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895
PAGE_RES * SetupApplyBoxes(const GenericVector< TBOX > &boxes, BLOCK_LIST *block_list)
WERD * word
Definition: pageres.h:186
PageSegMode GetPageSegMode() const
Definition: baseapi.cpp:522
void BestChoiceToCorrectText()
Definition: pageres.cpp:923

◆ AllWordConfidences()

int * tesseract::TessBaseAPI::AllWordConfidences ( )

Returns all word confidences (between 0 and 100) in an array, terminated by -1. The calling function must delete [] after use. The number of confidences should correspond to the number of space- delimited words in GetUTF8Text.

Returns an array of all word confidences, terminated by -1.

Definition at line 1763 of file baseapi.cpp.

1763  {
1764  if (tesseract_ == nullptr ||
1765  (!recognition_done_ && Recognize(nullptr) < 0))
1766  return nullptr;
1767  int n_word = 0;
1768  PAGE_RES_IT res_it(page_res_);
1769  for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward())
1770  n_word++;
1771 
1772  int* conf = new int[n_word+1];
1773  n_word = 0;
1774  for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) {
1775  WERD_RES *word = res_it.word();
1776  WERD_CHOICE* choice = word->best_choice;
1777  int w_conf = static_cast<int>(100 + 5 * choice->certainty());
1778  // This is the eq for converting Tesseract confidence to 1..100
1779  if (w_conf < 0) w_conf = 0;
1780  if (w_conf > 100) w_conf = 100;
1781  conf[n_word++] = w_conf;
1782  }
1783  conf[n_word] = -1;
1784  return conf;
1785 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
WERD * word
Definition: pageres.h:186
float certainty() const
Definition: ratngs.h:320
WERD_CHOICE * best_choice
Definition: pageres.h:241
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:901

◆ AnalyseLayout() [1/2]

PageIterator * tesseract::TessBaseAPI::AnalyseLayout ( )

Runs page layout analysis in the mode set by SetPageSegMode. May optionally be called prior to Recognize to get access to just the page layout results. Returns an iterator to the results. If merge_similar_words is true, words are combined where suitable for use with a line recognizer. Use if you want to use AnalyseLayout to find the textlines, and then want to process textline fragments with an external line recognizer. Returns nullptr on error or an empty page. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 810 of file baseapi.cpp.

810 { return AnalyseLayout(false); }
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:810

◆ AnalyseLayout() [2/2]

PageIterator * tesseract::TessBaseAPI::AnalyseLayout ( bool  merge_similar_words)

Definition at line 812 of file baseapi.cpp.

812  {
813  if (FindLines() == 0) {
814  if (block_list_->empty())
815  return nullptr; // The page was empty.
816  page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr);
817  DetectParagraphs(false);
818  return new PageIterator(
822  }
823  return nullptr;
824 }
TESS_LOCAL void DetectParagraphs(bool after_text_recognition)
Definition: baseapi.cpp:2277
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:894
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2067
int GetScaledYResolution() const
Definition: thresholder.h:92
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895

◆ Clear()

void tesseract::TessBaseAPI::Clear ( )

Free up recognition results and any stored image data, without actually freeing any recognition data that would be time-consuming to reload. Afterwards, you must call SetImage or TesseractRect before doing any Recognize or Get* operation.

Definition at line 1861 of file baseapi.cpp.

1861  {
1862  if (thresholder_ != nullptr)
1863  thresholder_->Clear();
1864  ClearResults();
1865  if (tesseract_ != nullptr) SetInputImage(nullptr);
1866 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:956
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
Definition: thresholder.cpp:48

◆ ClearPersistentCache()

void tesseract::TessBaseAPI::ClearPersistentCache ( )
static

Clear any library-level memory caches. There are a variety of expensive-to-load constant data structures (mostly language dictionaries) that are cached globally – surviving the Init() and End() of individual TessBaseAPI's. This function allows the clearing of these caches.

Definition at line 1909 of file baseapi.cpp.

1909  {
1911 }
void DeleteUnusedDawgs()
Definition: dawg_cache.h:43
static TESS_API DawgCache * GlobalDawgCache()
Definition: dict.cpp:184

◆ DeleteBlockList()

void tesseract::TessBaseAPI::DeleteBlockList ( BLOCK_LIST *  block_list)
static

Delete a block list. This is to keep BLOCK_LIST pointer opaque and let go of including the other headers.

Definition at line 2345 of file baseapi.cpp.

2345  {
2346  delete block_list;
2347 }

◆ DetectOrientationScript()

bool tesseract::TessBaseAPI::DetectOrientationScript ( int *  orient_deg,
float *  orient_conf,
const char **  script_name,
float *  script_conf 
)

Detect the orientation of the input image and apparent script (alphabet). orient_deg is the detected clockwise rotation of the input image in degrees (0, 90, 180, 270) orient_conf is the confidence (15.0 is reasonably confident) script_name is an ASCII string, the name of the script, e.g. "Latin" script_conf is confidence level in the script Returns true on success and writes values to each parameter as an output

Definition at line 1685 of file baseapi.cpp.

1687  {
1688  OSResults osr;
1689 
1690  bool osd = DetectOS(&osr);
1691  if (!osd) {
1692  return false;
1693  }
1694 
1695  int orient_id = osr.best_result.orientation_id;
1696  int script_id = osr.get_best_script(orient_id);
1697  if (orient_conf) *orient_conf = osr.best_result.oconfidence;
1698  if (orient_deg) *orient_deg = orient_id * 90; // convert quadrant to degrees
1699 
1700  if (script_name) {
1701  const char* script = osr.unicharset->get_script_from_script_id(script_id);
1702 
1703  *script_name = script;
1704  }
1705 
1706  if (script_conf) *script_conf = osr.best_result.sconfidence;
1707 
1708  return true;
1709 }
const char * get_script_from_script_id(int id) const
Definition: unicharset.h:854
OSBestResult best_result
Definition: osdetect.h:81
bool DetectOS(OSResults *)
Definition: baseapi.cpp:2199
int orientation_id
Definition: osdetect.h:43
TESS_API int get_best_script(int orientation_id) const
Definition: osdetect.cpp:112
float oconfidence
Definition: osdetect.h:46
float sconfidence
Definition: osdetect.h:45
UNICHARSET * unicharset
Definition: osdetect.h:80

◆ DetectOS()

bool tesseract::TessBaseAPI::DetectOS ( OSResults osr)

Estimates the Orientation And Script of the image.

Returns
true if the image was processed successfully.

Estimates the Orientation And Script of the image. Returns true if the image was processed successfully.

Definition at line 2199 of file baseapi.cpp.

2199  {
2200  if (tesseract_ == nullptr)
2201  return false;
2202  ClearResults();
2203  if (tesseract_->pix_binary() == nullptr &&
2205  return false;
2206  }
2207 
2208  if (input_file_ == nullptr)
2209  input_file_ = new STRING(kInputFile);
2211 }
int orientation_and_script_detection(STRING &filename, OSResults *osr, tesseract::Tesseract *tess)
Definition: osdetect.cpp:190
Pix * pix_binary() const
virtual TESS_LOCAL bool Threshold(Pix **pix)
Definition: baseapi.cpp:2013
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
STRING * input_file_
Name used by training code.
Definition: baseapi.h:896
Definition: strngs.h:45

◆ End()

void tesseract::TessBaseAPI::End ( )

Close down tesseract and free up all memory. End() is equivalent to destructing and reconstructing your TessBaseAPI. Once End() has been used, none of the other API functions may be used other than Init and anything declared above it in the class definition.

Definition at line 1874 of file baseapi.cpp.

1874  {
1875  Clear();
1876  delete thresholder_;
1877  thresholder_ = nullptr;
1878  delete page_res_;
1879  page_res_ = nullptr;
1880  delete block_list_;
1881  block_list_ = nullptr;
1882  if (paragraph_models_ != nullptr) {
1884  delete paragraph_models_;
1885  paragraph_models_ = nullptr;
1886  }
1887  if (osd_tesseract_ == tesseract_) osd_tesseract_ = nullptr;
1888  delete tesseract_;
1889  tesseract_ = nullptr;
1890  delete osd_tesseract_;
1891  osd_tesseract_ = nullptr;
1892  delete equ_detect_;
1893  equ_detect_ = nullptr;
1894  delete input_file_;
1895  input_file_ = nullptr;
1896  delete output_file_;
1897  output_file_ = nullptr;
1898  delete datapath_;
1899  datapath_ = nullptr;
1900  delete language_;
1901  language_ = nullptr;
1902 }
STRING * language_
Last initialized language.
Definition: baseapi.h:899
void delete_data_pointers()
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:889
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:898
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:894
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:893
STRING * input_file_
Name used by training code.
Definition: baseapi.h:896
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:897
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:890

◆ FindLinesCreateBlockList()

BLOCK_LIST * tesseract::TessBaseAPI::FindLinesCreateBlockList ( )

Find lines from the image making the BLOCK_LIST.

Definition at line 2333 of file baseapi.cpp.

2333  {
2334  ASSERT_HOST(FindLines() == 0);
2335  BLOCK_LIST* result = block_list_;
2336  block_list_ = nullptr;
2337  return result;
2338 }
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:894
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2067
#define ASSERT_HOST(x)
Definition: errcode.h:88

◆ FindRowForBox()

ROW * tesseract::TessBaseAPI::FindRowForBox ( BLOCK_LIST *  blocks,
int  left,
int  top,
int  right,
int  bottom 
)
static

This method returns the row to which a box of specified dimensions would belong. If no good match is found, it returns nullptr.

Definition at line 2634 of file baseapi.cpp.

2635  {
2636  TBOX box(left, bottom, right, top);
2637  BLOCK_IT b_it(blocks);
2638  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
2639  BLOCK* block = b_it.data();
2640  if (!box.major_overlap(block->pdblk.bounding_box()))
2641  continue;
2642  ROW_IT r_it(block->row_list());
2643  for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
2644  ROW* row = r_it.data();
2645  if (!box.major_overlap(row->bounding_box()))
2646  continue;
2647  WERD_IT w_it(row->word_list());
2648  for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
2649  WERD* word = w_it.data();
2650  if (box.major_overlap(word->bounding_box()))
2651  return row;
2652  }
2653  }
2654  }
2655  return nullptr;
2656 }
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:190
WERD_LIST * word_list()
Definition: ocrrow.h:55
Definition: ocrrow.h:36
TBOX bounding_box() const
Definition: werd.cpp:148
TBOX bounding_box() const
Definition: ocrrow.h:88
Definition: ocrblock.h:29
Definition: rect.h:34
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:116
Definition: werd.h:56

◆ GetAltoText() [1/2]

char * tesseract::TessBaseAPI::GetAltoText ( ETEXT_DESC monitor,
int  page_number 
)

Make an XML-formatted string with Alto markup from the internal data structures.

Make an XML-formatted string with ALTO markup from the internal data structures.

Definition at line 131 of file altorenderer.cpp.

131  {
132  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
133  return nullptr;
134 
135  int lcnt = 0, tcnt = 0, bcnt = 0, wcnt = 0;
136 
137  if (input_file_ == nullptr) SetInputName(nullptr);
138 
139 #ifdef _WIN32
140  // convert input name from ANSI encoding to utf-8
141  int str16_len =
142  MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0);
143  wchar_t* uni16_str = new WCHAR[str16_len];
144  str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
145  uni16_str, str16_len);
146  int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr,
147  0, nullptr, nullptr);
148  char* utf8_str = new char[utf8_len];
149  WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len,
150  nullptr, nullptr);
151  *input_file_ = utf8_str;
152  delete[] uni16_str;
153  delete[] utf8_str;
154 #endif
155 
156  std::stringstream alto_str;
157  // Use "C" locale (needed for int values larger than 999).
158  alto_str.imbue(std::locale::classic());
159  alto_str
160  << "\t\t<Page WIDTH=\"" << rect_width_ << "\" HEIGHT=\""
161  << rect_height_
162  << "\" PHYSICAL_IMG_NR=\"" << page_number << "\""
163  << " ID=\"page_" << page_number << "\">\n"
164  << "\t\t\t<PrintSpace HPOS=\"0\" VPOS=\"0\""
165  << " WIDTH=\"" << rect_width_ << "\""
166  << " HEIGHT=\"" << rect_height_ << "\">\n";
167 
168  ResultIterator* res_it = GetIterator();
169  while (!res_it->Empty(RIL_BLOCK)) {
170  if (res_it->Empty(RIL_WORD)) {
171  res_it->Next(RIL_WORD);
172  continue;
173  }
174 
175  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
176  alto_str << "\t\t\t\t<ComposedBlock ID=\"cblock_" << bcnt << "\"";
177  AddBoxToAlto(res_it, RIL_BLOCK, alto_str);
178  alto_str << "\n";
179  }
180 
181  if (res_it->IsAtBeginningOf(RIL_PARA)) {
182  alto_str << "\t\t\t\t\t<TextBlock ID=\"block_" << tcnt << "\"";
183  AddBoxToAlto(res_it, RIL_PARA, alto_str);
184  alto_str << "\n";
185  }
186 
187  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
188  alto_str << "\t\t\t\t\t\t<TextLine ID=\"line_" << lcnt << "\"";
189  AddBoxToAlto(res_it, RIL_TEXTLINE, alto_str);
190  alto_str << "\n";
191  }
192 
193  alto_str << "\t\t\t\t\t\t\t<String ID=\"string_" << wcnt << "\"";
194  AddBoxToAlto(res_it, RIL_WORD, alto_str);
195  alto_str << " CONTENT=\"";
196 
197  bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
198  bool last_word_in_tblock = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
199  bool last_word_in_cblock = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
200 
201 
202  int left, top, right, bottom;
203  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
204 
205  do {
206  const std::unique_ptr<const char[]> grapheme(
207  res_it->GetUTF8Text(RIL_SYMBOL));
208  if (grapheme && grapheme[0] != 0) {
209  alto_str << HOcrEscape(grapheme.get()).c_str();
210  }
211  res_it->Next(RIL_SYMBOL);
212  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
213 
214  alto_str << "\"/>";
215 
216  wcnt++;
217 
218  if (last_word_in_line) {
219  alto_str << "\n\t\t\t\t\t\t</TextLine>\n";
220  lcnt++;
221  } else {
222  int hpos = right;
223  int vpos = top;
224  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
225  int width = left - hpos;
226  alto_str << "<SP WIDTH=\"" << width << "\" VPOS=\"" << vpos
227  << "\" HPOS=\"" << hpos << "\"/>\n";
228  }
229 
230  if (last_word_in_tblock) {
231  alto_str << "\t\t\t\t\t</TextBlock>\n";
232  tcnt++;
233  }
234 
235  if (last_word_in_cblock) {
236  alto_str << "\t\t\t\t</ComposedBlock>\n";
237  bcnt++;
238  }
239  }
240 
241  alto_str << "\t\t\t</PrintSpace>\n"
242  << "\t\t</Page>\n";
243  const std::string& text = alto_str.str();
244 
245  char* result = new char[text.length() + 1];
246  strcpy(result, text.c_str());
247  delete res_it;
248  return result;
249 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
const char * string() const
Definition: strngs.cpp:194
ResultIterator * GetIterator()
Definition: baseapi.cpp:1323
STRING * input_file_
Name used by training code.
Definition: baseapi.h:896
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
void SetInputName(const char *name)
Definition: baseapi.cpp:271
STRING HOcrEscape(const char *text)
Definition: baseapi.cpp:2309

◆ GetAltoText() [2/2]

char * tesseract::TessBaseAPI::GetAltoText ( int  page_number)

Make an XML-formatted string with Alto markup from the internal data structures.

Make an XML-formatted string with ALTO markup from the internal data structures.

Definition at line 123 of file altorenderer.cpp.

123  {
124  return GetAltoText(nullptr, page_number);
125 }
char * GetAltoText(ETEXT_DESC *monitor, int page_number)

◆ GetBlockTextOrientations()

void tesseract::TessBaseAPI::GetBlockTextOrientations ( int **  block_orientation,
bool **  vertical_writing 
)

Return text orientation of each block as determined by an earlier run of layout analysis.

Return text orientation of each block as determined in an earlier page layout analysis operation. Orientation is returned as the number of ccw 90-degree rotations (in [0..3]) required to make the text in the block upright (readable). Note that this may not necessary be the block orientation preferred for recognition (such as the case of vertical CJK text).

Also returns whether the text in the block is believed to have vertical writing direction (when in an upright page orientation).

The returned array is of length equal to the number of text blocks, which may be less than the total number of blocks. The ordering is intended to be consistent with GetTextLines().

Definition at line 2232 of file baseapi.cpp.

2233  {
2234  delete[] *block_orientation;
2235  *block_orientation = nullptr;
2236  delete[] *vertical_writing;
2237  *vertical_writing = nullptr;
2238  BLOCK_IT block_it(block_list_);
2239 
2240  block_it.move_to_first();
2241  int num_blocks = 0;
2242  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2243  if (!block_it.data()->pdblk.poly_block()->IsText()) {
2244  continue;
2245  }
2246  ++num_blocks;
2247  }
2248  if (!num_blocks) {
2249  tprintf("WARNING: Found no blocks\n");
2250  return;
2251  }
2252  *block_orientation = new int[num_blocks];
2253  *vertical_writing = new bool[num_blocks];
2254  block_it.move_to_first();
2255  int i = 0;
2256  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
2257  block_it.forward()) {
2258  if (!block_it.data()->pdblk.poly_block()->IsText()) {
2259  continue;
2260  }
2261  FCOORD re_rotation = block_it.data()->re_rotation();
2262  float re_theta = re_rotation.angle();
2263  FCOORD classify_rotation = block_it.data()->classify_rotation();
2264  float classify_theta = classify_rotation.angle();
2265  double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI;
2266  if (rot_theta < 0) rot_theta += 4;
2267  int num_rotations = static_cast<int>(rot_theta + 0.5);
2268  (*block_orientation)[i] = num_rotations;
2269  // The classify_rotation is non-zero only if the text has vertical
2270  // writing direction.
2271  (*vertical_writing)[i] = classify_rotation.y() != 0.0f;
2272  ++i;
2273  }
2274 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
Definition: points.h:188
float y() const
Definition: points.h:210
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:894
float angle() const
find angle
Definition: points.h:247

◆ GetBoxText()

char * tesseract::TessBaseAPI::GetBoxText ( int  page_number)

The recognized text is returned as a char* which is coded in the same format as a box file used in training. Constructs coordinates in the original image - not just the rectangle. page_number is a 0-based page index that will appear in the box file. Returned string must be freed with the delete [] operator.

The recognized text is returned as a char* which is coded as a UTF8 box file. page_number is a 0-base page index that will appear in the box file. Returned string must be freed with the delete [] operator.

Definition at line 1519 of file baseapi.cpp.

1519  {
1520  if (tesseract_ == nullptr ||
1521  (!recognition_done_ && Recognize(nullptr) < 0))
1522  return nullptr;
1523  int blob_count;
1524  int utf8_length = TextLength(&blob_count);
1525  int total_length = blob_count * kBytesPerBoxFileLine + utf8_length +
1527  char* result = new char[total_length];
1528  result[0] = '\0';
1529  int output_length = 0;
1530  LTRResultIterator* it = GetLTRIterator();
1531  do {
1532  int left, top, right, bottom;
1533  if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
1534  const std::unique_ptr</*non-const*/ char[]> text(
1535  it->GetUTF8Text(RIL_SYMBOL));
1536  // Tesseract uses space for recognition failure. Fix to a reject
1537  // character, kTesseractReject so we don't create illegal box files.
1538  for (int i = 0; text[i] != '\0'; ++i) {
1539  if (text[i] == ' ')
1540  text[i] = kTesseractReject;
1541  }
1542  snprintf(result + output_length, total_length - output_length,
1543  "%s %d %d %d %d %d\n", text.get(), left, image_height_ - bottom,
1544  right, image_height_ - top, page_number);
1545  output_length += strlen(result + output_length);
1546  // Just in case...
1547  if (output_length + kMaxBytesPerLine > total_length)
1548  break;
1549  }
1550  } while (it->Next(RIL_SYMBOL));
1551  delete it;
1552  return result;
1553 }
const int kBytesPerBoxFileLine
Definition: baseapi.cpp:1501
const int kMaxBytesPerLine
Definition: baseapi.cpp:1510
TESS_LOCAL int TextLength(int *blob_count)
Definition: baseapi.cpp:2168
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
const char kTesseractReject
Definition: baseapi.cpp:106
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:901
TESS_LOCAL LTRResultIterator * GetLTRIterator()
Definition: baseapi.cpp:1306

◆ GetComponentImages() [1/2]

Boxa * tesseract::TessBaseAPI::GetComponentImages ( PageIteratorLevel  level,
bool  text_only,
bool  raw_image,
int  raw_padding,
Pixa **  pixa,
int **  blockids,
int **  paraids 
)

Get the given level kind of components (block, textline, word etc.) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not nullptr, the block-id of each component is also returned as an array of one element per component. delete [] after use. If blockids is not nullptr, the paragraph-id of each component with its block is also returned as an array of one element per component. delete [] after use. If raw_image is true, then portions of the original image are extracted instead of the thresholded image and padded with raw_padding. If text_only is true, then only text components are returned.

Get the given level kind of components (block, textline, word etc.) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not nullptr, the block-id of each component is also returned as an array of one element per component. delete [] after use. If text_only is true, then only text components are returned.

Definition at line 706 of file baseapi.cpp.

710  {
711  PageIterator* page_it = GetIterator();
712  if (page_it == nullptr)
713  page_it = AnalyseLayout();
714  if (page_it == nullptr)
715  return nullptr; // Failed.
716 
717  // Count the components to get a size for the arrays.
718  int component_count = 0;
719  int left, top, right, bottom;
720 
721  TessResultCallback<bool>* get_bbox = nullptr;
722  if (raw_image) {
723  // Get bounding box in original raw image with padding.
725  level, raw_padding,
726  &left, &top, &right, &bottom);
727  } else {
728  // Get bounding box from binarized imaged. Note that this could be
729  // differently scaled from the original image.
730  get_bbox = NewPermanentTessCallback(page_it,
732  level, &left, &top, &right, &bottom);
733  }
734  do {
735  if (get_bbox->Run() &&
736  (!text_only || PTIsTextType(page_it->BlockType())))
737  ++component_count;
738  } while (page_it->Next(level));
739 
740  Boxa* boxa = boxaCreate(component_count);
741  if (pixa != nullptr)
742  *pixa = pixaCreate(component_count);
743  if (blockids != nullptr)
744  *blockids = new int[component_count];
745  if (paraids != nullptr)
746  *paraids = new int[component_count];
747 
748  int blockid = 0;
749  int paraid = 0;
750  int component_index = 0;
751  page_it->Begin();
752  do {
753  if (get_bbox->Run() &&
754  (!text_only || PTIsTextType(page_it->BlockType()))) {
755  Box* lbox = boxCreate(left, top, right - left, bottom - top);
756  boxaAddBox(boxa, lbox, L_INSERT);
757  if (pixa != nullptr) {
758  Pix* pix = nullptr;
759  if (raw_image) {
760  pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left,
761  &top);
762  } else {
763  pix = page_it->GetBinaryImage(level);
764  }
765  pixaAddPix(*pixa, pix, L_INSERT);
766  pixaAddBox(*pixa, lbox, L_CLONE);
767  }
768  if (paraids != nullptr) {
769  (*paraids)[component_index] = paraid;
770  if (page_it->IsAtFinalElement(RIL_PARA, level))
771  ++paraid;
772  }
773  if (blockids != nullptr) {
774  (*blockids)[component_index] = blockid;
775  if (page_it->IsAtFinalElement(RIL_BLOCK, level)) {
776  ++blockid;
777  paraid = 0;
778  }
779  }
780  ++component_index;
781  }
782  } while (page_it->Next(level));
783  delete page_it;
784  delete get_bbox;
785  return boxa;
786 }
virtual R Run()=0
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:810
ResultIterator * GetIterator()
Definition: baseapi.cpp:1323
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:82
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
Definition: tesscallback.h:258

◆ GetComponentImages() [2/2]

Boxa* tesseract::TessBaseAPI::GetComponentImages ( const PageIteratorLevel  level,
const bool  text_only,
Pixa **  pixa,
int **  blockids 
)
inline

Definition at line 450 of file baseapi.h.

452  {
453  return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr);
454  }
Boxa * GetComponentImages(PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:706

◆ GetConnectedComponents()

Boxa * tesseract::TessBaseAPI::GetConnectedComponents ( Pixa **  pixa)

Gets the individual connected (text) components (created after pages segmentation step, but before recognition) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. Note: the caller is responsible for calling boxaDestroy() on the returned Boxa array and pixaDestroy() on cc array.

Gets the individual connected (text) components (created after pages segmentation step, but before recognition) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 694 of file baseapi.cpp.

694  {
695  return GetComponentImages(RIL_SYMBOL, true, pixa, nullptr);
696 }
Boxa * GetComponentImages(PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:706

◆ GetDawg()

const Dawg * tesseract::TessBaseAPI::GetDawg ( int  i) const

Return the pointer to the i-th dawg loaded into tesseract_ object.

Definition at line 2298 of file baseapi.cpp.

2298  {
2299  if (tesseract_ == nullptr || i >= NumDawgs()) return nullptr;
2300  return tesseract_->getDict().GetDawg(i);
2301 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
Dict & getDict() override
const Dawg * GetDawg(int index) const
Return i-th dawg pointer recorded in the dawgs_ vector.
Definition: dict.h:432
int NumDawgs() const
Definition: baseapi.cpp:2304

◆ GetFeaturesForBlob()

void tesseract::TessBaseAPI::GetFeaturesForBlob ( TBLOB blob,
INT_FEATURE_STRUCT int_features,
int *  num_features,
int *  feature_outline_index 
)

This method returns the features associated with the input image.

This method returns the features associated with the input blob.

Definition at line 2606 of file baseapi.cpp.

2609  {
2610  GenericVector<int> outline_counts;
2613  INT_FX_RESULT_STRUCT fx_info;
2614  tesseract_->ExtractFeatures(*blob, false, &bl_features,
2615  &cn_features, &fx_info, &outline_counts);
2616  if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) {
2617  *num_features = 0;
2618  return; // Feature extraction failed.
2619  }
2620  *num_features = cn_features.size();
2621  memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0]));
2622  // TODO(rays) Pass outline_counts back and simplify the calling code.
2623  if (feature_outline_index != nullptr) {
2624  int f = 0;
2625  for (int i = 0; i < outline_counts.size(); ++i) {
2626  while (f < outline_counts[i])
2627  feature_outline_index[f++] = i;
2628  }
2629  }
2630 }
bool empty() const
Definition: genericvector.h:91
#define MAX_NUM_INT_FEATURES
Definition: intproto.h:129
static void ExtractFeatures(const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
Definition: intfx.cpp:442
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
int size() const
Definition: genericvector.h:72

◆ GetHOCRText() [1/2]

char * tesseract::TessBaseAPI::GetHOCRText ( ETEXT_DESC monitor,
int  page_number 
)

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. monitor can be used to cancel the recognition receive progress callbacks Returned string must be freed with the delete [] operator.

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Image name/input_file_ can be set by SetInputName before calling GetHOCRText STL removed from original patch submission and refactored by rays. Returned string must be freed with the delete [] operator.

Definition at line 132 of file hocrrenderer.cpp.

132  {
133  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
134  return nullptr;
135 
136  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, scnt = 1, tcnt = 1, gcnt = 1;
137  int page_id = page_number + 1; // hOCR uses 1-based page numbers.
138  bool para_is_ltr = true; // Default direction is LTR
139  const char* paragraph_lang = nullptr;
140  bool font_info = false;
141  bool hocr_boxes = false;
142  GetBoolVariable("hocr_font_info", &font_info);
143  GetBoolVariable("hocr_char_boxes", &hocr_boxes);
144 
145  if (input_file_ == nullptr) SetInputName(nullptr);
146 
147 #ifdef _WIN32
148  // convert input name from ANSI encoding to utf-8
149  int str16_len =
150  MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0);
151  wchar_t* uni16_str = new WCHAR[str16_len];
152  str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
153  uni16_str, str16_len);
154  int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr,
155  0, nullptr, nullptr);
156  char* utf8_str = new char[utf8_len];
157  WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len,
158  nullptr, nullptr);
159  *input_file_ = utf8_str;
160  delete[] uni16_str;
161  delete[] utf8_str;
162 #endif
163 
164  std::stringstream hocr_str;
165  // Use "C" locale (needed for double values x_size and x_descenders).
166  hocr_str.imbue(std::locale::classic());
167  // Use 8 digits for double values.
168  hocr_str.precision(8);
169  hocr_str << " <div class='ocr_page'";
170  hocr_str << " id='"
171  << "page_" << page_id << "'";
172  hocr_str << " title='image \"";
173  if (input_file_) {
174  hocr_str << HOcrEscape(input_file_->string()).c_str();
175  } else {
176  hocr_str << "unknown";
177  }
178  hocr_str << "\"; bbox " << rect_left_ << " " << rect_top_ << " "
179  << rect_width_ << " " << rect_height_ << "; ppageno " << page_number
180  << "'>\n";
181 
182  std::unique_ptr<ResultIterator> res_it(GetIterator());
183  while (!res_it->Empty(RIL_BLOCK)) {
184  if (res_it->Empty(RIL_WORD)) {
185  res_it->Next(RIL_WORD);
186  continue;
187  }
188 
189  // Open any new block/paragraph/textline.
190  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
191  para_is_ltr = true; // reset to default direction
192  hocr_str << " <div class='ocr_carea'"
193  << " id='"
194  << "block_" << page_id << "_" << bcnt << "'";
195  AddBoxTohOCR(res_it.get(), RIL_BLOCK, hocr_str);
196  }
197  if (res_it->IsAtBeginningOf(RIL_PARA)) {
198  hocr_str << "\n <p class='ocr_par'";
199  para_is_ltr = res_it->ParagraphIsLtr();
200  if (!para_is_ltr) {
201  hocr_str << " dir='rtl'";
202  }
203  hocr_str << " id='"
204  << "par_" << page_id << "_" << pcnt << "'";
205  paragraph_lang = res_it->WordRecognitionLanguage();
206  if (paragraph_lang) {
207  hocr_str << " lang='" << paragraph_lang << "'";
208  }
209  AddBoxTohOCR(res_it.get(), RIL_PARA, hocr_str);
210  }
211  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
212  hocr_str << "\n <span class='";
213  switch (res_it->BlockType()) {
214  case PT_HEADING_TEXT:
215  hocr_str << "ocr_header";
216  break;
217  case PT_PULLOUT_TEXT:
218  hocr_str << "ocr_textfloat";
219  break;
220  case PT_CAPTION_TEXT:
221  hocr_str << "ocr_caption";
222  break;
223  default:
224  hocr_str << "ocr_line";
225  }
226  hocr_str << "' id='"
227  << "line_" << page_id << "_" << lcnt << "'";
228  AddBoxTohOCR(res_it.get(), RIL_TEXTLINE, hocr_str);
229  }
230 
231  // Now, process the word...
232  std::vector<std::vector<std::pair<const char*, float>>>* choiceMap =
233  nullptr;
235 
236  choiceMap = res_it->GetBestLSTMSymbolChoices();
237  }
238  hocr_str << "\n <span class='ocrx_word'"
239  << " id='"
240  << "word_" << page_id << "_" << wcnt << "'";
241  int left, top, right, bottom;
242  bool bold, italic, underlined, monospace, serif, smallcaps;
243  int pointsize, font_id;
244  const char* font_name;
245  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
246  font_name =
247  res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace,
248  &serif, &smallcaps, &pointsize, &font_id);
249  hocr_str << " title='bbox " << left << " " << top << " " << right << " "
250  << bottom << "; x_wconf "
251  << static_cast<int>(res_it->Confidence(RIL_WORD));
252  if (font_info) {
253  if (font_name) {
254  hocr_str << "; x_font " << HOcrEscape(font_name).c_str();
255  }
256  hocr_str << "; x_fsize " << pointsize;
257  }
258  hocr_str << "'";
259  const char* lang = res_it->WordRecognitionLanguage();
260  if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) {
261  hocr_str << " lang='" << lang << "'";
262  }
263  switch (res_it->WordDirection()) {
264  // Only emit direction if different from current paragraph direction
265  case DIR_LEFT_TO_RIGHT:
266  if (!para_is_ltr) hocr_str << " dir='ltr'";
267  break;
268  case DIR_RIGHT_TO_LEFT:
269  if (para_is_ltr) hocr_str << " dir='rtl'";
270  break;
271  case DIR_MIX:
272  case DIR_NEUTRAL:
273  default: // Do nothing.
274  break;
275  }
276  hocr_str << ">";
277  bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
278  bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
279  bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
280  if (bold) hocr_str << "<strong>";
281  if (italic) hocr_str << "<em>";
282  do {
283  const std::unique_ptr<const char[]> grapheme(
284  res_it->GetUTF8Text(RIL_SYMBOL));
285  if (grapheme && grapheme[0] != 0) {
286  if (hocr_boxes) {
287  res_it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom);
288  hocr_str << "\n <span class='ocrx_cinfo' title='x_bboxes "
289  << left << " " << top << " " << right << " " << bottom
290  << "; x_conf " << res_it->Confidence(RIL_SYMBOL) << "'>";
291  }
292  hocr_str << HOcrEscape(grapheme.get()).c_str();
293  if (hocr_boxes) {
294  hocr_str << "</span>";
295  }
296  }
297  res_it->Next(RIL_SYMBOL);
298  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
299  if (italic) hocr_str << "</em>";
300  if (bold) hocr_str << "</strong>";
301  // If the lstm choice mode is required it is added here
302  if (tesseract_->lstm_choice_mode == 1 && choiceMap != nullptr) {
303  for (auto timestep : *choiceMap) {
304  hocr_str << "\n <span class='ocrx_cinfo'"
305  << " id='"
306  << "timestep_" << page_id << "_" << wcnt << "_" << tcnt << "'"
307  << ">";
308  for (std::pair<const char*, float> conf : timestep) {
309  hocr_str << "<span class='ocr_glyph'"
310  << " id='"
311  << "choice_" << page_id << "_" << wcnt << "_" << gcnt << "'"
312  << " title='x_confs " << int(conf.second * 100) << "'>"
313  << conf.first << "</span>";
314  gcnt++;
315  }
316  hocr_str << "</span>";
317  tcnt++;
318  }
319  } else if (tesseract_->lstm_choice_mode == 2 && choiceMap != nullptr) {
320  for (auto timestep : *choiceMap) {
321  if (timestep.size() > 0) {
322  hocr_str << "\n <span class='ocrx_cinfo'"
323  << " id='"
324  << "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt
325  << "'>";
326  for (auto & j : timestep) {
327  hocr_str << "<span class='ocr_glyph'"
328  << " id='"
329  << "choice_" << page_id << "_" << wcnt << "_" << gcnt
330  << "'"
331  << " title='x_confs " << int(j.second * 100)
332  << "'>" << j.first << "</span>";
333  gcnt++;
334  }
335  hocr_str << "</span>";
336  tcnt++;
337  }
338  }
339  }
340  // Close ocrx_word.
341  if (hocr_boxes || tesseract_->lstm_choice_mode > 0) {
342  hocr_str << "\n ";
343  }
344  hocr_str << "</span>";
345  tcnt = 1;
346  gcnt = 1;
347  wcnt++;
348  // Close any ending block/paragraph/textline.
349  if (last_word_in_line) {
350  hocr_str << "\n </span>";
351  lcnt++;
352  }
353  if (last_word_in_para) {
354  hocr_str << "\n </p>\n";
355  pcnt++;
356  para_is_ltr = true; // back to default direction
357  }
358  if (last_word_in_block) {
359  hocr_str << " </div>\n";
360  bcnt++;
361  }
362  }
363  hocr_str << " </div>\n";
364 
365  const std::string& text = hocr_str.str();
366  char* result = new char[text.length() + 1];
367  strcpy(result, text.c_str());
368  return result;
369 }
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:306
const char * c_str() const
Definition: strngs.cpp:205
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
const char * string() const
Definition: strngs.cpp:194
ResultIterator * GetIterator()
Definition: baseapi.cpp:1323
STRING * input_file_
Name used by training code.
Definition: baseapi.h:896
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
void SetInputName(const char *name)
Definition: baseapi.cpp:271
STRING HOcrEscape(const char *text)
Definition: baseapi.cpp:2309

◆ GetHOCRText() [2/2]

char * tesseract::TessBaseAPI::GetHOCRText ( int  page_number)

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Returned string must be freed with the delete [] operator.

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Image name/input_file_ can be set by SetInputName before calling GetHOCRText STL removed from original patch submission and refactored by rays. Returned string must be freed with the delete [] operator.

Definition at line 119 of file hocrrenderer.cpp.

119  {
120  return GetHOCRText(nullptr, page_number);
121 }
char * GetHOCRText(ETEXT_DESC *monitor, int page_number)

◆ GetIterator()

ResultIterator * tesseract::TessBaseAPI::GetIterator ( )

Get a reading-order iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 1323 of file baseapi.cpp.

1323  {
1324  if (tesseract_ == nullptr || page_res_ == nullptr)
1325  return nullptr;
1326  return ResultIterator::StartOfParagraph(LTRResultIterator(
1330 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892
static ResultIterator * StartOfParagraph(const LTRResultIterator &resit)
int GetScaledYResolution() const
Definition: thresholder.h:92
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895

◆ GetLSTMBoxText()

char * tesseract::TessBaseAPI::GetLSTMBoxText ( int  page_number = 0)

Make a box file for LSTM training from the internal data structures. Constructs coordinates in the original image - not just the rectangle. page_number is a 0-based page index that will appear in the box file. Returned string must be freed with the delete [] operator.

Definition at line 38 of file lstmboxrenderer.cpp.

38  {
39  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
40  return nullptr;
41 
42  STRING lstm_box_str("");
43  bool first_word = true;
44  int left = 0, top = 0, right = 0, bottom = 0;
45 
46  LTRResultIterator* res_it = GetLTRIterator();
47  while (!res_it->Empty(RIL_BLOCK)) {
48  if (res_it->Empty(RIL_SYMBOL)) {
49  res_it->Next(RIL_SYMBOL);
50  continue;
51  }
52  if (!first_word) {
53  if (!(res_it->IsAtBeginningOf(RIL_TEXTLINE))) {
54  if (res_it->IsAtBeginningOf(RIL_WORD)) {
55  lstm_box_str.add_str_int(" ", left);
56  AddBoxToLSTM(right, bottom, top, image_height_, page_number,
57  &lstm_box_str);
58  lstm_box_str += "\n"; // end of row for word
59  } // word
60  } else {
61  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
62  lstm_box_str.add_str_int("\t ", left);
63  AddBoxToLSTM(right, bottom, top, image_height_, page_number,
64  &lstm_box_str);
65  lstm_box_str += "\n"; // end of row for line
66  } // line
67  }
68  } // not first word
69  first_word = false;
70  // Use bounding box for whole line for everything
71  res_it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
72  do {
73  lstm_box_str +=
74  std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
75  res_it->Next(RIL_SYMBOL);
76  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_SYMBOL));
77  lstm_box_str.add_str_int(" ", left);
78  AddBoxToLSTM(right, bottom, top, image_height_, page_number, &lstm_box_str);
79  lstm_box_str += "\n"; // end of row for symbol
80  }
81  if (!first_word) { // if first_word is true => empty page
82  lstm_box_str.add_str_int("\t ", left);
83  AddBoxToLSTM(right, bottom, top, image_height_, page_number, &lstm_box_str);
84  lstm_box_str += "\n"; // end of PAGE
85  }
86  char* ret = new char[lstm_box_str.length() + 1];
87  strcpy(ret, lstm_box_str.string());
88  delete res_it;
89  return ret;
90 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895
Definition: strngs.h:45
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
TESS_LOCAL LTRResultIterator * GetLTRIterator()
Definition: baseapi.cpp:1306

◆ GetMutableIterator()

MutableIterator * tesseract::TessBaseAPI::GetMutableIterator ( )

Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 1340 of file baseapi.cpp.

1340  {
1341  if (tesseract_ == nullptr || page_res_ == nullptr)
1342  return nullptr;
1343  return new MutableIterator(page_res_, tesseract_,
1347 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892
int GetScaledYResolution() const
Definition: thresholder.h:92
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895

◆ GetOsdText()

char * tesseract::TessBaseAPI::GetOsdText ( int  page_number)

The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator. page_number is a 0-based page index that will appear in the osd file.

Definition at line 1716 of file baseapi.cpp.

1716  {
1717  int orient_deg;
1718  float orient_conf;
1719  const char* script_name;
1720  float script_conf;
1721 
1722  if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name,
1723  &script_conf))
1724  return nullptr;
1725 
1726  // clockwise rotation needed to make the page upright
1727  int rotate = OrientationIdToValue(orient_deg / 90);
1728 
1729  std::stringstream stream;
1730  // Use "C" locale (needed for float values orient_conf and script_conf).
1731  stream.imbue(std::locale::classic());
1732  // Use fixed notation with 2 digits after the decimal point for float values.
1733  stream.precision(2);
1734  stream
1735  << std::fixed
1736  << "Page number: " << page_number << "\n"
1737  << "Orientation in degrees: " << orient_deg << "\n"
1738  << "Rotate: " << rotate << "\n"
1739  << "Orientation confidence: " << orient_conf << "\n"
1740  << "Script: " << script_name << "\n"
1741  << "Script confidence: " << script_conf << "\n";
1742  const std::string& text = stream.str();
1743  char* result = new char[text.length() + 1];
1744  strcpy(result, text.c_str());
1745  return result;
1746 }
int OrientationIdToValue(const int &id)
Definition: osdetect.cpp:566
bool DetectOrientationScript(int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
Definition: baseapi.cpp:1685

◆ GetRegions()

Boxa * tesseract::TessBaseAPI::GetRegions ( Pixa **  pixa)

Get the result of page layout analysis as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 649 of file baseapi.cpp.

649  {
650  return GetComponentImages(RIL_BLOCK, false, pixa, nullptr);
651 }
Boxa * GetComponentImages(PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:706

◆ GetStrips()

Boxa * tesseract::TessBaseAPI::GetStrips ( Pixa **  pixa,
int **  blockids 
)

Get textlines and strips of image regions as a leptonica-style Boxa, Pixa pair, in reading order. Enables downstream handling of non-rectangular regions. Can be called before or after Recognize. If blockids is not nullptr, the block-id of each line is also returned as an array of one element per line. delete [] after use.

Definition at line 675 of file baseapi.cpp.

675  {
676  return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids);
677 }
Boxa * GetComponentImages(PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:706

◆ GetTextDirection()

bool tesseract::TessBaseAPI::GetTextDirection ( int *  out_offset,
float *  out_slope 
)

Definition at line 1928 of file baseapi.cpp.

1928  {
1929  PageIterator* it = AnalyseLayout();
1930  if (it == nullptr) {
1931  return false;
1932  }
1933  int x1, x2, y1, y2;
1934  it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
1935  // Calculate offset and slope (NOTE: Kind of ugly)
1936  if (x2 <= x1) x2 = x1 + 1;
1937  // Convert the point pair to slope/offset of the baseline (in image coords.)
1938  *out_slope = static_cast<float>(y2 - y1) / (x2 - x1);
1939  *out_offset = static_cast<int>(y1 - *out_slope * x1);
1940  // Get the y-coord of the baseline at the left and right edges of the
1941  // textline's bounding box.
1942  int left, top, right, bottom;
1943  if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) {
1944  delete it;
1945  return false;
1946  }
1947  int left_y = IntCastRounded(*out_slope * left + *out_offset);
1948  int right_y = IntCastRounded(*out_slope * right + *out_offset);
1949  // Shift the baseline down so it passes through the nearest bottom-corner
1950  // of the textline's bounding box. This is the difference between the y
1951  // at the lowest (max) edge of the box and the actual box bottom.
1952  *out_offset += bottom - std::max(left_y, right_y);
1953  // Switch back to bottom-up tesseract coordinates. Requires negation of
1954  // the slope and height - offset for the offset.
1955  *out_slope = -*out_slope;
1956  *out_offset = rect_height_ - *out_offset;
1957  delete it;
1958 
1959  return true;
1960 }
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:810
int IntCastRounded(double x)
Definition: helpers.h:175

◆ GetTextlines() [1/2]

Boxa * tesseract::TessBaseAPI::GetTextlines ( bool  raw_image,
int  raw_padding,
Pixa **  pixa,
int **  blockids,
int **  paraids 
)

Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If raw_image is true, then extract from the original image instead of the thresholded image and pad by raw_padding pixels. If blockids is not nullptr, the block-id of each line is also returned as an array of one element per line. delete [] after use. If paraids is not nullptr, the paragraph-id of each line within its block is also returned as an array of one element per line. delete [] after use.

Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not nullptr, the block-id of each line is also returned as an array of one element per line. delete [] after use. If paraids is not nullptr, the paragraph-id of each line within its block is also returned as an array of one element per line. delete [] after use.

Definition at line 661 of file baseapi.cpp.

662  {
663  return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding,
664  pixa, blockids, paraids);
665 }
Boxa * GetComponentImages(PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:706

◆ GetTextlines() [2/2]

Boxa* tesseract::TessBaseAPI::GetTextlines ( Pixa **  pixa,
int **  blockids 
)
inline

Definition at line 401 of file baseapi.h.

401  {
402  return GetTextlines(false, 0, pixa, blockids, nullptr);
403  }
Boxa * GetTextlines(bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:661

◆ GetThresholdedImage()

Pix * tesseract::TessBaseAPI::GetThresholdedImage ( )

Get a copy of the internal thresholded image from Tesseract. Caller takes ownership of the Pix and must pixDestroy it. May be called any time after SetImage, or after TesseractRect.

ONLY available after SetImage if you have Leptonica installed. Get a copy of the internal thresholded image from Tesseract.

Definition at line 635 of file baseapi.cpp.

635  {
636  if (tesseract_ == nullptr || thresholder_ == nullptr) return nullptr;
637  if (tesseract_->pix_binary() == nullptr &&
639  return nullptr;
640  }
641  return pixClone(tesseract_->pix_binary());
642 }
Pix * pix_binary() const
virtual TESS_LOCAL bool Threshold(Pix **pix)
Definition: baseapi.cpp:2013
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892

◆ GetThresholdedImageScaleFactor()

int tesseract::TessBaseAPI::GetThresholdedImageScaleFactor ( ) const

Returns the scale factor of the thresholded image that would be returned by GetThresholdedImage() and the various GetX() methods that call GetComponentImages(). Returns 0 if no thresholder has been set.

Definition at line 788 of file baseapi.cpp.

788  {
789  if (thresholder_ == nullptr) {
790  return 0;
791  }
792  return thresholder_->GetScaleFactor();
793 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892

◆ GetTSVText()

char * tesseract::TessBaseAPI::GetTSVText ( int  page_number)

Make a TSV-formatted string from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Returned string must be freed with the delete [] operator.

Definition at line 1382 of file baseapi.cpp.

1382  {
1383  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
1384  return nullptr;
1385 
1386  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
1387  int page_id = page_number + 1; // we use 1-based page numbers.
1388 
1389  STRING tsv_str("");
1390 
1391  int page_num = page_id;
1392  int block_num = 0;
1393  int par_num = 0;
1394  int line_num = 0;
1395  int word_num = 0;
1396 
1397  tsv_str.add_str_int("1\t", page_num); // level 1 - page
1398  tsv_str.add_str_int("\t", block_num);
1399  tsv_str.add_str_int("\t", par_num);
1400  tsv_str.add_str_int("\t", line_num);
1401  tsv_str.add_str_int("\t", word_num);
1402  tsv_str.add_str_int("\t", rect_left_);
1403  tsv_str.add_str_int("\t", rect_top_);
1404  tsv_str.add_str_int("\t", rect_width_);
1405  tsv_str.add_str_int("\t", rect_height_);
1406  tsv_str += "\t-1\t\n";
1407 
1408  ResultIterator* res_it = GetIterator();
1409  while (!res_it->Empty(RIL_BLOCK)) {
1410  if (res_it->Empty(RIL_WORD)) {
1411  res_it->Next(RIL_WORD);
1412  continue;
1413  }
1414 
1415  // Add rows for any new block/paragraph/textline.
1416  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
1417  block_num++;
1418  par_num = 0;
1419  line_num = 0;
1420  word_num = 0;
1421  tsv_str.add_str_int("2\t", page_num); // level 2 - block
1422  tsv_str.add_str_int("\t", block_num);
1423  tsv_str.add_str_int("\t", par_num);
1424  tsv_str.add_str_int("\t", line_num);
1425  tsv_str.add_str_int("\t", word_num);
1426  AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str);
1427  tsv_str += "\t-1\t\n"; // end of row for block
1428  }
1429  if (res_it->IsAtBeginningOf(RIL_PARA)) {
1430  par_num++;
1431  line_num = 0;
1432  word_num = 0;
1433  tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph
1434  tsv_str.add_str_int("\t", block_num);
1435  tsv_str.add_str_int("\t", par_num);
1436  tsv_str.add_str_int("\t", line_num);
1437  tsv_str.add_str_int("\t", word_num);
1438  AddBoxToTSV(res_it, RIL_PARA, &tsv_str);
1439  tsv_str += "\t-1\t\n"; // end of row for para
1440  }
1441  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
1442  line_num++;
1443  word_num = 0;
1444  tsv_str.add_str_int("4\t", page_num); // level 4 - line
1445  tsv_str.add_str_int("\t", block_num);
1446  tsv_str.add_str_int("\t", par_num);
1447  tsv_str.add_str_int("\t", line_num);
1448  tsv_str.add_str_int("\t", word_num);
1449  AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str);
1450  tsv_str += "\t-1\t\n"; // end of row for line
1451  }
1452 
1453  // Now, process the word...
1454  int left, top, right, bottom;
1455  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
1456  word_num++;
1457  tsv_str.add_str_int("5\t", page_num); // level 5 - word
1458  tsv_str.add_str_int("\t", block_num);
1459  tsv_str.add_str_int("\t", par_num);
1460  tsv_str.add_str_int("\t", line_num);
1461  tsv_str.add_str_int("\t", word_num);
1462  tsv_str.add_str_int("\t", left);
1463  tsv_str.add_str_int("\t", top);
1464  tsv_str.add_str_int("\t", right - left);
1465  tsv_str.add_str_int("\t", bottom - top);
1466  tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD));
1467  tsv_str += "\t";
1468 
1469  // Increment counts if at end of block/paragraph/textline.
1470  if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++;
1471  if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++;
1472  if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++;
1473 
1474  do {
1475  tsv_str +=
1476  std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
1477  res_it->Next(RIL_SYMBOL);
1478  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
1479  tsv_str += "\n"; // end of row
1480  wcnt++;
1481  }
1482 
1483  char* ret = new char[tsv_str.length() + 1];
1484  strcpy(ret, tsv_str.string());
1485  delete res_it;
1486  return ret;
1487 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
ResultIterator * GetIterator()
Definition: baseapi.cpp:1323
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895
Definition: strngs.h:45
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830

◆ GetUnichar()

const char * tesseract::TessBaseAPI::GetUnichar ( int  unichar_id)

This method returns the string form of the specified unichar.

Definition at line 2293 of file baseapi.cpp.

2293  {
2294  return tesseract_->unicharset.id_to_unichar(unichar_id);
2295 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291
UNICHARSET unicharset
Definition: ccutil.h:73

◆ GetUNLVText()

char * tesseract::TessBaseAPI::GetUNLVText ( )

The recognized text is returned as a char* which is coded as UNLV format Latin-1 with specific reject and suspect codes. Returned string must be freed with the delete [] operator.

Definition at line 1573 of file baseapi.cpp.

1573  {
1574  if (tesseract_ == nullptr ||
1575  (!recognition_done_ && Recognize(nullptr) < 0))
1576  return nullptr;
1577  bool tilde_crunch_written = false;
1578  bool last_char_was_newline = true;
1579  bool last_char_was_tilde = false;
1580 
1581  int total_length = TextLength(nullptr);
1582  PAGE_RES_IT page_res_it(page_res_);
1583  char* result = new char[total_length];
1584  char* ptr = result;
1585  for (page_res_it.restart_page(); page_res_it.word () != nullptr;
1586  page_res_it.forward()) {
1587  WERD_RES *word = page_res_it.word();
1588  // Process the current word.
1589  if (word->unlv_crunch_mode != CR_NONE) {
1590  if (word->unlv_crunch_mode != CR_DELETE &&
1591  (!tilde_crunch_written ||
1592  (word->unlv_crunch_mode == CR_KEEP_SPACE &&
1593  word->word->space() > 0 &&
1594  !word->word->flag(W_FUZZY_NON) &&
1595  !word->word->flag(W_FUZZY_SP)))) {
1596  if (!word->word->flag(W_BOL) &&
1597  word->word->space() > 0 &&
1598  !word->word->flag(W_FUZZY_NON) &&
1599  !word->word->flag(W_FUZZY_SP)) {
1600  /* Write a space to separate from preceding good text */
1601  *ptr++ = ' ';
1602  last_char_was_tilde = false;
1603  }
1604  if (!last_char_was_tilde) {
1605  // Write a reject char.
1606  last_char_was_tilde = true;
1607  *ptr++ = kUNLVReject;
1608  tilde_crunch_written = true;
1609  last_char_was_newline = false;
1610  }
1611  }
1612  } else {
1613  // NORMAL PROCESSING of non tilde crunched words.
1614  tilde_crunch_written = false;
1616  const char* wordstr = word->best_choice->unichar_string().string();
1617  const STRING& lengths = word->best_choice->unichar_lengths();
1618  int length = lengths.length();
1619  int i = 0;
1620  int offset = 0;
1621 
1622  if (last_char_was_tilde &&
1623  word->word->space() == 0 && wordstr[offset] == ' ') {
1624  // Prevent adjacent tilde across words - we know that adjacent tildes
1625  // within words have been removed.
1626  // Skip the first character.
1627  offset = lengths[i++];
1628  }
1629  if (i < length && wordstr[offset] != 0) {
1630  if (!last_char_was_newline)
1631  *ptr++ = ' ';
1632  else
1633  last_char_was_newline = false;
1634  for (; i < length; offset += lengths[i++]) {
1635  if (wordstr[offset] == ' ' ||
1636  wordstr[offset] == kTesseractReject) {
1637  *ptr++ = kUNLVReject;
1638  last_char_was_tilde = true;
1639  } else {
1640  if (word->reject_map[i].rejected())
1641  *ptr++ = kUNLVSuspect;
1642  UNICHAR ch(wordstr + offset, lengths[i]);
1643  int uni_ch = ch.first_uni();
1644  for (int j = 0; kUniChs[j] != 0; ++j) {
1645  if (kUniChs[j] == uni_ch) {
1646  uni_ch = kLatinChs[j];
1647  break;
1648  }
1649  }
1650  if (uni_ch <= 0xff) {
1651  *ptr++ = static_cast<char>(uni_ch);
1652  last_char_was_tilde = false;
1653  } else {
1654  *ptr++ = kUNLVReject;
1655  last_char_was_tilde = true;
1656  }
1657  }
1658  }
1659  }
1660  }
1661  if (word->word->flag(W_EOL) && !last_char_was_newline) {
1662  /* Add a new line output */
1663  *ptr++ = '\n';
1664  tilde_crunch_written = false;
1665  last_char_was_newline = true;
1666  last_char_was_tilde = false;
1667  }
1668  }
1669  *ptr++ = '\n';
1670  *ptr = '\0';
1671  return result;
1672 }
const char kUNLVReject
Definition: baseapi.cpp:108
bool flag(WERD_FLAGS mask) const
Definition: werd.h:117
const int kUniChs[]
Definition: baseapi.cpp:1560
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:315
REJMAP reject_map
Definition: pageres.h:294
const char kUNLVSuspect
Definition: baseapi.cpp:110
TESS_LOCAL int TextLength(int *blob_count)
Definition: baseapi.cpp:2168
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
const STRING & unichar_lengths() const
Definition: ratngs.h:538
const char * string() const
Definition: strngs.cpp:194
end of line
Definition: werd.h:33
fuzzy space
Definition: werd.h:39
int32_t length() const
Definition: strngs.cpp:189
uint8_t space()
Definition: werd.h:99
const char kTesseractReject
Definition: baseapi.cpp:106
start of line
Definition: werd.h:32
const STRING & unichar_string() const
Definition: ratngs.h:531
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895
Definition: strngs.h:45
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
WERD * word
Definition: pageres.h:186
void set_unlv_suspects(WERD_RES *word)
Definition: output.cpp:273
const int kLatinChs[]
Definition: baseapi.cpp:1564
WERD_CHOICE * best_choice
Definition: pageres.h:241
fuzzy nonspace
Definition: werd.h:40
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:901

◆ GetUTF8Text()

char * tesseract::TessBaseAPI::GetUTF8Text ( )

The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator.

Make a text string from the internal data structures.

Definition at line 1350 of file baseapi.cpp.

1350  {
1351  if (tesseract_ == nullptr ||
1352  (!recognition_done_ && Recognize(nullptr) < 0))
1353  return nullptr;
1354  STRING text("");
1355  ResultIterator *it = GetIterator();
1356  do {
1357  if (it->Empty(RIL_PARA)) continue;
1358  const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
1359  text += para_text.get();
1360  } while (it->Next(RIL_PARA));
1361  char* result = new char[text.length() + 1];
1362  strncpy(result, text.string(), text.length() + 1);
1363  delete it;
1364  return result;
1365 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
ResultIterator * GetIterator()
Definition: baseapi.cpp:1323
Definition: strngs.h:45
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:901

◆ GetWords()

Boxa * tesseract::TessBaseAPI::GetWords ( Pixa **  pixa)

Get the words as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 684 of file baseapi.cpp.

684  {
685  return GetComponentImages(RIL_WORD, true, pixa, nullptr);
686 }
Boxa * GetComponentImages(PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:706

◆ GetWordStrBoxText()

char * tesseract::TessBaseAPI::GetWordStrBoxText ( int  page_number = 0)

The recognized text is returned as a char* which is coded in the same format as a WordStr box file used in training. page_number is a 0-based page index that will appear in the box file. Returned string must be freed with the delete [] operator.

Create a UTF8 box file with WordStr strings from the internal data structures. page_number is a 0-base page index that will appear in the box file. Returned string must be freed with the delete [] operator.

Definition at line 31 of file wordstrboxrenderer.cpp.

31  {
32  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
33  return nullptr;
34 
35  STRING wordstr_box_str("");
36  int left = 0, top = 0, right = 0, bottom = 0;
37 
38  bool first_line = true;
39 
40  LTRResultIterator* res_it = GetLTRIterator();
41  while (!res_it->Empty(RIL_BLOCK)) {
42  if (res_it->Empty(RIL_WORD)) {
43  res_it->Next(RIL_WORD);
44  continue;
45  }
46 
47  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
48  if (!first_line) {
49  wordstr_box_str.add_str_int("\n\t ", right + 1);
50  wordstr_box_str.add_str_int(" ", image_height_ - bottom);
51  wordstr_box_str.add_str_int(" ", right + 5);
52  wordstr_box_str.add_str_int(" ", image_height_ - top);
53  wordstr_box_str.add_str_int(" ", page_number); // row for tab for EOL
54  wordstr_box_str += "\n";
55  } else {
56  first_line = false;
57  }
58  // Use bounding box for whole line for WordStr
59  res_it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
60  wordstr_box_str.add_str_int("WordStr ", left);
61  wordstr_box_str.add_str_int(" ", image_height_ - bottom);
62  wordstr_box_str.add_str_int(" ", right);
63  wordstr_box_str.add_str_int(" ", image_height_ - top);
64  wordstr_box_str.add_str_int(" ", page_number); // word
65  wordstr_box_str += " #";
66  }
67  do {
68  wordstr_box_str +=
69  std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_WORD)).get();
70  wordstr_box_str += " ";
71  res_it->Next(RIL_WORD);
72  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
73  }
74 
75  if (left != 0 && top != 0 && right != 0 && bottom != 0) {
76  wordstr_box_str.add_str_int("\n\t ", right + 1);
77  wordstr_box_str.add_str_int(" ", image_height_ - bottom);
78  wordstr_box_str.add_str_int(" ", right + 5);
79  wordstr_box_str.add_str_int(" ", image_height_ - top);
80  wordstr_box_str.add_str_int(" ", page_number); // row for tab for EOL
81  wordstr_box_str += "\n";
82  }
83  char* ret = new char[wordstr_box_str.length() + 1];
84  strcpy(ret, wordstr_box_str.string());
85  delete res_it;
86  return ret;
87 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895
Definition: strngs.h:45
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
TESS_LOCAL LTRResultIterator * GetLTRIterator()
Definition: baseapi.cpp:1306

◆ InitTruthCallback()

void tesseract::TessBaseAPI::InitTruthCallback ( TruthCallback cb)
inline

Definition at line 805 of file baseapi.h.

805 { truth_cb_ = cb; }
TruthCallback * truth_cb_
Definition: baseapi.h:902

◆ IsValidCharacter()

bool tesseract::TessBaseAPI::IsValidCharacter ( const char *  utf8_character)

Definition at line 1921 of file baseapi.cpp.

1921  {
1922  return tesseract_->unicharset.contains_unichar(utf8_character);
1923 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:671
UNICHARSET unicharset
Definition: ccutil.h:73

◆ IsValidWord()

int tesseract::TessBaseAPI::IsValidWord ( const char *  word)

Check whether a word is valid according to Tesseract's language model

Returns
0 if the word is invalid, non-zero if valid.
Warning
temporary! This function will be removed from here and placed in a separate API at some future time.

Check whether a word is valid according to Tesseract's language model returns 0 if the word is invalid, non-zero if valid

Definition at line 1917 of file baseapi.cpp.

1917  {
1918  return tesseract_->getDict().valid_word(word);
1919 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:778
Dict & getDict() override

◆ MakeTBLOB()

TBLOB * tesseract::TessBaseAPI::MakeTBLOB ( Pix *  pix)
static

Returns a TBLOB corresponding to the entire input image.

Creates a TBLOB* from the whole pix.

Definition at line 2367 of file baseapi.cpp.

2367  {
2368  int width = pixGetWidth(pix);
2369  int height = pixGetHeight(pix);
2370  BLOCK block("a character", true, 0, 0, 0, 0, width, height);
2371 
2372  // Create C_BLOBs from the page
2373  extract_edges(pix, &block);
2374 
2375  // Merge all C_BLOBs
2376  C_BLOB_LIST *list = block.blob_list();
2377  C_BLOB_IT c_blob_it(list);
2378  if (c_blob_it.empty())
2379  return nullptr;
2380  // Move all the outlines to the first blob.
2381  C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
2382  for (c_blob_it.forward();
2383  !c_blob_it.at_first();
2384  c_blob_it.forward()) {
2385  C_BLOB *c_blob = c_blob_it.data();
2386  ol_it.add_list_after(c_blob->out_list());
2387  }
2388  // Convert the first blob to the output TBLOB.
2389  return TBLOB::PolygonalCopy(false, c_blob_it.data());
2390 }
static TBLOB * PolygonalCopy(bool allow_detailed_fx, C_BLOB *src)
Definition: blobs.cpp:327
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70
Definition: ocrblock.h:29
void extract_edges(Pix *pix, BLOCK *block)
Definition: edgblob.cpp:329

◆ MakeTessOCRRow()

ROW * tesseract::TessBaseAPI::MakeTessOCRRow ( float  baseline,
float  xheight,
float  descender,
float  ascender 
)
static

Returns a ROW object created from the input row specification.

Definition at line 2350 of file baseapi.cpp.

2353  {
2354  int32_t xstarts[] = {-32000};
2355  double quad_coeffs[] = {0, 0, baseline};
2356  return new ROW(1,
2357  xstarts,
2358  quad_coeffs,
2359  xheight,
2360  ascender - (baseline + xheight),
2361  descender - baseline,
2362  0,
2363  0);
2364 }
Definition: ocrrow.h:36

◆ MeanTextConf()

int tesseract::TessBaseAPI::MeanTextConf ( )

Returns the (average) confidence value between 0 and 100.

Returns the average word confidence for Tesseract page result.

Definition at line 1751 of file baseapi.cpp.

1751  {
1752  int* conf = AllWordConfidences();
1753  if (!conf) return 0;
1754  int sum = 0;
1755  int *pt = conf;
1756  while (*pt >= 0) sum += *pt++;
1757  if (pt != conf) sum /= pt - conf;
1758  delete [] conf;
1759  return sum;
1760 }

◆ NormalizeTBLOB()

void tesseract::TessBaseAPI::NormalizeTBLOB ( TBLOB tblob,
ROW row,
bool  numeric_mode 
)
static

This method baseline normalizes a TBLOB in-place. The input row is used for normalization. The denorm is an optional parameter in which the normalization-antidote is returned.

Definition at line 2397 of file baseapi.cpp.

2397  {
2398  TBOX box = tblob->bounding_box();
2399  float x_center = (box.left() + box.right()) / 2.0f;
2400  float baseline = row->base_line(x_center);
2401  float scale = kBlnXHeight / row->x_height();
2402  tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale,
2403  0.0f, static_cast<float>(kBlnBaselineOffset), false, nullptr);
2404 }
const int kBlnBaselineOffset
Definition: normalis.h:25
const int kBlnXHeight
Definition: normalis.h:24
int16_t left() const
Definition: rect.h:72
float base_line(float xpos) const
Definition: ocrrow.h:59
float x_height() const
Definition: ocrrow.h:64
Definition: rect.h:34
TBOX bounding_box() const
Definition: blobs.cpp:468
void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift, bool inverse, Pix *pix)
Definition: blobs.cpp:397
int16_t right() const
Definition: rect.h:79

◆ NumDawgs()

int tesseract::TessBaseAPI::NumDawgs ( ) const

Return the number of dawgs loaded into tesseract_ object.

Definition at line 2304 of file baseapi.cpp.

2304  {
2305  return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs();
2306 }
int NumDawgs() const
Return the number of dawgs in the dawgs_ vector.
Definition: dict.h:430
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
Dict & getDict() override

◆ oem()

OcrEngineMode tesseract::TessBaseAPI::oem ( ) const
inline

Definition at line 803 of file baseapi.h.

803 { return last_oem_requested_; }
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:900

◆ ProcessPage()

bool tesseract::TessBaseAPI::ProcessPage ( Pix *  pix,
int  page_index,
const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Turn a single image into symbolic text.

The pix is the image processed. filename and page_index are metadata used by side-effect processes, such as reading a box file or formatting as hOCR.

See ProcessPages for desciptions of other parameters.

Definition at line 1239 of file baseapi.cpp.

1241  {
1242  SetInputName(filename);
1243  SetImage(pix);
1244  bool failed = false;
1245 
1247  // Disabled character recognition
1248  PageIterator* it = AnalyseLayout();
1249 
1250  if (it == nullptr) {
1251  failed = true;
1252  } else {
1253  delete it;
1254  }
1256  failed = FindLines() != 0;
1257  } else if (timeout_millisec > 0) {
1258  // Running with a timeout.
1259  ETEXT_DESC monitor;
1260  monitor.cancel = nullptr;
1261  monitor.cancel_this = nullptr;
1262  monitor.set_deadline_msecs(timeout_millisec);
1263 
1264  // Now run the main recognition.
1265  failed = Recognize(&monitor) < 0;
1266  } else {
1267  // Normal layout and character recognition with no timeout.
1268  failed = Recognize(nullptr) < 0;
1269  }
1270 
1272 #ifndef ANDROID_BUILD
1273  Pix* page_pix = GetThresholdedImage();
1274  pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4);
1275 #endif // ANDROID_BUILD
1276  }
1277 
1278  if (failed && retry_config != nullptr && retry_config[0] != '\0') {
1279  // Save current config variables before switching modes.
1280  FILE* fp = fopen(kOldVarsFile, "wb");
1281  if (fp == nullptr) {
1282  tprintf("Error, failed to open file \"%s\"\n", kOldVarsFile);
1283  } else {
1284  PrintVariables(fp);
1285  fclose(fp);
1286  }
1287  // Switch to alternate mode for retry.
1288  ReadConfigFile(retry_config);
1289  SetImage(pix);
1290  Recognize(nullptr);
1291  // Restore saved config variables.
1292  ReadConfigFile(kOldVarsFile);
1293  }
1294 
1295  if (renderer && !failed) {
1296  failed = !renderer->AddImage(this);
1297  }
1298 
1299  return !failed;
1300 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
Pix * GetThresholdedImage()
Definition: baseapi.cpp:635
CANCEL_FUNC cancel
for errcode use
Definition: ocrclass.h:112
Orientation and script detection only.
Definition: publictypes.h:164
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:810
Automatic page segmentation, but no OSD, or OCR.
Definition: publictypes.h:167
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2067
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:580
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
void set_deadline_msecs(int32_t deadline_msecs)
Definition: ocrclass.h:129
void PrintVariables(FILE *fp) const
Definition: baseapi.cpp:334
void * cancel_this
monitor-aware progress callback
Definition: ocrclass.h:116
void SetInputName(const char *name)
Definition: baseapi.cpp:271
void ReadConfigFile(const char *filename)
Definition: baseapi.cpp:501

◆ ProcessPages()

bool tesseract::TessBaseAPI::ProcessPages ( const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Turns images into symbolic text.

filename can point to a single image, a multi-page TIFF, or a plain text list of image filenames.

retry_config is useful for debugging. If not nullptr, you can fall back to an alternate configuration if a page fails for some reason.

timeout_millisec terminates processing if any single page takes too long. Set to 0 for unlimited time.

renderer is responible for creating the output. For example, use the TessTextRenderer if you want plaintext output, or the TessPDFRender to produce searchable PDF.

If tessedit_page_number is non-negative, will only process that single page. Works for multi-page tiff file, or filelist.

Returns true if successful, false on error.

Definition at line 1076 of file baseapi.cpp.

1078  {
1079  bool result =
1080  ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
1081  #ifndef DISABLED_LEGACY_ENGINE
1082  if (result) {
1085  tprintf("Write of TR file failed: %s\n", output_file_->string());
1086  return false;
1087  }
1088  }
1089  #endif // ndef DISABLED_LEGACY_ENGINE
1090  return result;
1091 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
bool WriteTRFile(const STRING &filename)
Definition: blobclass.cpp:98
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
const char * string() const
Definition: strngs.cpp:194
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:897
bool ProcessPagesInternal(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1113

◆ ProcessPagesInternal()

bool tesseract::TessBaseAPI::ProcessPagesInternal ( const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Definition at line 1113 of file baseapi.cpp.

1116  {
1117  bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
1118  if (stdInput) {
1119 #ifdef WIN32
1120  if (_setmode(_fileno(stdin), _O_BINARY) == -1)
1121  tprintf("ERROR: cin to binary: %s", strerror(errno));
1122 #endif // WIN32
1123  }
1124 
1125  if (stream_filelist) {
1126  return ProcessPagesFileList(stdin, nullptr, retry_config,
1127  timeout_millisec, renderer,
1129  }
1130 
1131  // At this point we are officially in autodection territory.
1132  // That means any data in stdin must be buffered, to make it
1133  // seekable.
1134  std::string buf;
1135  const l_uint8 *data = nullptr;
1136  if (stdInput) {
1137  buf.assign((std::istreambuf_iterator<char>(std::cin)),
1138  (std::istreambuf_iterator<char>()));
1139  data = reinterpret_cast<const l_uint8 *>(buf.data());
1140  } else if (strstr(filename, "://") != nullptr) {
1141  // Get image or image list by URL.
1142 #ifdef HAVE_LIBCURL
1143  CURL* curl = curl_easy_init();
1144  if (curl == nullptr) {
1145  fprintf(stderr, "Error, curl_easy_init failed\n");
1146  return false;
1147  } else {
1148  CURLcode curlcode;
1149  curlcode = curl_easy_setopt(curl, CURLOPT_URL, filename);
1150  ASSERT_HOST(curlcode == CURLE_OK);
1151  curlcode = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
1152  ASSERT_HOST(curlcode == CURLE_OK);
1153  curlcode = curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buf);
1154  ASSERT_HOST(curlcode == CURLE_OK);
1155  curlcode = curl_easy_perform(curl);
1156  ASSERT_HOST(curlcode == CURLE_OK);
1157  curl_easy_cleanup(curl);
1158  data = reinterpret_cast<const l_uint8 *>(buf.data());
1159  }
1160 #else
1161  fprintf(stderr, "Error, this tesseract has no URL support\n");
1162  return false;
1163 #endif
1164  } else {
1165  // Check whether the input file can be read.
1166  if (FILE* file = fopen(filename, "rb")) {
1167  fclose(file);
1168  } else {
1169  fprintf(stderr, "Error, cannot read input file %s: %s\n",
1170  filename, strerror(errno));
1171  return false;
1172  }
1173  }
1174 
1175  // Here is our autodetection
1176  int format;
1177  int r = (data != nullptr) ?
1178  findFileFormatBuffer(data, &format) :
1179  findFileFormat(filename, &format);
1180 
1181  // Maybe we have a filelist
1182  if (r != 0 || format == IFF_UNKNOWN) {
1183  STRING s;
1184  if (data != nullptr) {
1185  s = buf.c_str();
1186  } else {
1187  std::ifstream t(filename);
1188  std::string u((std::istreambuf_iterator<char>(t)),
1189  std::istreambuf_iterator<char>());
1190  s = u.c_str();
1191  }
1192  return ProcessPagesFileList(nullptr, &s, retry_config,
1193  timeout_millisec, renderer,
1195  }
1196 
1197  // Maybe we have a TIFF which is potentially multipage
1198  bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
1199  format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
1200  format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
1201 #if LIBLEPT_MAJOR_VERSION > 1 || LIBLEPT_MINOR_VERSION > 76
1202  format == IFF_TIFF_JPEG ||
1203 #endif
1204  format == IFF_TIFF_ZIP);
1205 
1206  // Fail early if we can, before producing any output
1207  Pix *pix = nullptr;
1208  if (!tiff) {
1209  pix = (data != nullptr) ? pixReadMem(data, buf.size()) : pixRead(filename);
1210  if (pix == nullptr) {
1211  return false;
1212  }
1213  }
1214 
1215  // Begin the output
1216  if (renderer && !renderer->BeginDocument(document_title.c_str())) {
1217  pixDestroy(&pix);
1218  return false;
1219  }
1220 
1221  // Produce output
1222  r = (tiff) ?
1223  ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config,
1224  timeout_millisec, renderer,
1226  ProcessPage(pix, 0, filename, retry_config,
1227  timeout_millisec, renderer);
1228 
1229  // Clean up memory as needed
1230  pixDestroy(&pix);
1231 
1232  // End the output
1233  if (!r || (renderer && !renderer->EndDocument())) {
1234  return false;
1235  }
1236  return true;
1237 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
bool ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1239
const char * c_str() const
Definition: strngs.cpp:205
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
Definition: strngs.h:45
#define ASSERT_HOST(x)
Definition: errcode.h:88

◆ Recognize()

int tesseract::TessBaseAPI::Recognize ( ETEXT_DESC monitor)

Recognize the image from SetAndThresholdImage, generating Tesseract internal structures. Returns 0 on success. Optional. The Get*Text functions below will call Recognize if needed. After Recognize, the output is kept internally until the next SetImage.

Recognize the tesseract global image and return the result as Tesseract internal structures.

Definition at line 830 of file baseapi.cpp.

830  {
831  if (tesseract_ == nullptr)
832  return -1;
833  if (FindLines() != 0)
834  return -1;
835  delete page_res_;
836  if (block_list_->empty()) {
837  page_res_ = new PAGE_RES(false, block_list_,
839  return 0; // Empty page.
840  }
841 
843  recognition_done_ = true;
844 #ifndef DISABLED_LEGACY_ENGINE
849  } else
850 #endif // ndef DISABLED_LEGACY_ENGINE
851  {
854  }
855 
856  if (page_res_ == nullptr) {
857  return -1;
858  }
859 
862  return -1;
863  }
865  return 0;
866  }
867 #ifndef DISABLED_LEGACY_ENGINE
870  return 0;
871  }
872 #endif // ndef DISABLED_LEGACY_ENGINE
873 
874  if (truth_cb_ != nullptr) {
875  tesseract_->wordrec_run_blamer.set_value(true);
876  auto *page_it = new PageIterator(
881  image_height_, page_it, this->tesseract()->pix_grey());
882  delete page_it;
883  }
884 
885  int result = 0;
887  #ifndef GRAPHICS_DISABLED
889  #endif // GRAPHICS_DISABLED
890  // The page_res is invalid after an interactive session, so cleanup
891  // in a way that lets us continue to the next page without crashing.
892  delete page_res_;
893  page_res_ = nullptr;
894  return -1;
895  #ifndef DISABLED_LEGACY_ENGINE
897  STRING fontname;
898  ExtractFontName(*output_file_, &fontname);
900  } else if (tesseract_->tessedit_ambigs_training) {
901  FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
902  // OCR the page segmented into words by tesseract.
904  *input_file_, page_res_, monitor, training_output_file);
905  fclose(training_output_file);
906  #endif // ndef DISABLED_LEGACY_ENGINE
907  } else {
908  // Now run the main recognition.
909  bool wait_for_text = true;
910  GetBoolVariable("paragraph_text_based", &wait_for_text);
911  if (!wait_for_text) DetectParagraphs(false);
912  if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) {
913  if (wait_for_text) DetectParagraphs(true);
914  } else {
915  result = -1;
916  }
917  }
918  return result;
919 }
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:306
void ApplyBoxTraining(const STRING &fontname, PAGE_RES *page_res)
Tesseract * tesseract() const
Definition: baseapi.h:801
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
Definition: control.cpp:302
TESS_LOCAL void DetectParagraphs(bool after_text_recognition)
Definition: baseapi.cpp:2277
bool wordrec_run_blamer
Definition: wordrec.h:232
void CorrectClassifyWords(PAGE_RES *page_res)
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
void recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
PAGE_RES * ApplyBoxes(const STRING &fname, bool find_segmentation, BLOCK_LIST *block_list)
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892
Dict & getDict() override
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:894
virtual void Run(A1, A2, A3, A4)=0
void ExtractFontName(const STRING &filename, STRING *fontname)
Definition: blobclass.cpp:45
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2067
STRING * input_file_
Name used by training code.
Definition: baseapi.h:896
int GetScaledYResolution() const
Definition: thresholder.h:92
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:476
const UNICHARSET & getUnicharset() const
Definition: dict.h:101
bool AnyLSTMLang() const
Definition: strngs.h:45
bool TrainLineRecognizer(const STRING &input_imagename, const STRING &output_basename, BLOCK_LIST *block_list)
Definition: linerec.cpp:44
TruthCallback * truth_cb_
Definition: baseapi.h:902
bool tessedit_resegment_from_line_boxes
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:897
Pix * pix_grey() const
FILE * init_recog_training(const STRING &fname)
void pgeditor_main(int width, int height, PAGE_RES *page_res)
Definition: pgedit.cpp:378
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:901

◆ RecognizeForChopTest()

int tesseract::TessBaseAPI::RecognizeForChopTest ( ETEXT_DESC monitor)

Methods to retrieve information after SetAndThresholdImage(), Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)Variant on Recognize used for testing chopper.

Tests the chopper by exhaustively running chop_one_blob.

Definition at line 923 of file baseapi.cpp.

923  {
924  if (tesseract_ == nullptr)
925  return -1;
926  if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
927  tprintf("Please call SetImage before attempting recognition.\n");
928  return -1;
929  }
930  if (page_res_ != nullptr)
931  ClearResults();
932  if (FindLines() != 0)
933  return -1;
934  // Additional conditions under which chopper test cannot be run
935  if (tesseract_->interactive_display_mode) return -1;
936 
937  recognition_done_ = true;
938 
939  page_res_ = new PAGE_RES(false, block_list_,
941 
942  PAGE_RES_IT page_res_it(page_res_);
943 
944  while (page_res_it.word() != nullptr) {
945  WERD_RES *word_res = page_res_it.word();
946  GenericVector<TBOX> boxes;
947  tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
948  page_res_it.row()->row, word_res);
949  page_res_it.forward();
950  }
951  return 0;
952 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:894
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2067
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:476
WERD * word
Definition: pageres.h:186
void MaximallyChopWord(const GenericVector< TBOX > &boxes, BLOCK *block, ROW *row, WERD_RES *word_res)
bool IsEmpty() const
Return true if no image has been set.
Definition: thresholder.cpp:53
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:901

◆ RunAdaptiveClassifier()

void tesseract::TessBaseAPI::RunAdaptiveClassifier ( TBLOB blob,
int  num_max_matches,
int *  unichar_ids,
float *  ratings,
int *  num_matches_returned 
)

Method to run adaptive classifier on a blob. It returns at max num_max_matches results.

Method to run adaptive classifier on a blob.

Definition at line 2659 of file baseapi.cpp.

2663  {
2664  auto* choices = new BLOB_CHOICE_LIST;
2665  tesseract_->AdaptiveClassifier(blob, choices);
2666  BLOB_CHOICE_IT choices_it(choices);
2667  int& index = *num_matches_returned;
2668  index = 0;
2669  for (choices_it.mark_cycle_pt();
2670  !choices_it.cycled_list() && index < num_max_matches;
2671  choices_it.forward()) {
2672  BLOB_CHOICE* choice = choices_it.data();
2673  unichar_ids[index] = choice->unichar_id();
2674  ratings[index] = choice->rating();
2675  ++index;
2676  }
2677  *num_matches_returned = index;
2678  delete choices;
2679 }
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:191
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
UNICHAR_ID unichar_id() const
Definition: ratngs.h:77
float rating() const
Definition: ratngs.h:80

◆ set_min_orientation_margin()

void tesseract::TessBaseAPI::set_min_orientation_margin ( double  margin)

Definition at line 2214 of file baseapi.cpp.

2214  {
2215  tesseract_->min_orientation_margin.set_value(margin);
2216 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888

◆ SetDictFunc()

void tesseract::TessBaseAPI::SetDictFunc ( DictFunc  f)

Sets Dict::letter_is_okay_ function to point to the given function.

Definition at line 1963 of file baseapi.cpp.

1963  {
1964  if (tesseract_ != nullptr) {
1966  }
1967 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
Dict & getDict() override
int(Dict::* letter_is_okay_)(void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const
Definition: dict.h:372

◆ SetFillLatticeFunc()

void tesseract::TessBaseAPI::SetFillLatticeFunc ( FillLatticeFunc  f)

Sets Wordrec::fill_lattice_ function to point to the given function.

Definition at line 1990 of file baseapi.cpp.

1990  {
1991  if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f;
1992 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:480

◆ SetImage() [1/2]

void tesseract::TessBaseAPI::SetImage ( const unsigned char *  imagedata,
int  width,
int  height,
int  bytes_per_pixel,
int  bytes_per_line 
)

Provide an image for Tesseract to recognize. Format is as TesseractRect above. Copies the image buffer and converts to Pix. SetImage clears all recognition results, and sets the rectangle to the full image, so it may be followed immediately by a GetUTF8Text, and it will automatically perform recognition.

Definition at line 580 of file baseapi.cpp.

582  {
583  if (InternalSetImage()) {
584  thresholder_->SetImage(imagedata, width, height,
585  bytes_per_pixel, bytes_per_line);
587  }
588 }
TESS_LOCAL bool InternalSetImage()
Definition: baseapi.cpp:1996
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:956
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:65

◆ SetImage() [2/2]

void tesseract::TessBaseAPI::SetImage ( Pix *  pix)

Provide an image for Tesseract to recognize. As with SetImage above, Tesseract takes its own copy of the image, so it need not persist until after Recognize. Pix vs raw, which to use? Use Pix where possible. Tesseract uses Pix as its internal representation and it is therefore more efficient to provide a Pix directly.

Definition at line 605 of file baseapi.cpp.

605  {
606  if (InternalSetImage()) {
607  if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) {
608  // remove alpha channel from png
609  Pix* p1 = pixRemoveAlpha(pix);
610  pixSetSpp(p1, 3);
611  (void)pixCopy(pix, p1);
612  pixDestroy(&p1);
613  }
614  thresholder_->SetImage(pix);
616  }
617 }
TESS_LOCAL bool InternalSetImage()
Definition: baseapi.cpp:1996
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:956
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:65

◆ SetProbabilityInContextFunc()

void tesseract::TessBaseAPI::SetProbabilityInContextFunc ( ProbabilityInContextFunc  f)

Sets Dict::probability_in_context_ function to point to the given function.

Sets Dict::probability_in_context_ function to point to the given function.

Parameters
fA single function that returns the probability of the current "character" (in general a utf-8 string), given the context of a previous utf-8 string.

Definition at line 1977 of file baseapi.cpp.

1977  {
1978  if (tesseract_ != nullptr) {
1980  // Set it for the sublangs too.
1981  int num_subs = tesseract_->num_sub_langs();
1982  for (int i = 0; i < num_subs; ++i) {
1984  }
1985  }
1986 }
Tesseract * get_sub_lang(int index) const
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
int num_sub_langs() const
double(Dict::* probability_in_context_)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Probability in context function used by the ngram permuter.
Definition: dict.h:384
Dict & getDict() override

◆ SetRectangle()

void tesseract::TessBaseAPI::SetRectangle ( int  left,
int  top,
int  width,
int  height 
)

Restrict recognition to a sub-rectangle of the image. Call after SetImage. Each SetRectangle clears the recogntion results so multiple rectangles can be recognized with the same image.

Definition at line 624 of file baseapi.cpp.

624  {
625  if (thresholder_ == nullptr)
626  return;
627  thresholder_->SetRectangle(left, top, width, height);
628  ClearResults();
629 }
void SetRectangle(int left, int top, int width, int height)
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892

◆ SetSourceResolution()

void tesseract::TessBaseAPI::SetSourceResolution ( int  ppi)

Set the resolution of the source image in pixels per inch so font size information can be calculated in results. Call this after SetImage().

Definition at line 590 of file baseapi.cpp.

590  {
591  if (thresholder_)
593  else
594  tprintf("Please call SetImage before SetSourceResolution.\n");
595 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
void SetSourceYResolution(int ppi)
Definition: thresholder.h:85
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892

◆ SetThresholder()

void tesseract::TessBaseAPI::SetThresholder ( ImageThresholder thresholder)
inline

In extreme cases only, usually with a subclass of Thresholder, it is possible to provide a different Thresholder. The Thresholder may be preloaded with an image, settings etc, or they may be set after. Note that Tesseract takes ownership of the Thresholder and will delete it when it it is replaced or the API is destructed.

Definition at line 365 of file baseapi.h.

365  {
366  delete thresholder_;
367  thresholder_ = thresholder;
368  ClearResults();
369  }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892

◆ tesseract()

Tesseract* tesseract::TessBaseAPI::tesseract ( ) const
inline

Definition at line 801 of file baseapi.h.

801 { return tesseract_; }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888