tesseract  3.04.01
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
baseapi.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: baseapi.cpp
3  * Description: Simple API for calling tesseract.
4  * Author: Ray Smith
5  * Created: Fri Oct 06 15:35:01 PDT 2006
6  *
7  * (C) Copyright 2006, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 #ifdef __linux__
26 #include <signal.h>
27 #endif
28 
29 #if defined(_WIN32)
30 #ifdef _MSC_VER
31 #include "vcsversion.h"
32 #include "mathfix.h"
33 #elif MINGW
34 // workaround for stdlib.h with -std=c++11 for _splitpath and _MAX_FNAME
35 #undef __STRICT_ANSI__
36 #endif // _MSC_VER
37 #include <stdlib.h>
38 #include <windows.h>
39 #include <fcntl.h>
40 #include <io.h>
41 #else
42 #include <dirent.h>
43 #include <libgen.h>
44 #include <string.h>
45 #endif // _WIN32
46 
47 #include <iostream>
48 #include <string>
49 #include <iterator>
50 #include <fstream>
51 
52 #include "allheaders.h"
53 
54 #include "baseapi.h"
55 #include "blobclass.h"
56 #include "resultiterator.h"
57 #include "mutableiterator.h"
58 #include "thresholder.h"
59 #include "tesseractclass.h"
60 #include "pageres.h"
61 #include "paragraphs.h"
62 #include "tessvars.h"
63 #include "control.h"
64 #include "dict.h"
65 #include "pgedit.h"
66 #include "paramsd.h"
67 #include "output.h"
68 #include "globaloc.h"
69 #include "globals.h"
70 #include "edgblob.h"
71 #include "equationdetect.h"
72 #include "tessbox.h"
73 #include "makerow.h"
74 #include "otsuthr.h"
75 #include "osdetect.h"
76 #include "params.h"
77 #include "renderer.h"
78 #include "strngs.h"
79 #include "openclwrapper.h"
80 
81 BOOL_VAR(stream_filelist, FALSE, "Stream a filelist from stdin");
82 
83 namespace tesseract {
84 
86 const int kMinRectSize = 10;
88 const char kTesseractReject = '~';
90 const char kUNLVReject = '~';
92 const char kUNLVSuspect = '^';
97 const char* kInputFile = "noname.tif";
101 const char* kOldVarsFile = "failed_vars.txt";
103 const int kMaxIntSize = 22;
108 const int kMinCredibleResolution = 70;
110 const int kMaxCredibleResolution = 2400;
111 
113  : tesseract_(NULL),
114  osd_tesseract_(NULL),
115  equ_detect_(NULL),
116  // Thresholder is initialized to NULL here, but will be set before use by:
117  // A constructor of a derived API, SetThresholder(), or
118  // created implicitly when used in InternalSetImage.
119  thresholder_(NULL),
120  paragraph_models_(NULL),
121  block_list_(NULL),
122  page_res_(NULL),
123  input_file_(NULL),
124  input_image_(NULL),
125  output_file_(NULL),
126  datapath_(NULL),
127  language_(NULL),
128  last_oem_requested_(OEM_DEFAULT),
129  recognition_done_(false),
130  truth_cb_(NULL),
131  rect_left_(0), rect_top_(0), rect_width_(0), rect_height_(0),
132  image_width_(0), image_height_(0) {
133 }
134 
136  End();
137 }
138 
142 const char* TessBaseAPI::Version() {
143 #if defined(GIT_REV) && (defined(DEBUG) || defined(_DEBUG))
144  return GIT_REV;
145 #else
146  return TESSERACT_VERSION_STR;
147 #endif
148 }
149 
157 #ifdef USE_OPENCL
158 #if USE_DEVICE_SELECTION
159 #include "opencl_device_selection.h"
160 #endif
161 #endif
162 size_t TessBaseAPI::getOpenCLDevice(void **data) {
163 #ifdef USE_OPENCL
164 #if USE_DEVICE_SELECTION
165  ds_device device = OpenclDevice::getDeviceSelection();
166  if (device.type == DS_DEVICE_OPENCL_DEVICE) {
167  *data = reinterpret_cast<void*>(new cl_device_id);
168  memcpy(*data, &device.oclDeviceID, sizeof(cl_device_id));
169  return sizeof(cl_device_id);
170  }
171 #endif
172 #endif
173 
174  *data = NULL;
175  return 0;
176 }
177 
183 #ifdef __linux__
184  struct sigaction action;
185  memset(&action, 0, sizeof(action));
186  action.sa_handler = &signal_exit;
187  action.sa_flags = SA_RESETHAND;
188  sigaction(SIGSEGV, &action, NULL);
189  sigaction(SIGFPE, &action, NULL);
190  sigaction(SIGBUS, &action, NULL);
191 #else
192  // Warn API users that an implementation is needed.
193  tprintf("CatchSignals has no non-linux implementation!\n");
194 #endif
195 }
196 
201 void TessBaseAPI::SetInputName(const char* name) {
202  if (input_file_ == NULL)
203  input_file_ = new STRING(name);
204  else
205  *input_file_ = name;
206 }
207 
209 void TessBaseAPI::SetOutputName(const char* name) {
210  if (output_file_ == NULL)
211  output_file_ = new STRING(name);
212  else
213  *output_file_ = name;
214 }
215 
216 bool TessBaseAPI::SetVariable(const char* name, const char* value) {
217  if (tesseract_ == NULL) tesseract_ = new Tesseract;
219  tesseract_->params());
220 }
221 
222 bool TessBaseAPI::SetDebugVariable(const char* name, const char* value) {
223  if (tesseract_ == NULL) tesseract_ = new Tesseract;
225  tesseract_->params());
226 }
227 
228 bool TessBaseAPI::GetIntVariable(const char *name, int *value) const {
229  IntParam *p = ParamUtils::FindParam<IntParam>(
231  if (p == NULL) return false;
232  *value = (inT32)(*p);
233  return true;
234 }
235 
236 bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const {
237  BoolParam *p = ParamUtils::FindParam<BoolParam>(
239  if (p == NULL) return false;
240  *value = (BOOL8)(*p);
241  return true;
242 }
243 
244 const char *TessBaseAPI::GetStringVariable(const char *name) const {
245  StringParam *p = ParamUtils::FindParam<StringParam>(
247  return (p != NULL) ? p->string() : NULL;
248 }
249 
250 bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const {
251  DoubleParam *p = ParamUtils::FindParam<DoubleParam>(
253  if (p == NULL) return false;
254  *value = (double)(*p);
255  return true;
256 }
257 
260  return ParamUtils::GetParamAsString(name, tesseract_->params(), val);
261 }
262 
264 void TessBaseAPI::PrintVariables(FILE *fp) const {
266 }
267 
276 int TessBaseAPI::Init(const char* datapath, const char* language,
277  OcrEngineMode oem, char **configs, int configs_size,
278  const GenericVector<STRING> *vars_vec,
279  const GenericVector<STRING> *vars_values,
280  bool set_only_non_debug_params) {
281  PERF_COUNT_START("TessBaseAPI::Init")
282  // Default language is "eng".
283  if (language == NULL) language = "eng";
284  // If the datapath, OcrEngineMode or the language have changed - start again.
285  // Note that the language_ field stores the last requested language that was
286  // initialized successfully, while tesseract_->lang stores the language
287  // actually used. They differ only if the requested language was NULL, in
288  // which case tesseract_->lang is set to the Tesseract default ("eng").
289  if (tesseract_ != NULL &&
290  (datapath_ == NULL || language_ == NULL ||
291  *datapath_ != datapath || last_oem_requested_ != oem ||
292  (*language_ != language && tesseract_->lang != language))) {
293  delete tesseract_;
294  tesseract_ = NULL;
295  }
296  // PERF_COUNT_SUB("delete tesseract_")
297 #ifdef USE_OPENCL
298  OpenclDevice od;
299  od.InitEnv();
300 #endif
301  PERF_COUNT_SUB("OD::InitEnv()")
302  bool reset_classifier = true;
303  if (tesseract_ == NULL) {
304  reset_classifier = false;
305  tesseract_ = new Tesseract;
307  datapath, output_file_ != NULL ? output_file_->string() : NULL,
308  language, oem, configs, configs_size, vars_vec, vars_values,
309  set_only_non_debug_params) != 0) {
310  return -1;
311  }
312  }
313  PERF_COUNT_SUB("update tesseract_")
314  // Update datapath and language requested for the last valid initialization.
315  if (datapath_ == NULL)
316  datapath_ = new STRING(datapath);
317  else
318  *datapath_ = datapath;
319  if ((strcmp(datapath_->string(), "") == 0) &&
320  (strcmp(tesseract_->datadir.string(), "") != 0))
322 
323  if (language_ == NULL)
324  language_ = new STRING(language);
325  else
326  *language_ = language;
328  // PERF_COUNT_SUB("update last_oem_requested_")
329  // For same language and datapath, just reset the adaptive classifier.
330  if (reset_classifier) {
332  PERF_COUNT_SUB("tesseract_->ResetAdaptiveClassifier()")
333  }
335  return 0;
336 }
337 
347  return (language_ == NULL || language_->string() == NULL) ?
348  "" : language_->string();
349 }
350 
357  GenericVector<STRING>* langs) const {
358  langs->clear();
359  if (tesseract_ != NULL) {
360  langs->push_back(tesseract_->lang);
361  int num_subs = tesseract_->num_sub_langs();
362  for (int i = 0; i < num_subs; ++i)
363  langs->push_back(tesseract_->get_sub_lang(i)->lang);
364  }
365 }
366 
371  GenericVector<STRING>* langs) const {
372  langs->clear();
373  if (tesseract_ != NULL) {
374 #ifdef _WIN32
375  STRING pattern = tesseract_->datadir + "/*." + kTrainedDataSuffix;
376  char fname[_MAX_FNAME];
377  WIN32_FIND_DATA data;
378  BOOL result = TRUE;
379  HANDLE handle = FindFirstFile(pattern.string(), &data);
380  if (handle != INVALID_HANDLE_VALUE) {
381  for (; result; result = FindNextFile(handle, &data)) {
382  _splitpath(data.cFileName, NULL, NULL, fname, NULL);
383  langs->push_back(STRING(fname));
384  }
385  FindClose(handle);
386  }
387 #else // _WIN32
388  DIR *dir;
389  struct dirent *dirent;
390  char *dot;
391 
392  STRING extension = STRING(".") + kTrainedDataSuffix;
393 
394  dir = opendir(tesseract_->datadir.string());
395  if (dir != NULL) {
396  while ((dirent = readdir(dir))) {
397  // Skip '.', '..', and hidden files
398  if (dirent->d_name[0] != '.') {
399  if (strstr(dirent->d_name, extension.string()) != NULL) {
400  dot = strrchr(dirent->d_name, '.');
401  // This ensures that .traineddata is at the end of the file name
402  if (strncmp(dot, extension.string(),
403  strlen(extension.string())) == 0) {
404  *dot = '\0';
405  langs->push_back(STRING(dirent->d_name));
406  }
407  }
408  }
409  }
410  closedir(dir);
411  }
412 #endif
413  }
414 }
415 
422 int TessBaseAPI::InitLangMod(const char* datapath, const char* language) {
423  if (tesseract_ == NULL)
424  tesseract_ = new Tesseract;
425  else
427  return tesseract_->init_tesseract_lm(datapath, NULL, language);
428 }
429 
435  if (tesseract_ == NULL) {
436  tesseract_ = new Tesseract;
438  }
439 }
440 
448 }
449 
453 }
454 
461  if (tesseract_ == NULL)
462  tesseract_ = new Tesseract;
463  tesseract_->tessedit_pageseg_mode.set_value(mode);
464 }
465 
468  if (tesseract_ == NULL)
469  return PSM_SINGLE_BLOCK;
470  return static_cast<PageSegMode>(
471  static_cast<int>(tesseract_->tessedit_pageseg_mode));
472 }
473 
487 char* TessBaseAPI::TesseractRect(const unsigned char* imagedata,
488  int bytes_per_pixel,
489  int bytes_per_line,
490  int left, int top,
491  int width, int height) {
492  if (tesseract_ == NULL || width < kMinRectSize || height < kMinRectSize)
493  return NULL; // Nothing worth doing.
494 
495  // Since this original api didn't give the exact size of the image,
496  // we have to invent a reasonable value.
497  int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8;
498  SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top,
499  bytes_per_pixel, bytes_per_line);
500  SetRectangle(left, top, width, height);
501 
502  return GetUTF8Text();
503 }
504 
510  if (tesseract_ == NULL)
511  return;
514 }
515 
525 void TessBaseAPI::SetImage(const unsigned char* imagedata,
526  int width, int height,
527  int bytes_per_pixel, int bytes_per_line) {
528  if (InternalSetImage())
529  thresholder_->SetImage(imagedata, width, height,
530  bytes_per_pixel, bytes_per_line);
531 }
532 
534  if (thresholder_)
536  else
537  tprintf("Please call SetImage before SetSourceResolution.\n");
538 }
539 
550 void TessBaseAPI::SetImage(Pix* pix) {
551  if (InternalSetImage())
552  thresholder_->SetImage(pix);
553  SetInputImage(pix);
554 }
555 
561 void TessBaseAPI::SetRectangle(int left, int top, int width, int height) {
562  if (thresholder_ == NULL)
563  return;
564  thresholder_->SetRectangle(left, top, width, height);
565  ClearResults();
566 }
567 
573  if (tesseract_ == NULL || thresholder_ == NULL)
574  return NULL;
575  if (tesseract_->pix_binary() == NULL)
577  return pixClone(tesseract_->pix_binary());
578 }
579 
585 Boxa* TessBaseAPI::GetRegions(Pixa** pixa) {
586  return GetComponentImages(RIL_BLOCK, false, pixa, NULL);
587 }
588 
597 Boxa* TessBaseAPI::GetTextlines(const bool raw_image, const int raw_padding,
598  Pixa** pixa, int** blockids, int** paraids) {
599  return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding,
600  pixa, blockids, paraids);
601 }
602 
611 Boxa* TessBaseAPI::GetStrips(Pixa** pixa, int** blockids) {
612  return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids);
613 }
614 
620 Boxa* TessBaseAPI::GetWords(Pixa** pixa) {
621  return GetComponentImages(RIL_WORD, true, pixa, NULL);
622 }
623 
631  return GetComponentImages(RIL_SYMBOL, true, pixa, NULL);
632 }
633 
643  bool text_only, bool raw_image,
644  const int raw_padding,
645  Pixa** pixa, int** blockids,
646  int** paraids) {
647  PageIterator* page_it = GetIterator();
648  if (page_it == NULL)
649  page_it = AnalyseLayout();
650  if (page_it == NULL)
651  return NULL; // Failed.
652 
653  // Count the components to get a size for the arrays.
654  int component_count = 0;
655  int left, top, right, bottom;
656 
657  TessResultCallback<bool>* get_bbox = NULL;
658  if (raw_image) {
659  // Get bounding box in original raw image with padding.
661  level, raw_padding,
662  &left, &top, &right, &bottom);
663  } else {
664  // Get bounding box from binarized imaged. Note that this could be
665  // differently scaled from the original image.
666  get_bbox = NewPermanentTessCallback(page_it,
668  level, &left, &top, &right, &bottom);
669  }
670  do {
671  if (get_bbox->Run() &&
672  (!text_only || PTIsTextType(page_it->BlockType())))
673  ++component_count;
674  } while (page_it->Next(level));
675 
676  Boxa* boxa = boxaCreate(component_count);
677  if (pixa != NULL)
678  *pixa = pixaCreate(component_count);
679  if (blockids != NULL)
680  *blockids = new int[component_count];
681  if (paraids != NULL)
682  *paraids = new int[component_count];
683 
684  int blockid = 0;
685  int paraid = 0;
686  int component_index = 0;
687  page_it->Begin();
688  do {
689  if (get_bbox->Run() &&
690  (!text_only || PTIsTextType(page_it->BlockType()))) {
691  Box* lbox = boxCreate(left, top, right - left, bottom - top);
692  boxaAddBox(boxa, lbox, L_INSERT);
693  if (pixa != NULL) {
694  Pix* pix = NULL;
695  if (raw_image) {
696  pix = page_it->GetImage(level, raw_padding, input_image_,
697  &left, &top);
698  } else {
699  pix = page_it->GetBinaryImage(level);
700  }
701  pixaAddPix(*pixa, pix, L_INSERT);
702  pixaAddBox(*pixa, lbox, L_CLONE);
703  }
704  if (paraids != NULL) {
705  (*paraids)[component_index] = paraid;
706  if (page_it->IsAtFinalElement(RIL_PARA, level))
707  ++paraid;
708  }
709  if (blockids != NULL) {
710  (*blockids)[component_index] = blockid;
711  if (page_it->IsAtFinalElement(RIL_BLOCK, level)) {
712  ++blockid;
713  paraid = 0;
714  }
715  }
716  ++component_index;
717  }
718  } while (page_it->Next(level));
719  delete page_it;
720  delete get_bbox;
721  return boxa;
722 }
723 
725  if (thresholder_ == NULL) {
726  return 0;
727  }
728  return thresholder_->GetScaleFactor();
729 }
730 
732 void TessBaseAPI::DumpPGM(const char* filename) {
733  if (tesseract_ == NULL)
734  return;
735  FILE *fp = fopen(filename, "wb");
736  Pix* pix = tesseract_->pix_binary();
737  int width = pixGetWidth(pix);
738  int height = pixGetHeight(pix);
739  l_uint32* data = pixGetData(pix);
740  fprintf(fp, "P5 %d %d 255\n", width, height);
741  for (int y = 0; y < height; ++y, data += pixGetWpl(pix)) {
742  for (int x = 0; x < width; ++x) {
743  uinT8 b = GET_DATA_BIT(data, x) ? 0 : 255;
744  fwrite(&b, 1, 1, fp);
745  }
746  }
747  fclose(fp);
748 }
749 
750 #ifndef NO_CUBE_BUILD
751 
757 int CubeAPITest(Boxa* boxa_blocks, Pixa* pixa_blocks,
758  Boxa* boxa_words, Pixa* pixa_words,
759  const FCOORD& reskew, Pix* page_pix,
760  PAGE_RES* page_res) {
761  int block_count = boxaGetCount(boxa_blocks);
762  ASSERT_HOST(block_count == pixaGetCount(pixa_blocks));
763  // Write each block to the current directory as junk_write_display.nnn.png.
764  for (int i = 0; i < block_count; ++i) {
765  Pix* pix = pixaGetPix(pixa_blocks, i, L_CLONE);
766  pixDisplayWrite(pix, 1);
767  }
768  int word_count = boxaGetCount(boxa_words);
769  ASSERT_HOST(word_count == pixaGetCount(pixa_words));
770  int pr_word = 0;
771  PAGE_RES_IT page_res_it(page_res);
772  for (page_res_it.restart_page(); page_res_it.word () != NULL;
773  page_res_it.forward(), ++pr_word) {
774  WERD_RES *word = page_res_it.word();
775  WERD_CHOICE* choice = word->best_choice;
776  // Write the first 100 words to files names wordims/<wordstring>.tif.
777  if (pr_word < 100) {
778  STRING filename("wordims/");
779  if (choice != NULL) {
780  filename += choice->unichar_string();
781  } else {
782  char numbuf[32];
783  filename += "unclassified";
784  snprintf(numbuf, 32, "%03d", pr_word);
785  filename += numbuf;
786  }
787  filename += ".tif";
788  Pix* pix = pixaGetPix(pixa_words, pr_word, L_CLONE);
789  pixWrite(filename.string(), pix, IFF_TIFF_G4);
790  }
791  }
792  ASSERT_HOST(pr_word == word_count);
793  return 0;
794 }
795 #endif // NO_CUBE_BUILD
796 
812 PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) {
813  if (FindLines() == 0) {
814  if (block_list_->empty())
815  return NULL; // The page was empty.
816  page_res_ = new PAGE_RES(merge_similar_words, block_list_, NULL);
817  DetectParagraphs(false);
818  return new PageIterator(
822  }
823  return NULL;
824 }
825 
831  if (tesseract_ == NULL)
832  return -1;
833  if (FindLines() != 0)
834  return -1;
835  if (page_res_ != NULL)
836  delete page_res_;
837  if (block_list_->empty()) {
838  page_res_ = new PAGE_RES(false, block_list_,
840  return 0; // Empty page.
841  }
842 
844  recognition_done_ = true;
849  } else {
850  // TODO(rays) LSTM here.
851  page_res_ = new PAGE_RES(false,
853  }
856  return 0;
857  }
858 
859  if (truth_cb_ != NULL) {
860  tesseract_->wordrec_run_blamer.set_value(true);
861  PageIterator *page_it = new PageIterator(
866  image_height_, page_it, this->tesseract()->pix_grey());
867  delete page_it;
868  }
869 
870  int result = 0;
872  #ifndef GRAPHICS_DISABLED
874  #endif // GRAPHICS_DISABLED
875  // The page_res is invalid after an interactive session, so cleanup
876  // in a way that lets us continue to the next page without crashing.
877  delete page_res_;
878  page_res_ = NULL;
879  return -1;
881  STRING fontname;
882  ExtractFontName(*output_file_, &fontname);
884  } else if (tesseract_->tessedit_ambigs_training) {
885  FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
886  // OCR the page segmented into words by tesseract.
888  *input_file_, page_res_, monitor, training_output_file);
889  fclose(training_output_file);
890  } else {
891  // Now run the main recognition.
892  bool wait_for_text = true;
893  GetBoolVariable("paragraph_text_based", &wait_for_text);
894  if (!wait_for_text) DetectParagraphs(false);
895  if (tesseract_->recog_all_words(page_res_, monitor, NULL, NULL, 0)) {
896  if (wait_for_text) DetectParagraphs(true);
897  } else {
898  result = -1;
899  }
900  }
901  return result;
902 }
903 
906  if (tesseract_ == NULL)
907  return -1;
908  if (thresholder_ == NULL || thresholder_->IsEmpty()) {
909  tprintf("Please call SetImage before attempting recognition.");
910  return -1;
911  }
912  if (page_res_ != NULL)
913  ClearResults();
914  if (FindLines() != 0)
915  return -1;
916  // Additional conditions under which chopper test cannot be run
917  if (tesseract_->interactive_display_mode) return -1;
918 
919  recognition_done_ = true;
920 
921  page_res_ = new PAGE_RES(false, block_list_,
923 
924  PAGE_RES_IT page_res_it(page_res_);
925 
926  while (page_res_it.word() != NULL) {
927  WERD_RES *word_res = page_res_it.word();
928  GenericVector<TBOX> boxes;
929  tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
930  page_res_it.row()->row, word_res);
931  page_res_it.forward();
932  }
933  return 0;
934 }
935 
937  if (input_image_)
938  pixDestroy(&input_image_);
939  input_image_ = NULL;
940  if (pix)
941  input_image_ = pixCopy(NULL, pix);
942 }
943 
945  return input_image_;
946 }
947 
949  if (input_file_)
950  return input_file_->c_str();
951  return NULL;
952 }
953 
954 const char * TessBaseAPI::GetDatapath() {
955  return tesseract_->datadir.c_str();
956 }
957 
960 }
961 
962 // If flist exists, get data from there. Otherwise get data from buf.
963 // Seems convoluted, but is the easiest way I know of to meet multiple
964 // goals. Support streaming from stdin, and also work on platforms
965 // lacking fmemopen.
966 bool TessBaseAPI::ProcessPagesFileList(FILE *flist,
967  STRING *buf,
968  const char* retry_config,
969  int timeout_millisec,
970  TessResultRenderer* renderer,
971  int tessedit_page_number) {
972  if (!flist && !buf) return false;
973  int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
974  char pagename[MAX_PATH];
975 
976  GenericVector<STRING> lines;
977  if (!flist) {
978  buf->split('\n', &lines);
979  if (lines.empty()) return false;
980  }
981 
982  // Skip to the requested page number.
983  for (int i = 0; i < page; i++) {
984  if (flist) {
985  if (fgets(pagename, sizeof(pagename), flist) == NULL) break;
986  }
987  }
988 
989  // Begin producing output
990  const char* kUnknownTitle = "";
991  if (renderer && !renderer->BeginDocument(kUnknownTitle)) {
992  return false;
993  }
994 
995  // Loop over all pages - or just the requested one
996  while (true) {
997  if (flist) {
998  if (fgets(pagename, sizeof(pagename), flist) == NULL) break;
999  } else {
1000  if (page >= lines.size()) break;
1001  snprintf(pagename, sizeof(pagename), "%s", lines[page].c_str());
1002  }
1003  chomp_string(pagename);
1004  Pix *pix = pixRead(pagename);
1005  if (pix == NULL) {
1006  tprintf("Image file %s cannot be read!\n", pagename);
1007  return false;
1008  }
1009  tprintf("Page %d : %s\n", page, pagename);
1010  bool r = ProcessPage(pix, page, pagename, retry_config,
1011  timeout_millisec, renderer);
1012  pixDestroy(&pix);
1013  if (!r) return false;
1014  if (tessedit_page_number >= 0) break;
1015  ++page;
1016  }
1017 
1018  // Finish producing output
1019  if (renderer && !renderer->EndDocument()) {
1020  return false;
1021  }
1022  return true;
1023 }
1024 
1025 bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
1026  size_t size,
1027  const char* filename,
1028  const char* retry_config,
1029  int timeout_millisec,
1030  TessResultRenderer* renderer,
1031  int tessedit_page_number) {
1032 #ifndef ANDROID_BUILD
1033  Pix *pix = NULL;
1034 #ifdef USE_OPENCL
1035  OpenclDevice od;
1036 #endif // USE_OPENCL
1037  int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
1038  for (; ; ++page) {
1039  if (tessedit_page_number >= 0)
1040  page = tessedit_page_number;
1041 #ifdef USE_OPENCL
1042  if ( od.selectedDeviceIsOpenCL() ) {
1043  // FIXME(jbreiden) Not implemented.
1044  pix = od.pixReadMemTiffCl(data, size, page);
1045  } else {
1046 #endif // USE_OPENCL
1047  pix = pixReadMemTiff(data, size, page);
1048 #ifdef USE_OPENCL
1049  }
1050 #endif // USE_OPENCL
1051  if (pix == NULL) break;
1052  tprintf("Page %d\n", page + 1);
1053  char page_str[kMaxIntSize];
1054  snprintf(page_str, kMaxIntSize - 1, "%d", page);
1055  SetVariable("applybox_page", page_str);
1056  bool r = ProcessPage(pix, page, filename, retry_config,
1057  timeout_millisec, renderer);
1058  pixDestroy(&pix);
1059  if (!r) return false;
1060  if (tessedit_page_number >= 0) break;
1061  }
1062  return true;
1063 #else
1064  return false;
1065 #endif
1066 }
1067 
1068 // Master ProcessPages calls ProcessPagesInternal and then does any post-
1069 // processing required due to being in a training mode.
1070 bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config,
1071  int timeout_millisec,
1072  TessResultRenderer* renderer) {
1073  bool result =
1074  ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
1075  if (result) {
1078  tprintf("Write of TR file failed: %s\n", output_file_->string());
1079  return false;
1080  }
1081  }
1082  return result;
1083 }
1084 
1085 // In the ideal scenario, Tesseract will start working on data as soon
1086 // as it can. For example, if you steam a filelist through stdin, we
1087 // should start the OCR process as soon as the first filename is
1088 // available. This is particularly useful when hooking Tesseract up to
1089 // slow hardware such as a book scanning machine.
1090 //
1091 // Unfortunately there are tradeoffs. You can't seek on stdin. That
1092 // makes automatic detection of datatype (TIFF? filelist? PNG?)
1093 // impractical. So we support a command line flag to explicitly
1094 // identify the scenario that really matters: filelists on
1095 // stdin. We'll still do our best if the user likes pipes. That means
1096 // piling up any data coming into stdin into a memory buffer.
1097 bool TessBaseAPI::ProcessPagesInternal(const char* filename,
1098  const char* retry_config,
1099  int timeout_millisec,
1100  TessResultRenderer* renderer) {
1101 #ifndef ANDROID_BUILD
1102  PERF_COUNT_START("ProcessPages")
1103  bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
1104  if (stdInput) {
1105 #ifdef WIN32
1106  if (_setmode(_fileno(stdin), _O_BINARY) == -1)
1107  tprintf("ERROR: cin to binary: %s", strerror(errno));
1108 #endif // WIN32
1109  }
1110 
1111  if (stream_filelist) {
1112  return ProcessPagesFileList(stdin, NULL, retry_config,
1113  timeout_millisec, renderer,
1115  }
1116 
1117  // At this point we are officially in autodection territory.
1118  // That means we are going to buffer stdin so that it is
1119  // seekable. To keep code simple we will also buffer data
1120  // coming from a file.
1121  std::string buf;
1122  if (stdInput) {
1123  buf.assign((std::istreambuf_iterator<char>(std::cin)),
1124  (std::istreambuf_iterator<char>()));
1125  } else {
1126  std::ifstream ifs(filename, std::ios::binary);
1127  if (ifs) {
1128  buf.assign((std::istreambuf_iterator<char>(ifs)),
1129  (std::istreambuf_iterator<char>()));
1130  } else {
1131  tprintf("ERROR: Can not open input file %s\n", filename);
1132  return false;
1133  }
1134  }
1135 
1136  // Here is our autodetection
1137  int format;
1138  const l_uint8 * data = reinterpret_cast<const l_uint8 *>(buf.c_str());
1139  findFileFormatBuffer(data, &format);
1140 
1141  // Maybe we have a filelist
1142  if (format == IFF_UNKNOWN) {
1143  STRING s(buf.c_str());
1144  return ProcessPagesFileList(NULL, &s, retry_config,
1145  timeout_millisec, renderer,
1147  }
1148 
1149  // Maybe we have a TIFF which is potentially multipage
1150  bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
1151  format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
1152  format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
1153  format == IFF_TIFF_ZIP);
1154 
1155  // Fail early if we can, before producing any output
1156  Pix *pix = NULL;
1157  if (!tiff) {
1158  pix = pixReadMem(data, buf.size());
1159  if (pix == NULL) {
1160  return false;
1161  }
1162  }
1163 
1164  // Begin the output
1165  const char* kUnknownTitle = "";
1166  if (renderer && !renderer->BeginDocument(kUnknownTitle)) {
1167  pixDestroy(&pix);
1168  return false;
1169  }
1170 
1171  // Produce output
1172  bool r = false;
1173  if (tiff) {
1174  r = ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config,
1175  timeout_millisec, renderer,
1177  } else {
1178  r = ProcessPage(pix, 0, filename, retry_config,
1179  timeout_millisec, renderer);
1180  pixDestroy(&pix);
1181  }
1182 
1183  // End the output
1184  if (!r || (renderer && !renderer->EndDocument())) {
1185  return false;
1186  }
1188  return true;
1189 #else
1190  return false;
1191 #endif
1192 }
1193 
1194 bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
1195  const char* retry_config, int timeout_millisec,
1196  TessResultRenderer* renderer) {
1197  PERF_COUNT_START("ProcessPage")
1198  SetInputName(filename);
1199  SetImage(pix);
1200  bool failed = false;
1201 
1203  // Disabled character recognition
1204  PageIterator* it = AnalyseLayout();
1205 
1206  if (it == NULL) {
1207  failed = true;
1208  } else {
1209  delete it;
1210  }
1212  failed = FindLines() != 0;
1213  } else if (timeout_millisec > 0) {
1214  // Running with a timeout.
1215  ETEXT_DESC monitor;
1216  monitor.cancel = NULL;
1217  monitor.cancel_this = NULL;
1218  monitor.set_deadline_msecs(timeout_millisec);
1219 
1220  // Now run the main recognition.
1221  failed = Recognize(&monitor) < 0;
1222  } else {
1223  // Normal layout and character recognition with no timeout.
1224  failed = Recognize(NULL) < 0;
1225  }
1226 
1228 #ifndef ANDROID_BUILD
1229  Pix* page_pix = GetThresholdedImage();
1230  pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4);
1231 #endif // ANDROID_BUILD
1232  }
1233 
1234  if (failed && retry_config != NULL && retry_config[0] != '\0') {
1235  // Save current config variables before switching modes.
1236  FILE* fp = fopen(kOldVarsFile, "wb");
1237  PrintVariables(fp);
1238  fclose(fp);
1239  // Switch to alternate mode for retry.
1240  ReadConfigFile(retry_config);
1241  SetImage(pix);
1242  Recognize(NULL);
1243  // Restore saved config variables.
1245  }
1246 
1247  if (renderer && !failed) {
1248  failed = !renderer->AddImage(this);
1249  }
1250 
1252  return !failed;
1253 }
1254 
1260  if (tesseract_ == NULL || page_res_ == NULL)
1261  return NULL;
1262  return new LTRResultIterator(
1266 }
1267 
1277  if (tesseract_ == NULL || page_res_ == NULL)
1278  return NULL;
1283 }
1284 
1294  if (tesseract_ == NULL || page_res_ == NULL)
1295  return NULL;
1296  return new MutableIterator(page_res_, tesseract_,
1300 }
1301 
1304  if (tesseract_ == NULL ||
1305  (!recognition_done_ && Recognize(NULL) < 0))
1306  return NULL;
1307  STRING text("");
1308  ResultIterator *it = GetIterator();
1309  do {
1310  if (it->Empty(RIL_PARA)) continue;
1311  char *para_text = it->GetUTF8Text(RIL_PARA);
1312  text += para_text;
1313  delete []para_text;
1314  } while (it->Next(RIL_PARA));
1315  char* result = new char[text.length() + 1];
1316  strncpy(result, text.string(), text.length() + 1);
1317  delete it;
1318  return result;
1319 }
1320 
1324 static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) {
1325  tesseract::Orientation orientation;
1326  tesseract::WritingDirection writing_direction;
1327  tesseract::TextlineOrder textline_order;
1328  float deskew_angle;
1329  it->Orientation(&orientation, &writing_direction, &textline_order,
1330  &deskew_angle);
1331  return orientation;
1332 }
1333 
1342 static void AddBaselineCoordsTohOCR(const PageIterator *it,
1343  PageIteratorLevel level,
1344  STRING* hocr_str) {
1345  tesseract::Orientation orientation = GetBlockTextOrientation(it);
1346  if (orientation != ORIENTATION_PAGE_UP) {
1347  hocr_str->add_str_int("; textangle ", 360 - orientation * 90);
1348  return;
1349  }
1350 
1351  int left, top, right, bottom;
1352  it->BoundingBox(level, &left, &top, &right, &bottom);
1353 
1354  // Try to get the baseline coordinates at this level.
1355  int x1, y1, x2, y2;
1356  if (!it->Baseline(level, &x1, &y1, &x2, &y2))
1357  return;
1358  // Following the description of this field of the hOCR spec, we convert the
1359  // baseline coordinates so that "the bottom left of the bounding box is the
1360  // origin".
1361  x1 -= left;
1362  x2 -= left;
1363  y1 -= bottom;
1364  y2 -= bottom;
1365 
1366  // Now fit a line through the points so we can extract coefficients for the
1367  // equation: y = p1 x + p0
1368  double p1 = 0;
1369  double p0 = 0;
1370  if (x1 == x2) {
1371  // Problem computing the polynomial coefficients.
1372  return;
1373  }
1374  p1 = (y2 - y1) / static_cast<double>(x2 - x1);
1375  p0 = y1 - static_cast<double>(p1 * x1);
1376 
1377  hocr_str->add_str_double("; baseline ", round(p1 * 1000.0) / 1000.0);
1378  hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0);
1379 }
1380 
1381 static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, int num2) {
1382  unsigned long bufsize = base.length() + 2 * kMaxIntSize;
1383  char id_buffer[bufsize];
1384  if (num2 >= 0) {
1385  snprintf(id_buffer, bufsize - 1, "%s_%d_%d", base.c_str(), num1, num2);
1386  } else {
1387  snprintf(id_buffer, bufsize - 1, "%s_%d", base.c_str(), num1);
1388  }
1389  id_buffer[bufsize - 1] = '\0';
1390  *hocr_str += " id='";
1391  *hocr_str += id_buffer;
1392  *hocr_str += "'";
1393 }
1394 
1395 static void AddBoxTohOCR(const ResultIterator *it,
1396  PageIteratorLevel level,
1397  STRING* hocr_str) {
1398  int left, top, right, bottom;
1399  it->BoundingBox(level, &left, &top, &right, &bottom);
1400  // This is the only place we use double quotes instead of single quotes,
1401  // but it may too late to change for consistency
1402  hocr_str->add_str_int(" title=\"bbox ", left);
1403  hocr_str->add_str_int(" ", top);
1404  hocr_str->add_str_int(" ", right);
1405  hocr_str->add_str_int(" ", bottom);
1406  // Add baseline coordinates & heights for textlines only.
1407  if (level == RIL_TEXTLINE) {
1408  AddBaselineCoordsTohOCR(it, level, hocr_str);
1409  // add custom height measures
1410  float row_height, descenders, ascenders; // row attributes
1411  it->RowAttributes(&row_height, &descenders, &ascenders);
1412  // TODO: Do we want to limit these to a single decimal place?
1413  hocr_str->add_str_double("; x_size ", row_height);
1414  hocr_str->add_str_double("; x_descenders ", descenders * -1);
1415  hocr_str->add_str_double("; x_ascenders ", ascenders);
1416  }
1417  *hocr_str += "\">";
1418 }
1419 
1428 char* TessBaseAPI::GetHOCRText(int page_number) {
1429  if (tesseract_ == NULL ||
1430  (page_res_ == NULL && Recognize(NULL) < 0))
1431  return NULL;
1432 
1433  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
1434  int page_id = page_number + 1; // hOCR uses 1-based page numbers.
1435  bool font_info = false;
1436  GetBoolVariable("hocr_font_info", &font_info);
1437 
1438  STRING hocr_str("");
1439 
1440  if (input_file_ == NULL)
1441  SetInputName(NULL);
1442 
1443 #ifdef _WIN32
1444  // convert input name from ANSI encoding to utf-8
1445  int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
1446  NULL, 0);
1447  wchar_t *uni16_str = new WCHAR[str16_len];
1448  str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
1449  uni16_str, str16_len);
1450  int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL,
1451  0, NULL, NULL);
1452  char *utf8_str = new char[utf8_len];
1453  WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str,
1454  utf8_len, NULL, NULL);
1455  *input_file_ = utf8_str;
1456  delete[] uni16_str;
1457  delete[] utf8_str;
1458 #endif
1459 
1460  hocr_str += " <div class='ocr_page'";
1461  AddIdTohOCR(&hocr_str, "page", page_id, -1);
1462  hocr_str += " title='image \"";
1463  if (input_file_) {
1464  hocr_str += HOcrEscape(input_file_->string());
1465  } else {
1466  hocr_str += "unknown";
1467  }
1468  hocr_str.add_str_int("\"; bbox ", rect_left_);
1469  hocr_str.add_str_int(" ", rect_top_);
1470  hocr_str.add_str_int(" ", rect_width_);
1471  hocr_str.add_str_int(" ", rect_height_);
1472  hocr_str.add_str_int("; ppageno ", page_number);
1473  hocr_str += "'>\n";
1474 
1475  ResultIterator *res_it = GetIterator();
1476  while (!res_it->Empty(RIL_BLOCK)) {
1477  if (res_it->Empty(RIL_WORD)) {
1478  res_it->Next(RIL_WORD);
1479  continue;
1480  }
1481 
1482  // Open any new block/paragraph/textline.
1483  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
1484  hocr_str += " <div class='ocr_carea'";
1485  AddIdTohOCR(&hocr_str, "block", page_id, bcnt);
1486  AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str);
1487  }
1488  if (res_it->IsAtBeginningOf(RIL_PARA)) {
1489  hocr_str += "\n <p class='ocr_par'";
1490  if (res_it->ParagraphIsLtr()) {
1491  hocr_str += " dir='ltr'";
1492  } else {
1493  hocr_str += " dir='rtl'";
1494  }
1495  AddIdTohOCR(&hocr_str, "par", page_id, pcnt);
1496  AddBoxTohOCR(res_it, RIL_PARA, &hocr_str);
1497  }
1498  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
1499  hocr_str += "\n <span class='ocr_line'";
1500  AddIdTohOCR(&hocr_str, "line", page_id, lcnt);
1501  AddBoxTohOCR(res_it, RIL_TEXTLINE, &hocr_str);
1502  }
1503 
1504  // Now, process the word...
1505  hocr_str += "<span class='ocrx_word'";
1506  AddIdTohOCR(&hocr_str, "word", page_id, wcnt);
1507  int left, top, right, bottom;
1508  bool bold, italic, underlined, monospace, serif, smallcaps;
1509  int pointsize, font_id;
1510  const char *font_name;
1511  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
1512  font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
1513  &monospace, &serif, &smallcaps,
1514  &pointsize, &font_id);
1515  hocr_str.add_str_int(" title='bbox ", left);
1516  hocr_str.add_str_int(" ", top);
1517  hocr_str.add_str_int(" ", right);
1518  hocr_str.add_str_int(" ", bottom);
1519  hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD));
1520  if (font_info) {
1521  if (font_name) {
1522  hocr_str += "; x_font ";
1523  hocr_str += HOcrEscape(font_name);
1524  }
1525  hocr_str.add_str_int("; x_fsize ", pointsize);
1526  }
1527  hocr_str += "'";
1528  if (res_it->WordRecognitionLanguage()) {
1529  hocr_str += " lang='";
1530  hocr_str += res_it->WordRecognitionLanguage();
1531  hocr_str += "'";
1532  }
1533  switch (res_it->WordDirection()) {
1534  case DIR_LEFT_TO_RIGHT: hocr_str += " dir='ltr'"; break;
1535  case DIR_RIGHT_TO_LEFT: hocr_str += " dir='rtl'"; break;
1536  default: // Do nothing.
1537  break;
1538  }
1539  hocr_str += ">";
1540  bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
1541  bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
1542  bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
1543  if (bold) hocr_str += "<strong>";
1544  if (italic) hocr_str += "<em>";
1545  do {
1546  const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL);
1547  if (grapheme && grapheme[0] != 0) {
1548  hocr_str += HOcrEscape(grapheme);
1549  }
1550  delete []grapheme;
1551  res_it->Next(RIL_SYMBOL);
1552  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
1553  if (italic) hocr_str += "</em>";
1554  if (bold) hocr_str += "</strong>";
1555  hocr_str += "</span> ";
1556  wcnt++;
1557  // Close any ending block/paragraph/textline.
1558  if (last_word_in_line) {
1559  hocr_str += "\n </span>";
1560  lcnt++;
1561  }
1562  if (last_word_in_para) {
1563  hocr_str += "\n </p>\n";
1564  pcnt++;
1565  }
1566  if (last_word_in_block) {
1567  hocr_str += " </div>\n";
1568  bcnt++;
1569  }
1570  }
1571  hocr_str += " </div>\n";
1572 
1573  char *ret = new char[hocr_str.length() + 1];
1574  strcpy(ret, hocr_str.string());
1575  delete res_it;
1576  return ret;
1577 }
1578 
1580 const int kNumbersPerBlob = 5;
1585 const int kBytesPerNumber = 5;
1594 const int kBytesPer64BitNumber = 20;
1602  UNICHAR_LEN;
1603 
1609 char* TessBaseAPI::GetBoxText(int page_number) {
1610  if (tesseract_ == NULL ||
1611  (!recognition_done_ && Recognize(NULL) < 0))
1612  return NULL;
1613  int blob_count;
1614  int utf8_length = TextLength(&blob_count);
1615  int total_length = blob_count * kBytesPerBoxFileLine + utf8_length +
1617  char* result = new char[total_length];
1618  strcpy(result, "\0");
1619  int output_length = 0;
1621  do {
1622  int left, top, right, bottom;
1623  if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
1624  char* text = it->GetUTF8Text(RIL_SYMBOL);
1625  // Tesseract uses space for recognition failure. Fix to a reject
1626  // character, kTesseractReject so we don't create illegal box files.
1627  for (int i = 0; text[i] != '\0'; ++i) {
1628  if (text[i] == ' ')
1629  text[i] = kTesseractReject;
1630  }
1631  snprintf(result + output_length, total_length - output_length,
1632  "%s %d %d %d %d %d\n",
1633  text, left, image_height_ - bottom,
1634  right, image_height_ - top, page_number);
1635  output_length += strlen(result + output_length);
1636  delete [] text;
1637  // Just in case...
1638  if (output_length + kMaxBytesPerLine > total_length)
1639  break;
1640  }
1641  } while (it->Next(RIL_SYMBOL));
1642  delete it;
1643  return result;
1644 }
1645 
1651 const int kUniChs[] = {
1652  0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0
1653 };
1655 const int kLatinChs[] = {
1656  0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0
1657 };
1658 
1665  if (tesseract_ == NULL ||
1666  (!recognition_done_ && Recognize(NULL) < 0))
1667  return NULL;
1668  bool tilde_crunch_written = false;
1669  bool last_char_was_newline = true;
1670  bool last_char_was_tilde = false;
1671 
1672  int total_length = TextLength(NULL);
1673  PAGE_RES_IT page_res_it(page_res_);
1674  char* result = new char[total_length];
1675  char* ptr = result;
1676  for (page_res_it.restart_page(); page_res_it.word () != NULL;
1677  page_res_it.forward()) {
1678  WERD_RES *word = page_res_it.word();
1679  // Process the current word.
1680  if (word->unlv_crunch_mode != CR_NONE) {
1681  if (word->unlv_crunch_mode != CR_DELETE &&
1682  (!tilde_crunch_written ||
1683  (word->unlv_crunch_mode == CR_KEEP_SPACE &&
1684  word->word->space() > 0 &&
1685  !word->word->flag(W_FUZZY_NON) &&
1686  !word->word->flag(W_FUZZY_SP)))) {
1687  if (!word->word->flag(W_BOL) &&
1688  word->word->space() > 0 &&
1689  !word->word->flag(W_FUZZY_NON) &&
1690  !word->word->flag(W_FUZZY_SP)) {
1691  /* Write a space to separate from preceding good text */
1692  *ptr++ = ' ';
1693  last_char_was_tilde = false;
1694  }
1695  if (!last_char_was_tilde) {
1696  // Write a reject char.
1697  last_char_was_tilde = true;
1698  *ptr++ = kUNLVReject;
1699  tilde_crunch_written = true;
1700  last_char_was_newline = false;
1701  }
1702  }
1703  } else {
1704  // NORMAL PROCESSING of non tilde crunched words.
1705  tilde_crunch_written = false;
1707  const char* wordstr = word->best_choice->unichar_string().string();
1708  const STRING& lengths = word->best_choice->unichar_lengths();
1709  int length = lengths.length();
1710  int i = 0;
1711  int offset = 0;
1712 
1713  if (last_char_was_tilde &&
1714  word->word->space() == 0 && wordstr[offset] == ' ') {
1715  // Prevent adjacent tilde across words - we know that adjacent tildes
1716  // within words have been removed.
1717  // Skip the first character.
1718  offset = lengths[i++];
1719  }
1720  if (i < length && wordstr[offset] != 0) {
1721  if (!last_char_was_newline)
1722  *ptr++ = ' ';
1723  else
1724  last_char_was_newline = false;
1725  for (; i < length; offset += lengths[i++]) {
1726  if (wordstr[offset] == ' ' ||
1727  wordstr[offset] == kTesseractReject) {
1728  *ptr++ = kUNLVReject;
1729  last_char_was_tilde = true;
1730  } else {
1731  if (word->reject_map[i].rejected())
1732  *ptr++ = kUNLVSuspect;
1733  UNICHAR ch(wordstr + offset, lengths[i]);
1734  int uni_ch = ch.first_uni();
1735  for (int j = 0; kUniChs[j] != 0; ++j) {
1736  if (kUniChs[j] == uni_ch) {
1737  uni_ch = kLatinChs[j];
1738  break;
1739  }
1740  }
1741  if (uni_ch <= 0xff) {
1742  *ptr++ = static_cast<char>(uni_ch);
1743  last_char_was_tilde = false;
1744  } else {
1745  *ptr++ = kUNLVReject;
1746  last_char_was_tilde = true;
1747  }
1748  }
1749  }
1750  }
1751  }
1752  if (word->word->flag(W_EOL) && !last_char_was_newline) {
1753  /* Add a new line output */
1754  *ptr++ = '\n';
1755  tilde_crunch_written = false;
1756  last_char_was_newline = true;
1757  last_char_was_tilde = false;
1758  }
1759  }
1760  *ptr++ = '\n';
1761  *ptr = '\0';
1762  return result;
1763 }
1764 
1770 char* TessBaseAPI::GetOsdText(int page_number) {
1771  OSResults osr;
1772 
1773  bool osd = DetectOS(&osr);
1774  if (!osd) {
1775  return NULL;
1776  }
1777 
1778  int orient_id = osr.best_result.orientation_id;
1779  int script_id = osr.get_best_script(orient_id);
1780  float orient_conf = osr.best_result.oconfidence;
1781  float script_conf = osr.best_result.sconfidence;
1782  const char* script_name =
1783  osr.unicharset->get_script_from_script_id(script_id);
1784 
1785  // clockwise orientation of the input image, in degrees
1786  int orient_deg = orient_id * 90;
1787 
1788  // clockwise rotation needed to make the page upright
1789  int rotate = OrientationIdToValue(orient_id);
1790 
1791  char* osd_buf = new char[255];
1792  snprintf(osd_buf, 255,
1793  "Page number: %d\n"
1794  "Orientation in degrees: %d\n"
1795  "Rotate: %d\n"
1796  "Orientation confidence: %.2f\n"
1797  "Script: %s\n"
1798  "Script confidence: %.2f\n",
1799  page_number,
1800  orient_deg, rotate, orient_conf,
1801  script_name, script_conf);
1802 
1803  return osd_buf;
1804 }
1805 
1808  int* conf = AllWordConfidences();
1809  if (!conf) return 0;
1810  int sum = 0;
1811  int *pt = conf;
1812  while (*pt >= 0) sum += *pt++;
1813  if (pt != conf) sum /= pt - conf;
1814  delete [] conf;
1815  return sum;
1816 }
1817 
1820  if (tesseract_ == NULL ||
1821  (!recognition_done_ && Recognize(NULL) < 0))
1822  return NULL;
1823  int n_word = 0;
1824  PAGE_RES_IT res_it(page_res_);
1825  for (res_it.restart_page(); res_it.word() != NULL; res_it.forward())
1826  n_word++;
1827 
1828  int* conf = new int[n_word+1];
1829  n_word = 0;
1830  for (res_it.restart_page(); res_it.word() != NULL; res_it.forward()) {
1831  WERD_RES *word = res_it.word();
1832  WERD_CHOICE* choice = word->best_choice;
1833  int w_conf = static_cast<int>(100 + 5 * choice->certainty());
1834  // This is the eq for converting Tesseract confidence to 1..100
1835  if (w_conf < 0) w_conf = 0;
1836  if (w_conf > 100) w_conf = 100;
1837  conf[n_word++] = w_conf;
1838  }
1839  conf[n_word] = -1;
1840  return conf;
1841 }
1842 
1853 bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
1854  int debug = 0;
1855  GetIntVariable("applybox_debug", &debug);
1856  bool success = true;
1857  PageSegMode current_psm = GetPageSegMode();
1858  SetPageSegMode(mode);
1859  SetVariable("classify_enable_learning", "0");
1860  char* text = GetUTF8Text();
1861  if (debug) {
1862  tprintf("Trying to adapt \"%s\" to \"%s\"\n", text, wordstr);
1863  }
1864  if (text != NULL) {
1865  PAGE_RES_IT it(page_res_);
1866  WERD_RES* word_res = it.word();
1867  if (word_res != NULL) {
1868  word_res->word->set_text(wordstr);
1869  } else {
1870  success = false;
1871  }
1872  // Check to see if text matches wordstr.
1873  int w = 0;
1874  int t = 0;
1875  for (t = 0; text[t] != '\0'; ++t) {
1876  if (text[t] == '\n' || text[t] == ' ')
1877  continue;
1878  while (wordstr[w] != '\0' && wordstr[w] == ' ')
1879  ++w;
1880  if (text[t] != wordstr[w])
1881  break;
1882  ++w;
1883  }
1884  if (text[t] != '\0' || wordstr[w] != '\0') {
1885  // No match.
1886  delete page_res_;
1887  GenericVector<TBOX> boxes;
1891  PAGE_RES_IT pr_it(page_res_);
1892  if (pr_it.word() == NULL)
1893  success = false;
1894  else
1895  word_res = pr_it.word();
1896  } else {
1897  word_res->BestChoiceToCorrectText();
1898  }
1899  if (success) {
1900  tesseract_->EnableLearning = true;
1901  tesseract_->LearnWord(NULL, word_res);
1902  }
1903  delete [] text;
1904  } else {
1905  success = false;
1906  }
1907  SetPageSegMode(current_psm);
1908  return success;
1909 }
1910 
1918  if (thresholder_ != NULL)
1919  thresholder_->Clear();
1920  ClearResults();
1921  SetInputImage(NULL);
1922 }
1923 
1931  if (thresholder_ != NULL) {
1932  delete thresholder_;
1933  thresholder_ = NULL;
1934  }
1935  if (page_res_ != NULL) {
1936  delete page_res_;
1937  page_res_ = NULL;
1938  }
1939  if (block_list_ != NULL) {
1940  delete block_list_;
1941  block_list_ = NULL;
1942  }
1943  if (paragraph_models_ != NULL) {
1945  delete paragraph_models_;
1946  paragraph_models_ = NULL;
1947  }
1948  if (tesseract_ != NULL) {
1949  delete tesseract_;
1950  if (osd_tesseract_ == tesseract_)
1951  osd_tesseract_ = NULL;
1952  tesseract_ = NULL;
1953  }
1954  if (osd_tesseract_ != NULL) {
1955  delete osd_tesseract_;
1956  osd_tesseract_ = NULL;
1957  }
1958  if (equ_detect_ != NULL) {
1959  delete equ_detect_;
1960  equ_detect_ = NULL;
1961  }
1962  if (input_file_ != NULL) {
1963  delete input_file_;
1964  input_file_ = NULL;
1965  }
1966  if (input_image_ != NULL) {
1967  pixDestroy(&input_image_);
1968  input_image_ = NULL;
1969  }
1970  if (output_file_ != NULL) {
1971  delete output_file_;
1972  output_file_ = NULL;
1973  }
1974  if (datapath_ != NULL) {
1975  delete datapath_;
1976  datapath_ = NULL;
1977  }
1978  if (language_ != NULL) {
1979  delete language_;
1980  language_ = NULL;
1981  }
1982 }
1983 
1984 // Clear any library-level memory caches.
1985 // There are a variety of expensive-to-load constant data structures (mostly
1986 // language dictionaries) that are cached globally -- surviving the Init()
1987 // and End() of individual TessBaseAPI's. This function allows the clearing
1988 // of these caches.
1991 }
1992 
1997 int TessBaseAPI::IsValidWord(const char *word) {
1998  return tesseract_->getDict().valid_word(word);
1999 }
2000 // Returns true if utf8_character is defined in the UniCharset.
2001 bool TessBaseAPI::IsValidCharacter(const char *utf8_character) {
2002  return tesseract_->unicharset.contains_unichar(utf8_character);
2003 }
2004 
2005 
2006 // TODO(rays) Obsolete this function and replace with a more aptly named
2007 // function that returns image coordinates rather than tesseract coordinates.
2008 bool TessBaseAPI::GetTextDirection(int* out_offset, float* out_slope) {
2009  PageIterator* it = AnalyseLayout();
2010  if (it == NULL) {
2011  return false;
2012  }
2013  int x1, x2, y1, y2;
2014  it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
2015  // Calculate offset and slope (NOTE: Kind of ugly)
2016  if (x2 <= x1) x2 = x1 + 1;
2017  // Convert the point pair to slope/offset of the baseline (in image coords.)
2018  *out_slope = static_cast<float>(y2 - y1) / (x2 - x1);
2019  *out_offset = static_cast<int>(y1 - *out_slope * x1);
2020  // Get the y-coord of the baseline at the left and right edges of the
2021  // textline's bounding box.
2022  int left, top, right, bottom;
2023  if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) {
2024  delete it;
2025  return false;
2026  }
2027  int left_y = IntCastRounded(*out_slope * left + *out_offset);
2028  int right_y = IntCastRounded(*out_slope * right + *out_offset);
2029  // Shift the baseline down so it passes through the nearest bottom-corner
2030  // of the textline's bounding box. This is the difference between the y
2031  // at the lowest (max) edge of the box and the actual box bottom.
2032  *out_offset += bottom - MAX(left_y, right_y);
2033  // Switch back to bottom-up tesseract coordinates. Requires negation of
2034  // the slope and height - offset for the offset.
2035  *out_slope = -*out_slope;
2036  *out_offset = rect_height_ - *out_offset;
2037  delete it;
2038 
2039  return true;
2040 }
2041 
2044  if (tesseract_ != NULL) {
2046  }
2047 }
2048 
2058  if (tesseract_ != NULL) {
2060  // Set it for the sublangs too.
2061  int num_subs = tesseract_->num_sub_langs();
2062  for (int i = 0; i < num_subs; ++i) {
2064  }
2065  }
2066 }
2067 
2070  if (tesseract_ != NULL) tesseract_->fill_lattice_ = f;
2071 }
2072 
2075  if (tesseract_ == NULL) {
2076  tprintf("Please call Init before attempting to set an image.");
2077  return false;
2078  }
2079  if (thresholder_ == NULL)
2081  ClearResults();
2082  return true;
2083 }
2084 
2091 void TessBaseAPI::Threshold(Pix** pix) {
2092  ASSERT_HOST(pix != NULL);
2093  if (*pix != NULL)
2094  pixDestroy(pix);
2095  // Zero resolution messes up the algorithms, so make sure it is credible.
2096  int y_res = thresholder_->GetScaledYResolution();
2097  if (y_res < kMinCredibleResolution || y_res > kMaxCredibleResolution) {
2098  // Use the minimum default resolution, as it is safer to under-estimate
2099  // than over-estimate resolution.
2101  }
2102  PageSegMode pageseg_mode =
2103  static_cast<PageSegMode>(
2104  static_cast<int>(tesseract_->tessedit_pageseg_mode));
2105  thresholder_->ThresholdToPix(pageseg_mode, pix);
2109  if (!thresholder_->IsBinary()) {
2112  } else {
2114  tesseract_->set_pix_grey(NULL);
2115  }
2116  // Set the internal resolution that is used for layout parameters from the
2117  // estimated resolution, rather than the image resolution, which may be
2118  // fabricated, but we will use the image resolution, if there is one, to
2119  // report output point sizes.
2120  int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(),
2123  if (estimated_res != thresholder_->GetScaledEstimatedResolution()) {
2124  tprintf("Estimated resolution %d out of range! Corrected to %d\n",
2125  thresholder_->GetScaledEstimatedResolution(), estimated_res);
2126  }
2127  tesseract_->set_source_resolution(estimated_res);
2128  SavePixForCrash(estimated_res, *pix);
2129 }
2130 
2133  if (thresholder_ == NULL || thresholder_->IsEmpty()) {
2134  tprintf("Please call SetImage before attempting recognition.");
2135  return -1;
2136  }
2137  if (recognition_done_)
2138  ClearResults();
2139  if (!block_list_->empty()) {
2140  return 0;
2141  }
2142  if (tesseract_ == NULL) {
2143  tesseract_ = new Tesseract;
2145  }
2146  if (tesseract_->pix_binary() == NULL)
2148  if (tesseract_->ImageWidth() > MAX_INT16 ||
2150  tprintf("Image too large: (%d, %d)\n",
2152  return -1;
2153  }
2154 
2156 
2158  if (equ_detect_ == NULL && datapath_ != NULL) {
2159  equ_detect_ = new EquationDetect(datapath_->string(), NULL);
2160  }
2162  }
2163 
2164  Tesseract* osd_tess = osd_tesseract_;
2165  OSResults osr;
2166  if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && osd_tess == NULL) {
2167  if (strcmp(language_->string(), "osd") == 0) {
2168  osd_tess = tesseract_;
2169  } else {
2170  osd_tesseract_ = new Tesseract;
2172  datapath_->string(), NULL, "osd", OEM_TESSERACT_ONLY,
2173  NULL, 0, NULL, NULL, false) == 0) {
2174  osd_tess = osd_tesseract_;
2177  } else {
2178  tprintf("Warning: Auto orientation and script detection requested,"
2179  " but osd language failed to load\n");
2180  delete osd_tesseract_;
2181  osd_tesseract_ = NULL;
2182  }
2183  }
2184  }
2185 
2186  if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0)
2187  return -1;
2188  // If Devanagari is being recognized, we use different images for page seg
2189  // and for OCR.
2190  tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
2191  return 0;
2192 }
2193 
2196  if (tesseract_ != NULL) {
2197  tesseract_->Clear();
2198  }
2199  if (page_res_ != NULL) {
2200  delete page_res_;
2201  page_res_ = NULL;
2202  }
2203  recognition_done_ = false;
2204  if (block_list_ == NULL)
2205  block_list_ = new BLOCK_LIST;
2206  else
2207  block_list_->clear();
2208  if (paragraph_models_ != NULL) {
2210  delete paragraph_models_;
2211  paragraph_models_ = NULL;
2212  }
2213  SavePixForCrash(0, NULL);
2214 }
2215 
2223 int TessBaseAPI::TextLength(int* blob_count) {
2224  if (tesseract_ == NULL || page_res_ == NULL)
2225  return 0;
2226 
2227  PAGE_RES_IT page_res_it(page_res_);
2228  int total_length = 2;
2229  int total_blobs = 0;
2230  // Iterate over the data structures to extract the recognition result.
2231  for (page_res_it.restart_page(); page_res_it.word () != NULL;
2232  page_res_it.forward()) {
2233  WERD_RES *word = page_res_it.word();
2234  WERD_CHOICE* choice = word->best_choice;
2235  if (choice != NULL) {
2236  total_blobs += choice->length() + 2;
2237  total_length += choice->unichar_string().length() + 2;
2238  for (int i = 0; i < word->reject_map.length(); ++i) {
2239  if (word->reject_map[i].rejected())
2240  ++total_length;
2241  }
2242  }
2243  }
2244  if (blob_count != NULL)
2245  *blob_count = total_blobs;
2246  return total_length;
2247 }
2248 
2254  if (tesseract_ == NULL)
2255  return false;
2256  ClearResults();
2257  if (tesseract_->pix_binary() == NULL)
2259  if (input_file_ == NULL)
2260  input_file_ = new STRING(kInputFile);
2262 }
2263 
2265  tesseract_->min_orientation_margin.set_value(margin);
2266 }
2267 
2282 void TessBaseAPI::GetBlockTextOrientations(int** block_orientation,
2283  bool** vertical_writing) {
2284  delete[] *block_orientation;
2285  *block_orientation = NULL;
2286  delete[] *vertical_writing;
2287  *vertical_writing = NULL;
2288  BLOCK_IT block_it(block_list_);
2289 
2290  block_it.move_to_first();
2291  int num_blocks = 0;
2292  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2293  if (!block_it.data()->poly_block()->IsText()) {
2294  continue;
2295  }
2296  ++num_blocks;
2297  }
2298  if (!num_blocks) {
2299  tprintf("WARNING: Found no blocks\n");
2300  return;
2301  }
2302  *block_orientation = new int[num_blocks];
2303  *vertical_writing = new bool[num_blocks];
2304  block_it.move_to_first();
2305  int i = 0;
2306  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
2307  block_it.forward()) {
2308  if (!block_it.data()->poly_block()->IsText()) {
2309  continue;
2310  }
2311  FCOORD re_rotation = block_it.data()->re_rotation();
2312  float re_theta = re_rotation.angle();
2313  FCOORD classify_rotation = block_it.data()->classify_rotation();
2314  float classify_theta = classify_rotation.angle();
2315  double rot_theta = - (re_theta - classify_theta) * 2.0 / PI;
2316  if (rot_theta < 0) rot_theta += 4;
2317  int num_rotations = static_cast<int>(rot_theta + 0.5);
2318  (*block_orientation)[i] = num_rotations;
2319  // The classify_rotation is non-zero only if the text has vertical
2320  // writing direction.
2321  (*vertical_writing)[i] = classify_rotation.y() != 0.0f;
2322  ++i;
2323  }
2324 }
2325 
2326 // ____________________________________________________________________________
2327 // Ocropus add-ons.
2328 
2331  FindLines();
2332  BLOCK_LIST* result = block_list_;
2333  block_list_ = NULL;
2334  return result;
2335 }
2336 
2342 void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) {
2343  delete block_list;
2344 }
2345 
2346 
2348  float xheight,
2349  float descender,
2350  float ascender) {
2351  inT32 xstarts[] = {-32000};
2352  double quad_coeffs[] = {0, 0, baseline};
2353  return new ROW(1,
2354  xstarts,
2355  quad_coeffs,
2356  xheight,
2357  ascender - (baseline + xheight),
2358  descender - baseline,
2359  0,
2360  0);
2361 }
2362 
2365  int width = pixGetWidth(pix);
2366  int height = pixGetHeight(pix);
2367  BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height);
2368 
2369  // Create C_BLOBs from the page
2370  extract_edges(pix, &block);
2371 
2372  // Merge all C_BLOBs
2373  C_BLOB_LIST *list = block.blob_list();
2374  C_BLOB_IT c_blob_it(list);
2375  if (c_blob_it.empty())
2376  return NULL;
2377  // Move all the outlines to the first blob.
2378  C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
2379  for (c_blob_it.forward();
2380  !c_blob_it.at_first();
2381  c_blob_it.forward()) {
2382  C_BLOB *c_blob = c_blob_it.data();
2383  ol_it.add_list_after(c_blob->out_list());
2384  }
2385  // Convert the first blob to the output TBLOB.
2386  return TBLOB::PolygonalCopy(false, c_blob_it.data());
2387 }
2388 
2394 void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) {
2395  TBOX box = tblob->bounding_box();
2396  float x_center = (box.left() + box.right()) / 2.0f;
2397  float baseline = row->base_line(x_center);
2398  float scale = kBlnXHeight / row->x_height();
2399  tblob->Normalize(NULL, NULL, NULL, x_center, baseline, scale, scale,
2400  0.0f, static_cast<float>(kBlnBaselineOffset), false, NULL);
2401 }
2402 
2407 TBLOB *make_tesseract_blob(float baseline, float xheight,
2408  float descender, float ascender,
2409  bool numeric_mode, Pix* pix) {
2410  TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix);
2411 
2412  // Normalize TBLOB
2413  ROW *row =
2414  TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender);
2415  TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode);
2416  delete row;
2417  return tblob;
2418 }
2419 
2425 void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
2426  int length,
2427  float baseline,
2428  float xheight,
2429  float descender,
2430  float ascender) {
2431  UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length);
2432  TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender,
2434  tesseract_->pix_binary());
2435  float threshold;
2436  float best_rating = -100;
2437 
2438 
2439  // Classify to get a raw choice.
2440  BLOB_CHOICE_LIST choices;
2441  tesseract_->AdaptiveClassifier(blob, &choices);
2442  BLOB_CHOICE_IT choice_it;
2443  choice_it.set_to_list(&choices);
2444  for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
2445  choice_it.forward()) {
2446  if (choice_it.data()->rating() > best_rating) {
2447  best_rating = choice_it.data()->rating();
2448  }
2449  }
2450 
2451  threshold = tesseract_->matcher_good_threshold;
2452 
2453  if (blob->outlines)
2454  tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold,
2456  delete blob;
2457 }
2458 
2459 
2460 PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) {
2461  PAGE_RES *page_res = new PAGE_RES(false, block_list,
2463  tesseract_->recog_all_words(page_res, NULL, NULL, NULL, 1);
2464  return page_res;
2465 }
2466 
2467 PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list,
2468  PAGE_RES* pass1_result) {
2469  if (!pass1_result)
2470  pass1_result = new PAGE_RES(false, block_list,
2472  tesseract_->recog_all_words(pass1_result, NULL, NULL, NULL, 2);
2473  return pass1_result;
2474 }
2475 
2476 void TessBaseAPI::DetectParagraphs(bool after_text_recognition) {
2477  int debug_level = 0;
2478  GetIntVariable("paragraph_debug_level", &debug_level);
2479  if (paragraph_models_ == NULL)
2481  MutableIterator *result_it = GetMutableIterator();
2482  do { // Detect paragraphs for this block
2484  ::tesseract::DetectParagraphs(debug_level, after_text_recognition,
2485  result_it, &models);
2486  *paragraph_models_ += models;
2487  } while (result_it->Next(RIL_BLOCK));
2488  delete result_it;
2489 }
2490 
2493  int length; // of unicode_repr
2494  float cost;
2496 
2497  TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) {
2498  length = (len == -1 ? strlen(repr) : len);
2499  unicode_repr = new char[length + 1];
2500  strncpy(unicode_repr, repr, length);
2501  }
2502 
2503  TESS_CHAR() { // Satisfies ELISTIZE.
2504  }
2506  delete [] unicode_repr;
2507  }
2508 };
2509 
2510 ELISTIZEH(TESS_CHAR)
2511 ELISTIZE(TESS_CHAR)
2512 
2513 static void add_space(TESS_CHAR_IT* it) {
2514  TESS_CHAR *t = new TESS_CHAR(0, " ");
2515  it->add_after_then_move(t);
2516 }
2517 
2518 
2519 static float rating_to_cost(float rating) {
2520  rating = 100 + rating;
2521  // cuddled that to save from coverage profiler
2522  // (I have never seen ratings worse than -100,
2523  // but the check won't hurt)
2524  if (rating < 0) rating = 0;
2525  return rating;
2526 }
2527 
2532 static void extract_result(TESS_CHAR_IT* out,
2533  PAGE_RES* page_res) {
2534  PAGE_RES_IT page_res_it(page_res);
2535  int word_count = 0;
2536  while (page_res_it.word() != NULL) {
2537  WERD_RES *word = page_res_it.word();
2538  const char *str = word->best_choice->unichar_string().string();
2539  const char *len = word->best_choice->unichar_lengths().string();
2540  TBOX real_rect = word->word->bounding_box();
2541 
2542  if (word_count)
2543  add_space(out);
2544  int n = strlen(len);
2545  for (int i = 0; i < n; i++) {
2546  TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()),
2547  str, *len);
2548  tc->box = real_rect.intersection(word->box_word->BlobBox(i));
2549  out->add_after_then_move(tc);
2550  str += *len;
2551  len++;
2552  }
2553  page_res_it.forward();
2554  word_count++;
2555  }
2556 }
2557 
2563  int** lengths,
2564  float** costs,
2565  int** x0,
2566  int** y0,
2567  int** x1,
2568  int** y1,
2569  PAGE_RES* page_res) {
2570  TESS_CHAR_LIST tess_chars;
2571  TESS_CHAR_IT tess_chars_it(&tess_chars);
2572  extract_result(&tess_chars_it, page_res);
2573  tess_chars_it.move_to_first();
2574  int n = tess_chars.length();
2575  int text_len = 0;
2576  *lengths = new int[n];
2577  *costs = new float[n];
2578  *x0 = new int[n];
2579  *y0 = new int[n];
2580  *x1 = new int[n];
2581  *y1 = new int[n];
2582  int i = 0;
2583  for (tess_chars_it.mark_cycle_pt();
2584  !tess_chars_it.cycled_list();
2585  tess_chars_it.forward(), i++) {
2586  TESS_CHAR *tc = tess_chars_it.data();
2587  text_len += (*lengths)[i] = tc->length;
2588  (*costs)[i] = tc->cost;
2589  (*x0)[i] = tc->box.left();
2590  (*y0)[i] = tc->box.bottom();
2591  (*x1)[i] = tc->box.right();
2592  (*y1)[i] = tc->box.top();
2593  }
2594  char *p = *text = new char[text_len];
2595 
2596  tess_chars_it.move_to_first();
2597  for (tess_chars_it.mark_cycle_pt();
2598  !tess_chars_it.cycled_list();
2599  tess_chars_it.forward()) {
2600  TESS_CHAR *tc = tess_chars_it.data();
2601  strncpy(p, tc->unicode_repr, tc->length);
2602  p += tc->length;
2603  }
2604  return n;
2605 }
2606 
2608 // The resulting features are returned in int_features, which must be
2609 // of size MAX_NUM_INT_FEATURES. The number of features is returned in
2610 // num_features (or 0 if there was a failure).
2611 // On return feature_outline_index is filled with an index of the outline
2612 // corresponding to each feature in int_features.
2613 // TODO(rays) Fix the caller to out outline_counts instead.
2615  INT_FEATURE_STRUCT* int_features,
2616  int* num_features,
2617  int* feature_outline_index) {
2618  GenericVector<int> outline_counts;
2621  INT_FX_RESULT_STRUCT fx_info;
2622  tesseract_->ExtractFeatures(*blob, false, &bl_features,
2623  &cn_features, &fx_info, &outline_counts);
2624  if (cn_features.size() == 0 || cn_features.size() > MAX_NUM_INT_FEATURES) {
2625  *num_features = 0;
2626  return; // Feature extraction failed.
2627  }
2628  *num_features = cn_features.size();
2629  memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0]));
2630  // TODO(rays) Pass outline_counts back and simplify the calling code.
2631  if (feature_outline_index != NULL) {
2632  int f = 0;
2633  for (int i = 0; i < outline_counts.size(); ++i) {
2634  while (f < outline_counts[i])
2635  feature_outline_index[f++] = i;
2636  }
2637  }
2638 }
2639 
2640 // This method returns the row to which a box of specified dimensions would
2641 // belong. If no good match is found, it returns NULL.
2642 ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks,
2643  int left, int top, int right, int bottom) {
2644  TBOX box(left, bottom, right, top);
2645  BLOCK_IT b_it(blocks);
2646  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
2647  BLOCK* block = b_it.data();
2648  if (!box.major_overlap(block->bounding_box()))
2649  continue;
2650  ROW_IT r_it(block->row_list());
2651  for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
2652  ROW* row = r_it.data();
2653  if (!box.major_overlap(row->bounding_box()))
2654  continue;
2655  WERD_IT w_it(row->word_list());
2656  for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
2657  WERD* word = w_it.data();
2658  if (box.major_overlap(word->bounding_box()))
2659  return row;
2660  }
2661  }
2662  }
2663  return NULL;
2664 }
2665 
2668  int num_max_matches,
2669  int* unichar_ids,
2670  float* ratings,
2671  int* num_matches_returned) {
2672  BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
2673  tesseract_->AdaptiveClassifier(blob, choices);
2674  BLOB_CHOICE_IT choices_it(choices);
2675  int& index = *num_matches_returned;
2676  index = 0;
2677  for (choices_it.mark_cycle_pt();
2678  !choices_it.cycled_list() && index < num_max_matches;
2679  choices_it.forward()) {
2680  BLOB_CHOICE* choice = choices_it.data();
2681  unichar_ids[index] = choice->unichar_id();
2682  ratings[index] = choice->rating();
2683  ++index;
2684  }
2685  *num_matches_returned = index;
2686  delete choices;
2687 }
2688 
2690 const char* TessBaseAPI::GetUnichar(int unichar_id) {
2691  return tesseract_->unicharset.id_to_unichar(unichar_id);
2692 }
2693 
2695 const Dawg *TessBaseAPI::GetDawg(int i) const {
2696  if (tesseract_ == NULL || i >= NumDawgs()) return NULL;
2697  return tesseract_->getDict().GetDawg(i);
2698 }
2699 
2702  return tesseract_ == NULL ? 0 : tesseract_->getDict().NumDawgs();
2703 }
2704 
2705 #ifndef NO_CUBE_BUILD
2706 
2708  return (tesseract_ == NULL) ? NULL : tesseract_->GetCubeRecoContext();
2709 }
2710 #endif // NO_CUBE_BUILD
2711 
2713 STRING HOcrEscape(const char* text) {
2714  STRING ret;
2715  const char *ptr;
2716  for (ptr = text; *ptr; ptr++) {
2717  switch (*ptr) {
2718  case '<': ret += "&lt;"; break;
2719  case '>': ret += "&gt;"; break;
2720  case '&': ret += "&amp;"; break;
2721  case '"': ret += "&quot;"; break;
2722  case '\'': ret += "&#39;"; break;
2723  default: ret += *ptr;
2724  }
2725  }
2726  return ret;
2727 }
2728 
2729 } // namespace tesseract.
int orientation_and_script_detection(STRING &filename, OSResults *osr, tesseract::Tesseract *tess)
Definition: osdetect.cpp:189
#define PI
Definition: const.h:19
Definition: blobs.h:261
C_BLOB_LIST * blob_list()
get blobs
Definition: ocrblock.h:132
void SavePixForCrash(int resolution, Pix *pix)
Definition: globaloc.cpp:34
static void ResetToDefaults(ParamsVectors *member_params)
Definition: params.cpp:205
const char * kInputFile
Definition: baseapi.cpp:97
int NumDawgs() const
Definition: baseapi.cpp:2701
int inT32
Definition: host.h:102
Definition: werd.h:35
TBOX bounding_box() const
Definition: werd.cpp:160
#define MAX_INT16
Definition: host.h:119
static const char * Version()
Definition: baseapi.cpp:142
double(Dict::* probability_in_context_)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Probability in context function used by the ngram permuter.
Definition: dict.h:357
bool GetIntVariable(const char *name, int *value) const
Definition: baseapi.cpp:228
int size() const
Definition: genericvector.h:72
void CorrectClassifyWords(PAGE_RES *page_res)
Definition: applybox.cpp:772
static size_t getOpenCLDevice(void **device)
Definition: baseapi.cpp:162
const char * WordFontAttributes(bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:128
bool classify_bln_numeric_mode
Definition: classify.h:500
bool wordrec_run_blamer
Definition: wordrec.h:168
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:70
BLOCK_LIST * FindLinesCreateBlockList()
Definition: baseapi.cpp:2330
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:853
static void DeleteBlockList(BLOCK_LIST *block_list)
Definition: baseapi.cpp:2342
inT32 length() const
Definition: rejctmap.h:237
int IntCastRounded(double x)
Definition: helpers.h:172
void set_deadline_msecs(inT32 deadline_msecs)
Definition: ocrclass.h:132
void recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
void SetDictFunc(DictFunc f)
Definition: baseapi.cpp:2043
bool GetTextDirection(int *out_offset, float *out_slope)
Definition: baseapi.cpp:2008
void GetAvailableLanguagesAsVector(GenericVector< STRING > *langs) const
Definition: baseapi.cpp:370
bool PSM_OSD_ENABLED(int pageseg_mode)
Definition: publictypes.h:179
#define TESSERACT_VERSION_STR
Definition: baseapi.h:23
void set_text(const char *new_text)
Definition: werd.h:126
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:863
void set_pix_thresholds(Pix *thresholds)
virtual Pix * GetPixRectThresholds()
int push_back(T object)
TruthCallback * truth_cb_
Definition: baseapi.h:865
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:705
CubeRecoContext * GetCubeRecoContext() const
Definition: baseapi.cpp:2707
void SetFillLatticeFunc(FillLatticeFunc f)
Definition: baseapi.cpp:2069
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
#define tprintf(...)
Definition: tprintf.h:31
int ImageHeight() const
const char * WordRecognitionLanguage() const
const TBOX & BlobBox(int index) const
Definition: boxword.h:88
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
const int kBlnBaselineOffset
Definition: normalis.h:29
double matcher_good_threshold
Definition: classify.h:420
UNICHARSET unicharset
Definition: ccutil.h:72
void * cancel_this
Definition: ocrclass.h:120
bool Empty(PageIteratorLevel level) const
PageIterator * AnalyseLayout()
Definition: baseapi.h:500
virtual void GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth, int *imageheight)
Definition: werd.h:36
const int kMinCredibleResolution
Minimum believable resolution.
Definition: baseapi.cpp:108
static void PrintParams(FILE *fp, const ParamsVectors *member_params)
Definition: params.cpp:180
void split(const char c, GenericVector< STRING > *splited)
Definition: strngs.cpp:281
#define BOOL
Definition: capi.h:27
Boxa * GetWords(Pixa **pixa)
Definition: baseapi.cpp:620
const int kBlnXHeight
Definition: normalis.h:28
virtual bool Next(PageIteratorLevel level)
void ReadConfigFile(const char *filename)
Definition: baseapi.cpp:446
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:855
void chomp_string(char *str)
Definition: helpers.h:75
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
float x_height() const
Definition: ocrrow.h:61
void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift, bool inverse, Pix *pix)
Definition: blobs.cpp:413
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:854
static ResultIterator * StartOfParagraph(const LTRResultIterator &resit)
bool ProcessPagesInternal(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1097
inT32 length() const
Definition: strngs.cpp:188
static bool GetParamAsString(const char *name, const ParamsVectors *member_params, STRING *value)
Definition: params.cpp:142
virtual char * GetUTF8Text(PageIteratorLevel level) const
bool SetVariable(const char *name, const char *value)
Definition: baseapi.cpp:216
float certainty() const
Definition: ratngs.h:327
tesseract::ParamsVectors * GlobalParams()
Definition: params.cpp:33
TESS_LOCAL int TextLength(int *blob_count)
Definition: baseapi.cpp:2223
void set_pix_grey(Pix *grey_pix)
void set_source_resolution(int ppi)
TESS_CHAR(float _cost, const char *repr, int len=-1)
Definition: baseapi.cpp:2497
Pix * input_image_
Image used for searchable PDF.
Definition: baseapi.h:859
STRING * language_
Last initialized language.
Definition: baseapi.h:862
Boxa * GetStrips(Pixa **pixa, int **blockids)
Definition: baseapi.cpp:611
Definition: werd.h:60
static ROW * MakeTessOCRRow(float baseline, float xheight, float descender, float ascender)
Definition: baseapi.cpp:2347
char * GetOsdText(int page_number)
Definition: baseapi.cpp:1770
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:62
bool GetDoubleVariable(const char *name, double *value) const
Definition: baseapi.cpp:250
Tesseract * get_sub_lang(int index) const
static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode)
Definition: baseapi.cpp:2394
bool stream_filelist
Definition: baseapi.cpp:81
FILE * init_recog_training(const STRING &fname)
inT16 right() const
Definition: rect.h:75
PAGE_RES * ApplyBoxes(const STRING &fname, bool find_segmentation, BLOCK_LIST *block_list)
Definition: applybox.cpp:117
#define FALSE
Definition: capi.h:29
MutableIterator * GetMutableIterator()
Definition: baseapi.cpp:1293
Pix * GetBinaryImage(PageIteratorLevel level) const
const char * GetInitLanguagesAsString() const
Definition: baseapi.cpp:346
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
Definition: control.cpp:287
int SegmentPage(const STRING *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr)
OcrEngineMode oem() const
Definition: baseapi.h:743
BLOCK * block
Definition: pageres.h:99
Tesseract * tesseract() const
Definition: baseapi.h:739
const int kUniChs[]
Definition: baseapi.cpp:1651
const char * GetInputName()
Definition: baseapi.cpp:948
static TBLOB * MakeTBLOB(Pix *pix)
Definition: baseapi.cpp:2364
void GetFeaturesForBlob(TBLOB *blob, INT_FEATURE_STRUCT *int_features, int *num_features, int *feature_outline_index)
Definition: baseapi.cpp:2614
TESS_LOCAL bool InternalSetImage()
Definition: baseapi.cpp:2074
void SetRectangle(int left, int top, int width, int height)
Definition: baseapi.cpp:561
Definition: ocrrow.h:32
bool IsBinary() const
Returns true if the source image is binary.
Definition: thresholder.h:75
double(Dict::* ProbabilityInContextFunc)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Definition: baseapi.h:85
int GetScaledYResolution() const
Definition: thresholder.h:93
static TBLOB * PolygonalCopy(bool allow_detailed_fx, C_BLOB *src)
Definition: blobs.cpp:344
const char kUNLVSuspect
Definition: baseapi.cpp:92
const char kUNLVReject
Definition: baseapi.cpp:90
static void ClearPersistentCache()
Definition: baseapi.cpp:1989
bool GetVariableAsString(const char *name, STRING *val)
Definition: baseapi.cpp:259
float sconfidence
Definition: osdetect.h:43
float base_line(float xpos) const
Definition: ocrrow.h:56
TESS_LOCAL PAGE_RES * RecognitionPass2(BLOCK_LIST *block_list, PAGE_RES *pass1_result)
Definition: baseapi.cpp:2467
const char * GetDatapath()
Definition: baseapi.cpp:954
void DetectParagraphs(int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel * > *models)
bool ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1194
PageSegMode GetPageSegMode() const
Definition: baseapi.cpp:467
int OrientationIdToValue(const int &id)
Definition: osdetect.cpp:563
const char * GetUnichar(int unichar_id)
Definition: baseapi.cpp:2690
PolyBlockType BlockType() const
int first_uni() const
Definition: unichar.cpp:97
int ImageWidth() const
float angle() const
find angle
Definition: points.h:249
BLOCK_RES * block() const
Definition: pageres.h:739
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:860
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:64
int orientation_id
Definition: osdetect.h:41
int NumDawgs() const
Return the number of dawgs in the dawgs_ vector.
Definition: dict.h:404
StrongScriptDirection WordDirection() const
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
WERD_RES * forward()
Definition: pageres.h:713
void SetRectangle(int left, int top, int width, int height)
void ClearAdaptiveClassifier()
Definition: baseapi.cpp:509
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:236
float rating() const
Definition: ratngs.h:79
ELISTIZE(AmbigSpec)
#define DIR
Definition: polyaprx.cpp:39
void TidyUp(PAGE_RES *page_res)
Definition: applybox.cpp:706
float oconfidence
Definition: osdetect.h:44
CANCEL_FUNC cancel
Definition: ocrclass.h:119
Pix * GetThresholdedImage()
Definition: baseapi.cpp:572
WERD_RES * restart_page()
Definition: pageres.h:680
static DawgCache * GlobalDawgCache()
Definition: dict.cpp:186
virtual Pix * GetPixRectGrey()
const char * kOldVarsFile
Definition: baseapi.cpp:101
CubeRecoContext * GetCubeRecoContext()
#define BOOL_VAR(name, val, comment)
Definition: params.h:280
float rating() const
Definition: ratngs.h:324
const char * get_script_from_script_id(int id) const
Definition: unicharset.h:802
void SetInputName(const char *name)
Definition: baseapi.cpp:201
virtual bool IsAtBeginningOf(PageIteratorLevel level) const
static ROW * FindRowForBox(BLOCK_LIST *blocks, int left, int top, int right, int bottom)
Definition: baseapi.cpp:2642
char * GetHOCRText(int page_number)
Definition: baseapi.cpp:1428
STRING datadir
Definition: ccutil.h:67
inT16 left() const
Definition: rect.h:68
void PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr)
#define MAX(x, y)
Definition: ndminx.h:24
int num_sub_langs() const
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:185
bool tessedit_resegment_from_line_boxes
void delete_data_pointers()
int GetScaledEstimatedResolution() const
Definition: thresholder.h:106
const STRING & unichar_string() const
Definition: ratngs.h:524
Orientation and script detection only.
Definition: publictypes.h:152
GenericVector< IntParam * > int_params
Definition: params.h:44
Definition: ocrblock.h:30
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:857
int GetSourceYResolution() const
Definition: thresholder.h:90
#define PERF_COUNT_END
virtual ~TessBaseAPI()
Definition: baseapi.cpp:135
ROW_RES * row() const
Definition: pageres.h:736
Automatic page segmentation, but no OSD, or OCR.
Definition: publictypes.h:155
virtual TESS_LOCAL void Threshold(Pix **pix)
Definition: baseapi.cpp:2091
void read_config_file(const char *filename, SetParamConstraint constraint)
Definition: tessedit.cpp:57
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:861
virtual bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const
Pix * GetImage(PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const
TBOX bounding_box() const
Definition: ocrrow.h:85
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67
static bool SetParam(const char *name, const char *value, SetParamConstraint constraint, ParamsVectors *member_params)
Definition: params.cpp:98
void DeleteUnusedDawgs()
Definition: dawg_cache.h:46
int(Dict::* DictFunc)(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const
Definition: baseapi.h:83
bool DetectOS(OSResults *)
Definition: baseapi.cpp:2253
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const
void pgeditor_main(int width, int height, PAGE_RES *page_res)
Definition: pgedit.cpp:337
TBLOB * make_tesseract_blob(float baseline, float xheight, float descender, float ascender, bool numeric_mode, Pix *pix)
Definition: baseapi.cpp:2407
Pix * pix_grey() const
UNICHARSET * unicharset
Definition: osdetect.h:78
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold, ADAPT_TEMPLATES adaptive_templates)
Definition: adaptmatch.cpp:886
Dict & getDict()
Definition: classify.h:65
#define MAX_NUM_INT_FEATURES
Definition: intproto.h:132
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2132
#define PERF_COUNT_SUB(SUB)
struct TessResultRenderer TessResultRenderer
Definition: capi.h:63
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
int RecognizeForChopTest(ETEXT_DESC *monitor)
Definition: baseapi.cpp:905
virtual void ThresholdToPix(PageSegMode pageseg_mode, Pix **pix)
GenericVector< BoolParam * > bool_params
Definition: params.h:45
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:936
int IsValidWord(const char *word)
Definition: baseapi.cpp:1997
int CubeAPITest(Boxa *boxa_blocks, Pixa *pixa_blocks, Boxa *boxa_words, Pixa *pixa_words, const FCOORD &reskew, Pix *page_pix, PAGE_RES *page_res)
Definition: baseapi.cpp:757
int UNICHAR_ID
Definition: unichar.h:33
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:294
void SetSourceYResolution(int ppi)
Definition: thresholder.h:86
void SetSourceResolution(int ppi)
Definition: baseapi.cpp:533
TESS_LOCAL PAGE_RES * RecognitionPass1(BLOCK_LIST *block_list)
Definition: baseapi.cpp:2460
const Dawg * GetDawg(int i) const
Definition: baseapi.cpp:2695
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:276
void DumpPGM(const char *filename)
Definition: baseapi.cpp:732
Boxa * GetTextlines(const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:597
const int kBytesPerNumber
Definition: baseapi.cpp:1585
Assume a single uniform block of text. (Default.)
Definition: publictypes.h:160
char * GetBoxText(int page_number)
Definition: baseapi.cpp:1609
inT16 bottom() const
Definition: rect.h:61
void set_unlv_suspects(WERD_RES *word)
Definition: output.cpp:307
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:416
bool major_overlap(const TBOX &box) const
Definition: rect.h:358
void ReadDebugConfigFile(const char *filename)
Definition: baseapi.cpp:451
void SetOutputName(const char *name)
Definition: baseapi.cpp:209
bool ProcessPages(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1070
virtual void Run(A1, A2, A3, A4)=0
bool SetDebugVariable(const char *name, const char *value)
Definition: baseapi.cpp:222
const char * GetStringVariable(const char *name) const
Definition: baseapi.cpp:244
void RunAdaptiveClassifier(TBLOB *blob, int num_max_matches, int *unichar_ids, float *ratings, int *num_matches_returned)
Definition: baseapi.cpp:2667
bool empty() const
Definition: genericvector.h:84
ParamsVectors * params()
Definition: ccutil.h:65
void set_min_orientation_margin(double margin)
Definition: baseapi.cpp:2264
CMD_EVENTS mode
Definition: pgedit.cpp:116
void add_str_int(const char *str, int number)
Definition: strngs.cpp:376
bool IsEmpty() const
Return true if no image has been set.
Definition: thresholder.cpp:50
void BestChoiceToCorrectText()
Definition: pageres.cpp:917
void assign(const char *cstr, int len)
Definition: strngs.cpp:417
const int kMaxIntSize
Definition: baseapi.cpp:103
void(Wordrec::* FillLatticeFunc)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: baseapi.h:92
TBOX intersection(const TBOX &box) const
Definition: rect.cpp:87
void ApplyBoxTraining(const STRING &fontname, PAGE_RES *page_res)
Definition: applybox.cpp:796
int GetThresholdedImageScaleFactor() const
Definition: baseapi.cpp:724
WERD * word
Definition: pageres.h:175
unsigned char BOOL8
Definition: host.h:113
#define TRUE
Definition: capi.h:28
int(Dict::* letter_is_okay_)(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const
Definition: dict.h:347
void ExtractFontName(const STRING &filename, STRING *fontname)
Definition: blobclass.cpp:46
ResultIterator * GetIterator()
Definition: baseapi.cpp:1276
int init_tesseract(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params)
Definition: tessedit.cpp:290
const UNICHARSET & getUnicharset() const
Definition: dict.h:96
bool AdaptToWordStr(PageSegMode mode, const char *wordstr)
Definition: baseapi.cpp:1853
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:852
GenericVector< DoubleParam * > double_params
Definition: params.h:47
TESS_API int get_best_script(int orientation_id) const
Definition: osdetect.cpp:117
static void ExtractFeatures(const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
Definition: intfx.cpp:445
void SetEquationDetect(EquationDetect *detector)
ROW * row
Definition: pageres.h:127
uinT8 space()
Definition: werd.h:104
void SetProbabilityInContextFunc(ProbabilityInContextFunc f)
Definition: baseapi.cpp:2057
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:856
const char kTesseractReject
Definition: baseapi.cpp:88
Definition: rect.h:30
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
const int kMaxCredibleResolution
Definition: baseapi.cpp:110
const int kBytesPer64BitNumber
Definition: baseapi.cpp:1594
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:525
#define PERF_COUNT_START(FUNCT_NAME)
void MaximallyChopWord(const GenericVector< TBOX > &boxes, BLOCK *block, ROW *row, WERD_RES *word_res)
Definition: applybox.cpp:253
float y() const
Definition: points.h:212
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
Definition: thresholder.cpp:45
void GetBlockTextOrientations(int **block_orientation, bool **vertical_writing)
Definition: baseapi.cpp:2282
void Orientation(tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
STRING HOcrEscape(const char *text)
Definition: baseapi.cpp:2713
STRING lang
Definition: ccutil.h:69
static void CatchSignals()
Definition: baseapi.cpp:182
tesseract::BoxWord * box_word
Definition: pageres.h:250
const Dawg * GetDawg(int index) const
Return i-th dawg pointer recorded in the dawgs_ vector.
Definition: dict.h:406
int InitLangMod(const char *datapath, const char *language)
Definition: baseapi.cpp:422
void extract_edges(Pix *pix, BLOCK *block)
Definition: edgblob.cpp:334
const int kBytesPerBlob
Definition: baseapi.cpp:1591
const STRING & unichar_lengths() const
Definition: ratngs.h:531
bool WriteTRFile(const STRING &filename)
Definition: blobclass.cpp:97
name_table name
virtual bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:420
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:115
Definition: strngs.h:44
void add_str_double(const char *str, double number)
Definition: strngs.cpp:386
float Confidence(PageIteratorLevel level) const
unsigned char uinT8
Definition: host.h:99
char * GetUTF8Text(PageIteratorLevel level) const
static TESS_LOCAL int TesseractExtractResult(char **text, int **lengths, float **costs, int **x0, int **y0, int **x1, int **y1, PAGE_RES *page_res)
Definition: baseapi.cpp:2562
void GetLoadedLanguagesAsVector(GenericVector< STRING > *langs) const
Definition: baseapi.cpp:356
void signal_exit(int signal_code)
Definition: globaloc.cpp:52
void InitAdaptiveClassifier(bool load_pre_trained_templates)
Definition: adaptmatch.cpp:527
virtual bool Next(PageIteratorLevel level)
const int kMinRectSize
Definition: baseapi.cpp:86
Pix * pix_binary() const
void LearnWord(const char *fontname, WERD_RES *word)
Definition: adaptmatch.cpp:244
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:642
#define UNICHAR_LEN
Definition: unichar.h:30
TBOX bounding_box() const
Definition: blobs.cpp:482
TESSLINE * outlines
Definition: blobs.h:377
TESS_LOCAL void DetectParagraphs(bool after_text_recognition)
Definition: baseapi.cpp:2476
PAGE_RES * SetupApplyBoxes(const GenericVector< TBOX > &boxes, BLOCK_LIST *block_list)
Definition: applybox.cpp:217
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:120
Boxa * GetConnectedComponents(Pixa **cc)
Definition: baseapi.cpp:630
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
bool IsValidCharacter(const char *utf8_character)
Definition: baseapi.cpp:2001
const int kMaxBytesPerLine
Definition: baseapi.cpp:1601
const char * string() const
Definition: strngs.cpp:193
int length() const
Definition: ratngs.h:300
WERD_CHOICE * best_choice
Definition: pageres.h:219
bool BeginDocument(const char *title)
Definition: renderer.cpp:53
const int kLatinChs[]
Definition: baseapi.cpp:1655
char * TesseractRect(const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height)
Definition: baseapi.cpp:487
inT16 top() const
Definition: rect.h:54
REJMAP reject_map
Definition: pageres.h:271
virtual R Run()=0
void PrintVariables(FILE *fp) const
Definition: baseapi.cpp:264
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:460
STRING * input_file_
Name used by training code.
Definition: baseapi.h:858
#define ASSERT_HOST(x)
Definition: errcode.h:84
GenericVector< StringParam * > string_params
Definition: params.h:46
Boxa * GetRegions(Pixa **pixa)
Definition: baseapi.cpp:585
UNICHAR_ID unichar_id() const
Definition: ratngs.h:76
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
Definition: points.h:189
bool AddImage(TessBaseAPI *api)
Definition: renderer.cpp:64
ELISTIZEH(AmbigSpec)
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:864
const char * string() const
Definition: params.h:203
int init_tesseract_lm(const char *arg0, const char *textbase, const char *language)
Definition: tessedit.cpp:465
const int kNumbersPerBlob
Definition: baseapi.cpp:1580
WERD_LIST * word_list()
Definition: ocrrow.h:52
WERD_RES * word() const
Definition: pageres.h:733
void ReSegmentByClassification(PAGE_RES *page_res)
Definition: applybox.cpp:509
TESS_LOCAL LTRResultIterator * GetLTRIterator()
Definition: baseapi.cpp:1259
TESS_LOCAL void AdaptToCharacter(const char *unichar_repr, int length, float baseline, float xheight, float descender, float ascender)
Definition: baseapi.cpp:2425
const int kBytesPerBoxFileLine
Definition: baseapi.cpp:1592
OSBestResult best_result
Definition: osdetect.h:79
#define MAX_PATH
Definition: platform.h:43
const char * c_str() const
Definition: strngs.cpp:204