This program reads in a text file consisting of feature samples from a training page in the following format:
The result of this program is a binary inttemp file used by the OCR engine.
389 const char*
lang =
"eng";
390 const char* image = NULL;
391 const char* outputbase = NULL;
392 const char* datapath = NULL;
393 bool list_langs =
false;
394 bool print_parameters =
false;
406 setMsgSeverity(L_SEVERITY_ERROR);
409 #if defined(HAVE_TIFFIO_H) && defined(_WIN32)
411 TIFFSetWarningHandler(Win32WarningHandler);
414 ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &list_langs,
415 &print_parameters, &vars_vec, &vars_values, &arg_i, &pagesegmode,
419 if (outputbase != NULL && strcmp(outputbase,
"-") &&
420 strcmp(outputbase,
"stdout")) {
434 api.SetOutputName(outputbase);
436 int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]),
437 argc - arg_i, &vars_vec, &vars_values, false);
439 fprintf(stderr,
"Could not initialize tesseract.\n");
450 if (print_parameters) {
452 fprintf(stdout,
"Tesseract parameters:\n");
453 api.PrintVariables(fout);
461 int ret_val = EXIT_SUCCESS;
463 Pix* pixs = pixRead(image);
465 fprintf(stderr,
"Cannot open input file: %s\n", image);
478 it->
Orientation(&orientation, &direction, &order, &deskew_angle);
480 "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
481 "Deskew angle: %.4f\n",
482 orientation, direction, order, deskew_angle);
484 ret_val = EXIT_FAILURE;
496 bool in_training_mode =
497 (
api.GetBoolVariable(
"tessedit_ambigs_training", &b) && b) ||
498 (
api.GetBoolVariable(
"tessedit_resegment_from_boxes", &b) && b) ||
499 (
api.GetBoolVariable(
"tessedit_make_boxes_from_boxes", &b) && b);
504 if (in_training_mode) {
510 if (!renderers.
empty()) {
512 bool succeed =
api.ProcessPages(image, NULL, 0, renderers[0]);
514 fprintf(stderr,
"Error during processing.\n");
void PrintLangsList(tesseract::TessBaseAPI *api)
void SetVariablesFromCLArgs(tesseract::TessBaseAPI *api, int argc, char **argv)
struct TessBaseAPI TessBaseAPI
void PreloadRenderers(tesseract::TessBaseAPI *api, tesseract::PointerVector< tesseract::TessResultRenderer > *renderers, tesseract::PageSegMode pagesegmode, const char *outputbase)
void ParseArgs(const int argc, char **argv, const char **lang, const char **image, const char **outputbase, const char **datapath, bool *list_langs, bool *print_parameters, GenericVector< STRING > *vars_vec, GenericVector< STRING > *vars_values, int *arg_i, tesseract::PageSegMode *pagesegmode, tesseract::OcrEngineMode *enginemode)
int direction(EDGEPT *point)
#define PERF_COUNT_START(FUNCT_NAME)
Automatic page segmentation, but no OSD, or OCR.
void Orientation(tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
void FixPageSegMode(tesseract::TessBaseAPI *api, tesseract::PageSegMode pagesegmode)
Fully automatic page segmentation, but no OSD.