27 #include "allheaders.h"
41 versionStrP = getLeptonicaVersion();
42 fprintf(stderr,
" %s\n", versionStrP);
43 lept_free(versionStrP);
45 versionStrP = getImagelibVersions();
46 fprintf(stderr,
" %s\n", versionStrP);
47 lept_free(versionStrP);
50 cl_platform_id platform;
51 cl_uint num_platforms;
52 cl_device_id devices[2];
57 fprintf(stderr,
" OpenCL info:\n");
58 clGetPlatformIDs(1, &platform, &num_platforms);
59 fprintf(stderr,
" Found %d platforms.\n", num_platforms);
60 clGetPlatformInfo(platform, CL_PLATFORM_NAME, 256, info, 0);
61 fprintf(stderr,
" Platform name: %s.\n", info);
62 clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 256, info, 0);
63 fprintf(stderr,
" Version: %s.\n", info);
64 clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, devices, &num_devices);
65 fprintf(stderr,
" Found %d devices.\n", num_devices);
66 for (i = 0; i < num_devices; ++i) {
67 clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0);
68 fprintf(stderr,
" Device %d name: %s.\n", i+1, info);
76 " %s --help | --help-psm | --version\n"
77 " %s --list-langs [--tessdata-dir PATH]\n"
78 " %s --print-parameters [options...] [configfile...]\n"
79 " %s imagename|stdin outputbase|stdout [options...] [configfile...]\n",
80 program, program, program, program);
85 "Page segmentation modes:\n"
86 " 0 Orientation and script detection (OSD) only.\n"
87 " 1 Automatic page segmentation with OSD.\n"
88 " 2 Automatic page segmentation, but no OSD, or OCR.\n"
89 " 3 Fully automatic page segmentation, but no OSD. (Default)\n"
90 " 4 Assume a single column of text of variable sizes.\n"
91 " 5 Assume a single uniform block of vertically aligned text.\n"
92 " 6 Assume a single uniform block of text.\n"
93 " 7 Treat the image as a single text line.\n"
94 " 8 Treat the image as a single word.\n"
95 " 9 Treat the image as a single word in a circle.\n"
96 " 10 Treat the image as a single character.\n"
100 " 11 Sparse text. Find as much text as possible in no"
101 " particular order.\n"
102 " 12 Sparse text with OSD.\n"
103 " 13 Raw line. Treat the image as a single text line,\n"
104 "\t\t\tbypassing hacks that are Tesseract-specific.\n"
108 fprintf(stderr,
"%s", msg);
114 const char* ocr_options =
116 " --tessdata-dir PATH Specify the location of tessdata path.\n"
117 " --user-words PATH Specify the location of user words file.\n"
118 " --user-patterns PATH Specify the location of user patterns file.\n"
119 " -l LANG[+LANG] Specify language(s) used for OCR.\n"
120 " -c VAR=VALUE Set value for config variables.\n"
121 " Multiple -c arguments are allowed.\n"
122 " -psm NUM Specify page segmentation mode.\n"
123 "NOTE: These options must occur before any configfile.\n"
126 fprintf(stderr,
"\n%s\n", ocr_options);
129 const char *single_options =
131 " -h, --help Show this help message.\n"
132 " --help-psm Show page segmentation modes.\n"
133 " -v, --version Show version information.\n"
134 " --list-langs List available languages for tesseract engine.\n"
135 " --print-parameters Print tesseract parameters to stdout.\n"
138 fprintf(stderr,
"\n%s", single_options);
142 char opt1[256], opt2[255];
143 for (
int i = 0; i < argc; i++) {
144 if (strcmp(argv[i],
"-c") == 0 && i + 1 < argc) {
145 strncpy(opt1, argv[i + 1], 255);
147 char *p = strchr(opt1,
'=');
149 fprintf(stderr,
"Missing = in configvar assignment\n");
153 strncpy(opt2, strchr(argv[i + 1],
'=') + 1, 255);
158 fprintf(stderr,
"Could not set option: %s=%s\n", opt1, opt2);
167 fprintf(stderr,
"List of available languages (%d):\n",
169 for (
int index = 0; index < languages.
size(); ++index) {
170 STRING&
string = languages[index];
171 fprintf(stderr,
"%s\n",
string.
string());
200 const char** outputbase,
201 const char** datapath,
203 bool* print_parameters,
214 if ((strcmp(argv[1],
"-h") == 0) ||
215 (strcmp(argv[1],
"--help") == 0)) {
219 if ((strcmp(argv[1],
"--help-psm") == 0)) {
223 if ((strcmp(argv[1],
"-v") == 0) ||
224 (strcmp(argv[1],
"--version") == 0)) {
232 while (i < argc && (*outputbase == NULL || argv[i][0] ==
'-')) {
233 if (strcmp(argv[i],
"-l") == 0 && i + 1 < argc) {
236 }
else if (strcmp(argv[i],
"--tessdata-dir") == 0 && i + 1 < argc) {
237 *datapath = argv[i + 1];
239 }
else if (strcmp(argv[i],
"--user-words") == 0 && i + 1 < argc) {
243 }
else if (strcmp(argv[i],
"--user-patterns") == 0 && i + 1 < argc) {
244 vars_vec->
push_back(
"user_patterns_file");
247 }
else if (strcmp(argv[i],
"--list-langs") == 0) {
250 }
else if (strcmp(argv[i],
"-psm") == 0 && i + 1 < argc) {
253 }
else if (strcmp(argv[i],
"--print-parameters") == 0) {
255 *print_parameters =
true;
256 }
else if (strcmp(argv[i],
"-c") == 0 && i + 1 < argc) {
259 }
else if (*image == NULL) {
261 }
else if (*outputbase == NULL) {
262 *outputbase = argv[i];
269 if (argc == 2 && strcmp(argv[1],
"--list-langs") == 0) {
274 if (*outputbase == NULL && noocr ==
false) {
279 if (*outputbase != NULL && strcmp(*outputbase,
"-") &&
280 strcmp(*outputbase,
"stdout")) {
281 tprintf(
"Tesseract Open Source OCR Engine v%s with Leptonica\n",
289 const char* outputbase) {
319 if (b || renderers->
empty()) {
324 if (!renderers->
empty()) {
327 for (
int r = 1; r < renderers->
size(); ++r) {
328 (*renderers)[0]->insert((*renderers)[r]);
329 (*renderers)[r] = NULL;
338 int main(
int argc,
char **argv) {
339 const char*
lang =
"eng";
340 const char* image = NULL;
341 const char* outputbase = NULL;
342 const char* datapath = NULL;
343 bool list_langs =
false;
344 bool print_parameters =
false;
350 &lang, &image, &outputbase, &datapath,
351 &list_langs, &print_parameters,
352 &vars_vec, &vars_values, &arg_i, &pagesegmode);
360 &(argv[arg_i]), argc - arg_i, &vars_vec, &vars_values,
false);
362 fprintf(stderr,
"Could not initialize tesseract.\n");
373 if (print_parameters) {
375 fprintf(stdout,
"Tesseract parameters:\n");
386 Pix* pixs = pixRead(image);
388 fprintf(stderr,
"Cannot open input file: %s\n", image);
401 it->
Orientation(&orientation, &direction, &order, &deskew_angle);
402 tprintf(
"Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \
403 "Deskew angle: %.4f\n",
404 orientation, direction, order, deskew_angle);
418 bool in_training_mode =
425 if (in_training_mode) {
431 if (!renderers.
empty()) {
432 bool succeed = api.
ProcessPages(image, NULL, 0, renderers[0]);
434 fprintf(stderr,
"Error during processing.\n");
static const char * Version()
void GetAvailableLanguagesAsVector(GenericVector< STRING > *langs) const
void PreloadRenderers(tesseract::TessBaseAPI *api, tesseract::PointerVector< tesseract::TessResultRenderer > *renderers, tesseract::PageSegMode pagesegmode, const char *outputbase)
PageIterator * AnalyseLayout()
bool SetVariable(const char *name, const char *value)
const char * GetDatapath()
PageSegMode GetPageSegMode() const
bool GetBoolVariable(const char *name, bool *value) const
void FixPageSegMode(tesseract::TessBaseAPI *api, tesseract::PageSegMode pagesegmode)
void PrintHelpMessage(const char *program)
Orientation and script detection only.
Automatic page segmentation, but no OSD, or OCR.
int direction(EDGEPT *point)
int main(int argc, char **argv)
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
void PrintUsage(const char *program)
Assume a single uniform block of text. (Default.)
void SetOutputName(const char *name)
bool ProcessPages(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
#define PERF_COUNT_START(FUNCT_NAME)
Fully automatic page segmentation, but no OSD.
void Orientation(tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
void PrintLangsList(tesseract::TessBaseAPI *api)
void PrintVariables(FILE *fp) const
void SetPageSegMode(PageSegMode mode)
void ParseArgs(const int argc, char **argv, const char **lang, const char **image, const char **outputbase, const char **datapath, bool *list_langs, bool *print_parameters, GenericVector< STRING > *vars_vec, GenericVector< STRING > *vars_values, int *arg_i, tesseract::PageSegMode *pagesegmode)
void SetVariablesFromCLArgs(tesseract::TessBaseAPI *api, int argc, char **argv)