|
tesseract 3.04.01
|
00001 /********************************************************************** 00002 * File: tessedit.cpp (Formerly tessedit.c) 00003 * Description: Main program for merge of tess and editor. 00004 * Author: Ray Smith 00005 * Created: Tue Jan 07 15:21:46 GMT 1992 00006 * 00007 * (C) Copyright 1992, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 // Include automatically generated configuration file if running autoconf 00021 #ifdef HAVE_CONFIG_H 00022 #include "config_auto.h" 00023 #endif 00024 00025 #include <iostream> 00026 00027 #include "allheaders.h" 00028 #include "baseapi.h" 00029 #include "basedir.h" 00030 #include "renderer.h" 00031 #include "strngs.h" 00032 #include "tprintf.h" 00033 #include "openclwrapper.h" 00034 #include "osdetect.h" 00035 00036 void PrintVersionInfo() { 00037 char *versionStrP; 00038 00039 fprintf(stderr, "tesseract %s\n", tesseract::TessBaseAPI::Version()); 00040 00041 versionStrP = getLeptonicaVersion(); 00042 fprintf(stderr, " %s\n", versionStrP); 00043 lept_free(versionStrP); 00044 00045 versionStrP = getImagelibVersions(); 00046 fprintf(stderr, " %s\n", versionStrP); 00047 lept_free(versionStrP); 00048 00049 #ifdef USE_OPENCL 00050 cl_platform_id platform; 00051 cl_uint num_platforms; 00052 cl_device_id devices[2]; 00053 cl_uint num_devices; 00054 char info[256]; 00055 int i; 00056 00057 fprintf(stderr, " OpenCL info:\n"); 00058 clGetPlatformIDs(1, &platform, &num_platforms); 00059 fprintf(stderr, " Found %d platforms.\n", num_platforms); 00060 clGetPlatformInfo(platform, CL_PLATFORM_NAME, 256, info, 0); 00061 fprintf(stderr, " Platform name: %s.\n", info); 00062 clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 256, info, 0); 00063 fprintf(stderr, " Version: %s.\n", info); 00064 clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, devices, &num_devices); 00065 fprintf(stderr, " Found %d devices.\n", num_devices); 00066 for (i = 0; i < num_devices; ++i) { 00067 clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0); 00068 fprintf(stderr, " Device %d name: %s.\n", i+1, info); 00069 } 00070 #endif 00071 } 00072 00073 void PrintUsage(const char* program) { 00074 fprintf(stderr, 00075 "Usage:\n" 00076 " %s --help | --help-psm | --version\n" 00077 " %s --list-langs [--tessdata-dir PATH]\n" 00078 " %s --print-parameters [options...] [configfile...]\n" 00079 " %s imagename|stdin outputbase|stdout [options...] [configfile...]\n", 00080 program, program, program, program); 00081 } 00082 00083 void PrintHelpForPSM() { 00084 const char* msg = 00085 "Page segmentation modes:\n" 00086 " 0 Orientation and script detection (OSD) only.\n" 00087 " 1 Automatic page segmentation with OSD.\n" 00088 " 2 Automatic page segmentation, but no OSD, or OCR.\n" 00089 " 3 Fully automatic page segmentation, but no OSD. (Default)\n" 00090 " 4 Assume a single column of text of variable sizes.\n" 00091 " 5 Assume a single uniform block of vertically aligned text.\n" 00092 " 6 Assume a single uniform block of text.\n" 00093 " 7 Treat the image as a single text line.\n" 00094 " 8 Treat the image as a single word.\n" 00095 " 9 Treat the image as a single word in a circle.\n" 00096 " 10 Treat the image as a single character.\n" 00097 00098 //TODO: Consider publishing these modes. 00099 #if 0 00100 " 11 Sparse text. Find as much text as possible in no" 00101 " particular order.\n" 00102 " 12 Sparse text with OSD.\n" 00103 " 13 Raw line. Treat the image as a single text line,\n" 00104 "\t\t\tbypassing hacks that are Tesseract-specific.\n" 00105 #endif 00106 ; 00107 00108 fprintf(stderr, "%s", msg); 00109 } 00110 00111 void PrintHelpMessage(const char* program) { 00112 PrintUsage(program); 00113 00114 const char* ocr_options = 00115 "OCR options:\n" 00116 " --tessdata-dir PATH Specify the location of tessdata path.\n" 00117 " --user-words PATH Specify the location of user words file.\n" 00118 " --user-patterns PATH Specify the location of user patterns file.\n" 00119 " -l LANG[+LANG] Specify language(s) used for OCR.\n" 00120 " -c VAR=VALUE Set value for config variables.\n" 00121 " Multiple -c arguments are allowed.\n" 00122 " -psm NUM Specify page segmentation mode.\n" 00123 "NOTE: These options must occur before any configfile.\n" 00124 ; 00125 00126 fprintf(stderr, "\n%s\n", ocr_options); 00127 PrintHelpForPSM(); 00128 00129 const char *single_options = 00130 "Single options:\n" 00131 " -h, --help Show this help message.\n" 00132 " --help-psm Show page segmentation modes.\n" 00133 " -v, --version Show version information.\n" 00134 " --list-langs List available languages for tesseract engine.\n" 00135 " --print-parameters Print tesseract parameters to stdout.\n" 00136 ; 00137 00138 fprintf(stderr, "\n%s", single_options); 00139 } 00140 00141 void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc, char** argv) { 00142 char opt1[256], opt2[255]; 00143 for (int i = 0; i < argc; i++) { 00144 if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) { 00145 strncpy(opt1, argv[i + 1], 255); 00146 opt1[255] = '\0'; 00147 char *p = strchr(opt1, '='); 00148 if (!p) { 00149 fprintf(stderr, "Missing = in configvar assignment\n"); 00150 exit(1); 00151 } 00152 *p = 0; 00153 strncpy(opt2, strchr(argv[i + 1], '=') + 1, 255); 00154 opt2[254] = 0; 00155 ++i; 00156 00157 if (!api->SetVariable(opt1, opt2)) { 00158 fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2); 00159 } 00160 } 00161 } 00162 } 00163 00164 void PrintLangsList(tesseract::TessBaseAPI* api) { 00165 GenericVector<STRING> languages; 00166 api->GetAvailableLanguagesAsVector(&languages); 00167 fprintf(stderr, "List of available languages (%d):\n", 00168 languages.size()); 00169 for (int index = 0; index < languages.size(); ++index) { 00170 STRING& string = languages[index]; 00171 fprintf(stderr, "%s\n", string.string()); 00172 } 00173 api->End(); 00174 } 00175 00190 void FixPageSegMode(tesseract::TessBaseAPI* api, 00191 tesseract::PageSegMode pagesegmode) { 00192 if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) 00193 api->SetPageSegMode(pagesegmode); 00194 } 00195 00196 // NOTE: arg_i is used here to avoid ugly *i so many times in this function 00197 void ParseArgs(const int argc, char** argv, 00198 const char** lang, 00199 const char** image, 00200 const char** outputbase, 00201 const char** datapath, 00202 bool* list_langs, 00203 bool* print_parameters, 00204 GenericVector<STRING>* vars_vec, 00205 GenericVector<STRING>* vars_values, 00206 int* arg_i, 00207 tesseract::PageSegMode* pagesegmode) { 00208 if (argc == 1) { 00209 PrintHelpMessage(argv[0]); 00210 exit(0); 00211 } 00212 00213 if (argc == 2) { 00214 if ((strcmp(argv[1], "-h") == 0) || 00215 (strcmp(argv[1], "--help") == 0)) { 00216 PrintHelpMessage(argv[0]); 00217 exit(0); 00218 } 00219 if ((strcmp(argv[1], "--help-psm") == 0)) { 00220 PrintHelpForPSM(); 00221 exit(0); 00222 } 00223 if ((strcmp(argv[1], "-v") == 0) || 00224 (strcmp(argv[1], "--version") == 0)) { 00225 PrintVersionInfo(); 00226 exit(0); 00227 } 00228 } 00229 00230 bool noocr = false; 00231 int i = 1; 00232 while (i < argc && (*outputbase == NULL || argv[i][0] == '-')) { 00233 if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) { 00234 *lang = argv[i + 1]; 00235 ++i; 00236 } else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) { 00237 *datapath = argv[i + 1]; 00238 ++i; 00239 } else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) { 00240 vars_vec->push_back("user_words_file"); 00241 vars_values->push_back(argv[i + 1]); 00242 ++i; 00243 } else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) { 00244 vars_vec->push_back("user_patterns_file"); 00245 vars_values->push_back(argv[i + 1]); 00246 ++i; 00247 } else if (strcmp(argv[i], "--list-langs") == 0) { 00248 noocr = true; 00249 *list_langs = true; 00250 } else if (strcmp(argv[i], "-psm") == 0 && i + 1 < argc) { 00251 *pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1])); 00252 ++i; 00253 } else if (strcmp(argv[i], "--print-parameters") == 0) { 00254 noocr = true; 00255 *print_parameters = true; 00256 } else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) { 00257 // handled properly after api init 00258 ++i; 00259 } else if (*image == NULL) { 00260 *image = argv[i]; 00261 } else if (*outputbase == NULL) { 00262 *outputbase = argv[i]; 00263 } 00264 ++i; 00265 } 00266 00267 *arg_i = i; 00268 00269 if (argc == 2 && strcmp(argv[1], "--list-langs") == 0) { 00270 *list_langs = true; 00271 noocr = true; 00272 } 00273 00274 if (*outputbase == NULL && noocr == false) { 00275 PrintHelpMessage(argv[0]); 00276 exit(1); 00277 } 00278 00279 if (*outputbase != NULL && strcmp(*outputbase, "-") && 00280 strcmp(*outputbase, "stdout")) { 00281 tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n", 00282 tesseract::TessBaseAPI::Version()); 00283 } 00284 } 00285 00286 void PreloadRenderers(tesseract::TessBaseAPI* api, 00287 tesseract::PointerVector<tesseract::TessResultRenderer>* renderers, 00288 tesseract::PageSegMode pagesegmode, 00289 const char* outputbase) { 00290 if (pagesegmode == tesseract::PSM_OSD_ONLY) { 00291 renderers->push_back(new tesseract::TessOsdRenderer(outputbase)); 00292 } else { 00293 bool b; 00294 api->GetBoolVariable("tessedit_create_hocr", &b); 00295 if (b) { 00296 bool font_info; 00297 api->GetBoolVariable("hocr_font_info", &font_info); 00298 renderers->push_back( 00299 new tesseract::TessHOcrRenderer(outputbase, font_info)); 00300 } 00301 00302 api->GetBoolVariable("tessedit_create_pdf", &b); 00303 if (b) { 00304 renderers->push_back(new tesseract::TessPDFRenderer(outputbase, 00305 api->GetDatapath())); 00306 } 00307 00308 api->GetBoolVariable("tessedit_write_unlv", &b); 00309 if (b) { 00310 renderers->push_back(new tesseract::TessUnlvRenderer(outputbase)); 00311 } 00312 00313 api->GetBoolVariable("tessedit_create_boxfile", &b); 00314 if (b) { 00315 renderers->push_back(new tesseract::TessBoxTextRenderer(outputbase)); 00316 } 00317 00318 api->GetBoolVariable("tessedit_create_txt", &b); 00319 if (b || renderers->empty()) { 00320 renderers->push_back(new tesseract::TessTextRenderer(outputbase)); 00321 } 00322 } 00323 00324 if (!renderers->empty()) { 00325 // Since the PointerVector auto-deletes, null-out the renderers that are 00326 // added to the root, and leave the root in the vector. 00327 for (int r = 1; r < renderers->size(); ++r) { 00328 (*renderers)[0]->insert((*renderers)[r]); 00329 (*renderers)[r] = NULL; 00330 } 00331 } 00332 } 00333 00334 /********************************************************************** 00335 * main() 00336 * 00337 **********************************************************************/ 00338 int main(int argc, char **argv) { 00339 const char* lang = "eng"; 00340 const char* image = NULL; 00341 const char* outputbase = NULL; 00342 const char* datapath = NULL; 00343 bool list_langs = false; 00344 bool print_parameters = false; 00345 GenericVector<STRING> vars_vec, vars_values; 00346 int arg_i = 1; 00347 tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO; 00348 00349 ParseArgs(argc, argv, 00350 &lang, &image, &outputbase, &datapath, 00351 &list_langs, &print_parameters, 00352 &vars_vec, &vars_values, &arg_i, &pagesegmode); 00353 00354 PERF_COUNT_START("Tesseract:main") 00355 tesseract::TessBaseAPI api; 00356 00357 api.SetOutputName(outputbase); 00358 00359 int init_failed = api.Init(datapath, lang, tesseract::OEM_DEFAULT, 00360 &(argv[arg_i]), argc - arg_i, &vars_vec, &vars_values, false); 00361 if (init_failed) { 00362 fprintf(stderr, "Could not initialize tesseract.\n"); 00363 exit(1); 00364 } 00365 00366 SetVariablesFromCLArgs(&api, argc, argv); 00367 00368 if (list_langs) { 00369 PrintLangsList(&api); 00370 exit(0); 00371 } 00372 00373 if (print_parameters) { 00374 FILE* fout = stdout; 00375 fprintf(stdout, "Tesseract parameters:\n"); 00376 api.PrintVariables(fout); 00377 api.End(); 00378 exit(0); 00379 } 00380 00381 FixPageSegMode(&api, pagesegmode); 00382 00383 if (pagesegmode == tesseract::PSM_AUTO_ONLY) { 00384 int ret_val = 0; 00385 00386 Pix* pixs = pixRead(image); 00387 if (!pixs) { 00388 fprintf(stderr, "Cannot open input file: %s\n", image); 00389 exit(2); 00390 } 00391 00392 api.SetImage(pixs); 00393 00394 tesseract::Orientation orientation; 00395 tesseract::WritingDirection direction; 00396 tesseract::TextlineOrder order; 00397 float deskew_angle; 00398 00399 tesseract::PageIterator* it = api.AnalyseLayout(); 00400 if (it) { 00401 it->Orientation(&orientation, &direction, &order, &deskew_angle); 00402 tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \ 00403 "Deskew angle: %.4f\n", 00404 orientation, direction, order, deskew_angle); 00405 } else { 00406 ret_val = 1; 00407 } 00408 00409 delete it; 00410 00411 pixDestroy(&pixs); 00412 exit(ret_val); 00413 } 00414 00415 // set in_training_mode to true when using one of these configs: 00416 // ambigs.train, box.train, box.train.stderr, linebox, rebox 00417 bool b = false; 00418 bool in_training_mode = 00419 (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) || 00420 (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) || 00421 (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b); 00422 00423 tesseract::PointerVector<tesseract::TessResultRenderer> renderers; 00424 00425 if (in_training_mode) { 00426 renderers.push_back(NULL); 00427 } else { 00428 PreloadRenderers(&api, &renderers, pagesegmode, outputbase); 00429 } 00430 00431 if (!renderers.empty()) { 00432 bool succeed = api.ProcessPages(image, NULL, 0, renderers[0]); 00433 if (!succeed) { 00434 fprintf(stderr, "Error during processing.\n"); 00435 exit(1); 00436 } 00437 } 00438 00439 PERF_COUNT_END 00440 return 0; // Normal exit 00441 }