tesseract 3.04.01

api/tesseractmain.cpp

Go to the documentation of this file.
00001 /**********************************************************************
00002 * File:        tessedit.cpp  (Formerly tessedit.c)
00003 * Description: Main program for merge of tess and editor.
00004 * Author:                  Ray Smith
00005 * Created:                 Tue Jan 07 15:21:46 GMT 1992
00006 *
00007 * (C) Copyright 1992, Hewlett-Packard Ltd.
00008 ** Licensed under the Apache License, Version 2.0 (the "License");
00009 ** you may not use this file except in compliance with the License.
00010 ** You may obtain a copy of the License at
00011 ** http://www.apache.org/licenses/LICENSE-2.0
00012 ** Unless required by applicable law or agreed to in writing, software
00013 ** distributed under the License is distributed on an "AS IS" BASIS,
00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 ** See the License for the specific language governing permissions and
00016 ** limitations under the License.
00017 *
00018 **********************************************************************/
00019 
00020 // Include automatically generated configuration file if running autoconf
00021 #ifdef HAVE_CONFIG_H
00022 #include "config_auto.h"
00023 #endif
00024 
00025 #include <iostream>
00026 
00027 #include "allheaders.h"
00028 #include "baseapi.h"
00029 #include "basedir.h"
00030 #include "renderer.h"
00031 #include "strngs.h"
00032 #include "tprintf.h"
00033 #include "openclwrapper.h"
00034 #include "osdetect.h"
00035 
00036 void PrintVersionInfo() {
00037     char *versionStrP;
00038 
00039     fprintf(stderr, "tesseract %s\n", tesseract::TessBaseAPI::Version());
00040 
00041     versionStrP = getLeptonicaVersion();
00042     fprintf(stderr, " %s\n", versionStrP);
00043     lept_free(versionStrP);
00044 
00045     versionStrP = getImagelibVersions();
00046     fprintf(stderr, "  %s\n", versionStrP);
00047     lept_free(versionStrP);
00048 
00049 #ifdef USE_OPENCL
00050     cl_platform_id platform;
00051     cl_uint num_platforms;
00052     cl_device_id devices[2];
00053     cl_uint num_devices;
00054     char info[256];
00055     int i;
00056 
00057     fprintf(stderr, " OpenCL info:\n");
00058     clGetPlatformIDs(1, &platform, &num_platforms);
00059     fprintf(stderr, "  Found %d platforms.\n", num_platforms);
00060     clGetPlatformInfo(platform, CL_PLATFORM_NAME, 256, info, 0);
00061     fprintf(stderr, "  Platform name: %s.\n", info);
00062     clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 256, info, 0);
00063     fprintf(stderr, "  Version: %s.\n", info);
00064     clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, devices, &num_devices);
00065     fprintf(stderr, "  Found %d devices.\n", num_devices);
00066     for (i = 0; i < num_devices; ++i) {
00067       clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0);
00068       fprintf(stderr, "    Device %d name: %s.\n", i+1, info);
00069     }
00070 #endif
00071 }
00072 
00073 void PrintUsage(const char* program) {
00074   fprintf(stderr,
00075       "Usage:\n"
00076       "  %s --help | --help-psm | --version\n"
00077       "  %s --list-langs [--tessdata-dir PATH]\n"
00078       "  %s --print-parameters [options...] [configfile...]\n"
00079       "  %s imagename|stdin outputbase|stdout [options...] [configfile...]\n",
00080       program, program, program, program);
00081 }
00082 
00083 void PrintHelpForPSM() {
00084   const char* msg =
00085       "Page segmentation modes:\n"
00086         "  0    Orientation and script detection (OSD) only.\n"
00087         "  1    Automatic page segmentation with OSD.\n"
00088         "  2    Automatic page segmentation, but no OSD, or OCR.\n"
00089         "  3    Fully automatic page segmentation, but no OSD. (Default)\n"
00090         "  4    Assume a single column of text of variable sizes.\n"
00091         "  5    Assume a single uniform block of vertically aligned text.\n"
00092         "  6    Assume a single uniform block of text.\n"
00093         "  7    Treat the image as a single text line.\n"
00094         "  8    Treat the image as a single word.\n"
00095         "  9    Treat the image as a single word in a circle.\n"
00096         " 10    Treat the image as a single character.\n"
00097 
00098         //TODO: Consider publishing these modes.
00099         #if 0
00100         " 11    Sparse text. Find as much text as possible in no"
00101           " particular order.\n"
00102         " 12    Sparse text with OSD.\n"
00103         " 13    Raw line. Treat the image as a single text line,\n"
00104           "\t\t\tbypassing hacks that are Tesseract-specific.\n"
00105         #endif
00106         ;
00107 
00108   fprintf(stderr, "%s", msg);
00109 }
00110 
00111 void PrintHelpMessage(const char* program) {
00112   PrintUsage(program);
00113 
00114   const char* ocr_options =
00115       "OCR options:\n"
00116       "  --tessdata-dir PATH   Specify the location of tessdata path.\n"
00117       "  --user-words PATH     Specify the location of user words file.\n"
00118       "  --user-patterns PATH  Specify the location of user patterns file.\n"
00119       "  -l LANG[+LANG]        Specify language(s) used for OCR.\n"
00120       "  -c VAR=VALUE          Set value for config variables.\n"
00121       "                        Multiple -c arguments are allowed.\n"
00122       "  -psm NUM              Specify page segmentation mode.\n"
00123       "NOTE: These options must occur before any configfile.\n"
00124      ;
00125 
00126   fprintf(stderr, "\n%s\n", ocr_options);
00127   PrintHelpForPSM();
00128 
00129   const char *single_options =
00130       "Single options:\n"
00131       "  -h, --help            Show this help message.\n"
00132       "  --help-psm            Show page segmentation modes.\n"
00133       "  -v, --version         Show version information.\n"
00134       "  --list-langs          List available languages for tesseract engine.\n"
00135       "  --print-parameters    Print tesseract parameters to stdout.\n"
00136       ;
00137 
00138   fprintf(stderr, "\n%s", single_options);
00139 }
00140 
00141 void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc, char** argv) {
00142   char opt1[256], opt2[255];
00143   for (int i = 0; i < argc; i++) {
00144     if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
00145       strncpy(opt1, argv[i + 1], 255);
00146       opt1[255] = '\0';
00147       char *p = strchr(opt1, '=');
00148       if (!p) {
00149         fprintf(stderr, "Missing = in configvar assignment\n");
00150         exit(1);
00151       }
00152       *p = 0;
00153       strncpy(opt2, strchr(argv[i + 1], '=') + 1, 255);
00154       opt2[254] = 0;
00155       ++i;
00156 
00157       if (!api->SetVariable(opt1, opt2)) {
00158         fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2);
00159       }
00160     }
00161   }
00162 }
00163 
00164 void PrintLangsList(tesseract::TessBaseAPI* api) {
00165   GenericVector<STRING> languages;
00166   api->GetAvailableLanguagesAsVector(&languages);
00167   fprintf(stderr, "List of available languages (%d):\n",
00168           languages.size());
00169   for (int index = 0; index < languages.size(); ++index) {
00170     STRING& string = languages[index];
00171     fprintf(stderr, "%s\n", string.string());
00172   }
00173   api->End();
00174 }
00175 
00190 void FixPageSegMode(tesseract::TessBaseAPI* api,
00191               tesseract::PageSegMode pagesegmode) {
00192   if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
00193      api->SetPageSegMode(pagesegmode);
00194 }
00195 
00196 // NOTE: arg_i is used here to avoid ugly *i so many times in this function
00197 void ParseArgs(const int argc, char** argv,
00198                   const char** lang,
00199                   const char** image,
00200                   const char** outputbase,
00201                   const char** datapath,
00202                   bool* list_langs,
00203                   bool* print_parameters,
00204                   GenericVector<STRING>* vars_vec,
00205                   GenericVector<STRING>* vars_values,
00206                   int* arg_i,
00207                   tesseract::PageSegMode* pagesegmode) {
00208   if (argc == 1) {
00209     PrintHelpMessage(argv[0]);
00210     exit(0);
00211   }
00212 
00213   if (argc == 2) {
00214     if ((strcmp(argv[1], "-h") == 0) ||
00215          (strcmp(argv[1], "--help") == 0)) {
00216       PrintHelpMessage(argv[0]);
00217       exit(0);
00218     }
00219     if ((strcmp(argv[1], "--help-psm") == 0)) {
00220       PrintHelpForPSM();
00221       exit(0);
00222     }
00223     if ((strcmp(argv[1], "-v") == 0) ||
00224          (strcmp(argv[1], "--version") == 0)) {
00225       PrintVersionInfo();
00226       exit(0);
00227     }
00228   }
00229 
00230   bool noocr = false;
00231   int i = 1;
00232   while (i < argc && (*outputbase == NULL || argv[i][0] == '-')) {
00233     if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) {
00234       *lang = argv[i + 1];
00235       ++i;
00236     } else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) {
00237       *datapath = argv[i + 1];
00238       ++i;
00239     } else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) {
00240       vars_vec->push_back("user_words_file");
00241       vars_values->push_back(argv[i + 1]);
00242       ++i;
00243     } else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) {
00244       vars_vec->push_back("user_patterns_file");
00245       vars_values->push_back(argv[i + 1]);
00246       ++i;
00247     } else if (strcmp(argv[i], "--list-langs") == 0) {
00248       noocr = true;
00249       *list_langs = true;
00250     } else if (strcmp(argv[i], "-psm") == 0 && i + 1 < argc) {
00251       *pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
00252       ++i;
00253     } else if (strcmp(argv[i], "--print-parameters") == 0) {
00254       noocr = true;
00255       *print_parameters = true;
00256     } else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
00257       // handled properly after api init
00258       ++i;
00259     } else if (*image == NULL) {
00260       *image = argv[i];
00261     } else if (*outputbase == NULL) {
00262       *outputbase = argv[i];
00263     }
00264     ++i;
00265   }
00266 
00267   *arg_i = i;
00268 
00269   if (argc == 2 && strcmp(argv[1], "--list-langs") == 0) {
00270     *list_langs = true;
00271     noocr = true;
00272   }
00273 
00274   if (*outputbase == NULL && noocr == false) {
00275     PrintHelpMessage(argv[0]);
00276     exit(1);
00277   }
00278 
00279   if (*outputbase != NULL && strcmp(*outputbase, "-") &&
00280       strcmp(*outputbase, "stdout")) {
00281     tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n",
00282            tesseract::TessBaseAPI::Version());
00283   }
00284 }
00285 
00286 void PreloadRenderers(tesseract::TessBaseAPI* api,
00287           tesseract::PointerVector<tesseract::TessResultRenderer>* renderers,
00288           tesseract::PageSegMode pagesegmode,
00289           const char* outputbase) {
00290   if (pagesegmode == tesseract::PSM_OSD_ONLY) {
00291     renderers->push_back(new tesseract::TessOsdRenderer(outputbase));
00292   } else {
00293     bool b;
00294     api->GetBoolVariable("tessedit_create_hocr", &b);
00295     if (b) {
00296       bool font_info;
00297       api->GetBoolVariable("hocr_font_info", &font_info);
00298       renderers->push_back(
00299                      new tesseract::TessHOcrRenderer(outputbase, font_info));
00300     }
00301 
00302     api->GetBoolVariable("tessedit_create_pdf", &b);
00303     if (b) {
00304       renderers->push_back(new tesseract::TessPDFRenderer(outputbase,
00305                                                         api->GetDatapath()));
00306     }
00307 
00308     api->GetBoolVariable("tessedit_write_unlv", &b);
00309     if (b) {
00310       renderers->push_back(new tesseract::TessUnlvRenderer(outputbase));
00311     }
00312 
00313     api->GetBoolVariable("tessedit_create_boxfile", &b);
00314     if (b) {
00315       renderers->push_back(new tesseract::TessBoxTextRenderer(outputbase));
00316     }
00317 
00318     api->GetBoolVariable("tessedit_create_txt", &b);
00319     if (b || renderers->empty()) {
00320       renderers->push_back(new tesseract::TessTextRenderer(outputbase));
00321     }
00322   }
00323 
00324   if (!renderers->empty()) {
00325     // Since the PointerVector auto-deletes, null-out the renderers that are
00326     // added to the root, and leave the root in the vector.
00327     for (int r = 1; r < renderers->size(); ++r) {
00328       (*renderers)[0]->insert((*renderers)[r]);
00329       (*renderers)[r] = NULL;
00330     }
00331   }
00332 }
00333 
00334 /**********************************************************************
00335  *  main()
00336  *
00337  **********************************************************************/
00338 int main(int argc, char **argv) {
00339   const char* lang = "eng";
00340   const char* image = NULL;
00341   const char* outputbase = NULL;
00342   const char* datapath = NULL;
00343   bool list_langs = false;
00344   bool print_parameters = false;
00345   GenericVector<STRING> vars_vec, vars_values;
00346   int arg_i = 1;
00347   tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
00348 
00349   ParseArgs(argc, argv,
00350           &lang, &image, &outputbase, &datapath,
00351           &list_langs, &print_parameters,
00352           &vars_vec, &vars_values, &arg_i, &pagesegmode);
00353 
00354   PERF_COUNT_START("Tesseract:main")
00355   tesseract::TessBaseAPI api;
00356 
00357   api.SetOutputName(outputbase);
00358 
00359   int init_failed = api.Init(datapath, lang, tesseract::OEM_DEFAULT,
00360                 &(argv[arg_i]), argc - arg_i, &vars_vec, &vars_values, false);
00361   if (init_failed) {
00362     fprintf(stderr, "Could not initialize tesseract.\n");
00363     exit(1);
00364   }
00365 
00366   SetVariablesFromCLArgs(&api, argc, argv);
00367 
00368   if (list_langs) {
00369      PrintLangsList(&api);
00370      exit(0);
00371   }
00372 
00373   if (print_parameters) {
00374      FILE* fout = stdout;
00375      fprintf(stdout, "Tesseract parameters:\n");
00376      api.PrintVariables(fout);
00377      api.End();
00378      exit(0);
00379   }
00380 
00381   FixPageSegMode(&api, pagesegmode);
00382 
00383   if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
00384     int ret_val = 0;
00385 
00386     Pix* pixs = pixRead(image);
00387     if (!pixs) {
00388       fprintf(stderr, "Cannot open input file: %s\n", image);
00389       exit(2);
00390     }
00391 
00392     api.SetImage(pixs);
00393 
00394     tesseract::Orientation orientation;
00395     tesseract::WritingDirection direction;
00396     tesseract::TextlineOrder order;
00397     float deskew_angle;
00398 
00399     tesseract::PageIterator* it =  api.AnalyseLayout();
00400     if (it) {
00401       it->Orientation(&orientation, &direction, &order, &deskew_angle);
00402       tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \
00403              "Deskew angle: %.4f\n",
00404               orientation, direction, order, deskew_angle);
00405     } else {
00406       ret_val = 1;
00407     }
00408 
00409     delete it;
00410 
00411     pixDestroy(&pixs);
00412     exit(ret_val);
00413   }
00414 
00415   // set in_training_mode to true when using one of these configs:
00416   // ambigs.train, box.train, box.train.stderr, linebox, rebox
00417   bool b = false;
00418   bool in_training_mode =
00419         (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
00420         (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
00421         (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
00422 
00423   tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
00424 
00425   if (in_training_mode) {
00426     renderers.push_back(NULL);
00427   } else {
00428     PreloadRenderers(&api, &renderers, pagesegmode, outputbase);
00429   }
00430 
00431   if (!renderers.empty()) {
00432     bool succeed = api.ProcessPages(image, NULL, 0, renderers[0]);
00433     if (!succeed) {
00434       fprintf(stderr, "Error during processing.\n");
00435       exit(1);
00436     }
00437   }
00438 
00439   PERF_COUNT_END
00440   return 0;                      // Normal exit
00441 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines