tesseract 3.04.01

training/classifier_tester.cpp File Reference

#include <stdio.h>
#include "base/commandlineflags.h"
#include "baseapi.h"
#include "commontraining.h"
#include "cubeclassifier.h"
#include "mastertrainer.h"
#include "params.h"
#include "strngs.h"
#include "tessclassifier.h"

Go to the source code of this file.

Enumerations

enum  ClassifierName {
  CN_PRUNER, CN_FULL, CN_CUBE, CN_CUBETESS,
  CN_COUNT
}

Functions

 STRING_PARAM_FLAG (classifier,"","Classifier to test")
 STRING_PARAM_FLAG (lang,"eng","Language to test")
 STRING_PARAM_FLAG (tessdata_dir,"","Directory of traineddata files")
 DECLARE_INT_PARAM_FLAG (debug_level)
 DECLARE_STRING_PARAM_FLAG (T)
int main (int argc, char **argv)

Variables

const char * names []

Enumeration Type Documentation

Enumerator:
CN_PRUNER 
CN_FULL 
CN_CUBE 
CN_CUBETESS 
CN_COUNT 

Definition at line 39 of file classifier_tester.cpp.

                    {
  CN_PRUNER,
  CN_FULL,
#ifndef NO_CUBE_BUILD
  CN_CUBE,
  CN_CUBETESS,
#endif  // NO_CUBE_BUILD
  CN_COUNT
};

Function Documentation

DECLARE_INT_PARAM_FLAG ( debug_level  )
DECLARE_STRING_PARAM_FLAG ( )
int main ( int  argc,
char **  argv 
)

This program reads in a text file consisting of feature samples from a training page in the following format:

      FontName UTF8-char-str xmin ymin xmax ymax page-number
       NumberOfFeatureTypes(N)
         FeatureTypeName1 NumberOfFeatures(M)
            Feature1
            ...
            FeatureM
         FeatureTypeName2 NumberOfFeatures(M)
            Feature1
            ...
            FeatureM
         ...
         FeatureTypeNameN NumberOfFeatures(M)
            Feature1
            ...
            FeatureM
      FontName CharName ...
    

The result of this program is a binary inttemp file used by the OCR engine.

Parameters:
argcnumber of command line arguments
argvarray of command line arguments
Returns:
none
Note:
Exceptions: none
History: Fri Aug 18 08:56:17 1989, DSJ, Created.
History: Mon May 18 1998, Christy Russson, Revistion started.

Definition at line 149 of file classifier_tester.cpp.

                                {
  ParseArguments(&argc, &argv);
  STRING file_prefix;
  tesseract::MasterTrainer* trainer = tesseract::LoadTrainingData(
      argc, argv, false, NULL, &file_prefix);
  tesseract::TessBaseAPI* api;
  // Decode the classifier string.
  tesseract::ShapeClassifier* shape_classifier = InitializeClassifier(
      FLAGS_classifier.c_str(), trainer->unicharset(), argc, argv, &api);
  if (shape_classifier == NULL) {
    fprintf(stderr, "Classifier init failed!:%s\n", FLAGS_classifier.c_str());
    return 1;
  }

  // We want to test junk as well if it is available.
  // trainer->IncludeJunk();
  // We want to test with replicated samples too.
  trainer->ReplicateAndRandomizeSamplesIfRequired();

  trainer->TestClassifierOnSamples(tesseract:: CT_UNICHAR_TOP1_ERR,
                                   MAX(3, FLAGS_debug_level), false,
                                   shape_classifier, NULL);
  delete shape_classifier;
  delete api;
  delete trainer;

  return 0;
} /* main */
STRING_PARAM_FLAG ( lang  ,
"eng"  ,
"Language to test"   
)
STRING_PARAM_FLAG ( tessdata_dir  ,
""  ,
"Directory of traineddata files"   
)
STRING_PARAM_FLAG ( classifier  ,
""  ,
"Classifier to test"   
)

Variable Documentation

const char* names[]
Initial value:
 {"pruner", "full",

  "cube", "cubetess",

  NULL }

Definition at line 49 of file classifier_tester.cpp.

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines