tesseract 3.04.01

training/unicharset_training_utils.h

Go to the documentation of this file.
00001 
00002 // File:        unicharset_training_utils.h
00003 // Description: Training utilities for UNICHARSET.
00004 // Author:      Ray Smith
00005 // Created:     Fri Oct 17 17:14:01 PDT 2014
00006 //
00007 // (C) Copyright 2014, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_
00021 #define TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_
00022 
00023 #include <string>
00024 
00025 #ifdef USE_STD_NAMESPACE
00026 using std::string;
00027 #endif
00028 
00029 class STATS;
00030 class UNICHARSET;
00031 
00032 namespace tesseract {
00033 
00034 // Helper sets the character attribute properties and sets up the script table.
00035 // Does not set tops and bottoms.
00036 void SetupBasicProperties(bool report_errors, UNICHARSET* unicharset);
00037 
00038 // Helper to set the properties for an input unicharset file, writes to the
00039 // output file. If an appropriate script unicharset can be found in the
00040 // script_dir directory, then the tops and bottoms are expanded using the
00041 // script unicharset.
00042 // If non-empty, xheight data for the fonts are written to the xheights_file.
00043 void SetPropertiesForInputFile(const string& script_dir,
00044                                const string& input_unicharset_file,
00045                                const string& output_unicharset_file,
00046                                const string& output_xheights_file);
00047 
00048 }  // namespace tesseract.
00049 
00050 #endif  // TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines