|
tesseract 3.04.01
|
00001 00002 // File: unicharset_training_utils.h 00003 // Description: Training utilities for UNICHARSET. 00004 // Author: Ray Smith 00005 // Created: Fri Oct 17 17:14:01 PDT 2014 00006 // 00007 // (C) Copyright 2014, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_ 00021 #define TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_ 00022 00023 #include <string> 00024 00025 #ifdef USE_STD_NAMESPACE 00026 using std::string; 00027 #endif 00028 00029 class STATS; 00030 class UNICHARSET; 00031 00032 namespace tesseract { 00033 00034 // Helper sets the character attribute properties and sets up the script table. 00035 // Does not set tops and bottoms. 00036 void SetupBasicProperties(bool report_errors, UNICHARSET* unicharset); 00037 00038 // Helper to set the properties for an input unicharset file, writes to the 00039 // output file. If an appropriate script unicharset can be found in the 00040 // script_dir directory, then the tops and bottoms are expanded using the 00041 // script unicharset. 00042 // If non-empty, xheight data for the fonts are written to the xheights_file. 00043 void SetPropertiesForInputFile(const string& script_dir, 00044 const string& input_unicharset_file, 00045 const string& output_unicharset_file, 00046 const string& output_xheights_file); 00047 00048 } // namespace tesseract. 00049 00050 #endif // TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_