#include <tessdatamanager.h>
|
| | TessdataManager () |
| |
| | TessdataManager (FileReader reader) |
| |
| | ~TessdataManager ()=default |
| |
| bool | swap () const |
| |
| bool | is_loaded () const |
| |
| void | LoadFileLater (const char *data_file_name) |
| |
| bool | Init (const char *data_file_name) |
| |
| bool | LoadMemBuffer (const char *name, const char *data, int size) |
| |
| void | OverwriteEntry (TessdataType type, const char *data, int size) |
| |
| bool | SaveFile (const STRING &filename, FileWriter writer) const |
| |
| void | Serialize (GenericVector< char > *data) const |
| |
| void | Clear () |
| |
| void | Directory () const |
| |
| bool | IsComponentAvailable (TessdataType type) const |
| |
| bool | GetComponent (TessdataType type, TFile *fp) |
| |
| bool | GetComponent (TessdataType type, TFile *fp) const |
| |
| std::string | VersionString () const |
| |
| void | SetVersionString (const std::string &v_str) |
| |
| bool | IsBaseAvailable () const |
| |
| bool | IsLSTMAvailable () const |
| |
| const STRING & | GetDataFileName () const |
| |
| bool | CombineDataFiles (const char *language_data_path_prefix, const char *output_filename) |
| |
| bool | OverwriteComponents (const char *new_traineddata_filename, char **component_filenames, int num_new_components) |
| |
| bool | ExtractToFile (const char *filename) |
| |
Definition at line 126 of file tessdatamanager.h.
| tesseract::TessdataManager::TessdataManager |
( |
| ) |
|
Definition at line 42 of file tessdatamanager.cpp.
42 : reader_(
nullptr), is_loaded_(
false), swap_(
false) {
void SetVersionString(const std::string &v_str)
| tesseract::TessdataManager::TessdataManager |
( |
FileReader |
reader | ) |
|
|
explicit |
| tesseract::TessdataManager::~TessdataManager |
( |
| ) |
|
|
default |
| void tesseract::TessdataManager::Clear |
( |
| ) |
|
| bool tesseract::TessdataManager::CombineDataFiles |
( |
const char * |
language_data_path_prefix, |
|
|
const char * |
output_filename |
|
) |
| |
Reads all the standard tesseract config and data files for a language at the given path and bundles them up into one binary data file. Returns true if the combined traineddata file was successfully written.
Definition at line 244 of file tessdatamanager.cpp.
248 for (
auto filesuffix : kTessdataFileSuffixes) {
250 ASSERT_HOST(TessdataTypeFromFileSuffix(filesuffix, &type));
251 STRING filename = language_data_path_prefix;
252 filename += filesuffix;
253 FILE *fp = fopen(filename.
string(),
"rb");
267 "Error: traineddata file must contain at least (a unicharset file" 268 "and inttemp) OR an lstm file.\n");
272 return SaveFile(output_filename,
nullptr);
bool SaveFile(const STRING &filename, FileWriter writer) const
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
bool IsLSTMAvailable() const
const char * string() const
DLLSYM void tprintf(const char *format,...)
bool IsBaseAvailable() const
| void tesseract::TessdataManager::Directory |
( |
| ) |
const |
Definition at line 202 of file tessdatamanager.cpp.
206 if (!entries_[i].empty()) {
207 tprintf(
"%d:%s:size=%d, offset=%d\n", i, kTessdataFileSuffixes[i],
208 entries_[i].size(), offset);
209 offset += entries_[i].
size();
std::string VersionString() const
DLLSYM void tprintf(const char *format,...)
| bool tesseract::TessdataManager::ExtractToFile |
( |
const char * |
filename | ) |
|
Extracts tessdata component implied by the name of the input file from the combined traineddata loaded into TessdataManager. Writes the extracted component to the file indicated by the file name. E.g. if the filename given is somepath/somelang.unicharset, unicharset will be extracted from the data loaded into the TessdataManager and will be written to somepath/somelang.unicharset.
- Returns
- true if the component was successfully extracted, false if the component was not present in the traineddata loaded into TessdataManager.
Definition at line 295 of file tessdatamanager.cpp.
298 tesseract::TessdataManager::TessdataTypeFromFileName(filename, &type));
299 if (entries_[type].empty())
return false;
bool SaveDataToFile(const GenericVector< char > &data, const STRING &filename)
Definition at line 216 of file tessdatamanager.cpp.
217 if (!is_loaded_ && !
Init(data_file_name_.
string()))
return false;
219 return const_this->GetComponent(
type, fp);
bool Init(const char *data_file_name)
const char * string() const
| bool tesseract::TessdataManager::GetComponent |
( |
TessdataType |
type, |
|
|
TFile * |
fp |
|
) |
| const |
Definition at line 224 of file tessdatamanager.cpp.
226 if (entries_[
type].empty())
return false;
227 fp->Open(&entries_[
type][0], entries_[type].size());
| const STRING& tesseract::TessdataManager::GetDataFileName |
( |
| ) |
const |
|
inline |
| bool tesseract::TessdataManager::Init |
( |
const char * |
data_file_name | ) |
|
Opens and reads the given data file right now.
- Returns
- true on success.
Definition at line 97 of file tessdatamanager.cpp.
99 if (reader_ ==
nullptr) {
100 #if defined(HAVE_LIBARCHIVE) 101 if (LoadArchiveFile(data_file_name))
return true;
105 if (!(*reader_)(data_file_name, &data))
return false;
bool LoadMemBuffer(const char *name, const char *data, int size)
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
| bool tesseract::TessdataManager::is_loaded |
( |
| ) |
const |
|
inline |
| bool tesseract::TessdataManager::IsBaseAvailable |
( |
| ) |
const |
|
inline |
| bool tesseract::TessdataManager::IsComponentAvailable |
( |
TessdataType |
type | ) |
const |
|
inline |
| bool tesseract::TessdataManager::IsLSTMAvailable |
( |
| ) |
const |
|
inline |
| void tesseract::TessdataManager::LoadFileLater |
( |
const char * |
data_file_name | ) |
|
| bool tesseract::TessdataManager::LoadMemBuffer |
( |
const char * |
name, |
|
|
const char * |
data, |
|
|
int |
size |
|
) |
| |
Definition at line 111 of file tessdatamanager.cpp.
115 data_file_name_ = name;
118 uint32_t num_entries;
119 if (!fp.DeSerialize(&num_entries))
return false;
120 swap_ = num_entries > kMaxNumTessdataEntries;
122 if (swap_)
ReverseN(&num_entries,
sizeof(num_entries));
123 if (num_entries > kMaxNumTessdataEntries)
return false;
126 if (!fp.DeSerialize(&offset_table[0], num_entries))
return false;
128 if (offset_table[i] >= 0) {
129 int64_t entry_size = size - offset_table[i];
131 while (j < num_entries && offset_table[j] == -1) ++j;
132 if (j < num_entries) entry_size = offset_table[j] - offset_table[i];
134 if (!fp.DeSerialize(&entries_[i][0], entry_size))
return false;
void resize_no_init(int size)
void SetVersionString(const std::string &v_str)
void ReverseN(void *ptr, int num_bytes)
| bool tesseract::TessdataManager::OverwriteComponents |
( |
const char * |
new_traineddata_filename, |
|
|
char ** |
component_filenames, |
|
|
int |
num_new_components |
|
) |
| |
Gets the individual components from the data_file_ with which the class was initialized. Overwrites the components specified by component_filenames. Writes the updated traineddata file to new_traineddata_filename.
Definition at line 275 of file tessdatamanager.cpp.
281 for (
int i = 0; i < num_new_components; ++i) {
283 if (TessdataTypeFromFileName(component_filenames[i], &type)) {
285 tprintf(
"Failed to read component file:%s\n", component_filenames[i]);
292 return SaveFile(new_traineddata_filename,
nullptr);
bool SaveFile(const STRING &filename, FileWriter writer) const
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
DLLSYM void tprintf(const char *format,...)
| void tesseract::TessdataManager::OverwriteEntry |
( |
TessdataType |
type, |
|
|
const char * |
data, |
|
|
int |
size |
|
) |
| |
| bool tesseract::TessdataManager::SaveFile |
( |
const STRING & |
filename, |
|
|
FileWriter |
writer |
|
) |
| const |
Definition at line 153 of file tessdatamanager.cpp.
159 if (writer ==
nullptr)
162 return (*writer)(data, filename);
void Serialize(GenericVector< char > *data) const
bool SaveDataToFile(const GenericVector< char > &data, const STRING &filename)
| void tesseract::TessdataManager::Serialize |
( |
GenericVector< char > * |
data | ) |
const |
Definition at line 166 of file tessdatamanager.cpp.
171 int64_t offset =
sizeof(int32_t) +
sizeof(offset_table);
173 if (entries_[i].empty()) {
174 offset_table[i] = -1;
176 offset_table[i] = offset;
177 offset += entries_[i].
size();
184 fp.Serialize(&num_entries);
185 fp.Serialize(&offset_table[0],
countof(offset_table));
186 for (
const auto& entry : entries_) {
187 if (!entry.empty()) {
188 fp.Serialize(&entry[0], entry.size());
void init_to_size(int size, const T &t)
constexpr size_t countof(T const (&)[N]) noexcept
| void tesseract::TessdataManager::SetVersionString |
( |
const std::string & |
v_str | ) |
|
| bool tesseract::TessdataManager::swap |
( |
| ) |
const |
|
inline |
| std::string tesseract::TessdataManager::VersionString |
( |
| ) |
const |
The documentation for this class was generated from the following files: