|
tesseract 3.04.01
|
#include <ligature_table.h>
Public Member Functions | |
| string | AddLigatures (const string &str, const PangoFontInfo *font) const |
| string | RemoveLigatures (const string &str) const |
| string | RemoveCustomLigatures (const string &str) const |
| const LigHash & | norm_to_lig_table () const |
| const LigHash & | lig_to_norm_table () const |
Static Public Member Functions | |
| static LigatureTable * | Get () |
Protected Member Functions | |
| LigatureTable () | |
| void | Init () |
Protected Attributes | |
| LigHash | norm_to_lig_table_ |
| LigHash | lig_to_norm_table_ |
| int | min_lig_length_ |
| int | max_lig_length_ |
| int | min_norm_length_ |
| int | max_norm_length_ |
Static Protected Attributes | |
| static SmartPtr< LigatureTable > | instance_ |
Definition at line 37 of file ligature_table.h.
| tesseract::LigatureTable::LigatureTable | ( | ) | [protected] |
Definition at line 63 of file ligature_table.cpp.
{
if (norm_to_lig_table_.empty()) {
| string tesseract::LigatureTable::AddLigatures | ( | const string & | str, |
| const PangoFontInfo * | font | ||
| ) | const |
Definition at line 159 of file ligature_table.cpp.
{
step = 0;
for (int liglen = max_norm_length_; liglen >= min_norm_length_; --liglen) {
if (i + liglen <= len) {
string lig_cand = str.substr(i, liglen);
LigHash::const_iterator it = norm_to_lig_table_.find(lig_cand);
if (it != norm_to_lig_table_.end()) {
tlog(3, "Considering %s -> %s\n", lig_cand.c_str(),
it->second.c_str());
if (font) {
// Test for renderability.
if (!font->CanRenderString(it->second.data(), it->second.length()))
continue; // Not renderable
}
// Found a match so convert it.
step = liglen;
result += it->second;
tlog(2, "Substituted %s -> %s\n", lig_cand.c_str(),
it->second.c_str());
break;
}
}
}
if (step == 0) {
result += str[i];
step = 1;
}
}
result += str.substr(i, len - i);
return result;
}
} // namespace tesseract
| LigatureTable * tesseract::LigatureTable::Get | ( | ) | [static] |
Definition at line 55 of file ligature_table.cpp.
: min_lig_length_(0), max_lig_length_(0), min_norm_length_(0), max_norm_length_(0) {}
| void tesseract::LigatureTable::Init | ( | ) | [protected] |
Definition at line 66 of file ligature_table.cpp.
{
norm_to_lig_table_[normed8] = lig8;
lig_to_norm_table_[lig8] = normed8;
if (min_lig_length_ == 0 || lig_length < min_lig_length_)
min_lig_length_ = lig_length;
if (lig_length > max_lig_length_)
max_lig_length_ = lig_length;
if (min_norm_length_ == 0 || norm_length < min_norm_length_)
min_norm_length_ = norm_length;
if (norm_length > max_norm_length_)
max_norm_length_ = norm_length;
}
}
// Add custom extra ligatures.
for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != NULL; ++i) {
norm_to_lig_table_[UNICHARSET::kCustomLigatures[i][0]] =
UNICHARSET::kCustomLigatures[i][1];
int norm_length = strlen(UNICHARSET::kCustomLigatures[i][0]);
if (min_norm_length_ == 0 || norm_length < min_norm_length_)
min_norm_length_ = norm_length;
if (norm_length > max_norm_length_)
max_norm_length_ = norm_length;
lig_to_norm_table_[UNICHARSET::kCustomLigatures[i][1]] =
UNICHARSET::kCustomLigatures[i][0];
}
}
}
string LigatureTable::RemoveLigatures(const string& str) const {
string result;
| const LigHash& tesseract::LigatureTable::lig_to_norm_table | ( | ) | const [inline] |
Definition at line 54 of file ligature_table.h.
{
return lig_to_norm_table_;
}
| const LigHash& tesseract::LigatureTable::norm_to_lig_table | ( | ) | const [inline] |
Definition at line 51 of file ligature_table.h.
{
return norm_to_lig_table_;
}
| string tesseract::LigatureTable::RemoveCustomLigatures | ( | const string & | str | ) | const |
Definition at line 133 of file ligature_table.cpp.
{
len = it.get_utf8(tmp);
tmp[len] = '\0';
norm_ind = -1;
for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != NULL && norm_ind < 0;
++i) {
if (!strcmp(tmp, UNICHARSET::kCustomLigatures[i][1])) {
norm_ind = i;
}
}
if (norm_ind >= 0) {
result += UNICHARSET::kCustomLigatures[norm_ind][0];
} else {
result += tmp;
}
}
return result;
}
string LigatureTable::AddLigatures(const string& str,
const PangoFontInfo* font) const {
| string tesseract::LigatureTable::RemoveLigatures | ( | const string & | str | ) | const |
Definition at line 114 of file ligature_table.cpp.
{
len = it.get_utf8(tmp);
tmp[len] = '\0';
LigHash::const_iterator lig_it = lig_to_norm_table_.find(tmp);
if (lig_it != lig_to_norm_table_.end()) {
result += lig_it->second;
} else {
result += tmp;
}
}
return result;
}
string LigatureTable::RemoveCustomLigatures(const string& str) const {
string result;
SmartPtr< LigatureTable > tesseract::LigatureTable::instance_ [static, protected] |
Definition at line 64 of file ligature_table.h.
LigHash tesseract::LigatureTable::lig_to_norm_table_ [protected] |
Definition at line 66 of file ligature_table.h.
int tesseract::LigatureTable::max_lig_length_ [protected] |
Definition at line 68 of file ligature_table.h.
int tesseract::LigatureTable::max_norm_length_ [protected] |
Definition at line 70 of file ligature_table.h.
int tesseract::LigatureTable::min_lig_length_ [protected] |
Definition at line 67 of file ligature_table.h.
int tesseract::LigatureTable::min_norm_length_ [protected] |
Definition at line 69 of file ligature_table.h.
LigHash tesseract::LigatureTable::norm_to_lig_table_ [protected] |
Definition at line 65 of file ligature_table.h.