tesseract 3.04.01

tesseract::LigatureTable Class Reference

#include <ligature_table.h>

List of all members.

Public Member Functions

string AddLigatures (const string &str, const PangoFontInfo *font) const
string RemoveLigatures (const string &str) const
string RemoveCustomLigatures (const string &str) const
const LigHashnorm_to_lig_table () const
const LigHashlig_to_norm_table () const

Static Public Member Functions

static LigatureTableGet ()

Protected Member Functions

 LigatureTable ()
void Init ()

Protected Attributes

LigHash norm_to_lig_table_
LigHash lig_to_norm_table_
int min_lig_length_
int max_lig_length_
int min_norm_length_
int max_norm_length_

Static Protected Attributes

static SmartPtr< LigatureTableinstance_

Detailed Description

Definition at line 37 of file ligature_table.h.


Constructor & Destructor Documentation

tesseract::LigatureTable::LigatureTable ( ) [protected]

Definition at line 63 of file ligature_table.cpp.

                         {
  if (norm_to_lig_table_.empty()) {

Member Function Documentation

string tesseract::LigatureTable::AddLigatures ( const string &  str,
const PangoFontInfo font 
) const

Definition at line 159 of file ligature_table.cpp.

                                                         {
    step = 0;
    for (int liglen = max_norm_length_; liglen >= min_norm_length_; --liglen) {
      if (i + liglen <= len) {
        string lig_cand = str.substr(i, liglen);
        LigHash::const_iterator it = norm_to_lig_table_.find(lig_cand);
        if (it != norm_to_lig_table_.end()) {
          tlog(3, "Considering %s -> %s\n", lig_cand.c_str(),
               it->second.c_str());
          if (font) {
            // Test for renderability.
            if (!font->CanRenderString(it->second.data(), it->second.length()))
              continue;  // Not renderable
          }
          // Found a match so convert it.
          step = liglen;
          result += it->second;
          tlog(2, "Substituted %s -> %s\n", lig_cand.c_str(),
               it->second.c_str());
          break;
        }
      }
    }
    if (step == 0) {
      result += str[i];
      step = 1;
    }
  }
  result += str.substr(i, len - i);
  return result;
}

}  // namespace tesseract
LigatureTable * tesseract::LigatureTable::Get ( ) [static]

Definition at line 55 of file ligature_table.cpp.

void tesseract::LigatureTable::Init ( ) [protected]

Definition at line 66 of file ligature_table.cpp.

                                                                {
        norm_to_lig_table_[normed8] = lig8;
        lig_to_norm_table_[lig8] = normed8;
        if (min_lig_length_ == 0 || lig_length < min_lig_length_)
          min_lig_length_ = lig_length;
        if (lig_length > max_lig_length_)
          max_lig_length_ = lig_length;
        if (min_norm_length_ == 0 || norm_length < min_norm_length_)
          min_norm_length_ = norm_length;
        if (norm_length > max_norm_length_)
          max_norm_length_ = norm_length;
      }
    }
    // Add custom extra ligatures.
    for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != NULL; ++i) {
      norm_to_lig_table_[UNICHARSET::kCustomLigatures[i][0]] =
          UNICHARSET::kCustomLigatures[i][1];
      int norm_length = strlen(UNICHARSET::kCustomLigatures[i][0]);
      if (min_norm_length_ == 0 || norm_length < min_norm_length_)
        min_norm_length_ = norm_length;
      if (norm_length > max_norm_length_)
        max_norm_length_ = norm_length;

      lig_to_norm_table_[UNICHARSET::kCustomLigatures[i][1]] =
          UNICHARSET::kCustomLigatures[i][0];
    }
  }
}

string LigatureTable::RemoveLigatures(const string& str) const {
  string result;
const LigHash& tesseract::LigatureTable::lig_to_norm_table ( ) const [inline]

Definition at line 54 of file ligature_table.h.

                                           {
    return lig_to_norm_table_;
  }
const LigHash& tesseract::LigatureTable::norm_to_lig_table ( ) const [inline]

Definition at line 51 of file ligature_table.h.

                                           {
    return norm_to_lig_table_;
  }
string tesseract::LigatureTable::RemoveCustomLigatures ( const string &  str) const

Definition at line 133 of file ligature_table.cpp.

                                                                {
    len = it.get_utf8(tmp);
    tmp[len] = '\0';
    norm_ind = -1;
    for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != NULL && norm_ind < 0;
         ++i) {
      if (!strcmp(tmp, UNICHARSET::kCustomLigatures[i][1])) {
        norm_ind = i;
      }
    }
    if (norm_ind >= 0) {
      result += UNICHARSET::kCustomLigatures[norm_ind][0];
    } else {
      result += tmp;
    }
  }
  return result;
}

string LigatureTable::AddLigatures(const string& str,
                                   const PangoFontInfo* font) const {
string tesseract::LigatureTable::RemoveLigatures ( const string &  str) const

Definition at line 114 of file ligature_table.cpp.

                                                                {
    len = it.get_utf8(tmp);
    tmp[len] = '\0';
    LigHash::const_iterator lig_it = lig_to_norm_table_.find(tmp);
    if (lig_it != lig_to_norm_table_.end()) {
      result += lig_it->second;
    } else {
      result += tmp;
    }
  }
  return result;
}

string LigatureTable::RemoveCustomLigatures(const string& str) const {
  string result;

Member Data Documentation

Definition at line 64 of file ligature_table.h.

Definition at line 68 of file ligature_table.h.

Definition at line 70 of file ligature_table.h.

Definition at line 67 of file ligature_table.h.

Definition at line 69 of file ligature_table.h.


The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines