|
tesseract 3.04.01
|
#include <pango_font_info.h>
Public Types | |
| enum | FontTypeEnum { UNKNOWN, SERIF, SANS_SERIF, DECORATIVE } |
Public Member Functions | |
| PangoFontInfo () | |
| PangoFontInfo (const string &name) | |
| bool | ParseFontDescriptionName (const string &name) |
| bool | CoversUTF8Text (const char *utf8_text, int byte_length) const |
| int | DropUncoveredChars (string *utf8_text) const |
| bool | CanRenderString (const char *utf8_word, int len, vector< string > *graphemes) const |
| bool | CanRenderString (const char *utf8_word, int len) const |
| bool | GetSpacingProperties (const string &utf8_char, int *x_bearing, int *x_advance) const |
| string | DescriptionName () const |
| const string & | family_name () const |
| const int | font_size () const |
| const bool | is_bold () const |
| const bool | is_italic () const |
| const bool | is_smallcaps () const |
| const bool | is_monospace () const |
| const bool | is_fraktur () const |
| const FontTypeEnum | font_type () const |
| const int | resolution () const |
| void | set_resolution (const int resolution) |
Static Public Member Functions | |
| static void | InitFontConfig (bool force_clear, const string &fonts_dir) |
Friends | |
| class | FontUtils |
Definition at line 38 of file pango_font_info.h.
Definition at line 40 of file pango_font_info.h.
{
UNKNOWN,
SERIF,
SANS_SERIF,
DECORATIVE,
};
| tesseract::PangoFontInfo::PangoFontInfo | ( | ) |
Definition at line 78 of file pango_font_info.cpp.
: desc_(NULL), resolution_(kDefaultResolution) { Clear(); }
| tesseract::PangoFontInfo::PangoFontInfo | ( | const string & | name | ) | [explicit] |
Definition at line 82 of file pango_font_info.cpp.
: desc_(NULL), resolution_(kDefaultResolution) { if (!ParseFontDescriptionName(desc)) { tprintf("ERROR: Could not parse %s\n", desc.c_str()); Clear(); } }
| bool tesseract::PangoFontInfo::CanRenderString | ( | const char * | utf8_word, |
| int | len, | ||
| vector< string > * | graphemes | ||
| ) | const |
Definition at line 377 of file pango_font_info.cpp.
{
if (graphemes) graphemes->clear();
// We check for font coverage of the text first, as otherwise Pango could
// (undesirably) fall back to another font that does have the required
// coverage.
if (!CoversUTF8Text(utf8_word, len)) {
return false;
}
// U+25CC dotted circle character that often (but not always) gets rendered
// when there is an illegal grapheme sequence.
const char32 kDottedCircleGlyph = 9676;
bool bad_glyph = false;
PangoFontMap* font_map = pango_cairo_font_map_get_default();
PangoContext* context = pango_context_new();
pango_context_set_font_map(context, font_map);
PangoLayout* layout;
{
// Pango is not relasing the cached layout.
DISABLE_HEAP_LEAK_CHECK;
layout = pango_layout_new(context);
}
if (desc_) {
pango_layout_set_font_description(layout, desc_);
} else {
PangoFontDescription *desc = pango_font_description_from_string(
DescriptionName().c_str());
pango_layout_set_font_description(layout, desc);
pango_font_description_free(desc);
}
pango_layout_set_text(layout, utf8_word, len);
PangoLayoutIter* run_iter = NULL;
{ // Fontconfig caches some information here that is not freed before exit.
DISABLE_HEAP_LEAK_CHECK;
run_iter = pango_layout_get_iter(layout);
}
do {
PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter);
if (!run) {
tlog(2, "Found end of line NULL run marker\n");
continue;
}
PangoGlyph dotted_circle_glyph;
PangoFont* font = run->item->analysis.font;
dotted_circle_glyph = pango_fc_font_get_glyph(
reinterpret_cast<PangoFcFont*>(font), kDottedCircleGlyph);
if (TLOG_IS_ON(2)) {
PangoFontDescription* desc = pango_font_describe(font);
char* desc_str = pango_font_description_to_string(desc);
tlog(2, "Desc of font in run: %s\n", desc_str);
g_free(desc_str);
pango_font_description_free(desc);
}
PangoGlyphItemIter cluster_iter;
gboolean have_cluster;
for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter,
run, utf8_word);
have_cluster && !bad_glyph;
have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) {
const int start_byte_index = cluster_iter.start_index;
const int end_byte_index = cluster_iter.end_index;
int start_glyph_index = cluster_iter.start_glyph;
int end_glyph_index = cluster_iter.end_glyph;
string cluster_text = string(utf8_word + start_byte_index,
end_byte_index - start_byte_index);
if (graphemes) graphemes->push_back(cluster_text);
if (IsUTF8Whitespace(cluster_text.c_str())) {
tlog(2, "Skipping whitespace\n");
continue;
}
if (TLOG_IS_ON(2)) {
printf("start_byte=%d end_byte=%d start_glyph=%d end_glyph=%d ",
start_byte_index, end_byte_index,
start_glyph_index, end_glyph_index);
}
for (int i = start_glyph_index,
step = (end_glyph_index > start_glyph_index) ? 1 : -1;
!bad_glyph && i != end_glyph_index; i+= step) {
const bool unknown_glyph =
(cluster_iter.glyph_item->glyphs->glyphs[i].glyph &
PANGO_GLYPH_UNKNOWN_FLAG);
const bool illegal_glyph =
(cluster_iter.glyph_item->glyphs->glyphs[i].glyph ==
dotted_circle_glyph);
bad_glyph = unknown_glyph || illegal_glyph;
if (TLOG_IS_ON(2)) {
printf("(%d=%d)", cluster_iter.glyph_item->glyphs->glyphs[i].glyph,
bad_glyph ? 1 : 0);
}
}
if (TLOG_IS_ON(2)) {
printf(" '%s'\n", cluster_text.c_str());
}
if (bad_glyph)
tlog(1, "Found illegal glyph!\n");
}
} while (!bad_glyph && pango_layout_iter_next_run(run_iter));
pango_layout_iter_free(run_iter);
g_object_unref(context);
g_object_unref(layout);
if (bad_glyph && graphemes) graphemes->clear();
return !bad_glyph;
}
| bool tesseract::PangoFontInfo::CanRenderString | ( | const char * | utf8_word, |
| int | len | ||
| ) | const |
Definition at line 372 of file pango_font_info.cpp.
{
vector<string> graphemes;
return CanRenderString(utf8_word, len, &graphemes);
}
| bool tesseract::PangoFontInfo::CoversUTF8Text | ( | const char * | utf8_text, |
| int | byte_length | ||
| ) | const |
Definition at line 252 of file pango_font_info.cpp.
{
PangoFont* font = ToPangoFont();
PangoCoverage* coverage = pango_font_get_coverage(font, NULL);
for (UNICHAR::const_iterator it = UNICHAR::begin(utf8_text, byte_length);
it != UNICHAR::end(utf8_text, byte_length);
++it) {
if (IsWhitespace(*it) || pango_is_zero_width(*it))
continue;
if (pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) {
char tmp[5];
int len = it.get_utf8(tmp);
tmp[len] = '\0';
tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it);
return false;
}
}
return true;
}
| string tesseract::PangoFontInfo::DescriptionName | ( | ) | const |
Definition at line 104 of file pango_font_info.cpp.
{
if (!desc_) return "";
char* desc_str = pango_font_description_to_string(desc_);
string desc_name(desc_str);
g_free(desc_str);
return desc_name;
}
| int tesseract::PangoFontInfo::DropUncoveredChars | ( | string * | utf8_text | ) | const |
Definition at line 293 of file pango_font_info.cpp.
{
PangoFont* font = ToPangoFont();
PangoCoverage* coverage = pango_font_get_coverage(font, NULL);
int num_dropped_chars = 0;
// Maintain two iterators that point into the string. For space efficiency, we
// will repeatedly copy one covered UTF8 character from one to the other, and
// at the end resize the string to the right length.
char* out = const_cast<char*>(utf8_text->c_str());
const UNICHAR::const_iterator it_begin =
UNICHAR::begin(utf8_text->c_str(), utf8_text->length());
const UNICHAR::const_iterator it_end =
UNICHAR::end(utf8_text->c_str(), utf8_text->length());
for (UNICHAR::const_iterator it = it_begin; it != it_end;) {
// Skip bad utf-8.
if (!it.is_legal()) {
++it; // One suitable error message will still be issued.
continue;
}
int unicode = *it;
int utf8_len = it.utf8_len();
const char* utf8_char = it.utf8_data();
// Move it forward before the data gets modified.
++it;
if (!IsWhitespace(unicode) && !pango_is_zero_width(unicode) &&
pango_coverage_get(coverage, unicode) != PANGO_COVERAGE_EXACT) {
if (TLOG_IS_ON(2)) {
UNICHAR unichar(unicode);
char* str = unichar.utf8_str();
tlog(2, "'%s' (U+%x) not covered by font\n", str, unicode);
delete[] str;
}
++num_dropped_chars;
continue;
}
my_strnmove(out, utf8_char, utf8_len);
out += utf8_len;
}
utf8_text->resize(out - utf8_text->c_str());
return num_dropped_chars;
}
| const string& tesseract::PangoFontInfo::family_name | ( | ) | const [inline] |
Definition at line 94 of file pango_font_info.h.
{ return family_name_; }
| const int tesseract::PangoFontInfo::font_size | ( | ) | const [inline] |
Definition at line 96 of file pango_font_info.h.
{ return font_size_; }
| const FontTypeEnum tesseract::PangoFontInfo::font_type | ( | ) | const [inline] |
Definition at line 102 of file pango_font_info.h.
{ return font_type_; }
| bool tesseract::PangoFontInfo::GetSpacingProperties | ( | const string & | utf8_char, |
| int * | x_bearing, | ||
| int * | x_advance | ||
| ) | const |
Definition at line 334 of file pango_font_info.cpp.
{
// Convert to equivalent PangoFont structure
PangoFont* font = ToPangoFont();
// Find the glyph index in the font for the supplied utf8 character.
int total_advance = 0;
int min_bearing = 0;
// Handle multi-unicode strings by reporting the left-most position of the
// x-bearing, and right-most position of the x-advance if the string were to
// be rendered.
const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_char.c_str(),
utf8_char.length());
const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_char.c_str(),
utf8_char.length());
for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
PangoGlyph glyph_index = pango_fc_font_get_glyph(
reinterpret_cast<PangoFcFont*>(font), *it);
if (!glyph_index) {
// Glyph for given unicode character doesn't exist in font.
return false;
}
// Find the ink glyph extents for the glyph
PangoRectangle ink_rect, logical_rect;
pango_font_get_glyph_extents(font, glyph_index, &ink_rect, &logical_rect);
pango_extents_to_pixels(&ink_rect, NULL);
pango_extents_to_pixels(&logical_rect, NULL);
int bearing = total_advance + PANGO_LBEARING(ink_rect);
if (it == it_begin || bearing < min_bearing) {
min_bearing = bearing;
}
total_advance += PANGO_RBEARING(logical_rect);
}
*x_bearing = min_bearing;
*x_advance = total_advance;
return true;
}
| void tesseract::PangoFontInfo::InitFontConfig | ( | bool | force_clear, |
| const string & | fonts_dir | ||
| ) | [static] |
Definition at line 117 of file pango_font_info.cpp.
{
if ((fontconfig_initialized_ && !force_clear) || fonts_dir.empty()) {
fontconfig_initialized_ = true;
return;
}
if (FLAGS_fontconfig_refresh_cache || force_clear) {
File::DeleteMatchingFiles(File::JoinPath(
FLAGS_fontconfig_tmpdir.c_str(), "*cache-?").c_str());
}
if (FLAGS_fontconfig_refresh_config_file || FLAGS_fontconfig_refresh_cache ||
force_clear) {
const int MAX_FONTCONF_FILESIZE = 1024;
char fonts_conf_template[MAX_FONTCONF_FILESIZE];
snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE,
"<?xml version=\"1.0\"?>\n"
"<!DOCTYPE fontconfig SYSTEM \"fonts.dtd\">\n"
"<fontconfig>\n"
"<dir>%s</dir>\n"
"<cachedir>%s</cachedir>\n"
"<config></config>\n"
"</fontconfig>", fonts_dir.c_str(),
FLAGS_fontconfig_tmpdir.c_str());
string fonts_conf_file = File::JoinPath(FLAGS_fontconfig_tmpdir.c_str(),
"fonts.conf");
File::WriteStringToFileOrDie(fonts_conf_template, fonts_conf_file);
}
#ifdef _WIN32
std::string env("FONTCONFIG_PATH=");
env.append(FLAGS_fontconfig_tmpdir.c_str());
putenv(env.c_str());
putenv("LANG=en_US.utf8");
#else
setenv("FONTCONFIG_PATH", FLAGS_fontconfig_tmpdir.c_str(), true);
// Fix the locale so that the reported font names are consistent.
setenv("LANG", "en_US.utf8", true);
#endif // _WIN32
if (!fontconfig_initialized_ || force_clear) {
if (FcInitReinitialize() != FcTrue) {
tprintf("FcInitiReinitialize failed!!\n");
}
}
fontconfig_initialized_ = true;
FontUtils::ReInit();
}
| const bool tesseract::PangoFontInfo::is_bold | ( | ) | const [inline] |
Definition at line 97 of file pango_font_info.h.
{ return is_bold_; }
| const bool tesseract::PangoFontInfo::is_fraktur | ( | ) | const [inline] |
Definition at line 101 of file pango_font_info.h.
{ return is_fraktur_; }
| const bool tesseract::PangoFontInfo::is_italic | ( | ) | const [inline] |
Definition at line 98 of file pango_font_info.h.
{ return is_italic_; }
| const bool tesseract::PangoFontInfo::is_monospace | ( | ) | const [inline] |
Definition at line 100 of file pango_font_info.h.
{ return is_monospace_; }
| const bool tesseract::PangoFontInfo::is_smallcaps | ( | ) | const [inline] |
Definition at line 99 of file pango_font_info.h.
{ return is_smallcaps_; }
| bool tesseract::PangoFontInfo::ParseFontDescriptionName | ( | const string & | name | ) |
Definition at line 227 of file pango_font_info.cpp.
{
PangoFontDescription *desc = pango_font_description_from_string(name.c_str());
bool success = ParseFontDescription(desc);
pango_font_description_free(desc);
return success;
}
| const int tesseract::PangoFontInfo::resolution | ( | ) | const [inline] |
Definition at line 104 of file pango_font_info.h.
{ return resolution_; }
| void tesseract::PangoFontInfo::set_resolution | ( | const int | resolution | ) | [inline] |
Definition at line 105 of file pango_font_info.h.
{
resolution_ = resolution;
}
friend class FontUtils [friend] |
Definition at line 110 of file pango_font_info.h.