|
tesseract 3.04.01
|
#include <pango_font_info.h>
Static Public Member Functions | |
| static bool | IsAvailableFont (const char *font_desc) |
| static bool | IsAvailableFont (const char *font_desc, string *best_match) |
| static const vector< string > & | ListAvailableFonts () |
| static bool | SelectFont (const char *utf8_word, const int utf8_len, string *font_name, vector< string > *graphemes) |
| static bool | SelectFont (const char *utf8_word, const int utf8_len, const vector< string > &all_fonts, string *font_name, vector< string > *graphemes) |
| static void | GetAllRenderableCharacters (vector< bool > *unichar_bitmap) |
| static void | GetAllRenderableCharacters (const vector< string > &font_names, vector< bool > *unichar_bitmap) |
| static void | GetAllRenderableCharacters (const string &font_name, vector< bool > *unichar_bitmap) |
| static string | BestFonts (const unordered_map< char32, inT64 > &ch_map, vector< std::pair< const char *, vector< bool > > > *font_flag) |
| static int | FontScore (const unordered_map< char32, inT64 > &ch_map, const string &fontname, int *raw_score, vector< bool > *ch_flags) |
| static void | ReInit () |
Definition at line 143 of file pango_font_info.h.
| string tesseract::FontUtils::BestFonts | ( | const unordered_map< char32, inT64 > & | ch_map, |
| vector< std::pair< const char *, vector< bool > > > * | font_flag | ||
| ) | [static] |
Definition at line 702 of file pango_font_info.cpp.
{
const double kMinOKFraction = 0.99;
// Weighted fraction of characters that must be renderable in a font to make
// it OK even if the raw count is not good.
const double kMinWeightedFraction = 0.99995;
fonts->clear();
vector<vector<bool> > font_flags;
vector<int> font_scores;
vector<int> raw_scores;
int most_ok_chars = 0;
int best_raw_score = 0;
const vector<string>& font_names = FontUtils::ListAvailableFonts();
for (int i = 0; i < font_names.size(); ++i) {
vector<bool> ch_flags;
int raw_score = 0;
int ok_chars = FontScore(ch_map, font_names[i], &raw_score, &ch_flags);
most_ok_chars = MAX(ok_chars, most_ok_chars);
best_raw_score = MAX(raw_score, best_raw_score);
font_flags.push_back(ch_flags);
font_scores.push_back(ok_chars);
raw_scores.push_back(raw_score);
}
// Now select the fonts with a score above a threshold fraction
// of both the raw and weighted best scores. To prevent bogus fonts being
// selected for CJK, we require a high fraction (kMinOKFraction = 0.99) of
// BOTH weighted and raw scores.
// In low character-count scripts, the issue is more getting enough fonts,
// when only 1 or 2 might have all those rare dingbats etc in them, so we
// allow a font with a very high weighted (coverage) score
// (kMinWeightedFraction = 0.99995) to be used even if its raw score is poor.
int least_good_enough = static_cast<int>(most_ok_chars * kMinOKFraction);
int least_raw_enough = static_cast<int>(best_raw_score * kMinOKFraction);
int override_enough = static_cast<int>(most_ok_chars * kMinWeightedFraction);
string font_list;
for (int i = 0; i < font_names.size(); ++i) {
int score = font_scores[i];
int raw_score = raw_scores[i];
if ((score >= least_good_enough && raw_score >= least_raw_enough) ||
score >= override_enough) {
fonts->push_back(make_pair(font_names[i].c_str(), font_flags[i]));
tlog(1, "OK font %s = %.4f%%, raw = %d = %.2f%%\n",
font_names[i].c_str(),
100.0 * score / most_ok_chars,
raw_score, 100.0 * raw_score / best_raw_score);
font_list += font_names[i];
font_list += "\n";
} else if (score >= least_good_enough || raw_score >= least_raw_enough) {
tlog(1, "Runner-up font %s = %.4f%%, raw = %d = %.2f%%\n",
font_names[i].c_str(),
100.0 * score / most_ok_chars,
raw_score, 100.0 * raw_score / best_raw_score);
}
}
return font_list;
}
| int tesseract::FontUtils::FontScore | ( | const unordered_map< char32, inT64 > & | ch_map, |
| const string & | fontname, | ||
| int * | raw_score, | ||
| vector< bool > * | ch_flags | ||
| ) | [static] |
Definition at line 667 of file pango_font_info.cpp.
{
PangoFontInfo font_info;
if (!font_info.ParseFontDescriptionName(fontname)) {
tprintf("ERROR: Could not parse %s\n", fontname.c_str());
}
PangoFont* font = font_info.ToPangoFont();
PangoCoverage* coverage = pango_font_get_coverage(font, NULL);
if (ch_flags) {
ch_flags->clear();
ch_flags->reserve(ch_map.size());
}
*raw_score = 0;
int ok_chars = 0;
for (unordered_map<char32, inT64>::const_iterator it = ch_map.begin();
it != ch_map.end(); ++it) {
bool covered = (IsWhitespace(it->first) ||
(pango_coverage_get(coverage, it->first)
== PANGO_COVERAGE_EXACT));
if (covered) {
++(*raw_score);
ok_chars += it->second;
}
if (ch_flags) {
ch_flags->push_back(covered);
}
}
return ok_chars;
}
| void tesseract::FontUtils::GetAllRenderableCharacters | ( | const string & | font_name, |
| vector< bool > * | unichar_bitmap | ||
| ) | [static] |
Definition at line 638 of file pango_font_info.cpp.
{
PangoFontInfo font_info(font_name);
PangoCoverage* coverage = pango_font_get_coverage(
font_info.ToPangoFont(), NULL);
CharCoverageMapToBitmap(coverage, unichar_bitmap);
}
| void tesseract::FontUtils::GetAllRenderableCharacters | ( | vector< bool > * | unichar_bitmap | ) | [static] |
Definition at line 632 of file pango_font_info.cpp.
{
const vector<string>& all_fonts = ListAvailableFonts();
return GetAllRenderableCharacters(all_fonts, unichar_bitmap);
}
| void tesseract::FontUtils::GetAllRenderableCharacters | ( | const vector< string > & | font_names, |
| vector< bool > * | unichar_bitmap | ||
| ) | [static] |
Definition at line 647 of file pango_font_info.cpp.
{
// Form the union of coverage maps from the fonts
PangoCoverage* all_coverage = pango_coverage_new();
tlog(1, "Processing %d fonts\n", fonts.size());
for (int i = 0; i < fonts.size(); ++i) {
PangoFontInfo font_info(fonts[i]);
PangoCoverage* coverage = pango_font_get_coverage(
font_info.ToPangoFont(), NULL);
// Mark off characters that any font can render.
pango_coverage_max(all_coverage, coverage);
}
CharCoverageMapToBitmap(all_coverage, unichar_bitmap);
pango_coverage_unref(all_coverage);
}
| bool tesseract::FontUtils::IsAvailableFont | ( | const char * | font_desc, |
| string * | best_match | ||
| ) | [static] |
Definition at line 497 of file pango_font_info.cpp.
{
string query_desc(input_query_desc);
if (PANGO_VERSION <= 12005) {
// Strip commas and any ' Medium' substring in the name.
query_desc.erase(std::remove(query_desc.begin(), query_desc.end(), ','),
query_desc.end());
const string kMediumStr = " Medium";
std::size_t found = query_desc.find(kMediumStr);
if (found != std::string::npos) {
query_desc.erase(found, kMediumStr.length());
}
}
PangoFontDescription *desc = pango_font_description_from_string(
query_desc.c_str());
PangoFont* selected_font = NULL;
{
PangoFontInfo::InitFontConfig(false, FLAGS_fonts_dir.c_str());
PangoFontMap* font_map = pango_cairo_font_map_get_default();
PangoContext* context = pango_context_new();
pango_context_set_font_map(context, font_map);
{
DISABLE_HEAP_LEAK_CHECK;
selected_font = pango_font_map_load_font(font_map, context, desc);
}
g_object_unref(context);
}
if (selected_font == NULL) {
pango_font_description_free(desc);
return false;
}
PangoFontDescription* selected_desc = pango_font_describe(selected_font);
bool equal = pango_font_description_equal(desc, selected_desc);
tlog(3, "query weight = %d \t selected weight =%d\n",
pango_font_description_get_weight(desc),
pango_font_description_get_weight(selected_desc));
char* selected_desc_str = pango_font_description_to_string(selected_desc);
tlog(2, "query_desc: '%s' Selected: 's'\n", query_desc.c_str(),
selected_desc_str);
if (!equal && best_match != NULL) {
*best_match = selected_desc_str;
// Clip the ending ' 0' if there is one. It seems that, if there is no
// point size on the end of the fontname, then Pango always appends ' 0'.
int len = best_match->size();
if (len > 2 && best_match->at(len - 1) == '0' &&
best_match->at(len - 2) == ' ') {
*best_match = best_match->substr(0, len - 2);
}
}
g_free(selected_desc_str);
pango_font_description_free(selected_desc);
g_object_unref(selected_font);
pango_font_description_free(desc);
return equal;
}
| static bool tesseract::FontUtils::IsAvailableFont | ( | const char * | font_desc | ) | [inline, static] |
Definition at line 147 of file pango_font_info.h.
{
return IsAvailableFont(font_desc, NULL);
}
| const vector< string > & tesseract::FontUtils::ListAvailableFonts | ( | ) | [static] |
Definition at line 569 of file pango_font_info.cpp.
{
if (available_fonts_.size()) {
return available_fonts_;
}
#ifndef USE_STD_NAMESPACE
if (FLAGS_use_only_legacy_fonts) {
// Restrict view to list of fonts in legacy_fonts.h
tprintf("Using list of legacy fonts only\n");
const int kNumFontLists = 4;
for (int i = 0; i < kNumFontLists; ++i) {
for (int j = 0; kFontlists[i][j] != NULL; ++j) {
available_fonts_.push_back(kFontlists[i][j]);
}
}
return available_fonts_;
}
#endif
PangoFontFamily** families = 0;
int n_families = 0;
ListFontFamilies(&families, &n_families);
for (int i = 0; i < n_families; ++i) {
const char* family_name = pango_font_family_get_name(families[i]);
tlog(2, "Listing family %s\n", family_name);
if (ShouldIgnoreFontFamilyName(family_name)) {
continue;
}
int n_faces;
PangoFontFace** faces = NULL;
pango_font_family_list_faces(families[i], &faces, &n_faces);
for (int j = 0; j < n_faces; ++j) {
PangoFontDescription* desc = pango_font_face_describe(faces[j]);
char* desc_str = pango_font_description_to_string(desc);
if (IsAvailableFont(desc_str)) {
available_fonts_.push_back(desc_str);
}
pango_font_description_free(desc);
g_free(desc_str);
}
g_free(faces);
}
g_free(families);
sort(available_fonts_.begin(), available_fonts_.end());
return available_fonts_;
}
| void tesseract::FontUtils::ReInit | ( | ) | [static] |
Definition at line 793 of file pango_font_info.cpp.
{ available_fonts_.clear(); }
| bool tesseract::FontUtils::SelectFont | ( | const char * | utf8_word, |
| const int | utf8_len, | ||
| const vector< string > & | all_fonts, | ||
| string * | font_name, | ||
| vector< string > * | graphemes | ||
| ) | [static] |
Definition at line 771 of file pango_font_info.cpp.
{
if (font_name) font_name->clear();
if (graphemes) graphemes->clear();
for (int i = 0; i < all_fonts.size(); ++i) {
PangoFontInfo font;
vector<string> found_graphemes;
ASSERT_HOST_MSG(font.ParseFontDescriptionName(all_fonts[i]),
"Could not parse font desc name %s\n",
all_fonts[i].c_str());
if (font.CanRenderString(utf8_word, utf8_len, &found_graphemes)) {
if (graphemes) graphemes->swap(found_graphemes);
if (font_name) *font_name = all_fonts[i];
return true;
}
}
return false;
}
| bool tesseract::FontUtils::SelectFont | ( | const char * | utf8_word, |
| const int | utf8_len, | ||
| string * | font_name, | ||
| vector< string > * | graphemes | ||
| ) | [static] |
Definition at line 764 of file pango_font_info.cpp.
{
return SelectFont(utf8_word, utf8_len, ListAvailableFonts(), font_name,
graphemes);
}