|
tesseract 3.04.01
|
#include <stringrenderer.h>
Public Member Functions | |
| StringRenderer (const string &font_desc, int page_width, int page_height) | |
| ~StringRenderer () | |
| int | RenderToImage (const char *text, int text_length, Pix **pix) |
| int | RenderToGrayscaleImage (const char *text, int text_length, Pix **pix) |
| int | RenderToBinaryImage (const char *text, int text_length, int threshold, Pix **pix) |
| int | RenderAllFontsToImage (double min_coverage, const char *text, int text_length, string *font_used, Pix **pix) |
| bool | set_font (const string &desc) |
| void | set_char_spacing (double char_spacing) |
| void | set_leading (int leading) |
| void | set_resolution (const int resolution) |
| void | set_vertical_text (bool vertical_text) |
| void | set_gravity_hint_strong (bool gravity_hint_strong) |
| void | set_render_fullwidth_latin (bool render_fullwidth_latin) |
| void | set_underline_start_prob (const double frac) |
| void | set_underline_continuation_prob (const double frac) |
| void | set_underline_style (const PangoUnderline style) |
| void | set_page (int page) |
| void | set_box_padding (int val) |
| void | set_drop_uncovered_chars (bool val) |
| void | set_strip_unrenderable_words (bool val) |
| void | set_output_word_boxes (bool val) |
| void | set_add_ligatures (bool add_ligatures) |
| void | set_pen_color (double r, double g, double b) |
| void | set_h_margin (const int h_margin) |
| void | set_v_margin (const int v_margin) |
| const PangoFontInfo & | font () const |
| const int | h_margin () const |
| const int | v_margin () const |
| const vector< BoxChar * > & | GetBoxes () const |
| Boxa * | GetPageBoxes () const |
| void | RotatePageBoxes (float rotation) |
| void | ClearBoxes () |
| void | WriteAllBoxes (const string &filename) |
| int | StripUnrenderableWords (string *utf8_text) const |
Static Public Member Functions | |
| static string | InsertWordJoiners (const string &text) |
| static string | ConvertBasicLatinToFullwidthLatin (const string &text) |
| static string | ConvertFullwidthLatinToBasicLatin (const string &text) |
Protected Member Functions | |
| void | InitPangoCairo () |
| void | FreePangoCairo () |
| void | SetLayoutProperties () |
| void | SetWordUnderlineAttributes (const string &page_text) |
| void | ComputeClusterBoxes () |
| void | CorrectBoxPositionsToLayout (vector< BoxChar * > *boxchars) |
| bool | GetClusterStrings (vector< string > *cluster_text) |
| int | FindFirstPageBreakOffset (const char *text, int text_length) |
Protected Attributes | |
| PangoFontInfo | font_ |
| int | page_width_ |
| int | page_height_ |
| int | h_margin_ |
| int | v_margin_ |
| int | pen_color_ [3] |
| double | char_spacing_ |
| int | leading_ |
| int | resolution_ |
| bool | vertical_text_ |
| bool | gravity_hint_strong_ |
| bool | render_fullwidth_latin_ |
| double | underline_start_prob_ |
| double | underline_continuation_prob_ |
| PangoUnderline | underline_style_ |
| bool | drop_uncovered_chars_ |
| bool | strip_unrenderable_words_ |
| bool | add_ligatures_ |
| bool | output_word_boxes_ |
| cairo_surface_t * | surface_ |
| cairo_t * | cr_ |
| PangoLayout * | layout_ |
| int | start_box_ |
| int | page_ |
| vector< BoxChar * > | boxchars_ |
| int | box_padding_ |
| Boxa * | page_boxes_ |
| hash_map< char32, inT64 > | char_map_ |
| int | total_chars_ |
| int | font_index_ |
| int | last_offset_ |
Definition at line 48 of file stringrenderer.h.
| tesseract::StringRenderer::StringRenderer | ( | const string & | font_desc, |
| int | page_width, | ||
| int | page_height | ||
| ) |
Definition at line 98 of file stringrenderer.cpp.
: page_width_(page_width), page_height_(page_height), h_margin_(50), v_margin_(50), char_spacing_(0), leading_(0), vertical_text_(false), gravity_hint_strong_(false), render_fullwidth_latin_(false), underline_start_prob_(0), underline_continuation_prob_(0), underline_style_(PANGO_UNDERLINE_SINGLE), drop_uncovered_chars_(true), strip_unrenderable_words_(false), add_ligatures_(false), output_word_boxes_(false), surface_(NULL), cr_(NULL), layout_(NULL), start_box_(0), page_(0), box_padding_(0), total_chars_(0), font_index_(0), last_offset_(0) { pen_color_[0] = 0.0; pen_color_[1] = 0.0; pen_color_[2] = 0.0; set_font(font_desc); set_resolution(kDefaultOutputResolution); page_boxes_ = NULL; }
| tesseract::StringRenderer::~StringRenderer | ( | ) |
Definition at line 152 of file stringrenderer.cpp.
| void tesseract::StringRenderer::ClearBoxes | ( | ) |
Definition at line 335 of file stringrenderer.cpp.
| void tesseract::StringRenderer::ComputeClusterBoxes | ( | ) | [protected] |
Definition at line 455 of file stringrenderer.cpp.
{
cluster_start_indices.push_back(pango_layout_iter_get_index(cluster_iter));
tlog(3, "Added %d\n", cluster_start_indices.back());
} while (pango_layout_iter_next_cluster(cluster_iter));
pango_layout_iter_free(cluster_iter);
cluster_start_indices.push_back(strlen(text));
tlog(3, "Added last index %d\n", cluster_start_indices.back());
// Sort the indices and create a map from start to end indices.
sort(cluster_start_indices.begin(), cluster_start_indices.end());
map<int, int> cluster_start_to_end_index;
for (int i = 0; i < cluster_start_indices.size() - 1; ++i) {
cluster_start_to_end_index[cluster_start_indices[i]]
= cluster_start_indices[i + 1];
}
// Iterate again to compute cluster boxes and their text with the obtained
// cluster extent information.
cluster_iter = pango_layout_get_iter(layout_);
// Store BoxChars* sorted by their byte start positions
map<int, BoxChar*> start_byte_to_box;
do {
PangoRectangle cluster_rect;
pango_layout_iter_get_cluster_extents(cluster_iter, &cluster_rect,
NULL);
pango_extents_to_pixels(&cluster_rect, NULL);
const int start_byte_index = pango_layout_iter_get_index(cluster_iter);
const int end_byte_index = cluster_start_to_end_index[start_byte_index];
string cluster_text = string(text + start_byte_index,
end_byte_index - start_byte_index);
if (cluster_text.size() && cluster_text[0] == '\n') {
tlog(2, "Skipping newlines at start of text.\n");
continue;
}
if (!cluster_rect.width || !cluster_rect.height ||
IsUTF8Whitespace(cluster_text.c_str())) {
tlog(2, "Skipping whitespace with boxdim (%d,%d) '%s'\n",
cluster_rect.width, cluster_rect.height, cluster_text.c_str());
BoxChar* boxchar = new BoxChar(" ", 1);
boxchar->set_page(page_);
start_byte_to_box[start_byte_index] = boxchar;
continue;
}
// Prepare a boxchar for addition at this byte position.
tlog(2, "[%d %d], %d, %d : start_byte=%d end_byte=%d : '%s'\n",
cluster_rect.x, cluster_rect.y,
cluster_rect.width, cluster_rect.height,
start_byte_index, end_byte_index,
cluster_text.c_str());
ASSERT_HOST_MSG(cluster_rect.width,
"cluster_text:%s start_byte_index:%d\n",
cluster_text.c_str(), start_byte_index);
ASSERT_HOST_MSG(cluster_rect.height,
"cluster_text:%s start_byte_index:%d\n",
cluster_text.c_str(), start_byte_index);
if (box_padding_) {
cluster_rect.x = max(0, cluster_rect.x - box_padding_);
cluster_rect.width += 2 * box_padding_;
cluster_rect.y = max(0, cluster_rect.y - box_padding_);
cluster_rect.height += 2 * box_padding_;
}
if (add_ligatures_) {
// Make sure the output box files have ligatured text in case the font
// decided to use an unmapped glyph.
cluster_text = LigatureTable::Get()->AddLigatures(cluster_text, NULL);
}
BoxChar* boxchar = new BoxChar(cluster_text.c_str(), cluster_text.size());
boxchar->set_page(page_);
boxchar->AddBox(cluster_rect.x, cluster_rect.y,
cluster_rect.width, cluster_rect.height);
start_byte_to_box[start_byte_index] = boxchar;
} while (pango_layout_iter_next_cluster(cluster_iter));
pango_layout_iter_free(cluster_iter);
// There is a subtle bug in the cluster text reported by the PangoLayoutIter
// on ligatured characters (eg. The word "Lam-Aliph" in arabic). To work
// around this, we use text reported using the PangoGlyphIter which is
// accurate.
// TODO(ranjith): Revisit whether this is still needed in newer versions of
// pango.
vector<string> cluster_text;
if (GetClusterStrings(&cluster_text)) {
ASSERT_HOST(cluster_text.size() == start_byte_to_box.size());
int ind = 0;
for (map<int, BoxChar*>::iterator it = start_byte_to_box.begin();
it != start_byte_to_box.end(); ++it, ++ind) {
it->second->mutable_ch()->swap(cluster_text[ind]);
}
}
// Append to the boxchars list in byte order.
vector<BoxChar*> page_boxchars;
page_boxchars.reserve(start_byte_to_box.size());
string last_ch;
for (map<int, BoxChar*>::const_iterator it = start_byte_to_box.begin();
it != start_byte_to_box.end(); ++it) {
if (it->second->ch() == kWordJoinerUTF8) {
// Skip zero-width joiner characters (ZWJs) here.
delete it->second;
} else {
page_boxchars.push_back(it->second);
}
}
CorrectBoxPositionsToLayout(&page_boxchars);
if (render_fullwidth_latin_) {
for (map<int, BoxChar*>::iterator it = start_byte_to_box.begin();
it != start_byte_to_box.end(); ++it) {
// Convert fullwidth Latin characters to their halfwidth forms.
string half(ConvertFullwidthLatinToBasicLatin(it->second->ch()));
it->second->mutable_ch()->swap(half);
}
}
// Merge the character boxes into word boxes if we are rendering n-grams.
if (output_word_boxes_) {
MergeBoxCharsToWords(&page_boxchars);
}
boxchars_.insert(boxchars_.end(), page_boxchars.begin(), page_boxchars.end());
// Compute the page bounding box
Box* page_box = NULL;
Boxa* all_boxes = NULL;
for (int i = 0; i < page_boxchars.size(); ++i) {
if (page_boxchars[i]->box() == NULL) continue;
if (all_boxes == NULL)
all_boxes = boxaCreate(0);
boxaAddBox(all_boxes, page_boxchars[i]->mutable_box(), L_CLONE);
}
boxaGetExtent(all_boxes, NULL, NULL, &page_box);
boxaDestroy(&all_boxes);
if (page_boxes_ == NULL)
page_boxes_ = boxaCreate(0);
boxaAddBox(page_boxes_, page_box, L_INSERT);
}
| string tesseract::StringRenderer::ConvertBasicLatinToFullwidthLatin | ( | const string & | text | ) | [static] |
Definition at line 689 of file stringrenderer.cpp.
{
// Convert printable and non-space 7-bit ASCII characters to
// their fullwidth forms.
if (IsInterchangeValid7BitAscii(*it) && isprint(*it) && !isspace(*it)) {
// Convert by adding 0xFEE0 to the codepoint of 7-bit ASCII.
char32 full_char = *it + 0xFEE0;
full_str.append(EncodeAsUTF8(full_char));
} else {
full_str.append(it.utf8_data(), it.utf8_len());
}
}
return full_str;
}
| string tesseract::StringRenderer::ConvertFullwidthLatinToBasicLatin | ( | const string & | text | ) | [static] |
Definition at line 709 of file stringrenderer.cpp.
{
char32 half_char = FullwidthToHalfwidth(*it);
// Convert fullwidth Latin characters to their halfwidth forms
// only if halfwidth forms are printable and non-space 7-bit ASCII.
if (IsInterchangeValid7BitAscii(half_char) &&
isprint(half_char) && !isspace(half_char)) {
half_str.append(EncodeAsUTF8(half_char));
} else {
half_str.append(it.utf8_data(), it.utf8_len());
}
}
return half_str;
}
| void tesseract::StringRenderer::CorrectBoxPositionsToLayout | ( | vector< BoxChar * > * | boxchars | ) | [protected] |
Definition at line 598 of file stringrenderer.cpp.
{
const double rotation = - pango_gravity_to_rotation(
pango_context_get_base_gravity(pango_layout_get_context(layout_)));
BoxChar::TranslateBoxes(page_width_ - h_margin_, v_margin_, boxchars);
BoxChar::RotateBoxes(rotation, page_width_ - h_margin_, v_margin_,
0, boxchars->size(), boxchars);
} else {
BoxChar::TranslateBoxes(h_margin_, v_margin_, boxchars);
}
}
| int tesseract::StringRenderer::FindFirstPageBreakOffset | ( | const char * | text, |
| int | text_length | ||
| ) | [protected] |
Definition at line 277 of file stringrenderer.cpp.
{
if (!text_length) return 0;
const int max_height = (page_height_ - 2 * v_margin_);
const int max_width = (page_width_ - 2 * h_margin_);
const int max_layout_height = vertical_text_ ? max_width : max_height;
UNICHAR::const_iterator it = UNICHAR::begin(text, text_length);
const UNICHAR::const_iterator it_end = UNICHAR::end(text, text_length);
const int kMaxUnicodeBufLength = 15000;
for (int i = 0; i < kMaxUnicodeBufLength && it != it_end; ++it, ++i);
int buf_length = it.utf8_data() - text;
tlog(1, "len = %d buf_len = %d\n", text_length, buf_length);
pango_layout_set_text(layout_, text, buf_length);
PangoLayoutIter* line_iter = NULL;
{ // Fontconfig caches some info here that is not freed before exit.
DISABLE_HEAP_LEAK_CHECK;
line_iter = pango_layout_get_iter(layout_);
}
bool first_page = true;
int page_top = 0;
int offset = buf_length;
do {
// Get bounding box of the current line
PangoRectangle line_ink_rect;
pango_layout_iter_get_line_extents(line_iter, &line_ink_rect, NULL);
pango_extents_to_pixels(&line_ink_rect, NULL);
PangoLayoutLine* line = pango_layout_iter_get_line_readonly(line_iter);
if (first_page) {
page_top = line_ink_rect.y;
first_page = false;
}
int line_bottom = line_ink_rect.y + line_ink_rect.height;
if (line_bottom - page_top > max_layout_height) {
offset = line->start_index;
tlog(1, "Found offset = %d\n", offset);
break;
}
} while (pango_layout_iter_next_line(line_iter));
pango_layout_iter_free(line_iter);
return offset;
}
| const PangoFontInfo& tesseract::StringRenderer::font | ( | ) | const [inline] |
Definition at line 126 of file stringrenderer.h.
{
return font_;
}
| void tesseract::StringRenderer::FreePangoCairo | ( | ) | [protected] |
| const vector< BoxChar * > & tesseract::StringRenderer::GetBoxes | ( | ) | const |
Definition at line 321 of file stringrenderer.cpp.
| bool tesseract::StringRenderer::GetClusterStrings | ( | vector< string > * | cluster_text | ) | [protected] |
Definition at line 348 of file stringrenderer.cpp.
{
PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter);
if (!run) {
// End of line NULL run marker
tlog(2, "Found end of line marker\n");
continue;
}
PangoGlyphItemIter cluster_iter;
gboolean have_cluster;
for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter,
run, full_text);
have_cluster;
have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) {
const int start_byte_index = cluster_iter.start_index;
const int end_byte_index = cluster_iter.end_index;
string text = string(full_text + start_byte_index,
end_byte_index - start_byte_index);
if (IsUTF8Whitespace(text.c_str())) {
tlog(2, "Found whitespace\n");
text = " ";
}
tlog(2, "start_byte=%d end_byte=%d : '%s'\n", start_byte_index,
end_byte_index, text.c_str());
if (add_ligatures_) {
// Make sure the output box files have ligatured text in case the font
// decided to use an unmapped glyph.
text = LigatureTable::Get()->AddLigatures(text, NULL);
}
start_byte_to_text[start_byte_index] = text;
}
} while (pango_layout_iter_next_run(run_iter));
pango_layout_iter_free(run_iter);
cluster_text->clear();
for (map<int, string>::const_iterator it = start_byte_to_text.begin();
it != start_byte_to_text.end(); ++it) {
cluster_text->push_back(it->second);
}
return cluster_text->size();
}
| Boxa * tesseract::StringRenderer::GetPageBoxes | ( | ) | const |
Definition at line 325 of file stringrenderer.cpp.
| const int tesseract::StringRenderer::h_margin | ( | ) | const [inline] |
Definition at line 129 of file stringrenderer.h.
{
return h_margin_;
}
| void tesseract::StringRenderer::InitPangoCairo | ( | ) | [protected] |
Definition at line 157 of file stringrenderer.cpp.
{
DISABLE_HEAP_LEAK_CHECK;
layout_ = pango_cairo_create_layout(cr_);
}
if (vertical_text_) {
PangoContext* context = pango_layout_get_context(layout_);
pango_context_set_base_gravity(context, PANGO_GRAVITY_EAST);
if (gravity_hint_strong_) {
pango_context_set_gravity_hint(context, PANGO_GRAVITY_HINT_STRONG);
}
pango_layout_context_changed(layout_);
}
SetLayoutProperties();
}
| string tesseract::StringRenderer::InsertWordJoiners | ( | const string & | text | ) | [static] |
Definition at line 666 of file stringrenderer.cpp.
{
// Add the symbol to the output string.
out_str.append(it.utf8_data(), it.utf8_len());
// Check the next symbol.
UNICHAR::const_iterator next_it = it;
++next_it;
bool next_char_is_boundary = (next_it == it_end || *next_it == ' ');
bool next_char_is_combiner = (next_it == it_end) ?
false : IsCombiner(*next_it);
if (*it != ' ' && *it != '\n' && !next_char_is_boundary &&
!next_char_is_combiner) {
out_str += kWordJoinerUTF8;
}
}
return out_str;
}
| int tesseract::StringRenderer::RenderAllFontsToImage | ( | double | min_coverage, |
| const char * | text, | ||
| int | text_length, | ||
| string * | font_used, | ||
| Pix ** | pix | ||
| ) |
Definition at line 828 of file stringrenderer.cpp.
{
*image = NULL;
// Select a suitable font to render the title with.
const char kTitleTemplate[] = "%s : %d hits = %.2f%%, raw = %d = %.2f%%";
string title_font;
if (!FontUtils::SelectFont(kTitleTemplate, strlen(kTitleTemplate),
&title_font, NULL)) {
tprintf("WARNING: Could not find a font to render image title with!\n");
title_font = "Arial";
}
title_font += " 8";
tlog(1, "Selected title font: %s\n", title_font.c_str());
if (font_used) font_used->clear();
string orig_font = font_.DescriptionName();
if (char_map_.empty()) {
total_chars_ = 0;
// Fill the hash table and use that for computing which fonts to use.
for (UNICHAR::const_iterator it = UNICHAR::begin(text, text_length);
it != UNICHAR::end(text, text_length); ++it) {
++total_chars_;
++char_map_[*it];
}
tprintf("Total chars = %d\n", total_chars_);
}
const vector<string>& all_fonts = FontUtils::ListAvailableFonts();
for (int i = font_index_; i < all_fonts.size(); ++i) {
++font_index_;
int raw_score = 0;
int ok_chars = FontUtils::FontScore(char_map_, all_fonts[i], &raw_score,
NULL);
if (ok_chars > 0 && ok_chars >= total_chars_ * min_coverage) {
set_font(all_fonts[i]);
int offset = RenderToBinaryImage(text, text_length, 128, image);
ClearBoxes(); // Get rid of them as they are garbage.
const int kMaxTitleLength = 1024;
char title[kMaxTitleLength];
snprintf(title, kMaxTitleLength, kTitleTemplate,
all_fonts[i].c_str(), ok_chars,
100.0 * ok_chars / total_chars_, raw_score,
100.0 * raw_score / char_map_.size());
tprintf("%s\n", title);
// This is a good font! Store the offset to return once we've tried all
// the fonts.
if (offset) {
last_offset_ = offset;
if (font_used) *font_used = all_fonts[i];
}
// Add the font to the image.
set_font(title_font);
v_margin_ /= 8;
Pix* title_image = NULL;
RenderToBinaryImage(title, strlen(title), 128, &title_image);
pixOr(*image, *image, title_image);
pixDestroy(&title_image);
v_margin_ *= 8;
set_font(orig_font);
// We return the real offset only after cycling through the list of fonts.
return 0;
} else {
tprintf("Font %s failed with %d hits = %.2f%%\n",
all_fonts[i].c_str(), ok_chars, 100.0 * ok_chars / total_chars_);
}
}
font_index_ = 0;
char_map_.clear();
return last_offset_ == 0 ? -1 : last_offset_;
}
| int tesseract::StringRenderer::RenderToBinaryImage | ( | const char * | text, |
| int | text_length, | ||
| int | threshold, | ||
| Pix ** | pix | ||
| ) |
Definition at line 648 of file stringrenderer.cpp.
{
Pix *orig_pix = NULL;
int offset = RenderToImage(text, text_length, &orig_pix);
if (orig_pix) {
Pix* gray_pix = pixConvertTo8(orig_pix, false);
pixDestroy(&orig_pix);
*pix = pixThresholdToBinary(gray_pix, threshold);
pixDestroy(&gray_pix);
} else {
*pix = orig_pix;
}
return offset;
}
| int tesseract::StringRenderer::RenderToGrayscaleImage | ( | const char * | text, |
| int | text_length, | ||
| Pix ** | pix | ||
| ) |
Definition at line 637 of file stringrenderer.cpp.
{
Pix *orig_pix = NULL;
int offset = RenderToImage(text, text_length, &orig_pix);
if (orig_pix) {
*pix = pixConvertTo8(orig_pix, false);
pixDestroy(&orig_pix);
}
return offset;
}
| int tesseract::StringRenderer::RenderToImage | ( | const char * | text, |
| int | text_length, | ||
| Pix ** | pix | ||
| ) |
Definition at line 728 of file stringrenderer.cpp.
{
if (pix && *pix) pixDestroy(pix);
InitPangoCairo();
const int page_offset = FindFirstPageBreakOffset(text, text_length);
if (!page_offset) {
return 0;
}
start_box_ = boxchars_.size();
if (!vertical_text_) {
// Translate by the specified margin
cairo_translate(cr_, h_margin_, v_margin_);
} else {
// Vertical text rendering is achieved by a two-step process of first
// performing regular horizontal layout with character orientation set to
// EAST, and then translating and rotating the layout before rendering onto
// the desired image surface. The settings required for the former step are
// done within InitPangoCairo().
//
// Translate to the top-right margin of page
cairo_translate(cr_, page_width_ - h_margin_, v_margin_);
// Rotate the layout
double rotation = - pango_gravity_to_rotation(
pango_context_get_base_gravity(pango_layout_get_context(layout_)));
tlog(2, "Rotating by %f radians\n", rotation);
cairo_rotate(cr_, rotation);
pango_cairo_update_layout(cr_, layout_);
}
string page_text(text, page_offset);
if (render_fullwidth_latin_) {
// Convert Basic Latin to their fullwidth forms.
page_text = ConvertBasicLatinToFullwidthLatin(page_text);
}
if (strip_unrenderable_words_) {
StripUnrenderableWords(&page_text);
}
if (drop_uncovered_chars_ &&
!font_.CoversUTF8Text(page_text.c_str(), page_text.length())) {
int num_dropped = font_.DropUncoveredChars(&page_text);
if (num_dropped) {
tprintf("WARNING: Dropped %d uncovered characters\n", num_dropped);
}
}
if (add_ligatures_) {
// Add ligatures wherever possible, including custom ligatures.
page_text = LigatureTable::Get()->AddLigatures(page_text, &font_);
}
if (underline_start_prob_ > 0) {
SetWordUnderlineAttributes(page_text);
}
pango_layout_set_text(layout_, page_text.c_str(), page_text.length());
if (pix) {
// Set a white background for the target image surface.
cairo_set_source_rgb(cr_, 1.0, 1.0, 1.0); // sets drawing colour to white
// Fill the surface with the active colour (if you don't do this, you will
// be given a surface with a transparent background to draw on)
cairo_paint(cr_);
// Set the ink color to black
cairo_set_source_rgb(cr_, pen_color_[0], pen_color_[1], pen_color_[2]);
// If the target surface or transformation properties of the cairo instance
// have changed, update the pango layout to reflect this
pango_cairo_update_layout(cr_, layout_);
{
DISABLE_HEAP_LEAK_CHECK; // for Fontconfig
// Draw the pango layout onto the cairo surface
pango_cairo_show_layout(cr_, layout_);
}
*pix = CairoARGB32ToPixFormat(surface_);
}
ComputeClusterBoxes();
FreePangoCairo();
// Update internal state variables.
++page_;
return page_offset;
}
| void tesseract::StringRenderer::RotatePageBoxes | ( | float | rotation | ) |
Definition at line 329 of file stringrenderer.cpp.
| void tesseract::StringRenderer::set_add_ligatures | ( | bool | add_ligatures | ) | [inline] |
Definition at line 111 of file stringrenderer.h.
{
add_ligatures_ = add_ligatures;
}
| void tesseract::StringRenderer::set_box_padding | ( | int | val | ) | [inline] |
Definition at line 96 of file stringrenderer.h.
{
box_padding_ = val;
}
| void tesseract::StringRenderer::set_char_spacing | ( | double | char_spacing | ) | [inline] |
Definition at line 67 of file stringrenderer.h.
{
char_spacing_ = char_spacing;
}
| void tesseract::StringRenderer::set_drop_uncovered_chars | ( | bool | val | ) | [inline] |
Definition at line 99 of file stringrenderer.h.
{
drop_uncovered_chars_ = val;
}
| bool tesseract::StringRenderer::set_font | ( | const string & | desc | ) |
Definition at line 133 of file stringrenderer.cpp.
| void tesseract::StringRenderer::set_gravity_hint_strong | ( | bool | gravity_hint_strong | ) | [inline] |
Definition at line 77 of file stringrenderer.h.
{
gravity_hint_strong_ = gravity_hint_strong;
}
| void tesseract::StringRenderer::set_h_margin | ( | const int | h_margin | ) | [inline] |
Definition at line 120 of file stringrenderer.h.
| void tesseract::StringRenderer::set_leading | ( | int | leading | ) | [inline] |
Definition at line 70 of file stringrenderer.h.
{
leading_ = leading;
}
| void tesseract::StringRenderer::set_output_word_boxes | ( | bool | val | ) | [inline] |
Definition at line 105 of file stringrenderer.h.
{
output_word_boxes_ = val;
}
| void tesseract::StringRenderer::set_page | ( | int | page | ) | [inline] |
Definition at line 93 of file stringrenderer.h.
{
page_ = page;
}
| void tesseract::StringRenderer::set_pen_color | ( | double | r, |
| double | g, | ||
| double | b | ||
| ) | [inline] |
Definition at line 115 of file stringrenderer.h.
{
pen_color_[0] = r;
pen_color_[1] = g;
pen_color_[2] = b;
}
| void tesseract::StringRenderer::set_render_fullwidth_latin | ( | bool | render_fullwidth_latin | ) | [inline] |
Definition at line 80 of file stringrenderer.h.
{
render_fullwidth_latin_ = render_fullwidth_latin;
}
| void tesseract::StringRenderer::set_resolution | ( | const int | resolution | ) |
Definition at line 139 of file stringrenderer.cpp.
| void tesseract::StringRenderer::set_strip_unrenderable_words | ( | bool | val | ) | [inline] |
Definition at line 102 of file stringrenderer.h.
{
strip_unrenderable_words_ = val;
}
| void tesseract::StringRenderer::set_underline_continuation_prob | ( | const double | frac | ) |
Definition at line 148 of file stringrenderer.cpp.
| void tesseract::StringRenderer::set_underline_start_prob | ( | const double | frac | ) |
Definition at line 144 of file stringrenderer.cpp.
| void tesseract::StringRenderer::set_underline_style | ( | const PangoUnderline | style | ) | [inline] |
Definition at line 90 of file stringrenderer.h.
{
underline_style_ = style;
}
| void tesseract::StringRenderer::set_v_margin | ( | const int | v_margin | ) | [inline] |
Definition at line 123 of file stringrenderer.h.
| void tesseract::StringRenderer::set_vertical_text | ( | bool | vertical_text | ) | [inline] |
Definition at line 74 of file stringrenderer.h.
{
vertical_text_ = vertical_text;
}
| void tesseract::StringRenderer::SetLayoutProperties | ( | ) | [protected] |
Definition at line 179 of file stringrenderer.cpp.
{
swap(max_width, max_height);
}
pango_layout_set_width(layout_, max_width * PANGO_SCALE);
pango_layout_set_wrap(layout_, PANGO_WRAP_WORD);
// Adjust character spacing
PangoAttrList* attr_list = pango_attr_list_new();
if (char_spacing_) {
PangoAttribute* spacing_attr = pango_attr_letter_spacing_new(
static_cast<int>(char_spacing_ * PANGO_SCALE + 0.5));
spacing_attr->start_index = 0;
spacing_attr->end_index = static_cast<guint>(-1);
pango_attr_list_change(attr_list, spacing_attr);
}
pango_layout_set_attributes(layout_, attr_list);
pango_attr_list_unref(attr_list);
// Adjust line spacing
if (leading_) {
pango_layout_set_spacing(layout_, leading_ * PANGO_SCALE);
}
}
| void tesseract::StringRenderer::SetWordUnderlineAttributes | ( | const string & | page_text | ) | [protected] |
Definition at line 231 of file stringrenderer.cpp.
{
offset += SpanUTF8Whitespace(text + offset);
if (offset == page_text.length()) break;
int word_start = offset;
int word_len = SpanUTF8NotWhitespace(text + offset);
offset += word_len;
if (started_underline) {
// Should we continue the underline to the next word?
if (RandBool(underline_continuation_prob_, &rand)) {
// Continue the current underline to this word.
und_attr->end_index = word_start + word_len;
} else {
// Otherwise end the current underline attribute at the end of the
// previous word.
pango_attr_list_insert(attr_list, und_attr);
started_underline = false;
und_attr = nullptr;
}
}
if (!started_underline && RandBool(underline_start_prob_, &rand)) {
// Start a new underline attribute
und_attr = pango_attr_underline_new(underline_style_);
und_attr->start_index = word_start;
und_attr->end_index = word_start + word_len;
started_underline = true;
}
}
// Finish the current underline attribute at the end of the page.
if (started_underline) {
und_attr->end_index = page_text.length();
pango_attr_list_insert(attr_list, und_attr);
}
}
| int tesseract::StringRenderer::StripUnrenderableWords | ( | string * | utf8_text | ) | const |
Definition at line 610 of file stringrenderer.cpp.
{
int space_len = SpanUTF8Whitespace(text + offset);
output_text.append(text + offset, space_len);
offset += space_len;
if (offset == utf8_text->length()) break;
int word_len = SpanUTF8NotWhitespace(text + offset);
if (font_.CanRenderString(text + offset, word_len)) {
output_text.append(text + offset, word_len);
} else {
++num_dropped;
}
offset += word_len;
}
utf8_text->swap(output_text);
if (num_dropped > 0) {
tprintf("Stripped %d unrenderable words\n", num_dropped);
}
return num_dropped;
}
| const int tesseract::StringRenderer::v_margin | ( | ) | const [inline] |
Definition at line 132 of file stringrenderer.h.
{
return v_margin_;
}
| void tesseract::StringRenderer::WriteAllBoxes | ( | const string & | filename | ) |
Definition at line 342 of file stringrenderer.cpp.
bool tesseract::StringRenderer::add_ligatures_ [protected] |
Definition at line 191 of file stringrenderer.h.
int tesseract::StringRenderer::box_padding_ [protected] |
Definition at line 204 of file stringrenderer.h.
vector<BoxChar*> tesseract::StringRenderer::boxchars_ [protected] |
Definition at line 203 of file stringrenderer.h.
hash_map<char32, inT64> tesseract::StringRenderer::char_map_ [protected] |
Definition at line 209 of file stringrenderer.h.
double tesseract::StringRenderer::char_spacing_ [protected] |
Definition at line 180 of file stringrenderer.h.
cairo_t* tesseract::StringRenderer::cr_ [protected] |
Definition at line 195 of file stringrenderer.h.
bool tesseract::StringRenderer::drop_uncovered_chars_ [protected] |
Definition at line 189 of file stringrenderer.h.
PangoFontInfo tesseract::StringRenderer::font_ [protected] |
Definition at line 175 of file stringrenderer.h.
int tesseract::StringRenderer::font_index_ [protected] |
Definition at line 211 of file stringrenderer.h.
bool tesseract::StringRenderer::gravity_hint_strong_ [protected] |
Definition at line 183 of file stringrenderer.h.
int tesseract::StringRenderer::h_margin_ [protected] |
Definition at line 177 of file stringrenderer.h.
int tesseract::StringRenderer::last_offset_ [protected] |
Definition at line 212 of file stringrenderer.h.
PangoLayout* tesseract::StringRenderer::layout_ [protected] |
Definition at line 196 of file stringrenderer.h.
int tesseract::StringRenderer::leading_ [protected] |
Definition at line 181 of file stringrenderer.h.
bool tesseract::StringRenderer::output_word_boxes_ [protected] |
Definition at line 192 of file stringrenderer.h.
int tesseract::StringRenderer::page_ [protected] |
Definition at line 200 of file stringrenderer.h.
Boxa* tesseract::StringRenderer::page_boxes_ [protected] |
Definition at line 206 of file stringrenderer.h.
int tesseract::StringRenderer::page_height_ [protected] |
Definition at line 177 of file stringrenderer.h.
int tesseract::StringRenderer::page_width_ [protected] |
Definition at line 177 of file stringrenderer.h.
int tesseract::StringRenderer::pen_color_[3] [protected] |
Definition at line 179 of file stringrenderer.h.
bool tesseract::StringRenderer::render_fullwidth_latin_ [protected] |
Definition at line 184 of file stringrenderer.h.
int tesseract::StringRenderer::resolution_ [protected] |
Definition at line 181 of file stringrenderer.h.
int tesseract::StringRenderer::start_box_ [protected] |
Definition at line 199 of file stringrenderer.h.
bool tesseract::StringRenderer::strip_unrenderable_words_ [protected] |
Definition at line 190 of file stringrenderer.h.
cairo_surface_t* tesseract::StringRenderer::surface_ [protected] |
Definition at line 194 of file stringrenderer.h.
int tesseract::StringRenderer::total_chars_ [protected] |
Definition at line 210 of file stringrenderer.h.
double tesseract::StringRenderer::underline_continuation_prob_ [protected] |
Definition at line 186 of file stringrenderer.h.
double tesseract::StringRenderer::underline_start_prob_ [protected] |
Definition at line 185 of file stringrenderer.h.
PangoUnderline tesseract::StringRenderer::underline_style_ [protected] |
Definition at line 187 of file stringrenderer.h.
int tesseract::StringRenderer::v_margin_ [protected] |
Definition at line 177 of file stringrenderer.h.
bool tesseract::StringRenderer::vertical_text_ [protected] |
Definition at line 182 of file stringrenderer.h.