tesseract 3.04.01

tesseract::StringRenderer Class Reference

#include <stringrenderer.h>

List of all members.

Public Member Functions

 StringRenderer (const string &font_desc, int page_width, int page_height)
 ~StringRenderer ()
int RenderToImage (const char *text, int text_length, Pix **pix)
int RenderToGrayscaleImage (const char *text, int text_length, Pix **pix)
int RenderToBinaryImage (const char *text, int text_length, int threshold, Pix **pix)
int RenderAllFontsToImage (double min_coverage, const char *text, int text_length, string *font_used, Pix **pix)
bool set_font (const string &desc)
void set_char_spacing (double char_spacing)
void set_leading (int leading)
void set_resolution (const int resolution)
void set_vertical_text (bool vertical_text)
void set_gravity_hint_strong (bool gravity_hint_strong)
void set_render_fullwidth_latin (bool render_fullwidth_latin)
void set_underline_start_prob (const double frac)
void set_underline_continuation_prob (const double frac)
void set_underline_style (const PangoUnderline style)
void set_page (int page)
void set_box_padding (int val)
void set_drop_uncovered_chars (bool val)
void set_strip_unrenderable_words (bool val)
void set_output_word_boxes (bool val)
void set_add_ligatures (bool add_ligatures)
void set_pen_color (double r, double g, double b)
void set_h_margin (const int h_margin)
void set_v_margin (const int v_margin)
const PangoFontInfofont () const
const int h_margin () const
const int v_margin () const
const vector< BoxChar * > & GetBoxes () const
Boxa * GetPageBoxes () const
void RotatePageBoxes (float rotation)
void ClearBoxes ()
void WriteAllBoxes (const string &filename)
int StripUnrenderableWords (string *utf8_text) const

Static Public Member Functions

static string InsertWordJoiners (const string &text)
static string ConvertBasicLatinToFullwidthLatin (const string &text)
static string ConvertFullwidthLatinToBasicLatin (const string &text)

Protected Member Functions

void InitPangoCairo ()
void FreePangoCairo ()
void SetLayoutProperties ()
void SetWordUnderlineAttributes (const string &page_text)
void ComputeClusterBoxes ()
void CorrectBoxPositionsToLayout (vector< BoxChar * > *boxchars)
bool GetClusterStrings (vector< string > *cluster_text)
int FindFirstPageBreakOffset (const char *text, int text_length)

Protected Attributes

PangoFontInfo font_
int page_width_
int page_height_
int h_margin_
int v_margin_
int pen_color_ [3]
double char_spacing_
int leading_
int resolution_
bool vertical_text_
bool gravity_hint_strong_
bool render_fullwidth_latin_
double underline_start_prob_
double underline_continuation_prob_
PangoUnderline underline_style_
bool drop_uncovered_chars_
bool strip_unrenderable_words_
bool add_ligatures_
bool output_word_boxes_
cairo_surface_t * surface_
cairo_t * cr_
PangoLayout * layout_
int start_box_
int page_
vector< BoxChar * > boxchars_
int box_padding_
Boxa * page_boxes_
hash_map< char32, inT64char_map_
int total_chars_
int font_index_
int last_offset_

Detailed Description

Definition at line 48 of file stringrenderer.h.


Constructor & Destructor Documentation

tesseract::StringRenderer::StringRenderer ( const string &  font_desc,
int  page_width,
int  page_height 
)

Definition at line 98 of file stringrenderer.cpp.

    : page_width_(page_width),
      page_height_(page_height),
      h_margin_(50),
      v_margin_(50),
      char_spacing_(0),
      leading_(0),
      vertical_text_(false),
      gravity_hint_strong_(false),
      render_fullwidth_latin_(false),
      underline_start_prob_(0),
      underline_continuation_prob_(0),
      underline_style_(PANGO_UNDERLINE_SINGLE),
      drop_uncovered_chars_(true),
      strip_unrenderable_words_(false),
      add_ligatures_(false),
      output_word_boxes_(false),
      surface_(NULL),
      cr_(NULL),
      layout_(NULL),
      start_box_(0),
      page_(0),
      box_padding_(0),
      total_chars_(0),
      font_index_(0),
      last_offset_(0) {
  pen_color_[0] = 0.0;
  pen_color_[1] = 0.0;
  pen_color_[2] = 0.0;
  set_font(font_desc);
  set_resolution(kDefaultOutputResolution);
  page_boxes_ = NULL;
}

tesseract::StringRenderer::~StringRenderer ( )

Definition at line 152 of file stringrenderer.cpp.


Member Function Documentation

void tesseract::StringRenderer::ClearBoxes ( )

Definition at line 335 of file stringrenderer.cpp.

void tesseract::StringRenderer::ComputeClusterBoxes ( ) [protected]

Definition at line 455 of file stringrenderer.cpp.

     {
    cluster_start_indices.push_back(pango_layout_iter_get_index(cluster_iter));
    tlog(3, "Added %d\n", cluster_start_indices.back());
  } while (pango_layout_iter_next_cluster(cluster_iter));
  pango_layout_iter_free(cluster_iter);
  cluster_start_indices.push_back(strlen(text));
  tlog(3, "Added last index %d\n", cluster_start_indices.back());
  // Sort the indices and create a map from start to end indices.
  sort(cluster_start_indices.begin(), cluster_start_indices.end());
  map<int, int> cluster_start_to_end_index;
  for (int i = 0; i < cluster_start_indices.size() - 1; ++i) {
    cluster_start_to_end_index[cluster_start_indices[i]]
        = cluster_start_indices[i + 1];
  }

  // Iterate again to compute cluster boxes and their text with the obtained
  // cluster extent information.
  cluster_iter = pango_layout_get_iter(layout_);
  // Store BoxChars* sorted by their byte start positions
  map<int, BoxChar*> start_byte_to_box;
  do {
    PangoRectangle cluster_rect;
    pango_layout_iter_get_cluster_extents(cluster_iter, &cluster_rect,
                                          NULL);
    pango_extents_to_pixels(&cluster_rect, NULL);
    const int start_byte_index = pango_layout_iter_get_index(cluster_iter);
    const int end_byte_index = cluster_start_to_end_index[start_byte_index];
    string cluster_text = string(text + start_byte_index,
                                 end_byte_index - start_byte_index);
    if (cluster_text.size() && cluster_text[0] == '\n') {
      tlog(2, "Skipping newlines at start of text.\n");
      continue;
    }
    if (!cluster_rect.width || !cluster_rect.height ||
        IsUTF8Whitespace(cluster_text.c_str())) {
      tlog(2, "Skipping whitespace with boxdim (%d,%d) '%s'\n",
           cluster_rect.width, cluster_rect.height, cluster_text.c_str());
      BoxChar* boxchar = new BoxChar(" ", 1);
      boxchar->set_page(page_);
      start_byte_to_box[start_byte_index] = boxchar;
      continue;
    }
    // Prepare a boxchar for addition at this byte position.
    tlog(2, "[%d %d], %d, %d : start_byte=%d end_byte=%d : '%s'\n",
         cluster_rect.x, cluster_rect.y,
         cluster_rect.width, cluster_rect.height,
         start_byte_index, end_byte_index,
         cluster_text.c_str());
    ASSERT_HOST_MSG(cluster_rect.width,
                    "cluster_text:%s  start_byte_index:%d\n",
                    cluster_text.c_str(), start_byte_index);
    ASSERT_HOST_MSG(cluster_rect.height,
                    "cluster_text:%s  start_byte_index:%d\n",
                    cluster_text.c_str(), start_byte_index);
    if (box_padding_) {
      cluster_rect.x = max(0, cluster_rect.x - box_padding_);
      cluster_rect.width += 2 * box_padding_;
      cluster_rect.y = max(0, cluster_rect.y - box_padding_);
      cluster_rect.height += 2 * box_padding_;
    }
    if (add_ligatures_) {
      // Make sure the output box files have ligatured text in case the font
      // decided to use an unmapped glyph.
      cluster_text = LigatureTable::Get()->AddLigatures(cluster_text, NULL);
    }
    BoxChar* boxchar = new BoxChar(cluster_text.c_str(), cluster_text.size());
    boxchar->set_page(page_);
    boxchar->AddBox(cluster_rect.x, cluster_rect.y,
                    cluster_rect.width, cluster_rect.height);
    start_byte_to_box[start_byte_index] = boxchar;
  } while (pango_layout_iter_next_cluster(cluster_iter));
  pango_layout_iter_free(cluster_iter);

  // There is a subtle bug in the cluster text reported by the PangoLayoutIter
  // on ligatured characters (eg. The word "Lam-Aliph" in arabic). To work
  // around this, we use text reported using the PangoGlyphIter which is
  // accurate.
  // TODO(ranjith): Revisit whether this is still needed in newer versions of
  // pango.
  vector<string> cluster_text;
  if (GetClusterStrings(&cluster_text)) {
    ASSERT_HOST(cluster_text.size() == start_byte_to_box.size());
    int ind = 0;
    for (map<int, BoxChar*>::iterator it = start_byte_to_box.begin();
         it != start_byte_to_box.end(); ++it, ++ind) {
      it->second->mutable_ch()->swap(cluster_text[ind]);
    }
  }

  // Append to the boxchars list in byte order.
  vector<BoxChar*> page_boxchars;
  page_boxchars.reserve(start_byte_to_box.size());
  string last_ch;
  for (map<int, BoxChar*>::const_iterator it = start_byte_to_box.begin();
       it != start_byte_to_box.end(); ++it) {
    if (it->second->ch() == kWordJoinerUTF8) {
      // Skip zero-width joiner characters (ZWJs) here.
      delete it->second;
    } else {
      page_boxchars.push_back(it->second);
    }
  }
  CorrectBoxPositionsToLayout(&page_boxchars);

  if (render_fullwidth_latin_) {
    for (map<int, BoxChar*>::iterator it = start_byte_to_box.begin();
         it != start_byte_to_box.end(); ++it) {
      // Convert fullwidth Latin characters to their halfwidth forms.
      string half(ConvertFullwidthLatinToBasicLatin(it->second->ch()));
      it->second->mutable_ch()->swap(half);
    }
  }

  // Merge the character boxes into word boxes if we are rendering n-grams.
  if (output_word_boxes_) {
    MergeBoxCharsToWords(&page_boxchars);
  }

  boxchars_.insert(boxchars_.end(), page_boxchars.begin(), page_boxchars.end());

  // Compute the page bounding box
  Box* page_box = NULL;
  Boxa* all_boxes = NULL;
  for (int i = 0; i < page_boxchars.size(); ++i) {
    if (page_boxchars[i]->box() == NULL) continue;
    if (all_boxes == NULL)
      all_boxes = boxaCreate(0);
    boxaAddBox(all_boxes, page_boxchars[i]->mutable_box(), L_CLONE);
  }
  boxaGetExtent(all_boxes, NULL, NULL, &page_box);
  boxaDestroy(&all_boxes);
  if (page_boxes_ == NULL)
    page_boxes_ = boxaCreate(0);
  boxaAddBox(page_boxes_, page_box, L_INSERT);
}

string tesseract::StringRenderer::ConvertBasicLatinToFullwidthLatin ( const string &  text) [static]

Definition at line 689 of file stringrenderer.cpp.

                           {
    // Convert printable and non-space 7-bit ASCII characters to
    // their fullwidth forms.
    if (IsInterchangeValid7BitAscii(*it) && isprint(*it) && !isspace(*it)) {
      // Convert by adding 0xFEE0 to the codepoint of 7-bit ASCII.
      char32 full_char = *it + 0xFEE0;
      full_str.append(EncodeAsUTF8(full_char));
    } else {
      full_str.append(it.utf8_data(), it.utf8_len());
    }
  }
  return full_str;
}

string tesseract::StringRenderer::ConvertFullwidthLatinToBasicLatin ( const string &  text) [static]

Definition at line 709 of file stringrenderer.cpp.

                           {
    char32 half_char = FullwidthToHalfwidth(*it);
    // Convert fullwidth Latin characters to their halfwidth forms
    // only if halfwidth forms are printable and non-space 7-bit ASCII.
    if (IsInterchangeValid7BitAscii(half_char) &&
        isprint(half_char) && !isspace(half_char)) {
      half_str.append(EncodeAsUTF8(half_char));
    } else {
      half_str.append(it.utf8_data(), it.utf8_len());
    }
  }
  return half_str;
}

void tesseract::StringRenderer::CorrectBoxPositionsToLayout ( vector< BoxChar * > *  boxchars) [protected]

Definition at line 598 of file stringrenderer.cpp.

                      {
    const double rotation = - pango_gravity_to_rotation(
        pango_context_get_base_gravity(pango_layout_get_context(layout_)));
    BoxChar::TranslateBoxes(page_width_ - h_margin_, v_margin_, boxchars);
    BoxChar::RotateBoxes(rotation, page_width_ - h_margin_, v_margin_,
                         0, boxchars->size(), boxchars);
  } else {
    BoxChar::TranslateBoxes(h_margin_, v_margin_, boxchars);
  }
}

int tesseract::StringRenderer::FindFirstPageBreakOffset ( const char *  text,
int  text_length 
) [protected]

Definition at line 277 of file stringrenderer.cpp.

                                                              {
  if (!text_length) return 0;
  const int max_height = (page_height_ - 2 * v_margin_);
  const int max_width = (page_width_ - 2 * h_margin_);
  const int max_layout_height = vertical_text_ ? max_width : max_height;

  UNICHAR::const_iterator it = UNICHAR::begin(text, text_length);
  const UNICHAR::const_iterator it_end = UNICHAR::end(text, text_length);
  const int kMaxUnicodeBufLength = 15000;
  for (int i = 0; i < kMaxUnicodeBufLength && it != it_end; ++it, ++i);
  int buf_length = it.utf8_data() - text;
  tlog(1, "len = %d  buf_len = %d\n", text_length, buf_length);
  pango_layout_set_text(layout_, text, buf_length);

  PangoLayoutIter* line_iter = NULL;
  { // Fontconfig caches some info here that is not freed before exit.
    DISABLE_HEAP_LEAK_CHECK;
    line_iter = pango_layout_get_iter(layout_);
  }
  bool first_page = true;
  int page_top = 0;
  int offset = buf_length;
  do {
    // Get bounding box of the current line
    PangoRectangle line_ink_rect;
    pango_layout_iter_get_line_extents(line_iter, &line_ink_rect, NULL);
    pango_extents_to_pixels(&line_ink_rect, NULL);
    PangoLayoutLine* line = pango_layout_iter_get_line_readonly(line_iter);
    if (first_page) {
      page_top = line_ink_rect.y;
      first_page = false;
    }
    int line_bottom = line_ink_rect.y + line_ink_rect.height;
    if (line_bottom - page_top > max_layout_height) {
      offset = line->start_index;
      tlog(1, "Found offset = %d\n", offset);
      break;
    }
  } while (pango_layout_iter_next_line(line_iter));
  pango_layout_iter_free(line_iter);
  return offset;
}

const PangoFontInfo& tesseract::StringRenderer::font ( ) const [inline]

Definition at line 126 of file stringrenderer.h.

                                    {
    return font_;
  }
void tesseract::StringRenderer::FreePangoCairo ( ) [protected]

Definition at line 216 of file stringrenderer.cpp.

               {
    g_object_unref(layout_);
    layout_ = NULL;
  }
  if (cr_) {
    cairo_destroy(cr_);
    cr_ = NULL;
  }
  if (surface_) {
    cairo_surface_destroy(surface_);
    surface_ = NULL;
  }
}

const vector< BoxChar * > & tesseract::StringRenderer::GetBoxes ( ) const

Definition at line 321 of file stringrenderer.cpp.

bool tesseract::StringRenderer::GetClusterStrings ( vector< string > *  cluster_text) [protected]

Definition at line 348 of file stringrenderer.cpp.

     {
    PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter);
    if (!run) {
      // End of line NULL run marker
      tlog(2, "Found end of line marker\n");
      continue;
    }
    PangoGlyphItemIter cluster_iter;
    gboolean have_cluster;
    for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter,
                                                          run, full_text);
         have_cluster;
         have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) {
      const int start_byte_index = cluster_iter.start_index;
      const int end_byte_index = cluster_iter.end_index;
      string text = string(full_text + start_byte_index,
                           end_byte_index - start_byte_index);
      if (IsUTF8Whitespace(text.c_str())) {
        tlog(2, "Found whitespace\n");
        text = " ";
      }
      tlog(2, "start_byte=%d end_byte=%d : '%s'\n", start_byte_index,
           end_byte_index, text.c_str());
      if (add_ligatures_) {
        // Make sure the output box files have ligatured text in case the font
        // decided to use an unmapped glyph.
        text = LigatureTable::Get()->AddLigatures(text, NULL);
      }
      start_byte_to_text[start_byte_index] = text;
    }
  } while (pango_layout_iter_next_run(run_iter));
  pango_layout_iter_free(run_iter);

  cluster_text->clear();
  for (map<int, string>::const_iterator it = start_byte_to_text.begin();
       it != start_byte_to_text.end(); ++it) {
    cluster_text->push_back(it->second);
  }
  return cluster_text->size();
}

Boxa * tesseract::StringRenderer::GetPageBoxes ( ) const

Definition at line 325 of file stringrenderer.cpp.

const int tesseract::StringRenderer::h_margin ( ) const [inline]

Definition at line 129 of file stringrenderer.h.

                             {
    return h_margin_;
  }
void tesseract::StringRenderer::InitPangoCairo ( ) [protected]

Definition at line 157 of file stringrenderer.cpp.

  {
    DISABLE_HEAP_LEAK_CHECK;
    layout_ = pango_cairo_create_layout(cr_);
  }

  if (vertical_text_) {
    PangoContext* context = pango_layout_get_context(layout_);
    pango_context_set_base_gravity(context, PANGO_GRAVITY_EAST);
    if (gravity_hint_strong_) {
      pango_context_set_gravity_hint(context, PANGO_GRAVITY_HINT_STRONG);
    }
    pango_layout_context_changed(layout_);
  }

  SetLayoutProperties();
}

string tesseract::StringRenderer::InsertWordJoiners ( const string &  text) [static]

Definition at line 666 of file stringrenderer.cpp.

                           {
    // Add the symbol to the output string.
    out_str.append(it.utf8_data(), it.utf8_len());
    // Check the next symbol.
    UNICHAR::const_iterator next_it = it;
    ++next_it;
    bool next_char_is_boundary = (next_it == it_end || *next_it == ' ');
    bool next_char_is_combiner = (next_it == it_end) ?
        false : IsCombiner(*next_it);
    if (*it != ' ' && *it != '\n' && !next_char_is_boundary &&
        !next_char_is_combiner) {
      out_str += kWordJoinerUTF8;
    }
  }
  return out_str;
}

int tesseract::StringRenderer::RenderAllFontsToImage ( double  min_coverage,
const char *  text,
int  text_length,
string *  font_used,
Pix **  pix 
)

Definition at line 828 of file stringrenderer.cpp.

                                                                          {
  *image = NULL;
  // Select a suitable font to render the title with.
  const char kTitleTemplate[] = "%s : %d hits = %.2f%%, raw = %d = %.2f%%";
  string title_font;
  if (!FontUtils::SelectFont(kTitleTemplate, strlen(kTitleTemplate),
                             &title_font, NULL)) {
    tprintf("WARNING: Could not find a font to render image title with!\n");
    title_font = "Arial";
  }
  title_font += " 8";
  tlog(1, "Selected title font: %s\n", title_font.c_str());
  if (font_used) font_used->clear();

  string orig_font = font_.DescriptionName();
  if (char_map_.empty()) {
    total_chars_ = 0;
    // Fill the hash table and use that for computing which fonts to use.
    for (UNICHAR::const_iterator it = UNICHAR::begin(text, text_length);
         it != UNICHAR::end(text, text_length); ++it) {
      ++total_chars_;
      ++char_map_[*it];
    }
    tprintf("Total chars = %d\n", total_chars_);
  }
  const vector<string>& all_fonts = FontUtils::ListAvailableFonts();
  for (int i = font_index_; i < all_fonts.size(); ++i) {
    ++font_index_;
    int raw_score = 0;
    int ok_chars = FontUtils::FontScore(char_map_, all_fonts[i], &raw_score,
                                        NULL);
    if (ok_chars > 0 && ok_chars >= total_chars_ * min_coverage) {
      set_font(all_fonts[i]);
      int offset = RenderToBinaryImage(text, text_length, 128, image);
      ClearBoxes();  // Get rid of them as they are garbage.
      const int kMaxTitleLength = 1024;
      char title[kMaxTitleLength];
      snprintf(title, kMaxTitleLength, kTitleTemplate,
               all_fonts[i].c_str(), ok_chars,
               100.0 * ok_chars / total_chars_, raw_score,
               100.0 * raw_score / char_map_.size());
      tprintf("%s\n", title);
      // This is a good font! Store the offset to return once we've tried all
      // the fonts.
      if (offset) {
        last_offset_ = offset;
        if (font_used) *font_used = all_fonts[i];
      }
      // Add the font to the image.
      set_font(title_font);
      v_margin_ /= 8;
      Pix* title_image = NULL;
      RenderToBinaryImage(title, strlen(title), 128, &title_image);
      pixOr(*image, *image, title_image);
      pixDestroy(&title_image);

      v_margin_ *= 8;
      set_font(orig_font);
      // We return the real offset only after cycling through the list of fonts.
      return 0;
    } else {
      tprintf("Font %s failed with %d hits = %.2f%%\n",
              all_fonts[i].c_str(), ok_chars, 100.0 * ok_chars / total_chars_);
    }
  }
  font_index_ = 0;
  char_map_.clear();
  return last_offset_ == 0 ? -1 : last_offset_;
}

int tesseract::StringRenderer::RenderToBinaryImage ( const char *  text,
int  text_length,
int  threshold,
Pix **  pix 
)

Definition at line 648 of file stringrenderer.cpp.

                                                                  {
  Pix *orig_pix = NULL;
  int offset = RenderToImage(text, text_length, &orig_pix);
  if (orig_pix) {
    Pix* gray_pix = pixConvertTo8(orig_pix, false);
    pixDestroy(&orig_pix);
    *pix = pixThresholdToBinary(gray_pix, threshold);
    pixDestroy(&gray_pix);
  } else {
    *pix = orig_pix;
  }
  return offset;
}

int tesseract::StringRenderer::RenderToGrayscaleImage ( const char *  text,
int  text_length,
Pix **  pix 
)

Definition at line 637 of file stringrenderer.cpp.

                                                      {
  Pix *orig_pix = NULL;
  int offset = RenderToImage(text, text_length, &orig_pix);
  if (orig_pix) {
    *pix = pixConvertTo8(orig_pix, false);
    pixDestroy(&orig_pix);
  }
  return offset;
}

int tesseract::StringRenderer::RenderToImage ( const char *  text,
int  text_length,
Pix **  pix 
)

Definition at line 728 of file stringrenderer.cpp.

                                             {
  if (pix && *pix) pixDestroy(pix);
  InitPangoCairo();

  const int page_offset = FindFirstPageBreakOffset(text, text_length);
  if (!page_offset) {
    return 0;
  }
  start_box_ = boxchars_.size();

  if (!vertical_text_) {
    // Translate by the specified margin
    cairo_translate(cr_, h_margin_, v_margin_);
  } else {
    // Vertical text rendering is achieved by a two-step process of first
    // performing regular horizontal layout with character orientation set to
    // EAST, and then translating and rotating the layout before rendering onto
    // the desired image surface. The settings required for the former step are
    // done within InitPangoCairo().
    //
    // Translate to the top-right margin of page
    cairo_translate(cr_, page_width_ - h_margin_, v_margin_);
    // Rotate the layout
    double rotation = - pango_gravity_to_rotation(
        pango_context_get_base_gravity(pango_layout_get_context(layout_)));
    tlog(2, "Rotating by %f radians\n", rotation);
    cairo_rotate(cr_, rotation);
    pango_cairo_update_layout(cr_, layout_);
  }
  string page_text(text, page_offset);
  if (render_fullwidth_latin_) {
    // Convert Basic Latin to their fullwidth forms.
    page_text = ConvertBasicLatinToFullwidthLatin(page_text);
  }
  if (strip_unrenderable_words_) {
    StripUnrenderableWords(&page_text);
  }
  if (drop_uncovered_chars_ &&
      !font_.CoversUTF8Text(page_text.c_str(), page_text.length())) {
    int num_dropped = font_.DropUncoveredChars(&page_text);
    if (num_dropped) {
      tprintf("WARNING: Dropped %d uncovered characters\n", num_dropped);
    }
  }
  if (add_ligatures_) {
    // Add ligatures wherever possible, including custom ligatures.
    page_text = LigatureTable::Get()->AddLigatures(page_text, &font_);
  }
  if (underline_start_prob_ > 0) {
    SetWordUnderlineAttributes(page_text);
  }

  pango_layout_set_text(layout_, page_text.c_str(), page_text.length());

  if (pix) {
    // Set a white background for the target image surface.
    cairo_set_source_rgb(cr_, 1.0, 1.0, 1.0);  // sets drawing colour to white
    // Fill the surface with the active colour (if you don't do this, you will
    // be given a surface with a transparent background to draw on)
    cairo_paint(cr_);
    // Set the ink color to black
    cairo_set_source_rgb(cr_, pen_color_[0], pen_color_[1], pen_color_[2]);
    // If the target surface or transformation properties of the cairo instance
    // have changed, update the pango layout to reflect this
    pango_cairo_update_layout(cr_, layout_);
    {
      DISABLE_HEAP_LEAK_CHECK;  // for Fontconfig
      // Draw the pango layout onto the cairo surface
      pango_cairo_show_layout(cr_, layout_);
    }
    *pix = CairoARGB32ToPixFormat(surface_);
  }
  ComputeClusterBoxes();
  FreePangoCairo();
  // Update internal state variables.
  ++page_;
  return page_offset;
}

void tesseract::StringRenderer::RotatePageBoxes ( float  rotation)

Definition at line 329 of file stringrenderer.cpp.

void tesseract::StringRenderer::set_add_ligatures ( bool  add_ligatures) [inline]

Definition at line 111 of file stringrenderer.h.

                                             {
    add_ligatures_ = add_ligatures;
  }
void tesseract::StringRenderer::set_box_padding ( int  val) [inline]

Definition at line 96 of file stringrenderer.h.

                                {
    box_padding_ = val;
  }
void tesseract::StringRenderer::set_char_spacing ( double  char_spacing) [inline]

Definition at line 67 of file stringrenderer.h.

                                             {
    char_spacing_ = char_spacing;
  }
void tesseract::StringRenderer::set_drop_uncovered_chars ( bool  val) [inline]

Definition at line 99 of file stringrenderer.h.

bool tesseract::StringRenderer::set_font ( const string &  desc)

Definition at line 133 of file stringrenderer.cpp.

void tesseract::StringRenderer::set_gravity_hint_strong ( bool  gravity_hint_strong) [inline]

Definition at line 77 of file stringrenderer.h.

                                                         {
    gravity_hint_strong_ = gravity_hint_strong;
  }
void tesseract::StringRenderer::set_h_margin ( const int  h_margin) [inline]

Definition at line 120 of file stringrenderer.h.

void tesseract::StringRenderer::set_leading ( int  leading) [inline]

Definition at line 70 of file stringrenderer.h.

                                {
    leading_ = leading;
  }
void tesseract::StringRenderer::set_output_word_boxes ( bool  val) [inline]

Definition at line 105 of file stringrenderer.h.

                                       {
    output_word_boxes_ = val;
  }
void tesseract::StringRenderer::set_page ( int  page) [inline]

Definition at line 93 of file stringrenderer.h.

                          {
    page_ = page;
  }
void tesseract::StringRenderer::set_pen_color ( double  r,
double  g,
double  b 
) [inline]

Definition at line 115 of file stringrenderer.h.

                                                   {
    pen_color_[0] = r;
    pen_color_[1] = g;
    pen_color_[2] = b;
  }
void tesseract::StringRenderer::set_render_fullwidth_latin ( bool  render_fullwidth_latin) [inline]

Definition at line 80 of file stringrenderer.h.

                                                               {
    render_fullwidth_latin_ = render_fullwidth_latin;
  }
void tesseract::StringRenderer::set_resolution ( const int  resolution)

Definition at line 139 of file stringrenderer.cpp.

void tesseract::StringRenderer::set_strip_unrenderable_words ( bool  val) [inline]

Definition at line 102 of file stringrenderer.h.

void tesseract::StringRenderer::set_underline_continuation_prob ( const double  frac)

Definition at line 148 of file stringrenderer.cpp.

void tesseract::StringRenderer::set_underline_start_prob ( const double  frac)

Definition at line 144 of file stringrenderer.cpp.

void tesseract::StringRenderer::set_underline_style ( const PangoUnderline  style) [inline]

Definition at line 90 of file stringrenderer.h.

                                                       {
    underline_style_ = style;
  }
void tesseract::StringRenderer::set_v_margin ( const int  v_margin) [inline]

Definition at line 123 of file stringrenderer.h.

void tesseract::StringRenderer::set_vertical_text ( bool  vertical_text) [inline]

Definition at line 74 of file stringrenderer.h.

                                             {
    vertical_text_ = vertical_text;
  }
void tesseract::StringRenderer::SetLayoutProperties ( ) [protected]

Definition at line 179 of file stringrenderer.cpp.

                      {
    swap(max_width, max_height);
  }
  pango_layout_set_width(layout_, max_width * PANGO_SCALE);
  pango_layout_set_wrap(layout_, PANGO_WRAP_WORD);

  // Adjust character spacing
  PangoAttrList* attr_list = pango_attr_list_new();
  if (char_spacing_) {
    PangoAttribute* spacing_attr = pango_attr_letter_spacing_new(
        static_cast<int>(char_spacing_ * PANGO_SCALE + 0.5));
    spacing_attr->start_index = 0;
    spacing_attr->end_index = static_cast<guint>(-1);
    pango_attr_list_change(attr_list, spacing_attr);
  }
  pango_layout_set_attributes(layout_, attr_list);
  pango_attr_list_unref(attr_list);
  // Adjust line spacing
  if (leading_) {
    pango_layout_set_spacing(layout_, leading_ * PANGO_SCALE);
  }
}

void tesseract::StringRenderer::SetWordUnderlineAttributes ( const string &  page_text) [protected]

Definition at line 231 of file stringrenderer.cpp.

                                      {
    offset += SpanUTF8Whitespace(text + offset);
    if (offset == page_text.length()) break;

    int word_start = offset;
    int word_len = SpanUTF8NotWhitespace(text + offset);
    offset += word_len;
    if (started_underline) {
      // Should we continue the underline to the next word?
      if (RandBool(underline_continuation_prob_, &rand)) {
        // Continue the current underline to this word.
        und_attr->end_index = word_start + word_len;
      } else {
        // Otherwise end the current underline attribute at the end of the
        // previous word.
        pango_attr_list_insert(attr_list, und_attr);
        started_underline = false;
        und_attr = nullptr;
      }
    }
    if (!started_underline && RandBool(underline_start_prob_, &rand)) {
      // Start a new underline attribute
      und_attr = pango_attr_underline_new(underline_style_);
      und_attr->start_index = word_start;
      und_attr->end_index = word_start + word_len;
      started_underline = true;
    }
  }
  // Finish the current underline attribute at the end of the page.
  if (started_underline) {
    und_attr->end_index = page_text.length();
    pango_attr_list_insert(attr_list, und_attr);
  }
}

int tesseract::StringRenderer::StripUnrenderableWords ( string *  utf8_text) const

Definition at line 610 of file stringrenderer.cpp.

                                       {
    int space_len = SpanUTF8Whitespace(text + offset);
    output_text.append(text + offset, space_len);
    offset += space_len;
    if (offset == utf8_text->length()) break;

    int word_len = SpanUTF8NotWhitespace(text + offset);
    if (font_.CanRenderString(text + offset, word_len)) {
      output_text.append(text + offset, word_len);
    } else {
      ++num_dropped;
    }
    offset += word_len;
  }
  utf8_text->swap(output_text);

  if (num_dropped > 0) {
    tprintf("Stripped %d unrenderable words\n", num_dropped);
  }
  return num_dropped;
}

const int tesseract::StringRenderer::v_margin ( ) const [inline]

Definition at line 132 of file stringrenderer.h.

                             {
    return v_margin_;
  }
void tesseract::StringRenderer::WriteAllBoxes ( const string &  filename)

Definition at line 342 of file stringrenderer.cpp.


Member Data Documentation

Definition at line 191 of file stringrenderer.h.

Definition at line 204 of file stringrenderer.h.

Definition at line 203 of file stringrenderer.h.

Definition at line 209 of file stringrenderer.h.

Definition at line 180 of file stringrenderer.h.

cairo_t* tesseract::StringRenderer::cr_ [protected]

Definition at line 195 of file stringrenderer.h.

Definition at line 189 of file stringrenderer.h.

Definition at line 175 of file stringrenderer.h.

Definition at line 211 of file stringrenderer.h.

Definition at line 183 of file stringrenderer.h.

Definition at line 177 of file stringrenderer.h.

Definition at line 212 of file stringrenderer.h.

PangoLayout* tesseract::StringRenderer::layout_ [protected]

Definition at line 196 of file stringrenderer.h.

Definition at line 181 of file stringrenderer.h.

Definition at line 192 of file stringrenderer.h.

Definition at line 200 of file stringrenderer.h.

Definition at line 206 of file stringrenderer.h.

Definition at line 177 of file stringrenderer.h.

Definition at line 177 of file stringrenderer.h.

Definition at line 179 of file stringrenderer.h.

Definition at line 184 of file stringrenderer.h.

Definition at line 181 of file stringrenderer.h.

Definition at line 199 of file stringrenderer.h.

Definition at line 190 of file stringrenderer.h.

cairo_surface_t* tesseract::StringRenderer::surface_ [protected]

Definition at line 194 of file stringrenderer.h.

Definition at line 210 of file stringrenderer.h.

Definition at line 186 of file stringrenderer.h.

Definition at line 185 of file stringrenderer.h.

PangoUnderline tesseract::StringRenderer::underline_style_ [protected]

Definition at line 187 of file stringrenderer.h.

Definition at line 177 of file stringrenderer.h.

Definition at line 182 of file stringrenderer.h.


The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines