tesseract 3.04.01

training/stringrenderer.h

Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        stringrenderer.h
00003  * Description: Class for rendering UTF-8 text to an image, and retrieving
00004  *              bounding boxes around each grapheme cluster.
00005  *
00006  *              Instances are created using a font description string
00007  *              (eg. "Arial Italic 12"; see pango_font_info.h for the format)
00008  *              and the page dimensions. Other renderer properties such as
00009  *              spacing, ligaturization, as well a preprocessing behavior such
00010  *              as removal of unrenderable words and a special n-gram mode may
00011  *              be set using respective set_* methods.
00012  *
00013  * Author:      Ranjith Unnikrishnan
00014  * Created:     Mon Nov 18 2013
00015  *
00016  * (C) Copyright 2013, Google Inc.
00017  * Licensed under the Apache License, Version 2.0 (the "License");
00018  * you may not use this file except in compliance with the License.
00019  * You may obtain a copy of the License at
00020  * http://www.apache.org/licenses/LICENSE-2.0
00021  * Unless required by applicable law or agreed to in writing, software
00022  * distributed under the License is distributed on an "AS IS" BASIS,
00023  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00024  * See the License for the specific language governing permissions and
00025  * limitations under the License.
00026  *
00027  **********************************************************************/
00028 
00029 #ifndef TESSERACT_TRAINING_STRINGRENDERER_H_
00030 #define TESSERACT_TRAINING_STRINGRENDERER_H_
00031 
00032 #include <string>
00033 #include <vector>
00034 
00035 #include "hashfn.h"
00036 #include "host.h"
00037 #include "pango_font_info.h"
00038 #include "pango/pango-layout.h"
00039 #include "pango/pangocairo.h"
00040 
00041 struct Boxa;
00042 struct Pix;
00043 
00044 namespace tesseract {
00045 
00046 class BoxChar;
00047 
00048 class StringRenderer {
00049  public:
00050   StringRenderer(const string& font_desc, int page_width, int page_height);
00051   ~StringRenderer();
00052 
00053   // Renders the text with the chosen font and returns the byte offset up to
00054   // which the text could be rendered so as to fit the specified page
00055   // dimensions.
00056   int RenderToImage(const char* text, int text_length, Pix** pix);
00057   int RenderToGrayscaleImage(const char* text, int text_length, Pix** pix);
00058   int RenderToBinaryImage(const char* text, int text_length, int threshold,
00059                           Pix** pix);
00060   // Renders a line of text with all available fonts that were able to render
00061   // at least min_coverage fraction of the input text. Use 1.0 to require that
00062   // a font be able to render all the text.
00063   int RenderAllFontsToImage(double min_coverage, const char* text,
00064                             int text_length, string* font_used, Pix** pix);
00065 
00066   bool set_font(const string& desc);
00067   void set_char_spacing(double char_spacing) {
00068     char_spacing_ = char_spacing;
00069   }
00070   void set_leading(int leading) {
00071     leading_ = leading;
00072   }
00073   void set_resolution(const int resolution);
00074   void set_vertical_text(bool vertical_text) {
00075     vertical_text_ = vertical_text;
00076   }
00077   void set_gravity_hint_strong(bool gravity_hint_strong) {
00078     gravity_hint_strong_ = gravity_hint_strong;
00079   }
00080   void set_render_fullwidth_latin(bool render_fullwidth_latin) {
00081     render_fullwidth_latin_ = render_fullwidth_latin;
00082   }
00083   // Sets the probability (value in [0, 1]) of starting to render a word with an
00084   // underline. This implementation consider words to be space-delimited
00085   // sequences of characters.
00086   void set_underline_start_prob(const double frac);
00087   // Set the probability (value in [0, 1]) of continuing a started underline to
00088   // the next word.
00089   void set_underline_continuation_prob(const double frac);
00090   void set_underline_style(const PangoUnderline style) {
00091     underline_style_ = style;
00092   }
00093   void set_page(int page) {
00094     page_ = page;
00095   }
00096   void set_box_padding(int val) {
00097     box_padding_ = val;
00098   }
00099   void set_drop_uncovered_chars(bool val) {
00100     drop_uncovered_chars_ = val;
00101   }
00102   void set_strip_unrenderable_words(bool val) {
00103     strip_unrenderable_words_ = val;
00104   }
00105   void set_output_word_boxes(bool val) {
00106     output_word_boxes_ = val;
00107   }
00108   // Before rendering the string, replace latin characters with their optional
00109   // ligatured forms (such as "fi", "ffi" etc.) if the font_ covers those
00110   // unicodes.
00111   void set_add_ligatures(bool add_ligatures) {
00112     add_ligatures_ = add_ligatures;
00113   }
00114   // Set the rgb value of the text ink. Values range in [0, 1.0]
00115   void set_pen_color(double r, double g, double b) {
00116     pen_color_[0] = r;
00117     pen_color_[1] = g;
00118     pen_color_[2] = b;
00119   }
00120   void set_h_margin(const int h_margin) {
00121     h_margin_ = h_margin;
00122   }
00123   void set_v_margin(const int v_margin) {
00124     v_margin_ = v_margin;
00125   }
00126   const PangoFontInfo& font() const {
00127     return font_;
00128   }
00129   const int h_margin() const {
00130     return h_margin_;
00131   }
00132   const int v_margin() const {
00133     return v_margin_;
00134   }
00135 
00136   // Get the boxchars of all clusters rendered thus far (or since the last call
00137   // to ClearBoxes()).
00138   const vector<BoxChar*>& GetBoxes() const;
00139   // Get the rendered page bounding boxes of all pages created thus far (or
00140   // since last call to ClearBoxes()).
00141   Boxa* GetPageBoxes() const;
00142 
00143   // Rotate the boxes on the most recent page by the given rotation.
00144   void RotatePageBoxes(float rotation);
00145   // Delete all boxes.
00146   void ClearBoxes();
00147   void WriteAllBoxes(const string& filename);
00148   // Removes space-delimited words from the string that are not renderable by
00149   // the current font and returns the count of such words.
00150   int StripUnrenderableWords(string* utf8_text) const;
00151 
00152   // Insert a Word Joiner symbol (U+2060) between adjacent characters, excluding
00153   // spaces and combining types, in each word before rendering to ensure words
00154   // are not broken across lines. The output boxchars will not contain the
00155   // joiner.
00156   static string InsertWordJoiners(const string& text);
00157 
00158   // Helper functions to convert fullwidth Latin and halfwidth Basic Latin.
00159   static string ConvertBasicLatinToFullwidthLatin(const string& text);
00160   static string ConvertFullwidthLatinToBasicLatin(const string& text);
00161 
00162  protected:
00163   // Init and free local renderer objects.
00164   void InitPangoCairo();
00165   void FreePangoCairo();
00166   // Set rendering properties.
00167   void SetLayoutProperties();
00168   void SetWordUnderlineAttributes(const string& page_text);
00169   // Compute bounding boxes around grapheme clusters.
00170   void ComputeClusterBoxes();
00171   void CorrectBoxPositionsToLayout(vector<BoxChar*>* boxchars);
00172   bool GetClusterStrings(vector<string>* cluster_text);
00173   int FindFirstPageBreakOffset(const char* text, int text_length);
00174 
00175   PangoFontInfo font_;
00176   // Page properties
00177   int page_width_, page_height_, h_margin_, v_margin_;
00178   // Text rendering properties
00179   int pen_color_[3];
00180   double char_spacing_;
00181   int leading_, resolution_;
00182   bool vertical_text_;
00183   bool gravity_hint_strong_;
00184   bool render_fullwidth_latin_;
00185   double underline_start_prob_;
00186   double underline_continuation_prob_;
00187   PangoUnderline underline_style_;
00188   // Text filtering options
00189   bool drop_uncovered_chars_;
00190   bool strip_unrenderable_words_;
00191   bool add_ligatures_;
00192   bool output_word_boxes_;
00193   // Pango and cairo specific objects
00194   cairo_surface_t* surface_;
00195   cairo_t* cr_;
00196   PangoLayout* layout_;
00197   // Internal state of current page number, updated on successive calls to
00198   // RenderToImage()
00199   int start_box_;
00200   int page_;
00201   // Boxes and associated text for all pages rendered with RenderToImage() since
00202   // the last call to ClearBoxes().
00203   vector<BoxChar*> boxchars_;
00204   int box_padding_;
00205   // Bounding boxes for pages since the last call to ClearBoxes().
00206   Boxa* page_boxes_;
00207 
00208   // Objects cached for subsequent calls to RenderAllFontsToImage()
00209   hash_map<char32, inT64> char_map_;  // Time-saving char histogram.
00210   int total_chars_;   // Number in the string to be rendered.
00211   int font_index_;    // Index of next font to use in font list.
00212   int last_offset_;   // Offset returned from last successful rendering
00213 
00214  private:
00215   StringRenderer(const StringRenderer&);
00216   void operator=(const StringRenderer&);
00217 };
00218 }  // namespace tesseract
00219 
00220 #endif  // THIRD_PARTY_TESSERACT_TRAINING_STRINGRENDERER_H_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines