|
tesseract 3.04.01
|
00001 /********************************************************************** 00002 * File: stringrenderer.h 00003 * Description: Class for rendering UTF-8 text to an image, and retrieving 00004 * bounding boxes around each grapheme cluster. 00005 * 00006 * Instances are created using a font description string 00007 * (eg. "Arial Italic 12"; see pango_font_info.h for the format) 00008 * and the page dimensions. Other renderer properties such as 00009 * spacing, ligaturization, as well a preprocessing behavior such 00010 * as removal of unrenderable words and a special n-gram mode may 00011 * be set using respective set_* methods. 00012 * 00013 * Author: Ranjith Unnikrishnan 00014 * Created: Mon Nov 18 2013 00015 * 00016 * (C) Copyright 2013, Google Inc. 00017 * Licensed under the Apache License, Version 2.0 (the "License"); 00018 * you may not use this file except in compliance with the License. 00019 * You may obtain a copy of the License at 00020 * http://www.apache.org/licenses/LICENSE-2.0 00021 * Unless required by applicable law or agreed to in writing, software 00022 * distributed under the License is distributed on an "AS IS" BASIS, 00023 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00024 * See the License for the specific language governing permissions and 00025 * limitations under the License. 00026 * 00027 **********************************************************************/ 00028 00029 #ifndef TESSERACT_TRAINING_STRINGRENDERER_H_ 00030 #define TESSERACT_TRAINING_STRINGRENDERER_H_ 00031 00032 #include <string> 00033 #include <vector> 00034 00035 #include "hashfn.h" 00036 #include "host.h" 00037 #include "pango_font_info.h" 00038 #include "pango/pango-layout.h" 00039 #include "pango/pangocairo.h" 00040 00041 struct Boxa; 00042 struct Pix; 00043 00044 namespace tesseract { 00045 00046 class BoxChar; 00047 00048 class StringRenderer { 00049 public: 00050 StringRenderer(const string& font_desc, int page_width, int page_height); 00051 ~StringRenderer(); 00052 00053 // Renders the text with the chosen font and returns the byte offset up to 00054 // which the text could be rendered so as to fit the specified page 00055 // dimensions. 00056 int RenderToImage(const char* text, int text_length, Pix** pix); 00057 int RenderToGrayscaleImage(const char* text, int text_length, Pix** pix); 00058 int RenderToBinaryImage(const char* text, int text_length, int threshold, 00059 Pix** pix); 00060 // Renders a line of text with all available fonts that were able to render 00061 // at least min_coverage fraction of the input text. Use 1.0 to require that 00062 // a font be able to render all the text. 00063 int RenderAllFontsToImage(double min_coverage, const char* text, 00064 int text_length, string* font_used, Pix** pix); 00065 00066 bool set_font(const string& desc); 00067 void set_char_spacing(double char_spacing) { 00068 char_spacing_ = char_spacing; 00069 } 00070 void set_leading(int leading) { 00071 leading_ = leading; 00072 } 00073 void set_resolution(const int resolution); 00074 void set_vertical_text(bool vertical_text) { 00075 vertical_text_ = vertical_text; 00076 } 00077 void set_gravity_hint_strong(bool gravity_hint_strong) { 00078 gravity_hint_strong_ = gravity_hint_strong; 00079 } 00080 void set_render_fullwidth_latin(bool render_fullwidth_latin) { 00081 render_fullwidth_latin_ = render_fullwidth_latin; 00082 } 00083 // Sets the probability (value in [0, 1]) of starting to render a word with an 00084 // underline. This implementation consider words to be space-delimited 00085 // sequences of characters. 00086 void set_underline_start_prob(const double frac); 00087 // Set the probability (value in [0, 1]) of continuing a started underline to 00088 // the next word. 00089 void set_underline_continuation_prob(const double frac); 00090 void set_underline_style(const PangoUnderline style) { 00091 underline_style_ = style; 00092 } 00093 void set_page(int page) { 00094 page_ = page; 00095 } 00096 void set_box_padding(int val) { 00097 box_padding_ = val; 00098 } 00099 void set_drop_uncovered_chars(bool val) { 00100 drop_uncovered_chars_ = val; 00101 } 00102 void set_strip_unrenderable_words(bool val) { 00103 strip_unrenderable_words_ = val; 00104 } 00105 void set_output_word_boxes(bool val) { 00106 output_word_boxes_ = val; 00107 } 00108 // Before rendering the string, replace latin characters with their optional 00109 // ligatured forms (such as "fi", "ffi" etc.) if the font_ covers those 00110 // unicodes. 00111 void set_add_ligatures(bool add_ligatures) { 00112 add_ligatures_ = add_ligatures; 00113 } 00114 // Set the rgb value of the text ink. Values range in [0, 1.0] 00115 void set_pen_color(double r, double g, double b) { 00116 pen_color_[0] = r; 00117 pen_color_[1] = g; 00118 pen_color_[2] = b; 00119 } 00120 void set_h_margin(const int h_margin) { 00121 h_margin_ = h_margin; 00122 } 00123 void set_v_margin(const int v_margin) { 00124 v_margin_ = v_margin; 00125 } 00126 const PangoFontInfo& font() const { 00127 return font_; 00128 } 00129 const int h_margin() const { 00130 return h_margin_; 00131 } 00132 const int v_margin() const { 00133 return v_margin_; 00134 } 00135 00136 // Get the boxchars of all clusters rendered thus far (or since the last call 00137 // to ClearBoxes()). 00138 const vector<BoxChar*>& GetBoxes() const; 00139 // Get the rendered page bounding boxes of all pages created thus far (or 00140 // since last call to ClearBoxes()). 00141 Boxa* GetPageBoxes() const; 00142 00143 // Rotate the boxes on the most recent page by the given rotation. 00144 void RotatePageBoxes(float rotation); 00145 // Delete all boxes. 00146 void ClearBoxes(); 00147 void WriteAllBoxes(const string& filename); 00148 // Removes space-delimited words from the string that are not renderable by 00149 // the current font and returns the count of such words. 00150 int StripUnrenderableWords(string* utf8_text) const; 00151 00152 // Insert a Word Joiner symbol (U+2060) between adjacent characters, excluding 00153 // spaces and combining types, in each word before rendering to ensure words 00154 // are not broken across lines. The output boxchars will not contain the 00155 // joiner. 00156 static string InsertWordJoiners(const string& text); 00157 00158 // Helper functions to convert fullwidth Latin and halfwidth Basic Latin. 00159 static string ConvertBasicLatinToFullwidthLatin(const string& text); 00160 static string ConvertFullwidthLatinToBasicLatin(const string& text); 00161 00162 protected: 00163 // Init and free local renderer objects. 00164 void InitPangoCairo(); 00165 void FreePangoCairo(); 00166 // Set rendering properties. 00167 void SetLayoutProperties(); 00168 void SetWordUnderlineAttributes(const string& page_text); 00169 // Compute bounding boxes around grapheme clusters. 00170 void ComputeClusterBoxes(); 00171 void CorrectBoxPositionsToLayout(vector<BoxChar*>* boxchars); 00172 bool GetClusterStrings(vector<string>* cluster_text); 00173 int FindFirstPageBreakOffset(const char* text, int text_length); 00174 00175 PangoFontInfo font_; 00176 // Page properties 00177 int page_width_, page_height_, h_margin_, v_margin_; 00178 // Text rendering properties 00179 int pen_color_[3]; 00180 double char_spacing_; 00181 int leading_, resolution_; 00182 bool vertical_text_; 00183 bool gravity_hint_strong_; 00184 bool render_fullwidth_latin_; 00185 double underline_start_prob_; 00186 double underline_continuation_prob_; 00187 PangoUnderline underline_style_; 00188 // Text filtering options 00189 bool drop_uncovered_chars_; 00190 bool strip_unrenderable_words_; 00191 bool add_ligatures_; 00192 bool output_word_boxes_; 00193 // Pango and cairo specific objects 00194 cairo_surface_t* surface_; 00195 cairo_t* cr_; 00196 PangoLayout* layout_; 00197 // Internal state of current page number, updated on successive calls to 00198 // RenderToImage() 00199 int start_box_; 00200 int page_; 00201 // Boxes and associated text for all pages rendered with RenderToImage() since 00202 // the last call to ClearBoxes(). 00203 vector<BoxChar*> boxchars_; 00204 int box_padding_; 00205 // Bounding boxes for pages since the last call to ClearBoxes(). 00206 Boxa* page_boxes_; 00207 00208 // Objects cached for subsequent calls to RenderAllFontsToImage() 00209 hash_map<char32, inT64> char_map_; // Time-saving char histogram. 00210 int total_chars_; // Number in the string to be rendered. 00211 int font_index_; // Index of next font to use in font list. 00212 int last_offset_; // Offset returned from last successful rendering 00213 00214 private: 00215 StringRenderer(const StringRenderer&); 00216 void operator=(const StringRenderer&); 00217 }; 00218 } // namespace tesseract 00219 00220 #endif // THIRD_PARTY_TESSERACT_TRAINING_STRINGRENDERER_H_