tesseract 3.04.01

training/text2image.cpp

Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        text2image.cpp
00003  * Description: Program to generate OCR training pages. Given a text file it
00004  *              outputs an image with a given font and degradation.
00005  *
00006  *              Note that since the results depend on the fonts available on
00007  *              your system, running the code on a different machine, or
00008  *              different OS, or even at a different time on the same machine,
00009  *              may produce different fonts even if --font is given explicitly.
00010  *              To see names of available fonts, use --list_available_fonts with
00011  *              the appropriate --fonts_dir path.
00012  *              Specifying --use_only_legacy_fonts will restrict the available
00013  *              fonts to those listed in legacy_fonts.h
00014  *
00015  * Authors:     Ranjith Unnikrishnan, Ray Smith
00016  * Created:     Tue Nov 19 2013
00017  *
00018  * (C) Copyright 2013, Google Inc.
00019  * Licensed under the Apache License, Version 2.0 (the "License");
00020  * you may not use this file except in compliance with the License.
00021  * You may obtain a copy of the License at
00022  * http://www.apache.org/licenses/LICENSE-2.0
00023  * Unless required by applicable law or agreed to in writing, software
00024  * distributed under the License is distributed on an "AS IS" BASIS,
00025  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00026  * See the License for the specific language governing permissions and
00027  * limitations under the License.
00028  *
00029  **********************************************************************/
00030 
00031 #include <stdlib.h>
00032 #include <string.h>
00033 #include <algorithm>
00034 #include <iostream>
00035 #include <map>
00036 #include <string>
00037 #include <utility>
00038 #include <vector>
00039 
00040 #include "allheaders.h"  // from leptonica
00041 #include "boxchar.h"
00042 #include "commandlineflags.h"
00043 #include "degradeimage.h"
00044 #include "errcode.h"
00045 #include "fileio.h"
00046 #include "helpers.h"
00047 #include "normstrngs.h"
00048 #include "stringrenderer.h"
00049 #include "tlog.h"
00050 #include "unicharset.h"
00051 #include "util.h"
00052 
00053 #ifdef USE_STD_NAMESPACE
00054 using std::make_pair;
00055 using std::map;
00056 using std::pair;
00057 #endif
00058 
00059 // A number with which to initialize the random number generator.
00060 const int kRandomSeed = 0x18273645;
00061 
00062 // The text input file.
00063 STRING_PARAM_FLAG(text, "", "File name of text input to process");
00064 
00065 // The text output file.
00066 STRING_PARAM_FLAG(outputbase, "", "Basename for output image/box file");
00067 
00068 // Degrade the rendered image to mimic scanner quality.
00069 BOOL_PARAM_FLAG(degrade_image, true,
00070                 "Degrade rendered image with speckle noise, dilation/erosion "
00071                 "and rotation");
00072 
00073 // Degradation to apply to the image.
00074 INT_PARAM_FLAG(exposure, 0, "Exposure level in photocopier");
00075 
00076 // Output image resolution.
00077 INT_PARAM_FLAG(resolution, 300, "Pixels per inch");
00078 
00079 // Width of output image (in pixels).
00080 INT_PARAM_FLAG(xsize, 3600, "Width of output image");
00081 
00082 // Max height of output image (in pixels).
00083 INT_PARAM_FLAG(ysize, 4800, "Height of output image");
00084 
00085 // Margin around text (in pixels).
00086 INT_PARAM_FLAG(margin, 100, "Margin round edges of image");
00087 
00088 // Size of text (in points).
00089 INT_PARAM_FLAG(ptsize, 12, "Size of printed text");
00090 
00091 // Inter-character space (in ems).
00092 DOUBLE_PARAM_FLAG(char_spacing, 0, "Inter-character space in ems");
00093 
00094 // Sets the probability (value in [0, 1]) of starting to render a word with an
00095 // underline. Words are assumed to be space-delimited.
00096 DOUBLE_PARAM_FLAG(underline_start_prob, 0,
00097                   "Fraction of words to underline (value in [0,1])");
00098 // Set the probability (value in [0, 1]) of continuing a started underline to
00099 // the next word.
00100 DOUBLE_PARAM_FLAG(underline_continuation_prob, 0,
00101                   "Fraction of words to underline (value in [0,1])");
00102 
00103 // Inter-line space (in pixels).
00104 INT_PARAM_FLAG(leading, 12, "Inter-line space (in pixels)");
00105 
00106 // Layout and glyph orientation on rendering.
00107 STRING_PARAM_FLAG(writing_mode, "horizontal",
00108                   "Specify one of the following writing"
00109                   " modes.\n"
00110                   "'horizontal' : Render regular horizontal text. (default)\n"
00111                   "'vertical' : Render vertical text. Glyph orientation is"
00112                   " selected by Pango.\n"
00113                   "'vertical-upright' : Render vertical text. Glyph "
00114                   " orientation is set to be upright.");
00115 
00116 INT_PARAM_FLAG(box_padding, 0, "Padding around produced bounding boxes");
00117 
00118 BOOL_PARAM_FLAG(strip_unrenderable_words, true,
00119                 "Remove unrenderable words from source text");
00120 
00121 // Font name.
00122 STRING_PARAM_FLAG(font, "Arial", "Font description name to use");
00123 
00124 BOOL_PARAM_FLAG(ligatures, false,
00125                 "Rebuild and render ligatures");
00126 
00127 BOOL_PARAM_FLAG(find_fonts, false,
00128                 "Search for all fonts that can render the text");
00129 BOOL_PARAM_FLAG(render_per_font, true,
00130                 "If find_fonts==true, render each font to its own image. "
00131                 "Image filenames are of the form output_name.font_name.tif");
00132 DOUBLE_PARAM_FLAG(min_coverage, 1.0,
00133                   "If find_fonts==true, the minimum coverage the font has of "
00134                   "the characters in the text file to include it, between "
00135                   "0 and 1.");
00136 
00137 BOOL_PARAM_FLAG(list_available_fonts, false, "List available fonts and quit.");
00138 
00139 BOOL_PARAM_FLAG(render_ngrams, false, "Put each space-separated entity from the"
00140                 " input file into one bounding box. The ngrams in the input"
00141                 " file will be randomly permuted before rendering (so that"
00142                 " there is sufficient variety of characters on each line).");
00143 
00144 BOOL_PARAM_FLAG(output_word_boxes, false,
00145                 "Output word bounding boxes instead of character boxes. "
00146                 "This is used for Cube training, and implied by "
00147                 "--render_ngrams.");
00148 
00149 STRING_PARAM_FLAG(unicharset_file, "",
00150                   "File with characters in the unicharset. If --render_ngrams"
00151                   " is true and --unicharset_file is specified, ngrams with"
00152                   " characters that are not in unicharset will be omitted");
00153 
00154 BOOL_PARAM_FLAG(bidirectional_rotation, false,
00155                 "Rotate the generated characters both ways.");
00156 
00157 BOOL_PARAM_FLAG(only_extract_font_properties, false,
00158                 "Assumes that the input file contains a list of ngrams. Renders"
00159                 " each ngram, extracts spacing properties and records them in"
00160                 " output_base/[font_name].fontinfo file.");
00161 
00162 // Use these flags to output zero-padded, square individual character images
00163 BOOL_PARAM_FLAG(output_individual_glyph_images, false,
00164                 "If true also outputs individual character images");
00165 INT_PARAM_FLAG(glyph_resized_size, 0,
00166                "Each glyph is square with this side length in pixels");
00167 INT_PARAM_FLAG(glyph_num_border_pixels_to_pad, 0,
00168                "Final_size=glyph_resized_size+2*glyph_num_border_pixels_to_pad");
00169 
00170 namespace tesseract {
00171 
00172 struct SpacingProperties {
00173   SpacingProperties() : x_gap_before(0), x_gap_after(0) {}
00174   SpacingProperties(int b, int a) : x_gap_before(b), x_gap_after(a) {}
00175   // These values are obtained from FT_Glyph_Metrics struct
00176   // used by the FreeType font engine.
00177   int x_gap_before;  // horizontal x bearing
00178   int x_gap_after;   // horizontal advance - x_gap_before - width
00179   map<string, int> kerned_x_gaps;
00180 };
00181 
00182 static bool IsWhitespaceBox(const BoxChar* boxchar) {
00183   return (boxchar->box() == NULL ||
00184           SpanUTF8Whitespace(boxchar->ch().c_str()));
00185 }
00186 
00187 static string StringReplace(const string& in,
00188                             const string& oldsub, const string& newsub) {
00189   string out;
00190   int start_pos = 0;
00191   do {
00192     int pos = in.find(oldsub, start_pos);
00193     if (pos == string::npos) break;
00194     out.append(in.data() + start_pos, pos - start_pos);
00195     out.append(newsub.data(), newsub.length());
00196     start_pos = pos + oldsub.length();
00197   } while (true);
00198   out.append(in.data() + start_pos, in.length() - start_pos);
00199   return out;
00200 }
00201 
00202 // Assumes that each word (whitespace-separated entity) in text is a bigram.
00203 // Renders the bigrams and calls FontInfo::GetSpacingProperties() to
00204 // obtain spacing information. Produces the output .fontinfo file with a line
00205 // per unichar of the form:
00206 // unichar space_before space_after kerned1 kerned_space1 kerned2 ...
00207 // Fox example, if unichar "A" has spacing of 0 pixels before and -1 pixels
00208 // after, is kerned with "V" resulting in spacing of "AV" to be -7 and kerned
00209 // with "T", such that "AT" has spacing of -5, the entry/line for unichar "A"
00210 // in .fontinfo file will be:
00211 // A 0 -1 T -5 V -7
00212 void ExtractFontProperties(const string &utf8_text,
00213                            StringRenderer *render,
00214                            const string &output_base) {
00215   map<string, SpacingProperties> spacing_map;
00216   map<string, SpacingProperties>::iterator spacing_map_it0;
00217   map<string, SpacingProperties>::iterator spacing_map_it1;
00218   int x_bearing, x_advance;
00219   int len = utf8_text.length();
00220   int offset = 0;
00221   const char* text = utf8_text.c_str();
00222   while (offset < len) {
00223     offset += render->RenderToImage(text + offset, strlen(text + offset), NULL);
00224     const vector<BoxChar*> &boxes = render->GetBoxes();
00225 
00226     // If the page break split a bigram, correct the offset so we try the bigram
00227     // on the next iteration.
00228     if (boxes.size() > 2 && !IsWhitespaceBox(boxes[boxes.size() - 1]) &&
00229         IsWhitespaceBox(boxes[boxes.size() - 2])) {
00230       if (boxes.size() > 3) {
00231         tprintf("WARNING: Adjusting to bad page break after '%s%s'\n",
00232                 boxes[boxes.size() - 4]->ch().c_str(),
00233                 boxes[boxes.size() - 3]->ch().c_str());
00234       }
00235       offset -= boxes[boxes.size() - 1]->ch().size();
00236     }
00237 
00238     for (int b = 0; b < boxes.size(); b += 2) {
00239       while (b < boxes.size() && IsWhitespaceBox(boxes[b])) ++b;
00240       if (b + 1 >= boxes.size()) break;
00241       const string &ch0 = boxes[b]->ch();
00242       // We encountered a ligature. This happens in at least two scenarios:
00243       // One is when the rendered bigram forms a grapheme cluster (eg. the
00244       // second character in the bigram is a combining vowel), in which case we
00245       // correctly output only one bounding box.
00246       // A second far less frequent case is when caused some fonts like 'DejaVu
00247       // Sans Ultra-Light' force Pango to render a ligatured character even if
00248       // the input consists of the separated characters.  NOTE(ranjith): As per
00249       // behdad@ this is not currently controllable at the level of the Pango
00250       // API.
00251       // Safeguard against these cases here by just skipping the bigram.
00252       if (IsWhitespaceBox(boxes[b+1])) {
00253         continue;
00254       }
00255       int xgap = (boxes[b+1]->box()->x -
00256                   (boxes[b]->box()->x + boxes[b]->box()->w));
00257       spacing_map_it0 = spacing_map.find(ch0);
00258       int ok_count = 0;
00259       if (spacing_map_it0 == spacing_map.end() &&
00260           render->font().GetSpacingProperties(ch0, &x_bearing, &x_advance)) {
00261         spacing_map[ch0] = SpacingProperties(
00262             x_bearing, x_advance - x_bearing - boxes[b]->box()->w);
00263         spacing_map_it0 = spacing_map.find(ch0);
00264         ++ok_count;
00265       }
00266       const string &ch1 = boxes[b+1]->ch();
00267       tlog(3, "%s%s\n", ch0.c_str(), ch1.c_str());
00268       spacing_map_it1 = spacing_map.find(ch1);
00269       if (spacing_map_it1 == spacing_map.end() &&
00270           render->font().GetSpacingProperties(ch1, &x_bearing, &x_advance)) {
00271         spacing_map[ch1] = SpacingProperties(
00272             x_bearing, x_advance - x_bearing - boxes[b+1]->box()->w);
00273         spacing_map_it1 = spacing_map.find(ch1);
00274         ++ok_count;
00275       }
00276       if (ok_count == 2 && xgap != (spacing_map_it0->second.x_gap_after +
00277                                     spacing_map_it1->second.x_gap_before)) {
00278         spacing_map_it0->second.kerned_x_gaps[ch1] = xgap;
00279       }
00280     }
00281     render->ClearBoxes();
00282   }
00283   string output_string;
00284   const int kBufSize = 1024;
00285   char buf[kBufSize];
00286   snprintf(buf, kBufSize, "%d\n", static_cast<int>(spacing_map.size()));
00287   output_string.append(buf);
00288   map<string, SpacingProperties>::const_iterator spacing_map_it;
00289   for (spacing_map_it = spacing_map.begin();
00290        spacing_map_it != spacing_map.end(); ++spacing_map_it) {
00291     snprintf(buf, kBufSize,
00292              "%s %d %d %d", spacing_map_it->first.c_str(),
00293              spacing_map_it->second.x_gap_before,
00294              spacing_map_it->second.x_gap_after,
00295              static_cast<int>(spacing_map_it->second.kerned_x_gaps.size()));
00296     output_string.append(buf);
00297     map<string, int>::const_iterator kern_it;
00298     for (kern_it = spacing_map_it->second.kerned_x_gaps.begin();
00299          kern_it != spacing_map_it->second.kerned_x_gaps.end(); ++kern_it) {
00300       snprintf(buf, kBufSize,
00301                " %s %d", kern_it->first.c_str(), kern_it->second);
00302       output_string.append(buf);
00303     }
00304     output_string.append("\n");
00305   }
00306   File::WriteStringToFileOrDie(output_string, output_base + ".fontinfo");
00307 }
00308 
00309 bool MakeIndividualGlyphs(Pix* pix,
00310                           const vector<BoxChar*>& vbox,
00311                           const int input_tiff_page) {
00312   // If checks fail, return false without exiting text2image
00313   if (!pix) {
00314     tprintf("ERROR: MakeIndividualGlyphs(): Input Pix* is NULL\n");
00315     return false;
00316   } else if (FLAGS_glyph_resized_size <= 0) {
00317     tprintf("ERROR: --glyph_resized_size must be positive\n");
00318     return false;
00319   } else if (FLAGS_glyph_num_border_pixels_to_pad < 0) {
00320     tprintf("ERROR: --glyph_num_border_pixels_to_pad must be 0 or positive\n");
00321     return false;
00322   }
00323 
00324   const int n_boxes = vbox.size();
00325   int n_boxes_saved = 0;
00326   int current_tiff_page = 0;
00327   int y_previous = 0;
00328   static int glyph_count = 0;
00329   for (int i = 0; i < n_boxes; i++) {
00330     // Get one bounding box
00331     Box* b = vbox[i]->mutable_box();
00332     if (!b) continue;
00333     const int x = b->x;
00334     const int y = b->y;
00335     const int w = b->w;
00336     const int h = b->h;
00337     // Check present tiff page (for multipage tiff)
00338     if (y < y_previous-pixGetHeight(pix)/10) {
00339       tprintf("ERROR: Wrap-around encountered, at i=%d\n", i);
00340       current_tiff_page++;
00341     }
00342     if (current_tiff_page < input_tiff_page) continue;
00343     else if (current_tiff_page > input_tiff_page) break;
00344     // Check box validity
00345     if (x < 0 || y < 0 ||
00346         (x+w-1) >= pixGetWidth(pix) ||
00347         (y+h-1) >= pixGetHeight(pix)) {
00348       tprintf("ERROR: MakeIndividualGlyphs(): Index out of range, at i=%d"
00349               " (x=%d, y=%d, w=%d, h=%d\n)", i, x, y, w, h);
00350       continue;
00351     } else if (w < FLAGS_glyph_num_border_pixels_to_pad &&
00352                h < FLAGS_glyph_num_border_pixels_to_pad) {
00353       tprintf("ERROR: Input image too small to be a character, at i=%d\n", i);
00354       continue;
00355     }
00356     // Crop the boxed character
00357     Pix* pix_glyph = pixClipRectangle(pix, b, NULL);
00358     if (!pix_glyph) {
00359       tprintf("ERROR: MakeIndividualGlyphs(): Failed to clip, at i=%d\n", i);
00360       continue;
00361     }
00362     // Resize to square
00363     Pix* pix_glyph_sq = pixScaleToSize(pix_glyph,
00364                                        FLAGS_glyph_resized_size,
00365                                        FLAGS_glyph_resized_size);
00366     if (!pix_glyph_sq) {
00367       tprintf("ERROR: MakeIndividualGlyphs(): Failed to resize, at i=%d\n", i);
00368       continue;
00369     }
00370     // Zero-pad
00371     Pix* pix_glyph_sq_pad = pixAddBorder(pix_glyph_sq,
00372                                          FLAGS_glyph_num_border_pixels_to_pad,
00373                                          0);
00374     if (!pix_glyph_sq_pad) {
00375       tprintf("ERROR: MakeIndividualGlyphs(): Failed to zero-pad, at i=%d\n",
00376               i);
00377       continue;
00378     }
00379     // Write out
00380     Pix* pix_glyph_sq_pad_8 = pixConvertTo8(pix_glyph_sq_pad, false);
00381     char filename[1024];
00382     snprintf(filename, 1024, "%s_%d.jpg", FLAGS_outputbase.c_str(),
00383              glyph_count++);
00384     if (pixWriteJpeg(filename, pix_glyph_sq_pad_8, 100, 0)) {
00385       tprintf("ERROR: MakeIndividualGlyphs(): Failed to write JPEG to %s,"
00386               " at i=%d\n", filename, i);
00387       continue;
00388     }
00389 
00390     pixDestroy(&pix_glyph);
00391     pixDestroy(&pix_glyph_sq);
00392     pixDestroy(&pix_glyph_sq_pad);
00393     pixDestroy(&pix_glyph_sq_pad_8);
00394     n_boxes_saved++;
00395     y_previous = y;
00396   }
00397   if (n_boxes_saved == 0) {
00398     return false;
00399   } else {
00400     tprintf("Total number of characters saved = %d\n", n_boxes_saved);
00401     return true;
00402   }
00403 }
00404 }  // namespace tesseract
00405 
00406 using tesseract::DegradeImage;
00407 using tesseract::ExtractFontProperties;
00408 using tesseract::File;
00409 using tesseract::FontUtils;
00410 using tesseract::SpanUTF8NotWhitespace;
00411 using tesseract::SpanUTF8Whitespace;
00412 using tesseract::StringRenderer;
00413 
00414 int main(int argc, char** argv) {
00415   tesseract::ParseCommandLineFlags(argv[0], &argc, &argv, true);
00416 
00417   if (FLAGS_list_available_fonts) {
00418     const vector<string>& all_fonts = FontUtils::ListAvailableFonts();
00419     for (int i = 0; i < all_fonts.size(); ++i) {
00420       tprintf("%3d: %s\n", i, all_fonts[i].c_str());
00421       ASSERT_HOST_MSG(FontUtils::IsAvailableFont(all_fonts[i].c_str()),
00422                       "Font %s is unrecognized.\n", all_fonts[i].c_str());
00423     }
00424     return EXIT_SUCCESS;
00425   }
00426   // Check validity of input flags.
00427   ASSERT_HOST_MSG(!FLAGS_text.empty(), "Text file missing!\n");
00428   ASSERT_HOST_MSG(!FLAGS_outputbase.empty(), "Output file missing!\n");
00429   ASSERT_HOST_MSG(FLAGS_render_ngrams || FLAGS_unicharset_file.empty(),
00430                   "Use --unicharset_file only if --render_ngrams is set.\n");
00431 
00432   if (!FLAGS_find_fonts && !FontUtils::IsAvailableFont(FLAGS_font.c_str())) {
00433     string pango_name;
00434     if (!FontUtils::IsAvailableFont(FLAGS_font.c_str(), &pango_name)) {
00435       tprintf("Could not find font named %s. Pango suggested font %s\n",
00436               FLAGS_font.c_str(), pango_name.c_str());
00437       TLOG_FATAL("Please correct --font arg.");
00438     }
00439   }
00440 
00441   if (FLAGS_render_ngrams)
00442     FLAGS_output_word_boxes = true;
00443 
00444   char font_desc_name[1024];
00445   snprintf(font_desc_name, 1024, "%s %d", FLAGS_font.c_str(),
00446            static_cast<int>(FLAGS_ptsize));
00447   StringRenderer render(font_desc_name, FLAGS_xsize, FLAGS_ysize);
00448   render.set_add_ligatures(FLAGS_ligatures);
00449   render.set_leading(FLAGS_leading);
00450   render.set_resolution(FLAGS_resolution);
00451   render.set_char_spacing(FLAGS_char_spacing * FLAGS_ptsize);
00452   render.set_h_margin(FLAGS_margin);
00453   render.set_v_margin(FLAGS_margin);
00454   render.set_output_word_boxes(FLAGS_output_word_boxes);
00455   render.set_box_padding(FLAGS_box_padding);
00456   render.set_strip_unrenderable_words(FLAGS_strip_unrenderable_words);
00457   render.set_underline_start_prob(FLAGS_underline_start_prob);
00458   render.set_underline_continuation_prob(FLAGS_underline_continuation_prob);
00459 
00460   // Set text rendering orientation and their forms.
00461   if (FLAGS_writing_mode == "horizontal") {
00462     // Render regular horizontal text (default).
00463     render.set_vertical_text(false);
00464     render.set_gravity_hint_strong(false);
00465     render.set_render_fullwidth_latin(false);
00466   } else if (FLAGS_writing_mode == "vertical") {
00467     // Render vertical text. Glyph orientation is selected by Pango.
00468     render.set_vertical_text(true);
00469     render.set_gravity_hint_strong(false);
00470     render.set_render_fullwidth_latin(false);
00471   } else if (FLAGS_writing_mode == "vertical-upright") {
00472     // Render vertical text. Glyph orientation is set to be upright.
00473     // Also Basic Latin characters are converted to their fullwidth forms
00474     // on rendering, since fullwidth Latin characters are well designed to fit
00475     // vertical text lines, while .box files store halfwidth Basic Latin
00476     // unichars.
00477     render.set_vertical_text(true);
00478     render.set_gravity_hint_strong(true);
00479     render.set_render_fullwidth_latin(true);
00480   } else {
00481     TLOG_FATAL("Invalid writing mode : %s\n", FLAGS_writing_mode.c_str());
00482   }
00483 
00484   string src_utf8;
00485   // This c_str is NOT redundant!
00486   File::ReadFileToStringOrDie(FLAGS_text.c_str(), &src_utf8);
00487 
00488   // Remove the unicode mark if present.
00489   if (strncmp(src_utf8.c_str(), "\xef\xbb\xbf", 3) == 0) {
00490     src_utf8.erase(0, 3);
00491   }
00492   tlog(1, "Render string of size %d\n", src_utf8.length());
00493 
00494   if (FLAGS_render_ngrams || FLAGS_only_extract_font_properties) {
00495     // Try to preserve behavior of old text2image by expanding inter-word
00496     // spaces by a factor of 4.
00497     const string kSeparator = FLAGS_render_ngrams ? "    " : " ";
00498     // Also restrict the number of charactes per line to try and avoid
00499     // line-breaking in the middle of words like "-A", "R$" etc. which are
00500     // otherwise allowed by the standard unicode line-breaking rules.
00501     const int kCharsPerLine = (FLAGS_ptsize > 20) ? 50 : 100;
00502     string rand_utf8;
00503     UNICHARSET unicharset;
00504     if (FLAGS_render_ngrams && !FLAGS_unicharset_file.empty() &&
00505         !unicharset.load_from_file(FLAGS_unicharset_file.c_str())) {
00506       TLOG_FATAL("Failed to load unicharset from file %s\n",
00507                  FLAGS_unicharset_file.c_str());
00508     }
00509 
00510     // If we are rendering ngrams that will be OCRed later, shuffle them so that
00511     // tesseract does not have difficulties finding correct baseline, word
00512     // spaces, etc.
00513     const char *str8 = src_utf8.c_str();
00514     int len = src_utf8.length();
00515     int step;
00516     vector<pair<int, int> > offsets;
00517     int offset = SpanUTF8Whitespace(str8);
00518     while (offset < len) {
00519       step = SpanUTF8NotWhitespace(str8 + offset);
00520       offsets.push_back(make_pair(offset, step));
00521       offset += step;
00522       offset += SpanUTF8Whitespace(str8 + offset);
00523     }
00524     if (FLAGS_render_ngrams)
00525       std::random_shuffle(offsets.begin(), offsets.end());
00526 
00527     for (int i = 0, line = 1; i < offsets.size(); ++i) {
00528       const char *curr_pos = str8 + offsets[i].first;
00529       int ngram_len = offsets[i].second;
00530       // Skip words that contain characters not in found in unicharset.
00531       if (!FLAGS_unicharset_file.empty() &&
00532           !unicharset.encodable_string(curr_pos, NULL)) {
00533         continue;
00534       }
00535       rand_utf8.append(curr_pos, ngram_len);
00536       if (rand_utf8.length() > line * kCharsPerLine) {
00537         rand_utf8.append(" \n");
00538         ++line;
00539         if (line & 0x1) rand_utf8.append(kSeparator);
00540       } else {
00541         rand_utf8.append(kSeparator);
00542       }
00543     }
00544     tlog(1, "Rendered ngram string of size %d\n", rand_utf8.length());
00545     src_utf8.swap(rand_utf8);
00546   }
00547   if (FLAGS_only_extract_font_properties) {
00548     tprintf("Extracting font properties only\n");
00549     ExtractFontProperties(src_utf8, &render, FLAGS_outputbase.c_str());
00550     tprintf("Done!\n");
00551     return 0;
00552   }
00553 
00554   int im = 0;
00555   vector<float> page_rotation;
00556   const char* to_render_utf8 = src_utf8.c_str();
00557 
00558   tesseract::TRand randomizer;
00559   randomizer.set_seed(kRandomSeed);
00560   vector<string> font_names;
00561   // We use a two pass mechanism to rotate images in both direction.
00562   // The first pass(0) will rotate the images in random directions and
00563   // the second pass(1) will mirror those rotations.
00564   int num_pass = FLAGS_bidirectional_rotation ? 2 : 1;
00565   for (int pass = 0; pass < num_pass; ++pass) {
00566     int page_num = 0;
00567     string font_used;
00568     for (int offset = 0; offset < strlen(to_render_utf8); ++im, ++page_num) {
00569       tlog(1, "Starting page %d\n", im);
00570       Pix* pix = NULL;
00571       if (FLAGS_find_fonts) {
00572         offset += render.RenderAllFontsToImage(FLAGS_min_coverage,
00573                                                to_render_utf8 + offset,
00574                                                strlen(to_render_utf8 + offset),
00575                                                &font_used, &pix);
00576       } else {
00577         offset += render.RenderToImage(to_render_utf8 + offset,
00578                                        strlen(to_render_utf8 + offset), &pix);
00579       }
00580       if (pix != NULL) {
00581         float rotation = 0;
00582         if (pass == 1) {
00583           // Pass 2, do mirror rotation.
00584           rotation = -1 * page_rotation[page_num];
00585         }
00586         if (FLAGS_degrade_image) {
00587           pix = DegradeImage(pix, FLAGS_exposure, &randomizer, &rotation);
00588         }
00589         render.RotatePageBoxes(rotation);
00590 
00591         if (pass == 0) {
00592           // Pass 1, rotate randomly and store the rotation..
00593           page_rotation.push_back(rotation);
00594         }
00595 
00596         Pix* gray_pix = pixConvertTo8(pix, false);
00597         pixDestroy(&pix);
00598         Pix* binary = pixThresholdToBinary(gray_pix, 128);
00599         pixDestroy(&gray_pix);
00600         char tiff_name[1024];
00601         if (FLAGS_find_fonts) {
00602           if (FLAGS_render_per_font) {
00603             string fontname_for_file = tesseract::StringReplace(
00604                 font_used, " ", "_");
00605             snprintf(tiff_name, 1024, "%s.%s.tif", FLAGS_outputbase.c_str(),
00606                      fontname_for_file.c_str());
00607             pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, "w");
00608             tprintf("Rendered page %d to file %s\n", im, tiff_name);
00609           } else {
00610             font_names.push_back(font_used);
00611           }
00612         } else {
00613           snprintf(tiff_name, 1024, "%s.tif", FLAGS_outputbase.c_str());
00614           pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, im == 0 ? "w" : "a");
00615           tprintf("Rendered page %d to file %s\n", im, tiff_name);
00616         }
00617         // Make individual glyphs
00618         if (FLAGS_output_individual_glyph_images) {
00619           if (!MakeIndividualGlyphs(binary, render.GetBoxes(), im)) {
00620             tprintf("ERROR: Individual glyphs not saved\n");
00621           }
00622         }
00623         pixDestroy(&binary);
00624       }
00625       if (FLAGS_find_fonts && offset != 0) {
00626         // We just want a list of names, or some sample images so we don't need
00627         // to render more than the first page of the text.
00628         break;
00629       }
00630     }
00631   }
00632   if (!FLAGS_find_fonts) {
00633     string box_name = FLAGS_outputbase.c_str();
00634     box_name += ".box";
00635     render.WriteAllBoxes(box_name);
00636   } else if (!FLAGS_render_per_font && !font_names.empty()) {
00637     string filename = FLAGS_outputbase.c_str();
00638     filename += ".fontlist.txt";
00639     FILE* fp = fopen(filename.c_str(), "wb");
00640     if (fp == NULL) {
00641       tprintf("Failed to create output font list %s\n", filename.c_str());
00642     } else {
00643       for (int i = 0; i < font_names.size(); ++i) {
00644         fprintf(fp, "%s\n", font_names[i].c_str());
00645       }
00646       fclose(fp);
00647     }
00648   }
00649 
00650   return 0;
00651 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines