tesseract  4.1.0
renderer.h
Go to the documentation of this file.
1 // File: renderer.h
3 // Description: Rendering interface to inject into TessBaseAPI
4 //
5 // (C) Copyright 2011, Google Inc.
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
17 
18 #ifndef TESSERACT_API_RENDERER_H_
19 #define TESSERACT_API_RENDERER_H_
20 
21 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
22 // complexity of includes here. Use forward declarations wherever possible
23 // and hide includes of complex types in baseapi.cpp.
24 #include <string> // for std::string
25 #include "genericvector.h"
26 #include "platform.h"
27 
28 struct Pix;
29 
30 namespace tesseract {
31 
32 class TessBaseAPI;
33 
48  public:
49  virtual ~TessResultRenderer();
50 
51  // Takes ownership of pointer so must be new'd instance.
52  // Renderers aren't ordered, but appends the sequences of next parameter
53  // and existing next(). The renderers should be unique across both lists.
54  void insert(TessResultRenderer* next);
55 
56  // Returns the next renderer or nullptr.
58  return next_;
59  }
60 
66  bool BeginDocument(const char* title);
67 
76  bool AddImage(TessBaseAPI* api);
77 
82  bool EndDocument();
83 
84  const char* file_extension() const {
85  return file_extension_;
86  }
87  const char* title() const {
88  return title_.c_str();
89  }
90 
91  // Is everything fine? Otherwise something went wrong.
92  bool happy() {
93  return happy_;
94  }
95 
105  int imagenum() const {
106  return imagenum_;
107  }
108 
109  protected:
120  TessResultRenderer(const char* outputbase, const char* extension);
121 
122  // Hook for specialized handling in BeginDocument()
123  virtual bool BeginDocumentHandler();
124 
125  // This must be overridden to render the OCR'd results
126  virtual bool AddImageHandler(TessBaseAPI* api) = 0;
127 
128  // Hook for specialized handling in EndDocument()
129  virtual bool EndDocumentHandler();
130 
131  // Renderers can call this to append '\0' terminated strings into
132  // the output string returned by GetOutput.
133  // This method will grow the output buffer if needed.
134  void AppendString(const char* s);
135 
136  // Renderers can call this to append binary byte sequences into
137  // the output string returned by GetOutput. Note that s is not necessarily
138  // '\0' terminated (and can contain '\0' within it).
139  // This method will grow the output buffer if needed.
140  void AppendData(const char* s, int len);
141 
142  private:
143  const char* file_extension_; // standard extension for generated output
144  STRING title_; // title of document being renderered
145  int imagenum_; // index of last image added
146 
147  FILE* fout_; // output file pointer
148  TessResultRenderer* next_; // Can link multiple renderers together
149  bool happy_; // I get grumpy when the disk fills up, etc.
150 };
151 
156  public:
157  explicit TessTextRenderer(const char* outputbase);
158 
159  protected:
160  bool AddImageHandler(TessBaseAPI* api) override;
161 };
162 
167  public:
168  explicit TessHOcrRenderer(const char* outputbase, bool font_info);
169  explicit TessHOcrRenderer(const char* outputbase);
170 
171  protected:
172  bool BeginDocumentHandler() override;
173  bool AddImageHandler(TessBaseAPI* api) override;
174  bool EndDocumentHandler() override;
175 
176  private:
177  bool font_info_; // whether to print font information
178 };
179 
184  public:
185  explicit TessAltoRenderer(const char* outputbase);
186 
187  protected:
188  bool BeginDocumentHandler() override;
189  bool AddImageHandler(TessBaseAPI* api) override;
190  bool EndDocumentHandler() override;
191 };
192 
197  public:
198  explicit TessTsvRenderer(const char* outputbase, bool font_info);
199  explicit TessTsvRenderer(const char* outputbase);
200 
201  protected:
202  bool BeginDocumentHandler() override;
203  bool AddImageHandler(TessBaseAPI* api) override;
204  bool EndDocumentHandler() override;
205 
206  private:
207  bool font_info_; // whether to print font information
208 };
209 
214  public:
215  // datadir is the location of the TESSDATA. We need it because
216  // we load a custom PDF font from this location.
217  TessPDFRenderer(const char* outputbase, const char* datadir,
218  bool textonly = false);
219 
220  protected:
221  bool BeginDocumentHandler() override;
222  bool AddImageHandler(TessBaseAPI* api) override;
223  bool EndDocumentHandler() override;
224 
225  private:
226  // We don't want to have every image in memory at once,
227  // so we store some metadata as we go along producing
228  // PDFs one page at a time. At the end, that metadata is
229  // used to make everything that isn't easily handled in a
230  // streaming fashion.
231  long int obj_; // counter for PDF objects
232  GenericVector<long int> offsets_; // offset of every PDF object in bytes
233  GenericVector<long int> pages_; // object number for every /Page object
234  std::string datadir_; // where to find the custom font
235  bool textonly_; // skip images if set
236  // Bookkeeping only. DIY = Do It Yourself.
237  void AppendPDFObjectDIY(size_t objectsize);
238  // Bookkeeping + emit data.
239  void AppendPDFObject(const char* data);
240  // Create the /Contents object for an entire page.
241  char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
242  // Turn an image into a PDF object. Only transcode if we have to.
243  static bool imageToPDFObj(Pix* pix, const char* filename, long int objnum,
244  char** pdf_object, long int* pdf_object_size,
245  int jpg_quality);
246 };
247 
252  public:
253  explicit TessUnlvRenderer(const char* outputbase);
254 
255  protected:
256  bool AddImageHandler(TessBaseAPI* api) override;
257 };
258 
263  public:
264  explicit TessLSTMBoxRenderer(const char* outputbase);
265 
266  protected:
267  bool AddImageHandler(TessBaseAPI* api) override;
268 };
269 
274  public:
275  explicit TessBoxTextRenderer(const char* outputbase);
276 
277  protected:
278  bool AddImageHandler(TessBaseAPI* api) override;
279 };
280 
285  public:
286  explicit TessWordStrBoxRenderer(const char* outputbase);
287 
288  protected:
289  bool AddImageHandler(TessBaseAPI* api) override;
290 };
291 
292 #ifndef DISABLED_LEGACY_ENGINE
293 
298  public:
299  explicit TessOsdRenderer(const char* outputbase);
300 
301  protected:
302  bool AddImageHandler(TessBaseAPI* api) override;
303 };
304 
305 #endif // ndef DISABLED_LEGACY_ENGINE
306 
307 } // namespace tesseract.
308 
309 #endif // TESSERACT_API_RENDERER_H_
struct TessTextRenderer TessTextRenderer
Definition: capi.h:88
Definition: strngs.h:45
const char * file_extension() const
Definition: renderer.h:84
struct TessResultRenderer TessResultRenderer
Definition: capi.h:87
#define TESS_API
Definition: platform.h:54
struct TessUnlvRenderer TessUnlvRenderer
Definition: capi.h:91
struct TessBaseAPI TessBaseAPI
Definition: capi.h:93
struct TessHOcrRenderer TessHOcrRenderer
Definition: capi.h:89
TessResultRenderer * next()
Definition: renderer.h:57
const char * title() const
Definition: renderer.h:87
struct TessPDFRenderer TessPDFRenderer
Definition: capi.h:90
void insert(LIST list, void *node)
Definition: oldlist.cpp:173
struct TessBoxTextRenderer TessBoxTextRenderer
Definition: capi.h:92