tesseract 3.04.01

api/renderer.cpp

Go to the documentation of this file.
00001 // Include automatically generated configuration file if running autoconf.
00002 #ifdef HAVE_CONFIG_H
00003 #include "config_auto.h"
00004 #endif
00005 
00006 #include <string.h>
00007 #include "baseapi.h"
00008 #include "genericvector.h"
00009 #include "renderer.h"
00010 
00011 namespace tesseract {
00012 
00013 /**********************************************************************
00014  * Base Renderer interface implementation
00015  **********************************************************************/
00016 TessResultRenderer::TessResultRenderer(const char *outputbase,
00017                                        const char* extension)
00018     : file_extension_(extension),
00019       title_(""), imagenum_(-1),
00020       fout_(stdout),
00021       next_(NULL),
00022       happy_(true) {
00023   if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
00024     STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_);
00025     fout_ = fopen(outfile.string(), "wb");
00026     if (fout_ == NULL) {
00027       happy_ = false;
00028     }
00029   }
00030 }
00031 
00032 TessResultRenderer::~TessResultRenderer() {
00033  if (fout_ != stdout)
00034     fclose(fout_);
00035   else
00036     clearerr(fout_);
00037   delete next_;
00038 }
00039 
00040 void TessResultRenderer::insert(TessResultRenderer* next) {
00041   if (next == NULL) return;
00042 
00043   TessResultRenderer* remainder = next_;
00044   next_ = next;
00045   if (remainder) {
00046     while (next->next_ != NULL) {
00047       next = next->next_;
00048     }
00049     next->next_ = remainder;
00050   }
00051 }
00052 
00053 bool TessResultRenderer::BeginDocument(const char* title) {
00054   if (!happy_) return false;
00055   title_ = title;
00056   imagenum_ = -1;
00057   bool ok = BeginDocumentHandler();
00058   if (next_) {
00059     ok = next_->BeginDocument(title) && ok;
00060   }
00061   return ok;
00062 }
00063 
00064 bool TessResultRenderer::AddImage(TessBaseAPI* api) {
00065   if (!happy_) return false;
00066   ++imagenum_;
00067   bool ok = AddImageHandler(api);
00068   if (next_) {
00069     ok = next_->AddImage(api) && ok;
00070   }
00071   return ok;
00072 }
00073 
00074 bool TessResultRenderer::EndDocument() {
00075   if (!happy_) return false;
00076   bool ok = EndDocumentHandler();
00077   if (next_) {
00078     ok = next_->EndDocument() && ok;
00079   }
00080   return ok;
00081 }
00082 
00083 void TessResultRenderer::AppendString(const char* s) {
00084   AppendData(s, strlen(s));
00085 }
00086 
00087 void TessResultRenderer::AppendData(const char* s, int len) {
00088   int n = fwrite(s, 1, len, fout_);
00089   if (n != len) happy_ = false;
00090 }
00091 
00092 bool TessResultRenderer::BeginDocumentHandler() {
00093   return happy_;
00094 }
00095 
00096 bool TessResultRenderer::EndDocumentHandler() {
00097   return happy_;
00098 }
00099 
00100 
00101 /**********************************************************************
00102  * UTF8 Text Renderer interface implementation
00103  **********************************************************************/
00104 TessTextRenderer::TessTextRenderer(const char *outputbase)
00105     : TessResultRenderer(outputbase, "txt") {
00106 }
00107 
00108 bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
00109   char* utf8 = api->GetUTF8Text();
00110   if (utf8 == NULL) {
00111     return false;
00112   }
00113 
00114   AppendString(utf8);
00115   delete[] utf8;
00116 
00117   bool pageBreak = false;
00118   api->GetBoolVariable("include_page_breaks", &pageBreak);
00119   const char* pageSeparator = api->GetStringVariable("page_separator");
00120   if (pageBreak) {
00121     AppendString(pageSeparator);
00122   }
00123 
00124   return true;
00125 }
00126 
00127 /**********************************************************************
00128  * HOcr Text Renderer interface implementation
00129  **********************************************************************/
00130 TessHOcrRenderer::TessHOcrRenderer(const char *outputbase)
00131     : TessResultRenderer(outputbase, "hocr") {
00132     font_info_ = false;
00133 }
00134 
00135 TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info)
00136     : TessResultRenderer(outputbase, "hocr") {
00137     font_info_ = font_info;
00138 }
00139 
00140 bool TessHOcrRenderer::BeginDocumentHandler() {
00141   AppendString(
00142         "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
00143         "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
00144         "    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
00145         "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" "
00146         "lang=\"en\">\n <head>\n  <title>");
00147   AppendString(title());
00148   AppendString(
00149       "</title>\n"
00150       "<meta http-equiv=\"Content-Type\" content=\"text/html;"
00151       "charset=utf-8\" />\n"
00152       "  <meta name='ocr-system' content='tesseract " TESSERACT_VERSION_STR
00153               "' />\n"
00154       "  <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par"
00155       " ocr_line ocrx_word");
00156   if (font_info_)
00157     AppendString(
00158       " ocrp_lang ocrp_dir ocrp_font ocrp_fsize ocrp_wconf");
00159   AppendString(
00160       "'/>\n"
00161       "</head>\n<body>\n");
00162 
00163   return true;
00164 }
00165 
00166 bool TessHOcrRenderer::EndDocumentHandler() {
00167   AppendString(" </body>\n</html>\n");
00168 
00169   return true;
00170 }
00171 
00172 bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) {
00173   char* hocr = api->GetHOCRText(imagenum());
00174   if (hocr == NULL) return false;
00175 
00176   AppendString(hocr);
00177   delete[] hocr;
00178 
00179   return true;
00180 }
00181 
00182 /**********************************************************************
00183  * UNLV Text Renderer interface implementation
00184  **********************************************************************/
00185 TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
00186     : TessResultRenderer(outputbase, "unlv") {
00187 }
00188 
00189 bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) {
00190   char* unlv = api->GetUNLVText();
00191   if (unlv == NULL) return false;
00192 
00193   AppendString(unlv);
00194   delete[] unlv;
00195 
00196   return true;
00197 }
00198 
00199 /**********************************************************************
00200  * BoxText Renderer interface implementation
00201  **********************************************************************/
00202 TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase)
00203     : TessResultRenderer(outputbase, "box") {
00204 }
00205 
00206 bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) {
00207   char* text = api->GetBoxText(imagenum());
00208   if (text == NULL) return false;
00209 
00210   AppendString(text);
00211   delete[] text;
00212 
00213   return true;
00214 }
00215 
00216 /**********************************************************************
00217  * Osd Text Renderer interface implementation
00218  **********************************************************************/
00219 TessOsdRenderer::TessOsdRenderer(const char* outputbase)
00220     : TessResultRenderer(outputbase, "osd") {
00221 }
00222 
00223 bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) {
00224   char* osd = api->GetOsdText(imagenum());
00225   if (osd == NULL) return false;
00226 
00227   AppendString(osd);
00228   delete[] osd;
00229 
00230   return true;
00231 }
00232 
00233 }  // namespace tesseract
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines