#include <renderer.h>
Renders tesseract output into searchable PDF
Definition at line 168 of file renderer.h.
| tesseract::TessPDFRenderer::TessPDFRenderer |
( |
const char * |
outputbase, |
|
|
const char * |
datadir |
|
) |
| |
Definition at line 164 of file pdfrenderer.cpp.
TessResultRenderer(const char *outputbase, const char *extension)
| bool tesseract::TessPDFRenderer::AddImageHandler |
( |
TessBaseAPI * |
api | ) |
|
|
protectedvirtual |
Implements tesseract::TessResultRenderer.
Definition at line 823 of file pdfrenderer.cpp.
826 Pix *pix =
api->GetInputImage();
828 int ppi =
api->GetSourceYResolution();
829 if (!pix || ppi <= 0)
831 double width = pixGetWidth(pix) * 72.0 / ppi;
832 double height = pixGetHeight(pix) * 72.0 / ppi;
835 n = snprintf(buf,
sizeof(buf),
840 " /MediaBox [0 0 %.2f %.2f]\n"
841 " /Contents %ld 0 R\n"
844 " /XObject << /Im1 %ld 0 R >>\n"
845 " /ProcSet [ /PDF /Text /ImageB /ImageI /ImageC ]\n"
846 " /Font << /f-0-0 %ld 0 R >>\n"
857 if (n >=
sizeof(buf))
return false;
859 AppendPDFObject(buf);
862 char* pdftext = GetPDFTextObjects(
api, width, height);
863 long pdftext_len = strlen(pdftext);
864 unsigned char *pdftext_casted =
reinterpret_cast<unsigned char *
>(pdftext);
866 unsigned char *comp_pdftext =
867 zlibCompress(pdftext_casted, pdftext_len, &len);
868 long comp_pdftext_len = len;
869 n = snprintf(buf,
sizeof(buf),
872 " /Length %ld /Filter /FlateDecode\n"
874 "stream\n", obj_, comp_pdftext_len);
875 if (n >=
sizeof(buf)) {
877 lept_free(comp_pdftext);
881 long objsize = strlen(buf);
882 AppendData(reinterpret_cast<char *>(comp_pdftext), comp_pdftext_len);
883 objsize += comp_pdftext_len;
884 lept_free(comp_pdftext);
890 objsize += strlen(b2);
891 AppendPDFObjectDIY(objsize);
894 if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize)) {
898 AppendPDFObjectDIY(objsize);
void AppendData(const char *s, int len)
void AppendString(const char *s)
| bool tesseract::TessPDFRenderer::BeginDocumentHandler |
( |
| ) |
|
|
protectedvirtual |
Reimplemented from tesseract::TessResultRenderer.
Definition at line 468 of file pdfrenderer.cpp.
472 n = snprintf(buf,
sizeof(buf),
475 0xDE, 0xAD, 0xBE, 0xEB);
476 if (n >=
sizeof(buf))
return false;
477 AppendPDFObject(buf);
480 n = snprintf(buf,
sizeof(buf),
488 if (n >=
sizeof(buf))
return false;
489 AppendPDFObject(buf);
497 n = snprintf(buf,
sizeof(buf),
500 " /BaseFont /GlyphLessFont\n"
501 " /DescendantFonts [ %ld 0 R ]\n"
502 " /Encoding /Identity-H\n"
504 " /ToUnicode %ld 0 R\n"
511 if (n >=
sizeof(buf))
return false;
512 AppendPDFObject(buf);
515 n = snprintf(buf,
sizeof(buf),
518 " /BaseFont /GlyphLessFont\n"
519 " /CIDToGIDMap %ld 0 R\n"
522 " /Ordering (Identity)\n"
523 " /Registry (Adobe)\n"
526 " /FontDescriptor %ld 0 R\n"
527 " /Subtype /CIDFontType2\n"
535 if (n >=
sizeof(buf))
return false;
536 AppendPDFObject(buf);
539 const int kCIDToGIDMapSize = 2 * (1 << 16);
540 unsigned char *cidtogidmap =
new unsigned char[kCIDToGIDMapSize];
541 for (
int i = 0; i < kCIDToGIDMapSize; i++) {
542 cidtogidmap[i] = (i % 2) ? 1 : 0;
545 unsigned char *comp =
546 zlibCompress(cidtogidmap, kCIDToGIDMapSize, &len);
547 delete[] cidtogidmap;
548 n = snprintf(buf,
sizeof(buf),
551 " /Length %lu /Filter /FlateDecode\n"
553 "stream\n", (
unsigned long)len);
554 if (n >=
sizeof(buf)) {
559 long objsize = strlen(buf);
560 AppendData(reinterpret_cast<char *>(comp), len);
563 const char *endstream_endobj =
567 objsize += strlen(endstream_endobj);
568 AppendPDFObjectDIY(objsize);
571 "/CIDInit /ProcSet findresource begin\n"
576 " /Registry (Adobe)\n"
580 "/CMapName /Adobe-Identify-UCS def\n"
582 "1 begincodespacerange\n"
584 "endcodespacerange\n"
586 "<0000> <FFFF> <0000>\n"
589 "CMapName currentdict /CMap defineresource pop\n"
594 n = snprintf(buf,
sizeof(buf),
596 "<< /Length %lu >>\n"
600 "endobj\n", (
unsigned long) strlen(stream), stream);
601 if (n >=
sizeof(buf))
return false;
602 AppendPDFObject(buf);
605 const int kCharHeight = 2;
606 n = snprintf(buf,
sizeof(buf),
613 " /FontBBox [ 0 0 %d %d ]\n"
614 " /FontFile2 %ld 0 R\n"
615 " /FontName /GlyphLessFont\n"
618 " /Type /FontDescriptor\n"
627 if (n >=
sizeof(buf))
return false;
628 AppendPDFObject(buf);
630 n = snprintf(buf,
sizeof(buf),
"%s/pdf.ttf", datadir_);
631 if (n >=
sizeof(buf))
return false;
632 FILE *fp = fopen(buf,
"rb");
634 tprintf(
"Can not open file \"%s\"!\n", buf);
637 fseek(fp, 0, SEEK_END);
638 long int size = ftell(fp);
639 fseek(fp, 0, SEEK_SET);
640 char *buffer =
new char[size];
641 if (fread(buffer, 1, size, fp) != size) {
648 n = snprintf(buf,
sizeof(buf),
654 "stream\n", size, size);
655 if (n >=
sizeof(buf)) {
660 objsize = strlen(buf);
665 objsize += strlen(endstream_endobj);
666 AppendPDFObjectDIY(objsize);
void AppendData(const char *s, int len)
void AppendString(const char *s)
| bool tesseract::TessPDFRenderer::EndDocumentHandler |
( |
| ) |
|
|
protectedvirtual |
Reimplemented from tesseract::TessResultRenderer.
Definition at line 904 of file pdfrenderer.cpp.
915 const long int kPagesObjectNumber = 2;
916 offsets_[kPagesObjectNumber] = offsets_.
back();
917 n = snprintf(buf,
sizeof(buf),
921 " /Kids [ ", kPagesObjectNumber);
922 if (n >=
sizeof(buf))
return false;
924 size_t pages_objsize = strlen(buf);
925 for (
size_t i = 0; i < pages_.
size(); i++) {
926 n = snprintf(buf,
sizeof(buf),
927 "%ld 0 R ", pages_[i]);
928 if (n >=
sizeof(buf))
return false;
930 pages_objsize += strlen(buf);
932 n = snprintf(buf,
sizeof(buf),
936 "endobj\n", pages_.
size());
937 if (n >=
sizeof(buf))
return false;
939 pages_objsize += strlen(buf);
940 offsets_.
back() += pages_objsize;
943 char* datestr = l_getFormattedDate();
944 n = snprintf(buf,
sizeof(buf),
947 " /Producer (Tesseract %s)\n"
948 " /CreationDate (D:%s)\n"
953 if (n >=
sizeof(buf))
return false;
954 AppendPDFObject(buf);
955 n = snprintf(buf,
sizeof(buf),
958 "0000000000 65535 f \n", obj_);
959 if (n >=
sizeof(buf))
return false;
961 for (
int i = 1; i < obj_; i++) {
962 n = snprintf(buf,
sizeof(buf),
"%010ld 00000 n \n", offsets_[i]);
963 if (n >=
sizeof(buf))
return false;
966 n = snprintf(buf,
sizeof(buf),
980 if (n >=
sizeof(buf))
return false;
#define TESSERACT_VERSION_STR
void AppendString(const char *s)
const char * title() const
The documentation for this class was generated from the following files: