31 #include "vcsversion.h"
35 #undef __STRICT_ANSI__
52 #include "allheaders.h"
114 osd_tesseract_(NULL),
120 paragraph_models_(NULL),
129 recognition_done_(false),
131 rect_left_(0), rect_top_(0), rect_width_(0), rect_height_(0),
132 image_width_(0), image_height_(0) {
143 #if defined(GIT_REV) && (defined(DEBUG) || defined(_DEBUG))
158 #if USE_DEVICE_SELECTION
164 #if USE_DEVICE_SELECTION
165 ds_device device = OpenclDevice::getDeviceSelection();
166 if (device.type == DS_DEVICE_OPENCL_DEVICE) {
167 *data =
reinterpret_cast<void*
>(
new cl_device_id);
168 memcpy(*data, &device.oclDeviceID,
sizeof(cl_device_id));
169 return sizeof(cl_device_id);
184 struct sigaction action;
185 memset(&action, 0,
sizeof(action));
187 action.sa_flags = SA_RESETHAND;
188 sigaction(SIGSEGV, &action, NULL);
189 sigaction(SIGFPE, &action, NULL);
190 sigaction(SIGBUS, &action, NULL);
193 tprintf(
"CatchSignals has no non-linux implementation!\n");
229 IntParam *p = ParamUtils::FindParam<IntParam>(
231 if (p == NULL)
return false;
232 *value = (
inT32)(*p);
237 BoolParam *p = ParamUtils::FindParam<BoolParam>(
239 if (p == NULL)
return false;
240 *value = (
BOOL8)(*p);
245 StringParam *p = ParamUtils::FindParam<StringParam>(
247 return (p != NULL) ? p->
string() : NULL;
251 DoubleParam *p = ParamUtils::FindParam<DoubleParam>(
253 if (p == NULL)
return false;
254 *value = (double)(*p);
280 bool set_only_non_debug_params) {
283 if (language == NULL) language =
"eng";
302 bool reset_classifier =
true;
304 reset_classifier =
false;
308 language,
oem, configs, configs_size, vars_vec, vars_values,
309 set_only_non_debug_params) != 0) {
330 if (reset_classifier) {
362 for (
int i = 0; i < num_subs; ++i)
376 char fname[_MAX_FNAME];
377 WIN32_FIND_DATA data;
379 HANDLE handle = FindFirstFile(pattern.
string(), &data);
380 if (handle != INVALID_HANDLE_VALUE) {
381 for (; result; result = FindNextFile(handle, &data)) {
382 _splitpath(data.cFileName, NULL, NULL, fname, NULL);
389 struct dirent *dirent;
396 while ((dirent = readdir(dir))) {
398 if (dirent->d_name[0] !=
'.') {
399 if (strstr(dirent->d_name, extension.
string()) != NULL) {
400 dot = strrchr(dirent->d_name,
'.');
402 if (strncmp(dot, extension.
string(),
403 strlen(extension.
string())) == 0) {
491 int width,
int height) {
497 int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8;
498 SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top,
499 bytes_per_pixel, bytes_per_line);
526 int width,
int height,
527 int bytes_per_pixel,
int bytes_per_line) {
530 bytes_per_pixel, bytes_per_line);
537 tprintf(
"Please call SetImage before SetSourceResolution.\n");
598 Pixa** pixa,
int** blockids,
int** paraids) {
600 pixa, blockids, paraids);
643 bool text_only,
bool raw_image,
644 const int raw_padding,
645 Pixa** pixa,
int** blockids,
654 int component_count = 0;
655 int left, top, right, bottom;
662 &left, &top, &right, &bottom);
668 level, &left, &top, &right, &bottom);
671 if (get_bbox->
Run() &&
674 }
while (page_it->
Next(level));
676 Boxa* boxa = boxaCreate(component_count);
678 *pixa = pixaCreate(component_count);
679 if (blockids != NULL)
680 *blockids =
new int[component_count];
682 *paraids =
new int[component_count];
686 int component_index = 0;
689 if (get_bbox->
Run() &&
691 Box* lbox = boxCreate(left, top, right - left, bottom - top);
692 boxaAddBox(boxa, lbox, L_INSERT);
701 pixaAddPix(*pixa, pix, L_INSERT);
702 pixaAddBox(*pixa, lbox, L_CLONE);
704 if (paraids != NULL) {
705 (*paraids)[component_index] = paraid;
709 if (blockids != NULL) {
710 (*blockids)[component_index] = blockid;
718 }
while (page_it->
Next(level));
735 FILE *fp = fopen(filename,
"wb");
737 int width = pixGetWidth(pix);
738 int height = pixGetHeight(pix);
739 l_uint32* data = pixGetData(pix);
740 fprintf(fp,
"P5 %d %d 255\n", width, height);
741 for (
int y = 0; y < height; ++y, data += pixGetWpl(pix)) {
742 for (
int x = 0; x < width; ++x) {
743 uinT8 b = GET_DATA_BIT(data, x) ? 0 : 255;
744 fwrite(&b, 1, 1, fp);
750 #ifndef NO_CUBE_BUILD
758 Boxa* boxa_words, Pixa* pixa_words,
759 const FCOORD& reskew, Pix* page_pix,
761 int block_count = boxaGetCount(boxa_blocks);
762 ASSERT_HOST(block_count == pixaGetCount(pixa_blocks));
764 for (
int i = 0; i < block_count; ++i) {
765 Pix* pix = pixaGetPix(pixa_blocks, i, L_CLONE);
766 pixDisplayWrite(pix, 1);
768 int word_count = boxaGetCount(boxa_words);
769 ASSERT_HOST(word_count == pixaGetCount(pixa_words));
773 page_res_it.
forward(), ++pr_word) {
779 if (choice != NULL) {
783 filename +=
"unclassified";
784 snprintf(numbuf, 32,
"%03d", pr_word);
788 Pix* pix = pixaGetPix(pixa_words, pr_word, L_CLONE);
789 pixWrite(filename.
string(), pix, IFF_TIFF_G4);
795 #endif // NO_CUBE_BUILD
872 #ifndef GRAPHICS_DISABLED
874 #endif // GRAPHICS_DISABLED
889 fclose(training_output_file);
892 bool wait_for_text =
true;
909 tprintf(
"Please call SetImage before attempting recognition.");
926 while (page_res_it.
word() != NULL) {
930 page_res_it.
row()->
row, word_res);
966 bool TessBaseAPI::ProcessPagesFileList(FILE *flist,
968 const char* retry_config,
969 int timeout_millisec,
971 int tessedit_page_number) {
972 if (!flist && !buf)
return false;
973 int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
978 buf->
split(
'\n', &lines);
979 if (lines.
empty())
return false;
983 for (
int i = 0; i < page; i++) {
985 if (fgets(pagename,
sizeof(pagename), flist) == NULL)
break;
990 const char* kUnknownTitle =
"";
998 if (fgets(pagename,
sizeof(pagename), flist) == NULL)
break;
1000 if (page >= lines.
size())
break;
1001 snprintf(pagename,
sizeof(pagename),
"%s", lines[page].c_str());
1004 Pix *pix = pixRead(pagename);
1006 tprintf(
"Image file %s cannot be read!\n", pagename);
1009 tprintf(
"Page %d : %s\n", page, pagename);
1010 bool r =
ProcessPage(pix, page, pagename, retry_config,
1011 timeout_millisec, renderer);
1013 if (!r)
return false;
1014 if (tessedit_page_number >= 0)
break;
1025 bool TessBaseAPI::ProcessPagesMultipageTiff(
const l_uint8 *data,
1028 const char* retry_config,
1029 int timeout_millisec,
1031 int tessedit_page_number) {
1032 #ifndef ANDROID_BUILD
1036 #endif // USE_OPENCL
1037 int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
1039 if (tessedit_page_number >= 0)
1040 page = tessedit_page_number;
1042 if ( od.selectedDeviceIsOpenCL() ) {
1044 pix = od.pixReadMemTiffCl(data, size, page);
1046 #endif // USE_OPENCL
1047 pix = pixReadMemTiff(data, size, page);
1050 #endif // USE_OPENCL
1051 if (pix == NULL)
break;
1052 tprintf(
"Page %d\n", page + 1);
1056 bool r =
ProcessPage(pix, page, filename, retry_config,
1057 timeout_millisec, renderer);
1059 if (!r)
return false;
1060 if (tessedit_page_number >= 0)
break;
1071 int timeout_millisec,
1098 const char* retry_config,
1099 int timeout_millisec,
1101 #ifndef ANDROID_BUILD
1103 bool stdInput = !strcmp(filename,
"stdin") || !strcmp(filename,
"-");
1106 if (_setmode(_fileno(stdin), _O_BINARY) == -1)
1107 tprintf(
"ERROR: cin to binary: %s", strerror(errno));
1112 return ProcessPagesFileList(stdin, NULL, retry_config,
1113 timeout_millisec, renderer,
1123 buf.
assign((std::istreambuf_iterator<char>(std::cin)),
1124 (std::istreambuf_iterator<char>()));
1126 std::ifstream ifs(filename, std::ios::binary);
1128 buf.assign((std::istreambuf_iterator<char>(ifs)),
1129 (std::istreambuf_iterator<char>()));
1131 tprintf(
"ERROR: Can not open input file %s\n", filename);
1138 const l_uint8 * data =
reinterpret_cast<const l_uint8 *
>(buf.c_str());
1139 findFileFormatBuffer(data, &format);
1142 if (format == IFF_UNKNOWN) {
1144 return ProcessPagesFileList(NULL, &s, retry_config,
1145 timeout_millisec, renderer,
1150 bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
1151 format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
1152 format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
1153 format == IFF_TIFF_ZIP);
1158 pix = pixReadMem(data, buf.size());
1165 const char* kUnknownTitle =
"";
1174 r = ProcessPagesMultipageTiff(data, buf.size(),
filename, retry_config,
1175 timeout_millisec, renderer,
1179 timeout_millisec, renderer);
1184 if (!r || (renderer && !renderer->
EndDocument())) {
1195 const char* retry_config,
int timeout_millisec,
1200 bool failed =
false;
1213 }
else if (timeout_millisec > 0) {
1228 #ifndef ANDROID_BUILD
1230 pixWrite(
"tessinput.tif", page_pix, IFF_TIFF_G4);
1231 #endif // ANDROID_BUILD
1234 if (failed && retry_config != NULL && retry_config[0] !=
'\0') {
1247 if (renderer && !failed) {
1248 failed = !renderer->
AddImage(
this);
1315 char* result =
new char[text.
length() + 1];
1329 it->
Orientation(&orientation, &writing_direction, &textline_order,
1342 static void AddBaselineCoordsTohOCR(
const PageIterator *it,
1347 hocr_str->
add_str_int(
"; textangle ", 360 - orientation * 90);
1351 int left, top, right, bottom;
1352 it->BoundingBox(level, &left, &top, &right, &bottom);
1356 if (!it->Baseline(level, &x1, &y1, &x2, &y2))
1374 p1 = (y2 - y1) / static_cast<double>(x2 - x1);
1375 p0 = y1 -
static_cast<double>(p1 * x1);
1377 hocr_str->
add_str_double(
"; baseline ", round(p1 * 1000.0) / 1000.0);
1381 static void AddIdTohOCR(
STRING* hocr_str,
const std::string base,
int num1,
int num2) {
1382 unsigned long bufsize = base.length() + 2 *
kMaxIntSize;
1383 char id_buffer[bufsize];
1385 snprintf(id_buffer, bufsize - 1,
"%s_%d_%d", base.c_str(), num1, num2);
1387 snprintf(id_buffer, bufsize - 1,
"%s_%d", base.c_str(), num1);
1389 id_buffer[bufsize - 1] =
'\0';
1390 *hocr_str +=
" id='";
1391 *hocr_str += id_buffer;
1395 static void AddBoxTohOCR(
const ResultIterator *it,
1398 int left, top, right, bottom;
1399 it->BoundingBox(level, &left, &top, &right, &bottom);
1408 AddBaselineCoordsTohOCR(it, level, hocr_str);
1410 float row_height, descenders, ascenders;
1411 it->RowAttributes(&row_height, &descenders, &ascenders);
1433 int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
1434 int page_id = page_number + 1;
1435 bool font_info =
false;
1447 wchar_t *uni16_str =
new WCHAR[str16_len];
1449 uni16_str, str16_len);
1450 int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL,
1452 char *utf8_str =
new char[utf8_len];
1453 WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str,
1454 utf8_len, NULL, NULL);
1460 hocr_str +=
" <div class='ocr_page'";
1461 AddIdTohOCR(&hocr_str,
"page", page_id, -1);
1462 hocr_str +=
" title='image \"";
1466 hocr_str +=
"unknown";
1484 hocr_str +=
" <div class='ocr_carea'";
1485 AddIdTohOCR(&hocr_str,
"block", page_id, bcnt);
1486 AddBoxTohOCR(res_it,
RIL_BLOCK, &hocr_str);
1489 hocr_str +=
"\n <p class='ocr_par'";
1491 hocr_str +=
" dir='ltr'";
1493 hocr_str +=
" dir='rtl'";
1495 AddIdTohOCR(&hocr_str,
"par", page_id, pcnt);
1496 AddBoxTohOCR(res_it,
RIL_PARA, &hocr_str);
1499 hocr_str +=
"\n <span class='ocr_line'";
1500 AddIdTohOCR(&hocr_str,
"line", page_id, lcnt);
1505 hocr_str +=
"<span class='ocrx_word'";
1506 AddIdTohOCR(&hocr_str,
"word", page_id, wcnt);
1507 int left, top, right, bottom;
1508 bool bold, italic, underlined, monospace, serif, smallcaps;
1509 int pointsize, font_id;
1510 const char *font_name;
1513 &monospace, &serif, &smallcaps,
1514 &pointsize, &font_id);
1522 hocr_str +=
"; x_font ";
1529 hocr_str +=
" lang='";
1543 if (bold) hocr_str +=
"<strong>";
1544 if (italic) hocr_str +=
"<em>";
1547 if (grapheme && grapheme[0] != 0) {
1553 if (italic) hocr_str +=
"</em>";
1554 if (bold) hocr_str +=
"</strong>";
1555 hocr_str +=
"</span> ";
1558 if (last_word_in_line) {
1559 hocr_str +=
"\n </span>";
1562 if (last_word_in_para) {
1563 hocr_str +=
"\n </p>\n";
1566 if (last_word_in_block) {
1567 hocr_str +=
" </div>\n";
1571 hocr_str +=
" </div>\n";
1573 char *ret =
new char[hocr_str.
length() + 1];
1574 strcpy(ret, hocr_str.
string());
1617 char* result =
new char[total_length];
1618 strcpy(result,
"\0");
1619 int output_length = 0;
1622 int left, top, right, bottom;
1627 for (
int i = 0; text[i] !=
'\0'; ++i) {
1631 snprintf(result + output_length, total_length - output_length,
1632 "%s %d %d %d %d %d\n",
1635 output_length += strlen(result + output_length);
1638 if (output_length + kMaxBytesPerLine > total_length)
1652 0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0
1656 0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0
1668 bool tilde_crunch_written =
false;
1669 bool last_char_was_newline =
true;
1670 bool last_char_was_tilde =
false;
1674 char* result =
new char[total_length];
1682 (!tilde_crunch_written ||
1693 last_char_was_tilde =
false;
1695 if (!last_char_was_tilde) {
1697 last_char_was_tilde =
true;
1699 tilde_crunch_written =
true;
1700 last_char_was_newline =
false;
1705 tilde_crunch_written =
false;
1709 int length = lengths.
length();
1713 if (last_char_was_tilde &&
1714 word->
word->
space() == 0 && wordstr[offset] ==
' ') {
1718 offset = lengths[i++];
1720 if (i < length && wordstr[offset] != 0) {
1721 if (!last_char_was_newline)
1724 last_char_was_newline =
false;
1725 for (; i < length; offset += lengths[i++]) {
1726 if (wordstr[offset] ==
' ' ||
1729 last_char_was_tilde =
true;
1733 UNICHAR ch(wordstr + offset, lengths[i]);
1735 for (
int j = 0;
kUniChs[j] != 0; ++j) {
1741 if (uni_ch <= 0xff) {
1742 *ptr++ =
static_cast<char>(uni_ch);
1743 last_char_was_tilde =
false;
1746 last_char_was_tilde =
true;
1755 tilde_crunch_written =
false;
1756 last_char_was_newline =
true;
1757 last_char_was_tilde =
false;
1782 const char* script_name =
1786 int orient_deg = orient_id * 90;
1791 char* osd_buf =
new char[255];
1792 snprintf(osd_buf, 255,
1794 "Orientation in degrees: %d\n"
1796 "Orientation confidence: %.2f\n"
1798 "Script confidence: %.2f\n",
1800 orient_deg, rotate, orient_conf,
1801 script_name, script_conf);
1809 if (!conf)
return 0;
1812 while (*pt >= 0) sum += *pt++;
1813 if (pt != conf) sum /= pt - conf;
1828 int* conf =
new int[n_word+1];
1833 int w_conf =
static_cast<int>(100 + 5 * choice->
certainty());
1835 if (w_conf < 0) w_conf = 0;
1836 if (w_conf > 100) w_conf = 100;
1837 conf[n_word++] = w_conf;
1856 bool success =
true;
1862 tprintf(
"Trying to adapt \"%s\" to \"%s\"\n", text, wordstr);
1867 if (word_res != NULL) {
1875 for (t = 0; text[t] !=
'\0'; ++t) {
1876 if (text[t] ==
'\n' || text[t] ==
' ')
1878 while (wordstr[w] !=
'\0' && wordstr[w] ==
' ')
1880 if (text[t] != wordstr[w])
1884 if (text[t] !=
'\0' || wordstr[w] !=
'\0') {
1892 if (pr_it.
word() == NULL)
1895 word_res = pr_it.
word();
2016 if (x2 <= x1) x2 = x1 + 1;
2018 *out_slope =
static_cast<float>(y2 - y1) / (x2 - x1);
2019 *out_offset =
static_cast<int>(y1 - *out_slope * x1);
2022 int left, top, right, bottom;
2032 *out_offset += bottom -
MAX(left_y, right_y);
2035 *out_slope = -*out_slope;
2062 for (
int i = 0; i < num_subs; ++i) {
2076 tprintf(
"Please call Init before attempting to set an image.");
2124 tprintf(
"Estimated resolution %d out of range! Corrected to %d\n",
2134 tprintf(
"Please call SetImage before attempting recognition.");
2150 tprintf(
"Image too large: (%d, %d)\n",
2173 NULL, 0, NULL, NULL,
false) == 0) {
2178 tprintf(
"Warning: Auto orientation and script detection requested,"
2179 " but osd language failed to load\n");
2228 int total_length = 2;
2229 int total_blobs = 0;
2235 if (choice != NULL) {
2236 total_blobs += choice->
length() + 2;
2244 if (blob_count != NULL)
2245 *blob_count = total_blobs;
2246 return total_length;
2283 bool** vertical_writing) {
2284 delete[] *block_orientation;
2285 *block_orientation = NULL;
2286 delete[] *vertical_writing;
2287 *vertical_writing = NULL;
2290 block_it.move_to_first();
2292 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2293 if (!block_it.data()->poly_block()->IsText()) {
2299 tprintf(
"WARNING: Found no blocks\n");
2302 *block_orientation =
new int[num_blocks];
2303 *vertical_writing =
new bool[num_blocks];
2304 block_it.move_to_first();
2306 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
2307 block_it.forward()) {
2308 if (!block_it.data()->poly_block()->IsText()) {
2311 FCOORD re_rotation = block_it.data()->re_rotation();
2312 float re_theta = re_rotation.
angle();
2313 FCOORD classify_rotation = block_it.data()->classify_rotation();
2314 float classify_theta = classify_rotation.
angle();
2315 double rot_theta = - (re_theta - classify_theta) * 2.0 /
PI;
2316 if (rot_theta < 0) rot_theta += 4;
2317 int num_rotations =
static_cast<int>(rot_theta + 0.5);
2318 (*block_orientation)[i] = num_rotations;
2321 (*vertical_writing)[i] = classify_rotation.
y() != 0.0f;
2351 inT32 xstarts[] = {-32000};
2352 double quad_coeffs[] = {0, 0, baseline};
2357 ascender - (baseline + xheight),
2358 descender - baseline,
2365 int width = pixGetWidth(pix);
2366 int height = pixGetHeight(pix);
2367 BLOCK block(
"a character",
TRUE, 0, 0, 0, 0, width, height);
2374 C_BLOB_IT c_blob_it(list);
2375 if (c_blob_it.empty())
2378 C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
2379 for (c_blob_it.forward();
2380 !c_blob_it.at_first();
2381 c_blob_it.forward()) {
2382 C_BLOB *c_blob = c_blob_it.data();
2383 ol_it.add_list_after(c_blob->
out_list());
2396 float x_center = (box.
left() + box.
right()) / 2.0f;
2399 tblob->
Normalize(NULL, NULL, NULL, x_center, baseline, scale, scale,
2408 float descender,
float ascender,
2409 bool numeric_mode, Pix* pix) {
2436 float best_rating = -100;
2440 BLOB_CHOICE_LIST choices;
2442 BLOB_CHOICE_IT choice_it;
2443 choice_it.set_to_list(&choices);
2444 for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
2445 choice_it.forward()) {
2446 if (choice_it.data()->rating() > best_rating) {
2447 best_rating = choice_it.data()->rating();
2470 pass1_result =
new PAGE_RES(
false, block_list,
2473 return pass1_result;
2477 int debug_level = 0;
2485 result_it, &models);
2498 length = (len == -1 ? strlen(repr) : len);
2513 static
void add_space(TESS_CHAR_IT* it) {
2514 TESS_CHAR *t =
new TESS_CHAR(0,
" ");
2515 it->add_after_then_move(t);
2519 static float rating_to_cost(
float rating) {
2520 rating = 100 + rating;
2524 if (rating < 0) rating = 0;
2532 static void extract_result(TESS_CHAR_IT* out,
2536 while (page_res_it.word() != NULL) {
2544 int n = strlen(len);
2545 for (
int i = 0; i < n; i++) {
2549 out->add_after_then_move(tc);
2553 page_res_it.forward();
2570 TESS_CHAR_LIST tess_chars;
2571 TESS_CHAR_IT tess_chars_it(&tess_chars);
2572 extract_result(&tess_chars_it, page_res);
2573 tess_chars_it.move_to_first();
2574 int n = tess_chars.length();
2576 *lengths =
new int[n];
2577 *costs =
new float[n];
2583 for (tess_chars_it.mark_cycle_pt();
2584 !tess_chars_it.cycled_list();
2585 tess_chars_it.forward(), i++) {
2587 text_len += (*lengths)[i] = tc->
length;
2588 (*costs)[i] = tc->
cost;
2592 (*y1)[i] = tc->
box.
top();
2594 char *p = *text =
new char[text_len];
2596 tess_chars_it.move_to_first();
2597 for (tess_chars_it.mark_cycle_pt();
2598 !tess_chars_it.cycled_list();
2599 tess_chars_it.forward()) {
2617 int* feature_outline_index) {
2623 &cn_features, &fx_info, &outline_counts);
2628 *num_features = cn_features.
size();
2629 memcpy(int_features, &cn_features[0], *num_features *
sizeof(cn_features[0]));
2631 if (feature_outline_index != NULL) {
2633 for (
int i = 0; i < outline_counts.
size(); ++i) {
2634 while (f < outline_counts[i])
2635 feature_outline_index[f++] = i;
2643 int left,
int top,
int right,
int bottom) {
2644 TBOX box(left, bottom, right, top);
2645 BLOCK_IT b_it(blocks);
2646 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
2647 BLOCK* block = b_it.data();
2651 for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
2652 ROW* row = r_it.data();
2656 for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
2657 WERD* word = w_it.data();
2668 int num_max_matches,
2671 int* num_matches_returned) {
2672 BLOB_CHOICE_LIST* choices =
new BLOB_CHOICE_LIST;
2674 BLOB_CHOICE_IT choices_it(choices);
2675 int& index = *num_matches_returned;
2677 for (choices_it.mark_cycle_pt();
2678 !choices_it.cycled_list() && index < num_max_matches;
2679 choices_it.forward()) {
2682 ratings[index] = choice->
rating();
2685 *num_matches_returned = index;
2705 #ifndef NO_CUBE_BUILD
2710 #endif // NO_CUBE_BUILD
2716 for (ptr = text; *ptr; ptr++) {
2718 case '<': ret +=
"<";
break;
2719 case '>': ret +=
">";
break;
2720 case '&': ret +=
"&";
break;
2721 case '"': ret +=
""";
break;
2722 case '\'': ret +=
"'";
break;
2723 default: ret += *ptr;
int orientation_and_script_detection(STRING &filename, OSResults *osr, tesseract::Tesseract *tess)
C_BLOB_LIST * blob_list()
get blobs
void SavePixForCrash(int resolution, Pix *pix)
static void ResetToDefaults(ParamsVectors *member_params)
TBOX bounding_box() const
static const char * Version()
double(Dict::* probability_in_context_)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Probability in context function used by the ngram permuter.
bool GetIntVariable(const char *name, int *value) const
void CorrectClassifyWords(PAGE_RES *page_res)
static size_t getOpenCLDevice(void **device)
const char * WordFontAttributes(bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const
BOOL8 flag(WERD_FLAGS mask) const
bool classify_bln_numeric_mode
bool PTIsTextType(PolyBlockType type)
BLOCK_LIST * FindLinesCreateBlockList()
EquationDetect * equ_detect_
The equation detector.
static void DeleteBlockList(BLOCK_LIST *block_list)
int IntCastRounded(double x)
bool tessedit_train_from_boxes
void set_deadline_msecs(inT32 deadline_msecs)
void recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
void SetDictFunc(DictFunc f)
bool GetTextDirection(int *out_offset, float *out_slope)
void GetAvailableLanguagesAsVector(GenericVector< STRING > *langs) const
bool PSM_OSD_ENABLED(int pageseg_mode)
#define TESSERACT_VERSION_STR
void set_text(const char *new_text)
void ResetDocumentDictionary()
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
void set_pix_thresholds(Pix *thresholds)
virtual Pix * GetPixRectThresholds()
TruthCallback * truth_cb_
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
CubeRecoContext * GetCubeRecoContext() const
void SetFillLatticeFunc(FillLatticeFunc f)
void SetBlackAndWhitelist()
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
const char * WordRecognitionLanguage() const
const TBOX & BlobBox(int index) const
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Pix ** mutable_pix_binary()
const int kBlnBaselineOffset
double matcher_good_threshold
void ResetAdaptiveClassifier()
bool Empty(PageIteratorLevel level) const
bool ParagraphIsLtr() const
PageIterator * AnalyseLayout()
bool textord_equation_detect
virtual void GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth, int *imageheight)
const int kMinCredibleResolution
Minimum believable resolution.
static void PrintParams(FILE *fp, const ParamsVectors *member_params)
void split(const char c, GenericVector< STRING > *splited)
Boxa * GetWords(Pixa **pixa)
virtual bool Next(PageIteratorLevel level)
void ReadConfigFile(const char *filename)
GenericVector< ParagraphModel * > * paragraph_models_
void chomp_string(char *str)
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift, bool inverse, Pix *pix)
ImageThresholder * thresholder_
Image thresholding module.
static ResultIterator * StartOfParagraph(const LTRResultIterator &resit)
bool ProcessPagesInternal(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
static bool GetParamAsString(const char *name, const ParamsVectors *member_params, STRING *value)
virtual char * GetUTF8Text(PageIteratorLevel level) const
bool SetVariable(const char *name, const char *value)
tesseract::ParamsVectors * GlobalParams()
TESS_LOCAL int TextLength(int *blob_count)
void set_pix_grey(Pix *grey_pix)
void set_source_resolution(int ppi)
TESS_CHAR(float _cost, const char *repr, int len=-1)
Pix * input_image_
Image used for searchable PDF.
STRING * language_
Last initialized language.
Boxa * GetStrips(Pixa **pixa, int **blockids)
static ROW * MakeTessOCRRow(float baseline, float xheight, float descender, float ascender)
char * GetOsdText(int page_number)
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
bool GetDoubleVariable(const char *name, double *value) const
Tesseract * get_sub_lang(int index) const
static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode)
FILE * init_recog_training(const STRING &fname)
bool tessedit_write_images
PAGE_RES * ApplyBoxes(const STRING &fname, bool find_segmentation, BLOCK_LIST *block_list)
MutableIterator * GetMutableIterator()
Pix * GetBinaryImage(PageIteratorLevel level) const
const char * GetInitLanguagesAsString() const
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
int SegmentPage(const STRING *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr)
int tessedit_pageseg_mode
OcrEngineMode oem() const
Tesseract * tesseract() const
int * AllWordConfidences()
const char * GetInputName()
static TBLOB * MakeTBLOB(Pix *pix)
void GetFeaturesForBlob(TBLOB *blob, INT_FEATURE_STRUCT *int_features, int *num_features, int *feature_outline_index)
TESS_LOCAL bool InternalSetImage()
void SetRectangle(int left, int top, int width, int height)
bool IsBinary() const
Returns true if the source image is binary.
double(Dict::* ProbabilityInContextFunc)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
int GetScaledYResolution() const
static TBLOB * PolygonalCopy(bool allow_detailed_fx, C_BLOB *src)
static void ClearPersistentCache()
bool GetVariableAsString(const char *name, STRING *val)
float base_line(float xpos) const
TESS_LOCAL PAGE_RES * RecognitionPass2(BLOCK_LIST *block_list, PAGE_RES *pass1_result)
const char * GetDatapath()
void DetectParagraphs(int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel * > *models)
bool ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
PageSegMode GetPageSegMode() const
int OrientationIdToValue(const int &id)
const char * GetUnichar(int unichar_id)
PolyBlockType BlockType() const
float angle() const
find angle
BLOCK_RES * block() const
STRING * output_file_
Name used by debug code.
C_OUTLINE_LIST * out_list()
int NumDawgs() const
Return the number of dawgs in the dawgs_ vector.
StrongScriptDirection WordDirection() const
const char * id_to_unichar(UNICHAR_ID id) const
void SetRectangle(int left, int top, int width, int height)
void ClearAdaptiveClassifier()
bool GetBoolVariable(const char *name, bool *value) const
void TidyUp(PAGE_RES *page_res)
Pix * GetThresholdedImage()
WERD_RES * restart_page()
static DawgCache * GlobalDawgCache()
virtual Pix * GetPixRectGrey()
const char * kOldVarsFile
CubeRecoContext * GetCubeRecoContext()
#define BOOL_VAR(name, val, comment)
const char * get_script_from_script_id(int id) const
void SetInputName(const char *name)
virtual bool IsAtBeginningOf(PageIteratorLevel level) const
static ROW * FindRowForBox(BLOCK_LIST *blocks, int left, int top, int right, int bottom)
char * GetHOCRText(int page_number)
void PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr)
int num_sub_langs() const
int Recognize(ETEXT_DESC *monitor)
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
bool tessedit_resegment_from_line_boxes
void delete_data_pointers()
bool interactive_display_mode
int GetScaledEstimatedResolution() const
const STRING & unichar_string() const
Orientation and script detection only.
GenericVector< IntParam * > int_params
PAGE_RES * page_res_
The page-level data.
int GetSourceYResolution() const
Automatic page segmentation, but no OSD, or OCR.
virtual TESS_LOCAL void Threshold(Pix **pix)
void read_config_file(const char *filename, SetParamConstraint constraint)
int GetSourceYResolution()
STRING * datapath_
Current location of tessdata.
virtual bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const
Pix * GetImage(PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const
TBOX bounding_box() const
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
static bool SetParam(const char *name, const char *value, SetParamConstraint constraint, ParamsVectors *member_params)
int(Dict::* DictFunc)(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const
bool DetectOS(OSResults *)
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const
void pgeditor_main(int width, int height, PAGE_RES *page_res)
TBLOB * make_tesseract_blob(float baseline, float xheight, float descender, float ascender, bool numeric_mode, Pix *pix)
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold, ADAPT_TEMPLATES adaptive_templates)
#define MAX_NUM_INT_FEATURES
TESS_LOCAL int FindLines()
#define PERF_COUNT_SUB(SUB)
struct TessResultRenderer TessResultRenderer
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
bool tessedit_resegment_from_boxes
int RecognizeForChopTest(ETEXT_DESC *monitor)
virtual void ThresholdToPix(PageSegMode pageseg_mode, Pix **pix)
GenericVector< BoolParam * > bool_params
void SetInputImage(Pix *pix)
bool tessedit_ambigs_training
int IsValidWord(const char *word)
int CubeAPITest(Boxa *boxa_blocks, Pixa *pixa_blocks, Boxa *boxa_words, Pixa *pixa_words, const FCOORD &reskew, Pix *page_pix, PAGE_RES *page_res)
int GetScaleFactor() const
CRUNCH_MODE unlv_crunch_mode
void SetSourceYResolution(int ppi)
void SetSourceResolution(int ppi)
void InitForAnalysePage()
TESS_LOCAL PAGE_RES * RecognitionPass1(BLOCK_LIST *block_list)
const Dawg * GetDawg(int i) const
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
void DumpPGM(const char *filename)
Boxa * GetTextlines(const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
const int kBytesPerNumber
Assume a single uniform block of text. (Default.)
char * GetBoxText(int page_number)
void set_unlv_suspects(WERD_RES *word)
WERD_CHOICE * prev_word_best_choice_
bool major_overlap(const TBOX &box) const
void ReadDebugConfigFile(const char *filename)
void SetOutputName(const char *name)
bool ProcessPages(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
virtual void Run(A1, A2, A3, A4)=0
bool SetDebugVariable(const char *name, const char *value)
const char * GetStringVariable(const char *name) const
void RunAdaptiveClassifier(TBLOB *blob, int num_max_matches, int *unichar_ids, float *ratings, int *num_matches_returned)
void set_min_orientation_margin(double margin)
void add_str_int(const char *str, int number)
bool IsEmpty() const
Return true if no image has been set.
void BestChoiceToCorrectText()
void assign(const char *cstr, int len)
void(Wordrec::* FillLatticeFunc)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
TBOX intersection(const TBOX &box) const
void ApplyBoxTraining(const STRING &fontname, PAGE_RES *page_res)
int GetThresholdedImageScaleFactor() const
double min_orientation_margin
int(Dict::* letter_is_okay_)(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const
void ExtractFontName(const STRING &filename, STRING *fontname)
ResultIterator * GetIterator()
int init_tesseract(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params)
const UNICHARSET & getUnicharset() const
bool AdaptToWordStr(PageSegMode mode, const char *wordstr)
Tesseract * osd_tesseract_
For orientation & script detection.
GenericVector< DoubleParam * > double_params
TESS_API int get_best_script(int orientation_id) const
static void ExtractFeatures(const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
void SetEquationDetect(EquationDetect *detector)
void SetProbabilityInContextFunc(ProbabilityInContextFunc f)
BLOCK_LIST * block_list_
The page layout.
const char kTesseractReject
bool contains_unichar(const char *const unichar_repr) const
const int kMaxCredibleResolution
const int kBytesPer64BitNumber
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
#define PERF_COUNT_START(FUNCT_NAME)
void MaximallyChopWord(const GenericVector< TBOX > &boxes, BLOCK *block, ROW *row, WERD_RES *word_res)
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
void GetBlockTextOrientations(int **block_orientation, bool **vertical_writing)
void Orientation(tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
STRING HOcrEscape(const char *text)
static void CatchSignals()
tesseract::BoxWord * box_word
const Dawg * GetDawg(int index) const
Return i-th dawg pointer recorded in the dawgs_ vector.
int InitLangMod(const char *datapath, const char *language)
void extract_edges(Pix *pix, BLOCK *block)
const STRING & unichar_lengths() const
bool WriteTRFile(const STRING &filename)
virtual bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
void add_str_double(const char *str, double number)
float Confidence(PageIteratorLevel level) const
char * GetUTF8Text(PageIteratorLevel level) const
static TESS_LOCAL int TesseractExtractResult(char **text, int **lengths, float **costs, int **x0, int **y0, int **x1, int **y1, PAGE_RES *page_res)
void GetLoadedLanguagesAsVector(GenericVector< STRING > *langs) const
void signal_exit(int signal_code)
void InitAdaptiveClassifier(bool load_pre_trained_templates)
virtual bool Next(PageIteratorLevel level)
void LearnWord(const char *fontname, WERD_RES *word)
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
TBOX bounding_box() const
TESS_LOCAL void DetectParagraphs(bool after_text_recognition)
PAGE_RES * SetupApplyBoxes(const GenericVector< TBOX > &boxes, BLOCK_LIST *block_list)
ROW_LIST * row_list()
get rows
Boxa * GetConnectedComponents(Pixa **cc)
Tesseract * tesseract_
The underlying data object.
bool IsValidCharacter(const char *utf8_character)
const int kMaxBytesPerLine
const char * string() const
WERD_CHOICE * best_choice
bool BeginDocument(const char *title)
char * TesseractRect(const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height)
void PrintVariables(FILE *fp) const
void SetPageSegMode(PageSegMode mode)
bool tessedit_make_boxes_from_boxes
STRING * input_file_
Name used by training code.
GenericVector< StringParam * > string_params
Boxa * GetRegions(Pixa **pixa)
UNICHAR_ID unichar_id() const
ADAPT_TEMPLATES AdaptedTemplates
bool AddImage(TessBaseAPI *api)
bool recognition_done_
page_res_ contains recognition data.
const char * string() const
int init_tesseract_lm(const char *arg0, const char *textbase, const char *language)
const int kNumbersPerBlob
void ReSegmentByClassification(PAGE_RES *page_res)
TESS_LOCAL LTRResultIterator * GetLTRIterator()
TESS_LOCAL void AdaptToCharacter(const char *unichar_repr, int length, float baseline, float xheight, float descender, float ascender)
const int kBytesPerBoxFileLine
const char * c_str() const