|
tesseract 3.04.01
|
#include <ltrresultiterator.h>
Public Member Functions | |
| LTRResultIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height) | |
| virtual | ~LTRResultIterator () |
| char * | GetUTF8Text (PageIteratorLevel level) const |
| void | SetLineSeparator (const char *new_line) |
| void | SetParagraphSeparator (const char *new_para) |
| float | Confidence (PageIteratorLevel level) const |
| void | RowAttributes (float *row_height, float *descenders, float *ascenders) const |
| const char * | WordFontAttributes (bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const |
| const char * | WordRecognitionLanguage () const |
| StrongScriptDirection | WordDirection () const |
| bool | WordIsFromDictionary () const |
| bool | WordIsNumeric () const |
| bool | HasBlamerInfo () const |
| const void * | GetParamsTrainingBundle () const |
| const char * | GetBlamerDebug () const |
| const char * | GetBlamerMisadaptionDebug () const |
| bool | HasTruthString () const |
| bool | EquivalentToTruth (const char *str) const |
| char * | WordTruthUTF8Text () const |
| char * | WordNormedUTF8Text () const |
| const char * | WordLattice (int *lattice_size) const |
| bool | SymbolIsSuperscript () const |
| bool | SymbolIsSubscript () const |
| bool | SymbolIsDropcap () const |
Protected Attributes | |
| const char * | line_separator_ |
| const char * | paragraph_separator_ |
Friends | |
| class | ChoiceIterator |
Definition at line 46 of file ltrresultiterator.h.
| usr src packages BUILD tesseract ccmain ltrresultiterator cpp tesseract::LTRResultIterator::LTRResultIterator | ( | PAGE_RES * | page_res, |
| Tesseract * | tesseract, | ||
| int | scale, | ||
| int | scaled_yres, | ||
| int | rect_left, | ||
| int | rect_top, | ||
| int | rect_width, | ||
| int | rect_height | ||
| ) |
Definition at line 31 of file ltrresultiterator.cpp.
: PageIterator(page_res, tesseract, scale, scaled_yres, rect_left, rect_top, rect_width, rect_height), line_separator_("\n"), paragraph_separator_("\n") { }
| tesseract::LTRResultIterator::~LTRResultIterator | ( | ) | [virtual] |
Definition at line 41 of file ltrresultiterator.cpp.
| float tesseract::LTRResultIterator::Confidence | ( | PageIteratorLevel | level | ) | const |
Definition at line 95 of file ltrresultiterator.cpp.
{
case RIL_BLOCK:
do {
best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != NULL);
mean_certainty += best_choice->certainty();
++certainty_count;
res_it.forward();
} while (res_it.block() == res_it.prev_block());
break;
case RIL_PARA:
do {
best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != NULL);
mean_certainty += best_choice->certainty();
++certainty_count;
res_it.forward();
} while (res_it.block() == res_it.prev_block() &&
res_it.row()->row->para() == res_it.prev_row()->row->para());
break;
case RIL_TEXTLINE:
do {
best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != NULL);
mean_certainty += best_choice->certainty();
++certainty_count;
res_it.forward();
} while (res_it.row() == res_it.prev_row());
break;
case RIL_WORD:
mean_certainty += best_choice->certainty();
++certainty_count;
break;
case RIL_SYMBOL:
mean_certainty += best_choice->certainty(blob_index_);
++certainty_count;
}
if (certainty_count > 0) {
mean_certainty /= certainty_count;
float confidence = 100 + 5 * mean_certainty;
if (confidence < 0.0f) confidence = 0.0f;
if (confidence > 100.0f) confidence = 100.0f;
return confidence;
}
return 0.0f;
}
| bool tesseract::LTRResultIterator::EquivalentToTruth | ( | const char * | str | ) | const |
Definition at line 269 of file ltrresultiterator.cpp.
| const char * tesseract::LTRResultIterator::GetBlamerDebug | ( | ) | const |
Definition at line 247 of file ltrresultiterator.cpp.
| const char * tesseract::LTRResultIterator::GetBlamerMisadaptionDebug | ( | ) | const |
Definition at line 253 of file ltrresultiterator.cpp.
| const void * tesseract::LTRResultIterator::GetParamsTrainingBundle | ( | ) | const |
Definition at line 240 of file ltrresultiterator.cpp.
: NULL; }
| char * tesseract::LTRResultIterator::GetUTF8Text | ( | PageIteratorLevel | level | ) | const |
Reimplemented in tesseract::ResultIterator.
Definition at line 46 of file ltrresultiterator.cpp.
{
text = res_it.word()->BestUTF8(blob_index_, false);
} else if (level == RIL_WORD) {
text = best_choice->unichar_string();
} else {
bool eol = false; // end of line?
bool eop = false; // end of paragraph?
do { // for each paragraph in a block
do { // for each text line in a paragraph
do { // for each word in a text line
best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != NULL);
text += best_choice->unichar_string();
text += " ";
res_it.forward();
eol = res_it.row() != res_it.prev_row();
} while (!eol);
text.truncate_at(text.length() - 1);
text += line_separator_;
eop = res_it.block() != res_it.prev_block() ||
res_it.row()->row->para() != res_it.prev_row()->row->para();
} while (level != RIL_TEXTLINE && !eop);
if (eop) text += paragraph_separator_;
} while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
}
int length = text.length() + 1;
char* result = new char[length];
strncpy(result, text.string(), length);
return result;
}
| bool tesseract::LTRResultIterator::HasBlamerInfo | ( | ) | const |
Definition at line 233 of file ltrresultiterator.cpp.
| bool tesseract::LTRResultIterator::HasTruthString | ( | ) | const |
Definition at line 258 of file ltrresultiterator.cpp.
{
return false; // no truth information for this word
}
return true;
}
| void tesseract::LTRResultIterator::RowAttributes | ( | float * | row_height, |
| float * | descenders, | ||
| float * | ascenders | ||
| ) | const |
| void tesseract::LTRResultIterator::SetLineSeparator | ( | const char * | new_line | ) |
Definition at line 84 of file ltrresultiterator.cpp.
| void tesseract::LTRResultIterator::SetParagraphSeparator | ( | const char * | new_para | ) |
Definition at line 89 of file ltrresultiterator.cpp.
| bool tesseract::LTRResultIterator::SymbolIsDropcap | ( | ) | const |
Definition at line 335 of file ltrresultiterator.cpp.
| bool tesseract::LTRResultIterator::SymbolIsSubscript | ( | ) | const |
Definition at line 326 of file ltrresultiterator.cpp.
| bool tesseract::LTRResultIterator::SymbolIsSuperscript | ( | ) | const |
Definition at line 316 of file ltrresultiterator.cpp.
| StrongScriptDirection tesseract::LTRResultIterator::WordDirection | ( | ) | const |
Definition at line 204 of file ltrresultiterator.cpp.
| const char * tesseract::LTRResultIterator::WordFontAttributes | ( | bool * | is_bold, |
| bool * | is_italic, | ||
| bool * | is_underlined, | ||
| bool * | is_monospace, | ||
| bool * | is_serif, | ||
| bool * | is_smallcaps, | ||
| int * | pointsize, | ||
| int * | font_id | ||
| ) | const |
Definition at line 166 of file ltrresultiterator.cpp.
{
if (it_->word() == NULL) return NULL; // Already at the end!
if (it_->word()->fontinfo == NULL) {
*font_id = -1;
return NULL; // No font information.
}
const FontInfo& font_info = *it_->word()->fontinfo;
*font_id = font_info.universal_id;
*is_bold = font_info.is_bold();
*is_italic = font_info.is_italic();
*is_underlined = false; // TODO(rays) fix this!
*is_monospace = font_info.is_fixed_pitch();
*is_serif = font_info.is_serif();
*is_smallcaps = it_->word()->small_caps;
float row_height = it_->row()->row->x_height() +
it_->row()->row->ascenders() - it_->row()->row->descenders();
// Convert from pixels to printers points.
*pointsize = scaled_yres_ > 0
? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5)
: 0;
return font_info.name;
}
| bool tesseract::LTRResultIterator::WordIsFromDictionary | ( | ) | const |
Definition at line 218 of file ltrresultiterator.cpp.
| bool tesseract::LTRResultIterator::WordIsNumeric | ( | ) | const |
Definition at line 226 of file ltrresultiterator.cpp.
| const char * tesseract::LTRResultIterator::WordLattice | ( | int * | lattice_size | ) | const |
Definition at line 306 of file ltrresultiterator.cpp.
| char * tesseract::LTRResultIterator::WordNormedUTF8Text | ( | ) | const |
Definition at line 289 of file ltrresultiterator.cpp.
{
ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i));
}
int length = ocr_text.length() + 1;
char* result = new char[length];
strncpy(result, ocr_text.string(), length);
return result;
}
| const char * tesseract::LTRResultIterator::WordRecognitionLanguage | ( | ) | const |
Definition at line 198 of file ltrresultiterator.cpp.
| char * tesseract::LTRResultIterator::WordTruthUTF8Text | ( | ) | const |
Definition at line 278 of file ltrresultiterator.cpp.
friend class ChoiceIterator [friend] |
Definition at line 47 of file ltrresultiterator.h.
const char* tesseract::LTRResultIterator::line_separator_ [protected] |
Definition at line 181 of file ltrresultiterator.h.
const char* tesseract::LTRResultIterator::paragraph_separator_ [protected] |
Definition at line 182 of file ltrresultiterator.h.