|
tesseract 3.04.01
|
#include <pageres.h>
| WERD_RES::WERD_RES | ( | ) | [inline] |
Definition at line 319 of file pageres.h.
{
InitNonPointers();
InitPointers();
}
| WERD_RES::WERD_RES | ( | WERD * | the_word | ) | [inline] |
Definition at line 323 of file pageres.h.
{
InitNonPointers();
InitPointers();
word = the_word;
}
| WERD_RES::WERD_RES | ( | const WERD_RES & | source | ) | [inline] |
Definition at line 330 of file pageres.h.
: ELIST_LINK(source) { InitPointers(); *this = source; // see operator= }
| WERD_RES::~WERD_RES | ( | ) |
Definition at line 1084 of file pageres.cpp.
{
Clear();
}
| bool WERD_RES::AlternativeChoiceAdjustmentsWorseThan | ( | float | threshold | ) | const |
Definition at line 430 of file pageres.cpp.
{
// The choices are not changed by this iteration.
WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
for (wc_it.forward(); !wc_it.at_first(); wc_it.forward()) {
WERD_CHOICE* choice = wc_it.data();
if (choice->adjust_factor() <= threshold)
return false;
}
return true;
}
| bool WERD_RES::AnyLtrCharsInWord | ( | ) | const [inline] |
Definition at line 389 of file pageres.h.
{
if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1)
return false;
for (int id = 0; id < best_choice->length(); id++) {
int unichar_id = best_choice->unichar_id(id);
if (unichar_id < 0 || unichar_id >= uch_set->size())
continue; // Ignore illegal chars.
UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
if (dir == UNICHARSET::U_LEFT_TO_RIGHT)
return true;
}
return false;
}
| bool WERD_RES::AnyRtlCharsInWord | ( | ) | const [inline] |
Definition at line 372 of file pageres.h.
{
if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1)
return false;
for (int id = 0; id < best_choice->length(); id++) {
int unichar_id = best_choice->unichar_id(id);
if (unichar_id < 0 || unichar_id >= uch_set->size())
continue; // Ignore illegal chars.
UNICHARSET::Direction dir =
uch_set->get_direction(unichar_id);
if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC ||
dir == UNICHARSET::U_ARABIC_NUMBER)
return true;
}
return false;
}
| void WERD_RES::BestChoiceToCorrectText | ( | ) |
Definition at line 917 of file pageres.cpp.
{
correct_text.clear();
ASSERT_HOST(best_choice != NULL);
for (int i = 0; i < best_choice->length(); ++i) {
UNICHAR_ID choice_id = best_choice->unichar_id(i);
const char* blob_choice = uch_set->id_to_unichar(choice_id);
correct_text.push_back(STRING(blob_choice));
}
}
| const char* WERD_RES::BestUTF8 | ( | int | blob_index, |
| bool | in_rtl_context | ||
| ) | const [inline] |
Definition at line 342 of file pageres.h.
{
if (blob_index < 0 || best_choice == NULL ||
blob_index >= best_choice->length())
return NULL;
UNICHAR_ID id = best_choice->unichar_id(blob_index);
if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
return NULL;
UNICHAR_ID mirrored = uch_set->get_mirror(id);
if (in_rtl_context && mirrored > 0 && mirrored != INVALID_UNICHAR_ID)
id = mirrored;
return uch_set->id_to_unichar_ext(id);
}
| UNICHAR_ID WERD_RES::BothHyphens | ( | UNICHAR_ID | id1, |
| UNICHAR_ID | id2 | ||
| ) |
Definition at line 1024 of file pageres.cpp.
{
const char *ch = uch_set->id_to_unichar(id1);
const char *next_ch = uch_set->id_to_unichar(id2);
if (strlen(ch) == 1 && strlen(next_ch) == 1 &&
(*ch == '-' || *ch == '~') && (*next_ch == '-' || *next_ch == '~'))
return uch_set->unichar_to_id("-");
return INVALID_UNICHAR_ID;
}
| UNICHAR_ID WERD_RES::BothQuotes | ( | UNICHAR_ID | id1, |
| UNICHAR_ID | id2 | ||
| ) |
Definition at line 1002 of file pageres.cpp.
{
const char *ch = uch_set->id_to_unichar(id1);
const char *next_ch = uch_set->id_to_unichar(id2);
if (is_simple_quote(ch, strlen(ch)) &&
is_simple_quote(next_ch, strlen(next_ch)))
return uch_set->unichar_to_id("\"");
return INVALID_UNICHAR_ID;
}
| UNICHAR_ID WERD_RES::BothSpaces | ( | UNICHAR_ID | id1, |
| UNICHAR_ID | id2 | ||
| ) |
Definition at line 1053 of file pageres.cpp.
{
if (id1 == id2 && id1 == uch_set->unichar_to_id(" "))
return id1;
else
return INVALID_UNICHAR_ID;
}
| void WERD_RES::Clear | ( | ) |
Definition at line 1130 of file pageres.cpp.
{
if (word != NULL && combination) {
delete word;
}
word = NULL;
delete blamer_bundle;
blamer_bundle = NULL;
ClearResults();
}
| void WERD_RES::ClearRatings | ( | ) |
Definition at line 1185 of file pageres.cpp.
{
if (ratings != NULL) {
ratings->delete_matrix_pointers();
delete ratings;
ratings = NULL;
}
}
| void WERD_RES::ClearResults | ( | ) |
Definition at line 1140 of file pageres.cpp.
{
done = false;
fontinfo = NULL;
fontinfo2 = NULL;
fontinfo_id_count = 0;
fontinfo_id2_count = 0;
if (bln_boxes != NULL) {
delete bln_boxes;
bln_boxes = NULL;
}
blob_row = NULL;
if (chopped_word != NULL) {
delete chopped_word;
chopped_word = NULL;
}
if (rebuild_word != NULL) {
delete rebuild_word;
rebuild_word = NULL;
}
if (box_word != NULL) {
delete box_word;
box_word = NULL;
}
best_state.clear();
correct_text.clear();
seam_array.delete_data_pointers();
seam_array.clear();
blob_widths.clear();
blob_gaps.clear();
ClearRatings();
ClearWordChoices();
if (blamer_bundle != NULL) blamer_bundle->ClearResults();
}
| void WERD_RES::ClearWordChoices | ( | ) |
Definition at line 1173 of file pageres.cpp.
{
best_choice = NULL;
if (raw_choice != NULL) {
delete raw_choice;
raw_choice = NULL;
}
best_choices.clear();
if (ep_choice != NULL) {
delete ep_choice;
ep_choice = NULL;
}
}
| void WERD_RES::CloneChoppedToRebuild | ( | ) |
Definition at line 828 of file pageres.cpp.
{
if (rebuild_word != NULL)
delete rebuild_word;
rebuild_word = new TWERD(*chopped_word);
SetupBoxWord();
int word_len = box_word->length();
best_state.reserve(word_len);
correct_text.reserve(word_len);
for (int i = 0; i < word_len; ++i) {
best_state.push_back(1);
correct_text.push_back(STRING(""));
}
}
| void WERD_RES::ComputeAdaptionThresholds | ( | float | certainty_scale, |
| float | min_rating, | ||
| float | max_rating, | ||
| float | rating_margin, | ||
| float * | thresholds | ||
| ) |
Definition at line 553 of file pageres.cpp.
{
int chunk = 0;
int end_chunk = best_choice->state(0);
int end_raw_chunk = raw_choice->state(0);
int raw_blob = 0;
for (int i = 0; i < best_choice->length(); i++, thresholds++) {
float avg_rating = 0.0f;
int num_error_chunks = 0;
// For each chunk in best choice blob i, count non-matching raw results.
while (chunk < end_chunk) {
if (chunk >= end_raw_chunk) {
++raw_blob;
end_raw_chunk += raw_choice->state(raw_blob);
}
if (best_choice->unichar_id(i) !=
raw_choice->unichar_id(raw_blob)) {
avg_rating += raw_choice->certainty(raw_blob);
++num_error_chunks;
}
++chunk;
}
if (num_error_chunks > 0) {
avg_rating /= num_error_chunks;
*thresholds = (avg_rating / -certainty_scale) * (1.0 - rating_margin);
} else {
*thresholds = max_rating;
}
if (*thresholds > max_rating)
*thresholds = max_rating;
if (*thresholds < min_rating)
*thresholds = min_rating;
}
}
| bool WERD_RES::ConditionalBlobMerge | ( | TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > * | class_cb, |
| TessResultCallback2< bool, const TBOX &, const TBOX & > * | box_cb | ||
| ) |
Definition at line 932 of file pageres.cpp.
{
ASSERT_HOST(best_choice->length() == 0 || ratings != NULL);
bool modified = false;
for (int i = 0; i + 1 < best_choice->length(); ++i) {
UNICHAR_ID new_id = class_cb->Run(best_choice->unichar_id(i),
best_choice->unichar_id(i+1));
if (new_id != INVALID_UNICHAR_ID &&
(box_cb == NULL || box_cb->Run(box_word->BlobBox(i),
box_word->BlobBox(i + 1)))) {
// Raw choice should not be fixed.
best_choice->set_unichar_id(new_id, i);
modified = true;
MergeAdjacentBlobs(i);
const MATRIX_COORD& coord = best_choice->MatrixCoord(i);
if (!coord.Valid(*ratings)) {
ratings->IncreaseBandSize(coord.row + 1 - coord.col);
}
BLOB_CHOICE_LIST* blob_choices = GetBlobChoices(i);
if (FindMatchingChoice(new_id, blob_choices) == NULL) {
// Insert a fake result.
BLOB_CHOICE* blob_choice = new BLOB_CHOICE;
blob_choice->set_unichar_id(new_id);
BLOB_CHOICE_IT bc_it(blob_choices);
bc_it.add_before_then_move(blob_choice);
}
}
}
delete class_cb;
delete box_cb;
return modified;
}
| void WERD_RES::ConsumeWordResults | ( | WERD_RES * | word | ) |
Definition at line 757 of file pageres.cpp.
{
denorm = word->denorm;
blob_row = word->blob_row;
MovePointerData(&chopped_word, &word->chopped_word);
MovePointerData(&rebuild_word, &word->rebuild_word);
MovePointerData(&box_word, &word->box_word);
seam_array.delete_data_pointers();
seam_array = word->seam_array;
word->seam_array.clear();
best_state.move(&word->best_state);
correct_text.move(&word->correct_text);
blob_widths.move(&word->blob_widths);
blob_gaps.move(&word->blob_gaps);
if (ratings != NULL) ratings->delete_matrix_pointers();
MovePointerData(&ratings, &word->ratings);
best_choice = word->best_choice;
MovePointerData(&raw_choice, &word->raw_choice);
best_choices.clear();
WERD_CHOICE_IT wc_it(&best_choices);
wc_it.add_list_after(&word->best_choices);
reject_map = word->reject_map;
if (word->blamer_bundle != NULL) {
assert(blamer_bundle != NULL);
blamer_bundle->CopyResults(*(word->blamer_bundle));
}
CopySimpleFields(*word);
}
| void WERD_RES::copy_on | ( | WERD_RES * | word_res | ) | [inline] |
| void WERD_RES::CopySimpleFields | ( | const WERD_RES & | source | ) |
Definition at line 241 of file pageres.cpp.
{
tess_failed = source.tess_failed;
tess_accepted = source.tess_accepted;
tess_would_adapt = source.tess_would_adapt;
done = source.done;
unlv_crunch_mode = source.unlv_crunch_mode;
small_caps = source.small_caps;
odd_size = source.odd_size;
italic = source.italic;
bold = source.bold;
fontinfo = source.fontinfo;
fontinfo2 = source.fontinfo2;
fontinfo_id_count = source.fontinfo_id_count;
fontinfo_id2_count = source.fontinfo_id2_count;
x_height = source.x_height;
caps_height = source.caps_height;
baseline_shift = source.baseline_shift;
guessed_x_ht = source.guessed_x_ht;
guessed_caps_ht = source.guessed_caps_ht;
reject_spaces = source.reject_spaces;
uch_set = source.uch_set;
tesseract = source.tesseract;
}
| void WERD_RES::DebugTopChoice | ( | const char * | msg | ) | const |
Definition at line 490 of file pageres.cpp.
{
tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ",
tess_accepted, tess_would_adapt, done);
if (best_choice == NULL)
tprintf("<Null choice>\n");
else
best_choice->print(msg);
}
| void WERD_RES::DebugWordChoices | ( | bool | debug, |
| const char * | word_to_debug | ||
| ) |
Definition at line 471 of file pageres.cpp.
{
if (debug ||
(word_to_debug != NULL && *word_to_debug != '\0' && best_choice != NULL &&
best_choice->unichar_string() == STRING(word_to_debug))) {
if (raw_choice != NULL)
raw_choice->print("\nBest Raw Choice");
WERD_CHOICE_IT it(&best_choices);
int index = 0;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
WERD_CHOICE* choice = it.data();
STRING label;
label.add_str_int("\nCooked Choice #", index);
choice->print(label.string());
}
}
}
| void WERD_RES::FakeClassifyWord | ( | int | blob_count, |
| BLOB_CHOICE ** | choices | ||
| ) |
Definition at line 872 of file pageres.cpp.
{
// Setup the WERD_RES.
ASSERT_HOST(box_word != NULL);
ASSERT_HOST(blob_count == box_word->length());
ClearWordChoices();
ClearRatings();
ratings = new MATRIX(blob_count, 1);
for (int c = 0; c < blob_count; ++c) {
BLOB_CHOICE_LIST* choice_list = new BLOB_CHOICE_LIST;
BLOB_CHOICE_IT choice_it(choice_list);
choice_it.add_after_then_move(choices[c]);
ratings->put(c, c, choice_list);
}
FakeWordFromRatings();
reject_map.initialise(blob_count);
done = true;
}
| void WERD_RES::FakeWordFromRatings | ( | ) |
Definition at line 892 of file pageres.cpp.
{
int num_blobs = ratings->dimension();
WERD_CHOICE* word_choice = new WERD_CHOICE(uch_set, num_blobs);
word_choice->set_permuter(TOP_CHOICE_PERM);
for (int b = 0; b < num_blobs; ++b) {
UNICHAR_ID unichar_id = UNICHAR_SPACE;
float rating = MAX_INT32;
float certainty = -MAX_INT32;
BLOB_CHOICE_LIST* choices = ratings->get(b, b);
if (choices != NULL && !choices->empty()) {
BLOB_CHOICE_IT bc_it(choices);
BLOB_CHOICE* choice = bc_it.data();
unichar_id = choice->unichar_id();
rating = choice->rating();
certainty = choice->certainty();
}
word_choice->append_unichar_id_space_allocated(unichar_id, 1, rating,
certainty);
}
LogNewRawChoice(word_choice);
// Ownership of word_choice taken by word here.
LogNewCookedChoice(1, false, word_choice);
}
| void WERD_RES::FilterWordChoices | ( | int | debug_level | ) |
Definition at line 504 of file pageres.cpp.
{
if (best_choice == NULL || best_choices.singleton())
return;
if (debug_level >= 2)
best_choice->print("\nFiltering against best choice");
WERD_CHOICE_IT it(&best_choices);
int index = 0;
for (it.forward(); !it.at_first(); it.forward(), ++index) {
WERD_CHOICE* choice = it.data();
float threshold = StopperAmbigThreshold(best_choice->adjust_factor(),
choice->adjust_factor());
// i, j index the blob choice in choice, best_choice.
// chunk is an index into the chopped_word blobs (AKA chunks).
// Since the two words may use different segmentations of the chunks, we
// iterate over the chunks to find out whether a comparable blob
// classification is much worse than the best result.
int i = 0, j = 0, chunk = 0;
// Each iteration of the while deals with 1 chunk. On entry choice_chunk
// and best_chunk are the indices of the first chunk in the NEXT blob,
// i.e. we don't have to increment i, j while chunk < choice_chunk and
// best_chunk respectively.
int choice_chunk = choice->state(0), best_chunk = best_choice->state(0);
while (i < choice->length() && j < best_choice->length()) {
if (choice->unichar_id(i) != best_choice->unichar_id(j) &&
choice->certainty(i) - best_choice->certainty(j) < threshold) {
if (debug_level >= 2) {
STRING label;
label.add_str_int("\nDiscarding bad choice #", index);
choice->print(label.string());
tprintf("i %d j %d Chunk %d Choice->Blob[i].Certainty %.4g"
" BestChoice->ChunkCertainty[Chunk] %g Threshold %g\n",
i, j, chunk, choice->certainty(i),
best_choice->certainty(j), threshold);
}
delete it.extract();
break;
}
++chunk;
// If needed, advance choice_chunk to keep up with chunk.
while (choice_chunk < chunk && ++i < choice->length())
choice_chunk += choice->state(i);
// If needed, advance best_chunk to keep up with chunk.
while (best_chunk < chunk && ++j < best_choice->length())
best_chunk += best_choice->state(j);
}
}
}
| void WERD_RES::fix_hyphens | ( | ) |
Definition at line 1041 of file pageres.cpp.
{
if (!uch_set->contains_unichar("-") ||
!uch_set->get_enabled(uch_set->unichar_to_id("-")))
return; // Don't create it if it is disallowed.
ConditionalBlobMerge(
NewPermanentTessCallback(this, &WERD_RES::BothHyphens),
NewPermanentTessCallback(this, &WERD_RES::HyphenBoxesOverlap));
}
| void WERD_RES::fix_quotes | ( | ) |
Definition at line 1012 of file pageres.cpp.
{
if (!uch_set->contains_unichar("\"") ||
!uch_set->get_enabled(uch_set->unichar_to_id("\"")))
return; // Don't create it if it is disallowed.
ConditionalBlobMerge(
NewPermanentTessCallback(this, &WERD_RES::BothQuotes),
NULL);
}
| BLOB_CHOICE * WERD_RES::GetBlobChoice | ( | int | index | ) | const |
Definition at line 742 of file pageres.cpp.
{
if (index < 0 || index >= best_choice->length()) return NULL;
BLOB_CHOICE_LIST* choices = GetBlobChoices(index);
return FindMatchingChoice(best_choice->unichar_id(index), choices);
}
| BLOB_CHOICE_LIST * WERD_RES::GetBlobChoices | ( | int | index | ) | const |
Definition at line 751 of file pageres.cpp.
{
return best_choice->blob_choices(index, ratings);
}
| int WERD_RES::GetBlobsGap | ( | int | blob_index | ) |
Definition at line 732 of file pageres.cpp.
| int WERD_RES::GetBlobsWidth | ( | int | start_blob, |
| int | last_blob | ||
| ) |
Definition at line 722 of file pageres.cpp.
{
int result = 0;
for (int b = start_blob; b <= last_blob; ++b) {
result += blob_widths[b];
if (b < last_blob)
result += blob_gaps[b];
}
return result;
}
Definition at line 1035 of file pageres.cpp.
| void WERD_RES::InitForRetryRecognition | ( | const WERD_RES & | source | ) |
Definition at line 269 of file pageres.cpp.
{
word = source.word;
CopySimpleFields(source);
if (source.blamer_bundle != NULL) {
blamer_bundle = new BlamerBundle();
blamer_bundle->CopyTruth(*source.blamer_bundle);
}
}
| void WERD_RES::InitNonPointers | ( | ) |
Definition at line 1088 of file pageres.cpp.
{
tess_failed = FALSE;
tess_accepted = FALSE;
tess_would_adapt = FALSE;
done = FALSE;
unlv_crunch_mode = CR_NONE;
small_caps = false;
odd_size = false;
italic = FALSE;
bold = FALSE;
// The fontinfos and tesseract count as non-pointers as they point to
// data owned elsewhere.
fontinfo = NULL;
fontinfo2 = NULL;
tesseract = NULL;
fontinfo_id_count = 0;
fontinfo_id2_count = 0;
x_height = 0.0;
caps_height = 0.0;
baseline_shift = 0.0f;
guessed_x_ht = TRUE;
guessed_caps_ht = TRUE;
combination = FALSE;
part_of_combo = FALSE;
reject_spaces = FALSE;
}
| void WERD_RES::InitPointers | ( | ) |
Definition at line 1115 of file pageres.cpp.
{
word = NULL;
bln_boxes = NULL;
blob_row = NULL;
uch_set = NULL;
chopped_word = NULL;
rebuild_word = NULL;
box_word = NULL;
ratings = NULL;
best_choice = NULL;
raw_choice = NULL;
ep_choice = NULL;
blamer_bundle = NULL;
}
| void WERD_RES::InsertSeam | ( | int | blob_number, |
| SEAM * | seam | ||
| ) |
Definition at line 409 of file pageres.cpp.
{
// Insert the seam into the SEAMS array.
seam->PrepareToInsertSeam(seam_array, chopped_word->blobs, blob_number, true);
seam_array.insert(seam, blob_number);
if (ratings != NULL) {
// Expand the ratings matrix.
ratings = ratings->ConsumeAndMakeBigger(blob_number);
// Fix all the segmentation states.
if (raw_choice != NULL)
raw_choice->UpdateStateForSplit(blob_number);
WERD_CHOICE_IT wc_it(&best_choices);
for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
WERD_CHOICE* choice = wc_it.data();
choice->UpdateStateForSplit(blob_number);
}
SetupBlobWidthsAndGaps();
}
}
| bool WERD_RES::IsAmbiguous | ( | ) |
Definition at line 443 of file pageres.cpp.
{
return !best_choices.singleton() || best_choice->dangerous_ambig_found();
}
| bool WERD_RES::LogNewCookedChoice | ( | int | max_num_choices, |
| bool | debug, | ||
| WERD_CHOICE * | word_choice | ||
| ) |
Definition at line 612 of file pageres.cpp.
{
if (best_choice != NULL) {
// Throw out obviously bad choices to save some work.
// TODO(rays) Get rid of this! This piece of code produces different
// results according to the order in which words are found, which is an
// undesirable behavior. It would be better to keep all the choices and
// prune them later when more information is available.
float max_certainty_delta =
StopperAmbigThreshold(best_choice->adjust_factor(),
word_choice->adjust_factor());
if (max_certainty_delta > -kStopperAmbiguityThresholdOffset)
max_certainty_delta = -kStopperAmbiguityThresholdOffset;
if (word_choice->certainty() - best_choice->certainty() <
max_certainty_delta) {
if (debug) {
STRING bad_string;
word_choice->string_and_lengths(&bad_string, NULL);
tprintf("Discarding choice \"%s\" with an overly low certainty"
" %.3f vs best choice certainty %.3f (Threshold: %.3f)\n",
bad_string.string(), word_choice->certainty(),
best_choice->certainty(),
max_certainty_delta + best_choice->certainty());
}
delete word_choice;
return false;
}
}
// Insert in the list in order of increasing rating, but knock out worse
// string duplicates.
WERD_CHOICE_IT it(&best_choices);
const STRING& new_str = word_choice->unichar_string();
bool inserted = false;
int num_choices = 0;
if (!it.empty()) {
do {
WERD_CHOICE* choice = it.data();
if (choice->rating() > word_choice->rating() && !inserted) {
// Time to insert.
it.add_before_stay_put(word_choice);
inserted = true;
if (num_choices == 0)
best_choice = word_choice; // This is the new best.
++num_choices;
}
if (choice->unichar_string() == new_str) {
if (inserted) {
// New is better.
delete it.extract();
} else {
// Old is better.
if (debug) {
tprintf("Discarding duplicate choice \"%s\", rating %g vs %g\n",
new_str.string(), word_choice->rating(), choice->rating());
}
delete word_choice;
return false;
}
} else {
++num_choices;
if (num_choices > max_num_choices)
delete it.extract();
}
it.forward();
} while (!it.at_first());
}
if (!inserted && num_choices < max_num_choices) {
it.add_to_end(word_choice);
inserted = true;
if (num_choices == 0)
best_choice = word_choice; // This is the new best.
}
if (debug) {
if (inserted)
tprintf("New %s", best_choice == word_choice ? "Best" : "Secondary");
else
tprintf("Poor");
word_choice->print(" Word Choice");
}
if (!inserted) {
delete word_choice;
return false;
}
return true;
}
| bool WERD_RES::LogNewRawChoice | ( | WERD_CHOICE * | word_choice | ) |
Definition at line 596 of file pageres.cpp.
{
if (raw_choice == NULL || word_choice->rating() < raw_choice->rating()) {
delete raw_choice;
raw_choice = new WERD_CHOICE(*word_choice);
raw_choice->set_permuter(TOP_CHOICE_PERM);
return true;
}
return false;
}
| void WERD_RES::merge_tess_fails | ( | ) |
Definition at line 1061 of file pageres.cpp.
{
if (ConditionalBlobMerge(
NewPermanentTessCallback(this, &WERD_RES::BothSpaces), NULL)) {
int len = best_choice->length();
ASSERT_HOST(reject_map.length() == len);
ASSERT_HOST(box_word->length() == len);
}
}
| void WERD_RES::MergeAdjacentBlobs | ( | int | index | ) |
Definition at line 968 of file pageres.cpp.
{
if (reject_map.length() == best_choice->length())
reject_map.remove_pos(index);
best_choice->remove_unichar_id(index + 1);
rebuild_word->MergeBlobs(index, index + 2);
box_word->MergeBoxes(index, index + 2);
if (index + 1 < best_state.length()) {
best_state[index] += best_state[index + 1];
best_state.remove(index + 1);
}
}
Definition at line 178 of file pageres.cpp.
{
this->ELIST_LINK::operator=(source);
Clear();
if (source.combination) {
word = new WERD;
*word = *(source.word); // deep copy
} else {
word = source.word; // pt to same word
}
if (source.bln_boxes != NULL)
bln_boxes = new tesseract::BoxWord(*source.bln_boxes);
if (source.chopped_word != NULL)
chopped_word = new TWERD(*source.chopped_word);
if (source.rebuild_word != NULL)
rebuild_word = new TWERD(*source.rebuild_word);
// TODO(rays) Do we ever need to copy the seam_array?
blob_row = source.blob_row;
denorm = source.denorm;
if (source.box_word != NULL)
box_word = new tesseract::BoxWord(*source.box_word);
best_state = source.best_state;
correct_text = source.correct_text;
blob_widths = source.blob_widths;
blob_gaps = source.blob_gaps;
// None of the uses of operator= require the ratings matrix to be copied,
// so don't as it would be really slow.
// Copy the cooked choices.
WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&source.best_choices));
WERD_CHOICE_IT wc_dest_it(&best_choices);
for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
const WERD_CHOICE *choice = wc_it.data();
wc_dest_it.add_after_then_move(new WERD_CHOICE(*choice));
}
if (!wc_dest_it.empty()) {
wc_dest_it.move_to_first();
best_choice = wc_dest_it.data();
} else {
best_choice = NULL;
}
if (source.raw_choice != NULL) {
raw_choice = new WERD_CHOICE(*source.raw_choice);
} else {
raw_choice = NULL;
}
if (source.ep_choice != NULL) {
ep_choice = new WERD_CHOICE(*source.ep_choice);
} else {
ep_choice = NULL;
}
reject_map = source.reject_map;
combination = source.combination;
part_of_combo = source.part_of_combo;
CopySimpleFields(source);
if (source.blamer_bundle != NULL) {
blamer_bundle = new BlamerBundle(*(source.blamer_bundle));
}
return *this;
}
| bool WERD_RES::PiecesAllNatural | ( | int | start, |
| int | count | ||
| ) | const |
Definition at line 1072 of file pageres.cpp.
{
// all seams must have no splits.
for (int index = start; index < start + count - 1; ++index) {
if (index >= 0 && index < seam_array.size()) {
SEAM* seam = seam_array[index];
if (seam != NULL && seam->HasAnySplits()) return false;
}
}
return true;
}
| void WERD_RES::PrintBestChoices | ( | ) | const |
Definition at line 709 of file pageres.cpp.
{
STRING alternates_str;
WERD_CHOICE_IT it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
if (!it.at_first()) alternates_str += "\", \"";
alternates_str += it.data()->unichar_string();
}
tprintf("Alternates for \"%s\": {\"%s\"}\n",
best_choice->unichar_string().string(), alternates_str.string());
}
| const char* WERD_RES::RawUTF8 | ( | int | blob_index | ) | const [inline] |
Definition at line 355 of file pageres.h.
{
if (blob_index < 0 || blob_index >= raw_choice->length())
return NULL;
UNICHAR_ID id = raw_choice->unichar_id(blob_index);
if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
return NULL;
return uch_set->id_to_unichar(id);
}
| void WERD_RES::RebuildBestState | ( | ) |
Definition at line 800 of file pageres.cpp.
{
ASSERT_HOST(best_choice != NULL);
if (rebuild_word != NULL)
delete rebuild_word;
rebuild_word = new TWERD;
if (seam_array.empty())
start_seam_list(chopped_word, &seam_array);
best_state.truncate(0);
int start = 0;
for (int i = 0; i < best_choice->length(); ++i) {
int length = best_choice->state(i);
best_state.push_back(length);
if (length > 1) {
SEAM::JoinPieces(seam_array, chopped_word->blobs, start,
start + length - 1);
}
TBLOB* blob = chopped_word->blobs[start];
rebuild_word->blobs.push_back(new TBLOB(*blob));
if (length > 1) {
SEAM::BreakPieces(seam_array, chopped_word->blobs, start,
start + length - 1);
}
start += length;
}
}
| void WERD_RES::ReplaceBestChoice | ( | WERD_CHOICE * | choice | ) |
Definition at line 787 of file pageres.cpp.
{
best_choice = choice;
RebuildBestState();
SetupBoxWord();
// Make up a fake reject map of the right length to keep the
// rejection pass happy.
reject_map.initialise(best_state.length());
done = tess_accepted = tess_would_adapt = true;
SetScriptPositions();
}
| void WERD_RES::SetAllScriptPositions | ( | tesseract::ScriptPos | position | ) |
Definition at line 860 of file pageres.cpp.
{
raw_choice->SetAllScriptPositions(position);
WERD_CHOICE_IT wc_it(&best_choices);
for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward())
wc_it.data()->SetAllScriptPositions(position);
}
| void WERD_RES::SetScriptPositions | ( | ) |
Definition at line 853 of file pageres.cpp.
| void WERD_RES::SetupBasicsFromChoppedWord | ( | const UNICHARSET & | unicharset_in | ) |
Definition at line 334 of file pageres.cpp.
| void WERD_RES::SetupBlamerBundle | ( | ) |
Definition at line 384 of file pageres.cpp.
{
if (blamer_bundle != NULL) {
blamer_bundle->SetupNormTruthWord(denorm);
}
}
| void WERD_RES::SetupBlobWidthsAndGaps | ( | ) |
Definition at line 391 of file pageres.cpp.
{
blob_widths.truncate(0);
blob_gaps.truncate(0);
int num_blobs = chopped_word->NumBlobs();
for (int b = 0; b < num_blobs; ++b) {
TBLOB *blob = chopped_word->blobs[b];
TBOX box = blob->bounding_box();
blob_widths.push_back(box.width());
if (b + 1 < num_blobs) {
blob_gaps.push_back(
chopped_word->blobs[b + 1]->bounding_box().left() - box.right());
}
}
}
| void WERD_RES::SetupBoxWord | ( | ) |
Definition at line 843 of file pageres.cpp.
{
if (box_word != NULL)
delete box_word;
rebuild_word->ComputeBoundingBoxes();
box_word = tesseract::BoxWord::CopyFromNormalized(rebuild_word);
box_word->ClipToOriginalWord(denorm.block(), word);
}
| void WERD_RES::SetupFake | ( | const UNICHARSET & | uch | ) |
Definition at line 343 of file pageres.cpp.
{
ClearResults();
SetupWordScript(unicharset_in);
chopped_word = new TWERD;
rebuild_word = new TWERD;
bln_boxes = new tesseract::BoxWord;
box_word = new tesseract::BoxWord;
int blob_count = word->cblob_list()->length();
if (blob_count > 0) {
BLOB_CHOICE** fake_choices = new BLOB_CHOICE*[blob_count];
// For non-text blocks, just pass any blobs through to the box_word
// and call the word failed with a fake classification.
C_BLOB_IT b_it(word->cblob_list());
int blob_id = 0;
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
TBOX box = b_it.data()->bounding_box();
box_word->InsertBox(box_word->length(), box);
fake_choices[blob_id++] = new BLOB_CHOICE;
}
FakeClassifyWord(blob_count, fake_choices);
delete [] fake_choices;
} else {
WERD_CHOICE* word = new WERD_CHOICE(&unicharset_in);
word->make_bad();
LogNewRawChoice(word);
// Ownership of word is taken by *this WERD_RES in LogNewCookedChoice.
LogNewCookedChoice(1, false, word);
}
tess_failed = true;
done = true;
}
| bool WERD_RES::SetupForRecognition | ( | const UNICHARSET & | unicharset_in, |
| tesseract::Tesseract * | tesseract, | ||
| Pix * | pix, | ||
| int | norm_mode, | ||
| const TBOX * | norm_box, | ||
| bool | numeric_mode, | ||
| bool | use_body_size, | ||
| bool | allow_detailed_fx, | ||
| ROW * | row, | ||
| const BLOCK * | block | ||
| ) |
Definition at line 294 of file pageres.cpp.
{
tesseract::OcrEngineMode norm_mode_hint =
static_cast<tesseract::OcrEngineMode>(norm_mode);
tesseract = tess;
POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
if ((norm_mode_hint != tesseract::OEM_CUBE_ONLY &&
word->cblob_list()->empty()) || (pb != NULL && !pb->IsText())) {
// Empty words occur when all the blobs have been moved to the rej_blobs
// list, which seems to occur frequently in junk.
SetupFake(unicharset_in);
word->set_flag(W_REP_CHAR, false);
return false;
}
ClearResults();
SetupWordScript(unicharset_in);
chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word);
float word_xheight = use_body_size && row != NULL && row->body_size() > 0.0f
? row->body_size() : x_height;
chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE),
word_xheight, baseline_shift, numeric_mode,
norm_mode_hint, norm_box, &denorm);
blob_row = row;
SetupBasicsFromChoppedWord(unicharset_in);
SetupBlamerBundle();
int num_blobs = chopped_word->NumBlobs();
ratings = new MATRIX(num_blobs, kWordrecMaxNumJoinChunks);
tess_failed = false;
return true;
}
| void WERD_RES::SetupWordScript | ( | const UNICHARSET & | unicharset_in | ) |
Definition at line 375 of file pageres.cpp.
{
uch_set = &uch;
int script = uch.default_sid();
word->set_script_id(script);
word->set_flag(W_SCRIPT_HAS_XHEIGHT, uch.script_has_xheight());
word->set_flag(W_SCRIPT_IS_LATIN, script == uch.latin_sid());
}
| bool WERD_RES::StatesAllValid | ( | ) |
Definition at line 449 of file pageres.cpp.
{
int ratings_dim = ratings->dimension();
if (raw_choice->TotalOfStates() != ratings_dim) {
tprintf("raw_choice has total of states = %d vs ratings dim of %d\n",
raw_choice->TotalOfStates(), ratings_dim);
return false;
}
WERD_CHOICE_IT it(&best_choices);
int index = 0;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
WERD_CHOICE* choice = it.data();
if (choice->TotalOfStates() != ratings_dim) {
tprintf("Cooked #%d has total of states = %d vs ratings dim of %d\n",
choice->TotalOfStates(), ratings_dim);
return false;
}
}
return true;
}
| UNICHARSET::Direction WERD_RES::SymbolDirection | ( | int | blob_index | ) | const [inline] |
Definition at line 364 of file pageres.h.
{
if (best_choice == NULL ||
blob_index >= best_choice->length() ||
blob_index < 0)
return UNICHARSET::U_OTHER_NEUTRAL;
return uch_set->get_direction(best_choice->unichar_id(blob_index));
}
| bool WERD_RES::UnicharsInReadingOrder | ( | ) | const [inline] |
Definition at line 406 of file pageres.h.
{
return best_choice->unichars_in_script_order();
}
| float WERD_RES::baseline_shift |
| WERD_CHOICE_LIST WERD_RES::best_choices |
| float WERD_RES::caps_height |
| const FontInfo* WERD_RES::fontinfo |
| const FontInfo* WERD_RES::fontinfo2 |
| bool WERD_RES::odd_size |
| bool WERD_RES::small_caps |
| const UNICHARSET* WERD_RES::uch_set |
| float WERD_RES::x_height |