|
tesseract 3.04.01
|
#include <blamer.h>
Public Member Functions | |
| BlamerBundle () | |
| BlamerBundle (const BlamerBundle &other) | |
| ~BlamerBundle () | |
| STRING | TruthString () const |
| IncorrectResultReason | incorrect_result_reason () const |
| bool | NoTruth () const |
| bool | HasDebugInfo () const |
| const STRING & | debug () const |
| const STRING & | misadaption_debug () const |
| void | UpdateBestRating (float rating) |
| int | correct_segmentation_length () const |
| bool | MatrixPositionCorrect (int index, const MATRIX_COORD &coord) |
| void | set_best_choice_is_dict_and_top_choice (bool value) |
| const char * | lattice_data () const |
| int | lattice_size () const |
| void | set_lattice_data (const char *data, int size) |
| const tesseract::ParamsTrainingBundle & | params_training_bundle () const |
| void | AddHypothesis (const tesseract::ParamsTrainingHypothesis &hypo) |
| void | SetWordTruth (const UNICHARSET &unicharset, const char *truth_str, const TBOX &word_box) |
| void | SetSymbolTruth (const UNICHARSET &unicharset, const char *char_str, const TBOX &char_box) |
| void | SetRejectedTruth () |
| bool | ChoiceIsCorrect (const WERD_CHOICE *word_choice) const |
| void | ClearResults () |
| void | CopyTruth (const BlamerBundle &other) |
| void | CopyResults (const BlamerBundle &other) |
| const char * | IncorrectReason () const |
| void | FillDebugString (const STRING &msg, const WERD_CHOICE *choice, STRING *debug) |
| void | SetupNormTruthWord (const DENORM &denorm) |
| void | SplitBundle (int word1_right, int word2_left, bool debug, BlamerBundle *bundle1, BlamerBundle *bundle2) const |
| void | JoinBlames (const BlamerBundle &bundle1, const BlamerBundle &bundle2, bool debug) |
| void | BlameClassifier (const UNICHARSET &unicharset, const TBOX &blob_box, const BLOB_CHOICE_LIST &choices, bool debug) |
| void | SetChopperBlame (const WERD_RES *word, bool debug) |
| void | BlameClassifierOrLangModel (const WERD_RES *word, const UNICHARSET &unicharset, bool valid_permuter, bool debug) |
| void | SetupCorrectSegmentation (const TWERD *word, bool debug) |
| bool | GuidedSegsearchNeeded (const WERD_CHOICE *best_choice) const |
| void | InitForSegSearch (const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id, bool debug, STRING *debug_str, TessResultCallback2< bool, int, int > *pp_cb) |
| bool | GuidedSegsearchStillGoing () const |
| void | FinishSegSearch (const WERD_CHOICE *best_choice, bool debug, STRING *debug_str) |
| void | SetMisAdaptionDebug (const WERD_CHOICE *best_choice, bool debug) |
Static Public Member Functions | |
| static const char * | IncorrectReasonName (IncorrectResultReason irr) |
| static void | LastChanceBlame (bool debug, WERD_RES *word) |
| BlamerBundle::BlamerBundle | ( | ) | [inline] |
Definition at line 90 of file blamer.h.
: truth_has_char_boxes_(false), incorrect_result_reason_(IRR_CORRECT), lattice_data_(NULL) { ClearResults(); }
| BlamerBundle::BlamerBundle | ( | const BlamerBundle & | other | ) | [inline] |
Definition at line 93 of file blamer.h.
{
this->CopyTruth(other);
this->CopyResults(other);
}
| BlamerBundle::~BlamerBundle | ( | ) | [inline] |
| void BlamerBundle::AddHypothesis | ( | const tesseract::ParamsTrainingHypothesis & | hypo | ) | [inline] |
Definition at line 154 of file blamer.h.
{
params_training_bundle_.AddHypothesis(hypo);
}
| void BlamerBundle::BlameClassifier | ( | const UNICHARSET & | unicharset, |
| const TBOX & | blob_box, | ||
| const BLOB_CHOICE_LIST & | choices, | ||
| bool | debug | ||
| ) |
Definition at line 257 of file blamer.cpp.
{
if (!truth_has_char_boxes_ ||
incorrect_result_reason_ != IRR_CORRECT)
return; // Nothing to do here.
for (int b = 0; b < norm_truth_word_.length(); ++b) {
const TBOX &truth_box = norm_truth_word_.BlobBox(b);
// Note that we are more strict on the bounding box boundaries here
// than in other places (chopper, segmentation search), since we do
// not have the ability to check the previous and next bounding box.
if (blob_box.x_almost_equal(truth_box, norm_box_tolerance_/2)) {
bool found = false;
bool incorrect_adapted = false;
UNICHAR_ID incorrect_adapted_id = INVALID_UNICHAR_ID;
const char *truth_str = truth_text_[b].string();
// We promise not to modify the list or its contents, using a
// const BLOB_CHOICE* below.
BLOB_CHOICE_IT choices_it(const_cast<BLOB_CHOICE_LIST*>(&choices));
for (choices_it.mark_cycle_pt(); !choices_it.cycled_list();
choices_it.forward()) {
const BLOB_CHOICE* choice = choices_it.data();
if (strcmp(truth_str, unicharset.get_normed_unichar(
choice->unichar_id())) == 0) {
found = true;
break;
} else if (choice->IsAdapted()) {
incorrect_adapted = true;
incorrect_adapted_id = choice->unichar_id();
}
} // end choices_it for loop
if (!found) {
STRING debug_str = "unichar ";
debug_str += truth_str;
debug_str += " not found in classification list";
SetBlame(IRR_CLASSIFIER, debug_str, NULL, debug);
} else if (incorrect_adapted) {
STRING debug_str = "better rating for adapted ";
debug_str += unicharset.id_to_unichar(incorrect_adapted_id);
debug_str += " than for correct ";
debug_str += truth_str;
SetBlame(IRR_ADAPTION, debug_str, NULL, debug);
}
break;
}
} // end iterating over blamer_bundle->norm_truth_word
}
| void BlamerBundle::BlameClassifierOrLangModel | ( | const WERD_RES * | word, |
| const UNICHARSET & | unicharset, | ||
| bool | valid_permuter, | ||
| bool | debug | ||
| ) |
Definition at line 369 of file blamer.cpp.
{
if (valid_permuter) {
// Find out whether best choice is a top choice.
best_choice_is_dict_and_top_choice_ = true;
for (int i = 0; i < word->best_choice->length(); ++i) {
BLOB_CHOICE_IT blob_choice_it(word->GetBlobChoices(i));
ASSERT_HOST(!blob_choice_it.empty());
BLOB_CHOICE *first_choice = NULL;
for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list();
blob_choice_it.forward()) { // find first non-fragment choice
if (!(unicharset.get_fragment(blob_choice_it.data()->unichar_id()))) {
first_choice = blob_choice_it.data();
break;
}
}
ASSERT_HOST(first_choice != NULL);
if (first_choice->unichar_id() != word->best_choice->unichar_id(i)) {
best_choice_is_dict_and_top_choice_ = false;
break;
}
}
}
STRING debug_str;
if (best_choice_is_dict_and_top_choice_) {
debug_str = "Best choice is: incorrect, top choice, dictionary word";
debug_str += " with permuter ";
debug_str += word->best_choice->permuter_name();
} else {
debug_str = "Classifier/Old LM tradeoff is to blame";
}
SetBlame(best_choice_is_dict_and_top_choice_ ? IRR_CLASSIFIER
: IRR_CLASS_OLD_LM_TRADEOFF,
debug_str, word->best_choice, debug);
}
| bool BlamerBundle::ChoiceIsCorrect | ( | const WERD_CHOICE * | word_choice | ) | const |
Definition at line 111 of file blamer.cpp.
{
if (word_choice == NULL) return false;
const UNICHARSET* uni_set = word_choice->unicharset();
STRING normed_choice_str;
for (int i = 0; i < word_choice->length(); ++i) {
normed_choice_str +=
uni_set->get_normed_unichar(word_choice->unichar_id(i));
}
STRING truth_str = TruthString();
return truth_str == normed_choice_str;
}
| void BlamerBundle::ClearResults | ( | ) | [inline] |
Definition at line 173 of file blamer.h.
{
norm_truth_word_.DeleteAllBoxes();
norm_box_tolerance_ = 0;
if (!NoTruth()) incorrect_result_reason_ = IRR_CORRECT;
debug_ = "";
segsearch_is_looking_for_blame_ = false;
best_correctly_segmented_rating_ = WERD_CHOICE::kBadRating;
correct_segmentation_cols_.clear();
correct_segmentation_rows_.clear();
best_choice_is_dict_and_top_choice_ = false;
delete[] lattice_data_;
lattice_data_ = NULL;
lattice_size_ = 0;
}
| void BlamerBundle::CopyResults | ( | const BlamerBundle & | other | ) | [inline] |
Definition at line 194 of file blamer.h.
{
norm_truth_word_ = other.norm_truth_word_;
norm_box_tolerance_ = other.norm_box_tolerance_;
incorrect_result_reason_ = other.incorrect_result_reason_;
segsearch_is_looking_for_blame_ = other.segsearch_is_looking_for_blame_;
best_correctly_segmented_rating_ = other.best_correctly_segmented_rating_;
correct_segmentation_cols_ = other.correct_segmentation_cols_;
correct_segmentation_rows_ = other.correct_segmentation_rows_;
best_choice_is_dict_and_top_choice_ =
other.best_choice_is_dict_and_top_choice_;
if (other.lattice_data_ != NULL) {
lattice_data_ = new char[other.lattice_size_];
memcpy(lattice_data_, other.lattice_data_, other.lattice_size_);
lattice_size_ = other.lattice_size_;
} else {
lattice_data_ = NULL;
}
}
| void BlamerBundle::CopyTruth | ( | const BlamerBundle & | other | ) | [inline] |
Definition at line 187 of file blamer.h.
{
truth_has_char_boxes_ = other.truth_has_char_boxes_;
truth_word_ = other.truth_word_;
truth_text_ = other.truth_text_;
incorrect_result_reason_ =
(other.NoTruth() ? other.incorrect_result_reason_ : IRR_CORRECT);
}
| int BlamerBundle::correct_segmentation_length | ( | ) | const [inline] |
| const STRING& BlamerBundle::debug | ( | ) | const [inline] |
| void BlamerBundle::FillDebugString | ( | const STRING & | msg, |
| const WERD_CHOICE * | choice, | ||
| STRING * | debug | ||
| ) |
Definition at line 123 of file blamer.cpp.
{
(*debug) += "Truth ";
for (int i = 0; i < this->truth_text_.length(); ++i) {
(*debug) += this->truth_text_[i];
}
if (!this->truth_has_char_boxes_) (*debug) += " (no char boxes)";
if (choice != NULL) {
(*debug) += " Choice ";
STRING choice_str;
choice->string_and_lengths(&choice_str, NULL);
(*debug) += choice_str;
}
if (msg.length() > 0) {
(*debug) += "\n";
(*debug) += msg;
}
(*debug) += "\n";
}
| void BlamerBundle::FinishSegSearch | ( | const WERD_CHOICE * | best_choice, |
| bool | debug, | ||
| STRING * | debug_str | ||
| ) |
Definition at line 506 of file blamer.cpp.
{
// If we are still looking for blame (i.e. best_choice is incorrect, but a
// path representing the correct segmentation could be constructed), we can
// blame segmentation search pain point prioritization if the rating of the
// path corresponding to the correct segmentation is better than that of
// best_choice (i.e. language model would have done the correct thing, but
// because of poor pain point prioritization the correct segmentation was
// never explored). Otherwise we blame the tradeoff between the language model
// and the classifier, since even after exploring the path corresponding to
// the correct segmentation incorrect best_choice would have been chosen.
// One special case when we blame the classifier instead is when best choice
// is incorrect, but it is a dictionary word and it classifier's top choice.
if (segsearch_is_looking_for_blame_) {
segsearch_is_looking_for_blame_ = false;
if (best_choice_is_dict_and_top_choice_) {
*debug_str = "Best choice is: incorrect, top choice, dictionary word";
*debug_str += " with permuter ";
*debug_str += best_choice->permuter_name();
SetBlame(IRR_CLASSIFIER, *debug_str, best_choice, debug);
} else if (best_correctly_segmented_rating_ <
best_choice->rating()) {
*debug_str += "Correct segmentation state was not explored";
SetBlame(IRR_SEGSEARCH_PP, *debug_str, best_choice, debug);
} else {
if (best_correctly_segmented_rating_ >=
WERD_CHOICE::kBadRating) {
*debug_str += "Correct segmentation paths were pruned by LM\n";
} else {
debug_str->add_str_double("Best correct segmentation rating ",
best_correctly_segmented_rating_);
debug_str->add_str_double(" vs. best choice rating ",
best_choice->rating());
}
SetBlame(IRR_CLASS_LM_TRADEOFF, *debug_str, best_choice, debug);
}
}
}
| bool BlamerBundle::GuidedSegsearchNeeded | ( | const WERD_CHOICE * | best_choice | ) | const |
Definition at line 461 of file blamer.cpp.
{
return incorrect_result_reason_ == IRR_CORRECT &&
!segsearch_is_looking_for_blame_ &&
truth_has_char_boxes_ &&
!ChoiceIsCorrect(best_choice);
}
| bool BlamerBundle::GuidedSegsearchStillGoing | ( | ) | const |
Definition at line 501 of file blamer.cpp.
{
return segsearch_is_looking_for_blame_;
}
| bool BlamerBundle::HasDebugInfo | ( | ) | const [inline] |
| IncorrectResultReason BlamerBundle::incorrect_result_reason | ( | ) | const [inline] |
| const char * BlamerBundle::IncorrectReason | ( | ) | const |
Definition at line 60 of file blamer.cpp.
{
return kIncorrectResultReasonNames[incorrect_result_reason_];
}
| const char * BlamerBundle::IncorrectReasonName | ( | IncorrectResultReason | irr | ) | [static] |
Definition at line 56 of file blamer.cpp.
{
return kIncorrectResultReasonNames[irr];
}
| void BlamerBundle::InitForSegSearch | ( | const WERD_CHOICE * | best_choice, |
| MATRIX * | ratings, | ||
| UNICHAR_ID | wildcard_id, | ||
| bool | debug, | ||
| STRING * | debug_str, | ||
| TessResultCallback2< bool, int, int > * | pp_cb | ||
| ) |
Definition at line 473 of file blamer.cpp.
{
segsearch_is_looking_for_blame_ = true;
if (debug) {
tprintf("segsearch starting to look for blame\n");
}
// Fill pain points for any unclassifed blob corresponding to the
// correct segmentation state.
*debug_str += "Correct segmentation:\n";
for (int idx = 0; idx < correct_segmentation_cols_.length(); ++idx) {
debug_str->add_str_int("col=", correct_segmentation_cols_[idx]);
debug_str->add_str_int(" row=", correct_segmentation_rows_[idx]);
*debug_str += "\n";
if (!ratings->Classified(correct_segmentation_cols_[idx],
correct_segmentation_rows_[idx],
wildcard_id) &&
!cb->Run(correct_segmentation_cols_[idx],
correct_segmentation_rows_[idx])) {
segsearch_is_looking_for_blame_ = false;
*debug_str += "\nFailed to insert pain point\n";
SetBlame(IRR_SEGSEARCH_HEUR, *debug_str, best_choice, debug);
break;
}
} // end for blamer_bundle->correct_segmentation_cols/rows
}
| void BlamerBundle::JoinBlames | ( | const BlamerBundle & | bundle1, |
| const BlamerBundle & | bundle2, | ||
| bool | debug | ||
| ) |
Definition at line 225 of file blamer.cpp.
{
STRING debug_str;
IncorrectResultReason irr = incorrect_result_reason_;
if (irr != IRR_NO_TRUTH_SPLIT) debug_str = "";
if (bundle1.incorrect_result_reason_ != IRR_CORRECT &&
bundle1.incorrect_result_reason_ != IRR_NO_TRUTH &&
bundle1.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
debug_str += "Blame from part 1: ";
debug_str += bundle1.debug_;
irr = bundle1.incorrect_result_reason_;
}
if (bundle2.incorrect_result_reason_ != IRR_CORRECT &&
bundle2.incorrect_result_reason_ != IRR_NO_TRUTH &&
bundle2.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
debug_str += "Blame from part 2: ";
debug_str += bundle2.debug_;
if (irr == IRR_CORRECT) {
irr = bundle2.incorrect_result_reason_;
} else if (irr != bundle2.incorrect_result_reason_) {
irr = IRR_UNKNOWN;
}
}
incorrect_result_reason_ = irr;
if (irr != IRR_CORRECT && irr != IRR_NO_TRUTH) {
SetBlame(irr, debug_str, NULL, debug);
}
}
| void BlamerBundle::LastChanceBlame | ( | bool | debug, |
| WERD_RES * | word | ||
| ) | [static] |
Definition at line 547 of file blamer.cpp.
{
if (word->blamer_bundle == NULL) {
word->blamer_bundle = new BlamerBundle();
word->blamer_bundle->SetBlame(IRR_PAGE_LAYOUT, "LastChanceBlame",
word->best_choice, debug);
} else if (word->blamer_bundle->incorrect_result_reason_ == IRR_NO_TRUTH) {
word->blamer_bundle->SetBlame(IRR_NO_TRUTH, "Rejected truth",
word->best_choice, debug);
} else {
bool correct = word->blamer_bundle->ChoiceIsCorrect(word->best_choice);
IncorrectResultReason irr = word->blamer_bundle->incorrect_result_reason_;
if (irr == IRR_CORRECT && !correct) {
STRING debug_str = "Choice is incorrect after recognition";
word->blamer_bundle->SetBlame(IRR_UNKNOWN, debug_str, word->best_choice,
debug);
} else if (irr != IRR_CORRECT && correct) {
if (debug) {
tprintf("Corrected %s\n", word->blamer_bundle->debug_.string());
}
word->blamer_bundle->incorrect_result_reason_ = IRR_CORRECT;
word->blamer_bundle->debug_ = "";
}
}
}
| const char* BlamerBundle::lattice_data | ( | ) | const [inline] |
| int BlamerBundle::lattice_size | ( | ) | const [inline] |
| bool BlamerBundle::MatrixPositionCorrect | ( | int | index, |
| const MATRIX_COORD & | coord | ||
| ) | [inline] |
| const STRING& BlamerBundle::misadaption_debug | ( | ) | const [inline] |
| bool BlamerBundle::NoTruth | ( | ) | const [inline] |
Definition at line 109 of file blamer.h.
{
return incorrect_result_reason_ == IRR_NO_TRUTH ||
incorrect_result_reason_ == IRR_PAGE_LAYOUT;
}
| const tesseract::ParamsTrainingBundle& BlamerBundle::params_training_bundle | ( | ) | const [inline] |
| void BlamerBundle::set_best_choice_is_dict_and_top_choice | ( | bool | value | ) | [inline] |
| void BlamerBundle::set_lattice_data | ( | const char * | data, |
| int | size | ||
| ) | [inline] |
| void BlamerBundle::SetChopperBlame | ( | const WERD_RES * | word, |
| bool | debug | ||
| ) |
Definition at line 310 of file blamer.cpp.
{
if (NoTruth() || !truth_has_char_boxes_ ||
word->chopped_word->blobs.empty()) {
return;
}
STRING debug_str;
bool missing_chop = false;
int num_blobs = word->chopped_word->blobs.size();
int box_index = 0;
int blob_index = 0;
inT16 truth_x;
while (box_index < truth_word_.length() && blob_index < num_blobs) {
truth_x = norm_truth_word_.BlobBox(box_index).right();
TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
if (curr_blob->bounding_box().right() < truth_x - norm_box_tolerance_) {
++blob_index;
continue; // encountered an extra chop, keep looking
} else if (curr_blob->bounding_box().right() >
truth_x + norm_box_tolerance_) {
missing_chop = true;
break;
} else {
++blob_index;
}
}
if (missing_chop || box_index < norm_truth_word_.length()) {
STRING debug_str;
if (missing_chop) {
debug_str.add_str_int("Detected missing chop (tolerance=",
norm_box_tolerance_);
debug_str += ") at Bounding Box=";
TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
curr_blob->bounding_box().print_to_str(&debug_str);
debug_str.add_str_int("\nNo chop for truth at x=", truth_x);
} else {
debug_str.add_str_int("Missing chops for last ",
norm_truth_word_.length() - box_index);
debug_str += " truth box(es)";
}
debug_str += "\nMaximally chopped word boxes:\n";
for (blob_index = 0; blob_index < num_blobs; ++blob_index) {
TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
curr_blob->bounding_box().print_to_str(&debug_str);
debug_str += '\n';
}
debug_str += "Truth bounding boxes:\n";
for (box_index = 0; box_index < norm_truth_word_.length(); ++box_index) {
norm_truth_word_.BlobBox(box_index).print_to_str(&debug_str);
debug_str += '\n';
}
SetBlame(IRR_CHOPPER, debug_str, word->best_choice, debug);
}
}
| void BlamerBundle::SetMisAdaptionDebug | ( | const WERD_CHOICE * | best_choice, |
| bool | debug | ||
| ) |
Definition at line 574 of file blamer.cpp.
{
if (incorrect_result_reason_ != IRR_NO_TRUTH &&
!ChoiceIsCorrect(best_choice)) {
misadaption_debug_ ="misadapt to word (";
misadaption_debug_ += best_choice->permuter_name();
misadaption_debug_ += "): ";
FillDebugString("", best_choice, &misadaption_debug_);
if (debug) {
tprintf("%s\n", misadaption_debug_.string());
}
}
}
| void BlamerBundle::SetRejectedTruth | ( | ) |
Definition at line 105 of file blamer.cpp.
{
incorrect_result_reason_ = IRR_NO_TRUTH;
truth_has_char_boxes_ = false;
}
| void BlamerBundle::SetSymbolTruth | ( | const UNICHARSET & | unicharset, |
| const char * | char_str, | ||
| const TBOX & | char_box | ||
| ) |
Definition at line 86 of file blamer.cpp.
{
STRING symbol_str(char_str);
UNICHAR_ID id = unicharset.unichar_to_id(char_str);
if (id != INVALID_UNICHAR_ID) {
STRING normed_uch(unicharset.get_normed_unichar(id));
if (normed_uch.length() > 0) symbol_str = normed_uch;
}
int length = truth_word_.length();
truth_text_.push_back(symbol_str);
truth_word_.InsertBox(length, char_box);
if (length == 0)
truth_has_char_boxes_ = true;
else if (truth_word_.BlobBox(length - 1) == char_box)
truth_has_char_boxes_ = false;
}
| void BlamerBundle::SetupCorrectSegmentation | ( | const TWERD * | word, |
| bool | debug | ||
| ) |
Definition at line 407 of file blamer.cpp.
{
params_training_bundle_.StartHypothesisList();
if (incorrect_result_reason_ != IRR_CORRECT || !truth_has_char_boxes_)
return; // Nothing to do here.
STRING debug_str;
debug_str += "Blamer computing correct_segmentation_cols\n";
int curr_box_col = 0;
int next_box_col = 0;
int num_blobs = word->NumBlobs();
if (num_blobs == 0) return; // No blobs to play with.
int blob_index = 0;
inT16 next_box_x = word->blobs[blob_index]->bounding_box().right();
for (int truth_idx = 0; blob_index < num_blobs &&
truth_idx < norm_truth_word_.length();
++blob_index) {
++next_box_col;
inT16 curr_box_x = next_box_x;
if (blob_index + 1 < num_blobs)
next_box_x = word->blobs[blob_index + 1]->bounding_box().right();
inT16 truth_x = norm_truth_word_.BlobBox(truth_idx).right();
debug_str.add_str_int("Box x coord vs. truth: ", curr_box_x);
debug_str.add_str_int(" ", truth_x);
debug_str += "\n";
if (curr_box_x > (truth_x + norm_box_tolerance_)) {
break; // failed to find a matching box
} else if (curr_box_x >= truth_x - norm_box_tolerance_ && // matched
(blob_index + 1 >= num_blobs || // next box can't be included
next_box_x > truth_x + norm_box_tolerance_)) {
correct_segmentation_cols_.push_back(curr_box_col);
correct_segmentation_rows_.push_back(next_box_col-1);
++truth_idx;
debug_str.add_str_int("col=", curr_box_col);
debug_str.add_str_int(" row=", next_box_col-1);
debug_str += "\n";
curr_box_col = next_box_col;
}
}
if (blob_index < num_blobs || // trailing blobs
correct_segmentation_cols_.length() != norm_truth_word_.length()) {
debug_str.add_str_int("Blamer failed to find correct segmentation"
" (tolerance=", norm_box_tolerance_);
if (blob_index >= num_blobs) debug_str += " blob == NULL";
debug_str += ")\n";
debug_str.add_str_int(" path length ", correct_segmentation_cols_.length());
debug_str.add_str_int(" vs. truth ", norm_truth_word_.length());
debug_str += "\n";
SetBlame(IRR_UNKNOWN, debug_str, NULL, debug);
correct_segmentation_cols_.clear();
correct_segmentation_rows_.clear();
}
}
| void BlamerBundle::SetupNormTruthWord | ( | const DENORM & | denorm | ) |
Definition at line 145 of file blamer.cpp.
{
// TODO(rays) Is this the last use of denorm in WERD_RES and can it go?
norm_box_tolerance_ = kBlamerBoxTolerance * denorm.x_scale();
TPOINT topleft;
TPOINT botright;
TPOINT norm_topleft;
TPOINT norm_botright;
for (int b = 0; b < truth_word_.length(); ++b) {
const TBOX &box = truth_word_.BlobBox(b);
topleft.x = box.left();
topleft.y = box.top();
botright.x = box.right();
botright.y = box.bottom();
denorm.NormTransform(NULL, topleft, &norm_topleft);
denorm.NormTransform(NULL, botright, &norm_botright);
TBOX norm_box(norm_topleft.x, norm_botright.y,
norm_botright.x, norm_topleft.y);
norm_truth_word_.InsertBox(b, norm_box);
}
}
| void BlamerBundle::SetWordTruth | ( | const UNICHARSET & | unicharset, |
| const char * | truth_str, | ||
| const TBOX & | word_box | ||
| ) |
Definition at line 66 of file blamer.cpp.
{
truth_word_.InsertBox(0, word_box);
truth_has_char_boxes_ = false;
// Encode the string as UNICHAR_IDs.
GenericVector<UNICHAR_ID> encoding;
GenericVector<char> lengths;
unicharset.encode_string(truth_str, false, &encoding, &lengths, NULL);
int total_length = 0;
for (int i = 0; i < encoding.size(); total_length += lengths[i++]) {
STRING uch(truth_str + total_length);
uch.truncate_at(lengths[i] - total_length);
UNICHAR_ID id = encoding[i];
if (id != INVALID_UNICHAR_ID) uch = unicharset.get_normed_unichar(id);
truth_text_.push_back(uch);
}
}
| void BlamerBundle::SplitBundle | ( | int | word1_right, |
| int | word2_left, | ||
| bool | debug, | ||
| BlamerBundle * | bundle1, | ||
| BlamerBundle * | bundle2 | ||
| ) | const |
Definition at line 169 of file blamer.cpp.
{
STRING debug_str;
// Find truth boxes that correspond to the split in the blobs.
int b;
int begin2_truth_index = -1;
if (incorrect_result_reason_ != IRR_NO_TRUTH &&
truth_has_char_boxes_) {
debug_str = "Looking for truth split at";
debug_str.add_str_int(" end1_x ", word1_right);
debug_str.add_str_int(" begin2_x ", word2_left);
debug_str += "\nnorm_truth_word boxes:\n";
if (norm_truth_word_.length() > 1) {
norm_truth_word_.BlobBox(0).print_to_str(&debug_str);
for (b = 1; b < norm_truth_word_.length(); ++b) {
norm_truth_word_.BlobBox(b).print_to_str(&debug_str);
if ((abs(word1_right - norm_truth_word_.BlobBox(b - 1).right()) <
norm_box_tolerance_) &&
(abs(word2_left - norm_truth_word_.BlobBox(b).left()) <
norm_box_tolerance_)) {
begin2_truth_index = b;
debug_str += "Split found";
break;
}
}
debug_str += '\n';
}
}
// Populate truth information in word and word2 with the first and second
// part of the original truth.
if (begin2_truth_index > 0) {
bundle1->truth_has_char_boxes_ = true;
bundle1->norm_box_tolerance_ = norm_box_tolerance_;
bundle2->truth_has_char_boxes_ = true;
bundle2->norm_box_tolerance_ = norm_box_tolerance_;
BlamerBundle *curr_bb = bundle1;
for (b = 0; b < norm_truth_word_.length(); ++b) {
if (b == begin2_truth_index) curr_bb = bundle2;
curr_bb->norm_truth_word_.InsertBox(b, norm_truth_word_.BlobBox(b));
curr_bb->truth_word_.InsertBox(b, truth_word_.BlobBox(b));
curr_bb->truth_text_.push_back(truth_text_[b]);
}
} else if (incorrect_result_reason_ == IRR_NO_TRUTH) {
bundle1->incorrect_result_reason_ = IRR_NO_TRUTH;
bundle2->incorrect_result_reason_ = IRR_NO_TRUTH;
} else {
debug_str += "Truth split not found";
debug_str += truth_has_char_boxes_ ?
"\n" : " (no truth char boxes)\n";
bundle1->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, NULL, debug);
bundle2->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, NULL, debug);
}
}
| STRING BlamerBundle::TruthString | ( | ) | const [inline] |
| void BlamerBundle::UpdateBestRating | ( | float | rating | ) | [inline] |