|
tesseract 3.04.01
|
#include <errorcounter.h>
Classes | |
| struct | Counts |
Static Public Member Functions | |
| static double | ComputeErrorRate (ShapeClassifier *classifier, int report_level, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, const GenericVector< Pix * > &page_images, SampleIterator *it, double *unichar_error, double *scaled_error, STRING *fonts_report) |
| static void | DebugNewErrors (ShapeClassifier *new_classifier, ShapeClassifier *old_classifier, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, const GenericVector< Pix * > &page_images, SampleIterator *it) |
Definition at line 94 of file errorcounter.h.
| double tesseract::ErrorCounter::ComputeErrorRate | ( | ShapeClassifier * | classifier, |
| int | report_level, | ||
| CountTypes | boosting_mode, | ||
| const FontInfoTable & | fontinfo_table, | ||
| const GenericVector< Pix * > & | page_images, | ||
| SampleIterator * | it, | ||
| double * | unichar_error, | ||
| double * | scaled_error, | ||
| STRING * | fonts_report | ||
| ) | [static] |
Definition at line 42 of file errorcounter.cpp.
{
int fontsize = it->sample_set()->NumFonts();
ErrorCounter counter(classifier->GetUnicharset(), fontsize);
GenericVector<UnicharRating> results;
clock_t start = clock();
int total_samples = 0;
double unscaled_error = 0.0;
// Set a number of samples on which to run the classify debug mode.
int error_samples = report_level > 3 ? report_level * report_level : 0;
// Iterate over all the samples, accumulating errors.
for (it->Begin(); !it->AtEnd(); it->Next()) {
TrainingSample* mutable_sample = it->MutableSample();
int page_index = mutable_sample->page_num();
Pix* page_pix = 0 <= page_index && page_index < page_images.size()
? page_images[page_index] : NULL;
// No debug, no keep this.
classifier->UnicharClassifySample(*mutable_sample, page_pix, 0,
INVALID_UNICHAR_ID, &results);
bool debug_it = false;
int correct_id = mutable_sample->class_id();
if (counter.unicharset_.has_special_codes() &&
(correct_id == UNICHAR_SPACE || correct_id == UNICHAR_JOINED ||
correct_id == UNICHAR_BROKEN)) {
// This is junk so use the special counter.
debug_it = counter.AccumulateJunk(report_level > 3,
results,
mutable_sample);
} else {
debug_it = counter.AccumulateErrors(report_level > 3, boosting_mode,
fontinfo_table,
results, mutable_sample);
}
if (debug_it && error_samples > 0) {
// Running debug, keep the correct answer, and debug the classifier.
tprintf("Error on sample %d: %s Classifier debug output:\n",
it->GlobalSampleIndex(),
it->sample_set()->SampleToString(*mutable_sample).string());
classifier->DebugDisplay(*mutable_sample, page_pix, correct_id);
--error_samples;
}
++total_samples;
}
double total_time = 1.0 * (clock() - start) / CLOCKS_PER_SEC;
// Create the appropriate error report.
unscaled_error = counter.ReportErrors(report_level, boosting_mode,
fontinfo_table,
*it, unichar_error, fonts_report);
if (scaled_error != NULL) *scaled_error = counter.scaled_error_;
if (report_level > 1) {
// It is useful to know the time in microseconds/char.
tprintf("Errors computed in %.2fs at %.1f μs/char\n",
total_time, 1000000.0 * total_time / total_samples);
}
return unscaled_error;
}
// Tests a pair of classifiers, debugging errors of the new against the old.
// See errorcounter.h for description of arguments.
| void tesseract::ErrorCounter::DebugNewErrors | ( | ShapeClassifier * | new_classifier, |
| ShapeClassifier * | old_classifier, | ||
| CountTypes | boosting_mode, | ||
| const FontInfoTable & | fontinfo_table, | ||
| const GenericVector< Pix * > & | page_images, | ||
| SampleIterator * | it | ||
| ) | [static] |
Definition at line 109 of file errorcounter.cpp.
{
int fontsize = it->sample_set()->NumFonts();
ErrorCounter old_counter(old_classifier->GetUnicharset(), fontsize);
ErrorCounter new_counter(new_classifier->GetUnicharset(), fontsize);
GenericVector<UnicharRating> results;
int total_samples = 0;
int error_samples = 25;
int total_new_errors = 0;
// Iterate over all the samples, accumulating errors.
for (it->Begin(); !it->AtEnd(); it->Next()) {
TrainingSample* mutable_sample = it->MutableSample();
int page_index = mutable_sample->page_num();
Pix* page_pix = 0 <= page_index && page_index < page_images.size()
? page_images[page_index] : NULL;
// No debug, no keep this.
old_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0,
INVALID_UNICHAR_ID, &results);
int correct_id = mutable_sample->class_id();
if (correct_id != 0 &&
!old_counter.AccumulateErrors(true, boosting_mode, fontinfo_table,
results, mutable_sample)) {
// old classifier was correct, check the new one.
new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0,
INVALID_UNICHAR_ID, &results);
if (correct_id != 0 &&
new_counter.AccumulateErrors(true, boosting_mode, fontinfo_table,
results, mutable_sample)) {
tprintf("New Error on sample %d: Classifier debug output:\n",
it->GlobalSampleIndex());
++total_new_errors;
new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 1,
correct_id, &results);
if (results.size() > 0 && error_samples > 0) {
new_classifier->DebugDisplay(*mutable_sample, page_pix, correct_id);
--error_samples;
}
}
}
++total_samples;
}
tprintf("Total new errors = %d\n", total_new_errors);
}
// Constructor is private. Only anticipated use of ErrorCounter is via
// the static ComputeErrorRate.