|
tesseract 3.04.01
|
#include <wordrec.h>
Public Member Functions | |||||||
| Wordrec () | |||||||
| virtual | ~Wordrec () | ||||||
| void | SaveAltChoices (const LIST &best_choices, WERD_RES *word) | ||||||
| void | FillLattice (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) | ||||||
| void | CallFillLattice (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) | ||||||
| void | SegSearch (WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle) | ||||||
| void | WordSearch (WERD_RES *word_res) | ||||||
| void | InitialSegSearch (WERD_RES *word_res, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle) | ||||||
| void | DoSegSearch (WERD_RES *word_res) | ||||||
| SEAM * | attempt_blob_chop (TWERD *word, TBLOB *blob, inT32 blob_number, bool italic_blob, const GenericVector< SEAM * > &seams) | ||||||
| SEAM * | chop_numbered_blob (TWERD *word, inT32 blob_number, bool italic_blob, const GenericVector< SEAM * > &seams) | ||||||
| SEAM * | chop_overlapping_blob (const GenericVector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, int *blob_number) | ||||||
| void | add_seam_to_queue (float new_priority, SEAM *new_seam, SeamQueue *seams) | ||||||
| void | choose_best_seam (SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile) | ||||||
| void | combine_seam (const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue) | ||||||
| SEAM * | pick_good_seam (TBLOB *blob) | ||||||
| void | try_point_pairs (EDGEPT *points[MAX_NUM_POINTS], inT16 num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob) | ||||||
| void | try_vertical_splits (EDGEPT *points[MAX_NUM_POINTS], inT16 num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob) | ||||||
| PRIORITY | grade_split_length (register SPLIT *split) | ||||||
| PRIORITY | grade_sharpness (register SPLIT *split) | ||||||
| bool | near_point (EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, EDGEPT **near_pt) | ||||||
| virtual BLOB_CHOICE_LIST * | classify_piece (const GenericVector< SEAM * > &seams, inT16 start, inT16 end, const char *description, TWERD *word, BlamerBundle *blamer_bundle) | ||||||
| void | merge_fragments (MATRIX *ratings, inT16 num_blobs) | ||||||
| void | get_fragment_lists (inT16 current_frag, inT16 current_row, inT16 start, inT16 num_frag_parts, inT16 num_blobs, MATRIX *ratings, BLOB_CHOICE_LIST *choice_lists) | ||||||
| void | merge_and_put_fragment_lists (inT16 row, inT16 column, inT16 num_frag_parts, BLOB_CHOICE_LIST *choice_lists, MATRIX *ratings) | ||||||
| void | fill_filtered_fragment_list (BLOB_CHOICE_LIST *choices, int fragment_pos, int num_frag_parts, BLOB_CHOICE_LIST *filtered_choices) | ||||||
program_editup | |||||||
Initialize all the things in the program that need to be initialized. init_permute determines whether to initialize the permute functions and Dawg models. | |||||||
| void | program_editup (const char *textbase, bool init_classifier, bool init_permute) | ||||||
cc_recog | |||||||
Recognize a word. | |||||||
| void | cc_recog (WERD_RES *word) | ||||||
program_editdown | |||||||
This function holds any nessessary post processing for the Wise Owl program. | |||||||
| void | program_editdown (inT32 elasped_time) | ||||||
set_pass1 | |||||||
Get ready to do some pass 1 stuff. | |||||||
| void | set_pass1 () | ||||||
set_pass2 | |||||||
Get ready to do some pass 2 stuff. | |||||||
| void | set_pass2 () | ||||||
end_recog | |||||||
Cleanup and exit the recog program. | |||||||
| int | end_recog () | ||||||
call_matcher | |||||||
Called from Tess with a blob in tess form. The blob may need rotating to the correct orientation for classification. | |||||||
| BLOB_CHOICE_LIST * | call_matcher (TBLOB *blob) | ||||||
dict_word() | |||||||
Test the dictionaries, returning NO_PERM (0) if not found, or one of the PermuterType values if found, according to the dictionary. | |||||||
| int | dict_word (const WERD_CHOICE &word) | ||||||
classify_blob | |||||||
Classify the this blob if it is not already recorded in the match table. Attempt to recognize this blob as a character. The recognition rating for this blob will be stored as a part of the blob. This value will also be returned to the caller.
| |||||||
| BLOB_CHOICE_LIST * | classify_blob (TBLOB *blob, const char *string, C_COL color, BlamerBundle *blamer_bundle) | ||||||
point_priority | |||||||
Assign a priority to and edge point that might be used as part of a split. The argument should be of type EDGEPT. | |||||||
| PRIORITY | point_priority (EDGEPT *point) | ||||||
add_point_to_list | |||||||
Add an edge point to a POINT_GROUP containg a list of other points. | |||||||
| void | add_point_to_list (PointHeap *point_heap, EDGEPT *point) | ||||||
| bool | is_inside_angle (EDGEPT *pt) | ||||||
angle_change | |||||||
Return the change in angle (degrees) of the line segments between points one and two, and two and three. | |||||||
| int | angle_change (EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) | ||||||
pick_close_point | |||||||
Choose the edge point that is closest to the critical point. This point may not be exactly vertical from the critical point. | |||||||
| EDGEPT * | pick_close_point (EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist) | ||||||
prioritize_points | |||||||
Find a list of edge points from the outer outline of this blob. For each of these points assign a priority. Sort these points using a heap structure so that they can be visited in order. | |||||||
| void | prioritize_points (TESSLINE *outline, PointHeap *points) | ||||||
new_min_point | |||||||
Found a new minimum point try to decide whether to save it or not. Return the new value for the local minimum. If a point is saved then the local minimum is reset to NULL. | |||||||
| void | new_min_point (EDGEPT *local_min, PointHeap *points) | ||||||
new_max_point | |||||||
Found a new minimum point try to decide whether to save it or not. Return the new value for the local minimum. If a point is saved then the local minimum is reset to NULL. | |||||||
| void | new_max_point (EDGEPT *local_max, PointHeap *points) | ||||||
vertical_projection_point | |||||||
For one point on the outline, find the corresponding point on the other side of the outline that is a likely projection for a split point. This is done by iterating through the edge points until the X value of the point being looked at is greater than the X value of the split point. Ensure that the point being returned is not right next to the split point. Return the edge point in *best_point as a result, and any points that were newly created are also saved on the new_points list. | |||||||
| void | vertical_projection_point (EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points) | ||||||
improve_one_blob | |||||||
Finds the best place to chop, based on the worst blob, fixpt, or next to a fragment, according to the input. Returns the SEAM corresponding to the chop point, if any is found, and the index in the ratings_matrix of the chopped blob. Note that blob_choices is just a copy of the pointers in the leading diagonal of the ratings MATRIX. Although the blob is chopped, the returned SEAM is yet to be inserted into word->seam_array and the resulting blobs are unclassified, so this function can be used by ApplyBox as well as during recognition. | |||||||
| SEAM * | improve_one_blob (const GenericVector< BLOB_CHOICE * > &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, int *blob_number) | ||||||
chop_one_blob | |||||||
Start with the current one-blob word and its classification. Find the worst blobs and try to divide it up to improve the ratings. Used for testing chopper. | |||||||
| SEAM * | chop_one_blob (const GenericVector< TBOX > &boxes, const GenericVector< BLOB_CHOICE * > &blob_choices, WERD_RES *word_res, int *blob_number) | ||||||
chop_word_main | |||||||
| void | chop_word_main (WERD_RES *word) | ||||||
improve_by_chopping | |||||||
Repeatedly chops the worst blob, classifying the new blobs fixing up all the data, and incrementally runs the segmentation search until a good word is found, or no more chops can be found. | |||||||
| void | improve_by_chopping (float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending) | ||||||
| int | select_blob_to_split (const GenericVector< BLOB_CHOICE * > &blob_choices, float rating_ceiling, bool split_next_to_fragment) | ||||||
| int | select_blob_to_split_from_fixpt (DANGERR *fixpt) | ||||||
Public Attributes | |||||||
| bool | merge_fragments_in_matrix = TRUE | ||||||
| bool | wordrec_no_block = FALSE | ||||||
| bool | wordrec_enable_assoc = TRUE | ||||||
| bool | force_word_assoc = FALSE | ||||||
| double | wordrec_worst_state = 1 | ||||||
| bool | fragments_guide_chopper = FALSE | ||||||
| int | repair_unchopped_blobs = 1 | ||||||
| double | tessedit_certainty_threshold = -2.25 | ||||||
| int | chop_debug = 0 | ||||||
| bool | chop_enable = 1 | ||||||
| bool | chop_vertical_creep = 0 | ||||||
| int | chop_split_length = 10000 | ||||||
| int | chop_same_distance = 2 | ||||||
| int | chop_min_outline_points = 6 | ||||||
| int | chop_seam_pile_size = 150 | ||||||
| bool | chop_new_seam_pile = 1 | ||||||
| int | chop_inside_angle = -50 | ||||||
| int | chop_min_outline_area = 2000 | ||||||
| double | chop_split_dist_knob = 0.5 | ||||||
| double | chop_overlap_knob = 0.9 | ||||||
| double | chop_center_knob = 0.15 | ||||||
| int | chop_centered_maxwidth = 90 | ||||||
| double | chop_sharpness_knob = 0.06 | ||||||
| double | chop_width_change_knob = 5.0 | ||||||
| double | chop_ok_split = 100.0 | ||||||
| double | chop_good_split = 50.0 | ||||||
| int | chop_x_y_weight = 3 | ||||||
| int | segment_adjust_debug = 0 | ||||||
| bool | assume_fixed_pitch_char_segment = FALSE | ||||||
| int | wordrec_debug_level = 0 | ||||||
| int | wordrec_max_join_chunks = 4 | ||||||
| bool | wordrec_skip_no_truth_words = false | ||||||
| bool | wordrec_debug_blamer = false | ||||||
| bool | wordrec_run_blamer = false | ||||||
| int | segsearch_debug_level = 0 | ||||||
| int | segsearch_max_pain_points = 2000 | ||||||
| int | segsearch_max_futile_classifications = 10 | ||||||
| double | segsearch_max_char_wh_ratio = 2.0 | ||||||
| bool | save_alt_choices = true | ||||||
| LanguageModel * | language_model_ | ||||||
| PRIORITY | pass2_ok_split | ||||||
| WERD_CHOICE * | prev_word_best_choice_ | ||||||
| GenericVector< int > | blame_reasons_ | ||||||
| void(Wordrec::* | fill_lattice_ )(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) | ||||||
Protected Member Functions | |||||||
| bool | SegSearchDone (int num_futile_classifications) | ||||||
| void | UpdateSegSearchNodes (float rating_cert_scale, int starting_col, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle) | ||||||
| void | ProcessSegSearchPainPoint (float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle) | ||||||
| void | ResetNGramSearch (WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, GenericVector< SegSearchPending > *pending) | ||||||
| void | InitBlamerForSegSearch (WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle, STRING *blamer_debug) | ||||||
| tesseract::Wordrec::Wordrec | ( | ) |
Definition at line 26 of file wordrec.cpp.
: // control parameters BOOL_MEMBER(merge_fragments_in_matrix, TRUE, "Merge the fragments in the ratings matrix and delete them" " after merging", params()), BOOL_MEMBER(wordrec_no_block, FALSE, "Don't output block information", params()), BOOL_MEMBER(wordrec_enable_assoc, TRUE, "Associator Enable", params()), BOOL_MEMBER(force_word_assoc, FALSE, "force associator to run regardless of what enable_assoc is." "This is used for CJK where component grouping is necessary.", CCUtil::params()), double_MEMBER(wordrec_worst_state, 1.0, "Worst segmentation state", params()), BOOL_MEMBER(fragments_guide_chopper, FALSE, "Use information from fragments to guide chopping process", params()), INT_MEMBER(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped", params()), double_MEMBER(tessedit_certainty_threshold, -2.25, "Good blob limit", params()), INT_MEMBER(chop_debug, 0, "Chop debug", params()), BOOL_MEMBER(chop_enable, 1, "Chop enable", params()), BOOL_MEMBER(chop_vertical_creep, 0, "Vertical creep", params()), INT_MEMBER(chop_split_length, 10000, "Split Length", params()), INT_MEMBER(chop_same_distance, 2, "Same distance", params()), INT_MEMBER(chop_min_outline_points, 6, "Min Number of Points on Outline", params()), INT_MEMBER(chop_seam_pile_size, 150, "Max number of seams in seam_pile", params()), BOOL_MEMBER(chop_new_seam_pile, 1, "Use new seam_pile", params()), INT_MEMBER(chop_inside_angle, -50, "Min Inside Angle Bend", params()), INT_MEMBER(chop_min_outline_area, 2000, "Min Outline Area", params()), double_MEMBER(chop_split_dist_knob, 0.5, "Split length adjustment", params()), double_MEMBER(chop_overlap_knob, 0.9, "Split overlap adjustment", params()), double_MEMBER(chop_center_knob, 0.15, "Split center adjustment", params()), INT_MEMBER(chop_centered_maxwidth, 90, "Width of (smaller) chopped blobs " "above which we don't care that a chop is not near the center.", params()), double_MEMBER(chop_sharpness_knob, 0.06, "Split sharpness adjustment", params()), double_MEMBER(chop_width_change_knob, 5.0, "Width change adjustment", params()), double_MEMBER(chop_ok_split, 100.0, "OK split limit", params()), double_MEMBER(chop_good_split, 50.0, "Good split limit", params()), INT_MEMBER(chop_x_y_weight, 3, "X / Y length weight", params()), INT_MEMBER(segment_adjust_debug, 0, "Segmentation adjustment debug", params()), BOOL_MEMBER(assume_fixed_pitch_char_segment, FALSE, "include fixed-pitch heuristics in char segmentation", params()), INT_MEMBER(wordrec_debug_level, 0, "Debug level for wordrec", params()), INT_MEMBER(wordrec_max_join_chunks, 4, "Max number of broken pieces to associate", params()), BOOL_MEMBER(wordrec_skip_no_truth_words, false, "Only run OCR for words that had truth recorded in BlamerBundle", params()), BOOL_MEMBER(wordrec_debug_blamer, false, "Print blamer debug messages", params()), BOOL_MEMBER(wordrec_run_blamer, false, "Try to set the blame for errors", params()), INT_MEMBER(segsearch_debug_level, 0, "SegSearch debug level", params()), INT_MEMBER(segsearch_max_pain_points, 2000, "Maximum number of pain points stored in the queue", params()), INT_MEMBER(segsearch_max_futile_classifications, 20, "Maximum number of pain point classifications per chunk that" "did not result in finding a better word choice.", params()), double_MEMBER(segsearch_max_char_wh_ratio, 2.0, "Maximum character width-to-height ratio", params()), BOOL_MEMBER(save_alt_choices, true, "Save alternative paths found during chopping" " and segmentation search", params()) { prev_word_best_choice_ = NULL; language_model_ = new LanguageModel(&get_fontinfo_table(), &(getDict())); fill_lattice_ = NULL; }
| tesseract::Wordrec::~Wordrec | ( | ) | [virtual] |
Definition at line 123 of file wordrec.cpp.
{
delete language_model_;
}
| void tesseract::Wordrec::add_seam_to_queue | ( | float | new_priority, |
| SEAM * | new_seam, | ||
| SeamQueue * | seams | ||
| ) |
Definition at line 64 of file findseam.cpp.
{
if (new_seam == NULL) return;
if (chop_debug) {
tprintf("Pushing new seam with priority %g :", new_priority);
new_seam->Print("seam: ");
}
if (seams->size() >= MAX_NUM_SEAMS) {
SeamPair old_pair(0, NULL);
if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) {
if (chop_debug) {
tprintf("Old seam staying with priority %g\n", old_pair.key());
}
delete new_seam;
seams->Push(&old_pair);
return;
} else if (chop_debug) {
tprintf("New seam with priority %g beats old worst seam with %g\n",
new_priority, old_pair.key());
}
}
SeamPair new_pair(new_priority, new_seam);
seams->Push(&new_pair);
}
| SEAM * tesseract::Wordrec::attempt_blob_chop | ( | TWERD * | word, |
| TBLOB * | blob, | ||
| inT32 | blob_number, | ||
| bool | italic_blob, | ||
| const GenericVector< SEAM * > & | seams | ||
| ) |
Definition at line 170 of file chopper.cpp.
{
if (repair_unchopped_blobs)
preserve_outline_tree (blob->outlines);
TBLOB *other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */
// Insert it into the word.
word->blobs.insert(other_blob, blob_number + 1);
SEAM *seam = NULL;
if (prioritize_division) {
TPOINT location;
if (divisible_blob(blob, italic_blob, &location)) {
seam = new SEAM(0.0f, location);
}
}
if (seam == NULL)
seam = pick_good_seam(blob);
if (chop_debug) {
if (seam != NULL)
seam->Print("Good seam picked=");
else
tprintf("\n** no seam picked *** \n");
}
if (seam) {
seam->ApplySeam(italic_blob, blob, other_blob);
}
seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob,
seams, seam);
if (seam == NULL) {
if (repair_unchopped_blobs)
restore_outline_tree(blob->outlines);
if (allow_blob_division && !prioritize_division) {
// If the blob can simply be divided into outlines, then do that.
TPOINT location;
if (divisible_blob(blob, italic_blob, &location)) {
other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */
word->blobs.insert(other_blob, blob_number + 1);
seam = new SEAM(0.0f, location);
seam->ApplySeam(italic_blob, blob, other_blob);
seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob,
seams, seam);
}
}
}
if (seam != NULL) {
// Make sure this seam doesn't get chopped again.
seam->Finalize();
}
return seam;
}
| BLOB_CHOICE_LIST * tesseract::Wordrec::call_matcher | ( | TBLOB * | blob | ) |
Definition at line 134 of file tface.cpp.
{
// Rotate the blob for classification if necessary.
TBLOB* rotated_blob = tessblob->ClassifyNormalizeIfNeeded();
if (rotated_blob == NULL) {
rotated_blob = tessblob;
}
BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST(); // matcher result
AdaptiveClassifier(rotated_blob, ratings);
if (rotated_blob != tessblob) {
delete rotated_blob;
}
return ratings;
}
| void tesseract::Wordrec::CallFillLattice | ( | const MATRIX & | ratings, |
| const WERD_CHOICE_LIST & | best_choices, | ||
| const UNICHARSET & | unicharset, | ||
| BlamerBundle * | blamer_bundle | ||
| ) | [inline] |
Definition at line 195 of file wordrec.h.
{
(this->*fill_lattice_)(ratings, best_choices, unicharset, blamer_bundle);
}
| void tesseract::Wordrec::cc_recog | ( | WERD_RES * | word | ) |
Definition at line 109 of file tface.cpp.
{
getDict().reset_hyphen_vars(word->word->flag(W_EOL));
chop_word_main(word);
word->DebugWordChoices(getDict().stopper_debug_level >= 1,
getDict().word_to_debug.string());
ASSERT_HOST(word->StatesAllValid());
}
| void tesseract::Wordrec::choose_best_seam | ( | SeamQueue * | seam_queue, |
| const SPLIT * | split, | ||
| PRIORITY | priority, | ||
| SEAM ** | seam_result, | ||
| TBLOB * | blob, | ||
| SeamPile * | seam_pile | ||
| ) |
Definition at line 103 of file findseam.cpp.
{
SEAM *seam;
char str[80];
float my_priority;
/* Add seam of split */
my_priority = priority;
if (split != NULL) {
TPOINT split_point = split->point1->pos;
split_point += split->point2->pos;
split_point /= 2;
seam = new SEAM(my_priority, split_point, *split);
if (chop_debug > 1) seam->Print("Partial priority ");
add_seam_to_queue(my_priority, seam, seam_queue);
if (my_priority > chop_good_split)
return;
}
TBOX bbox = blob->bounding_box();
/* Queue loop */
while (!seam_queue->empty()) {
SeamPair seam_pair;
seam_queue->Pop(&seam_pair);
seam = seam_pair.extract_data();
/* Set full priority */
my_priority = seam->FullPriority(bbox.left(), bbox.right(),
chop_overlap_knob, chop_centered_maxwidth,
chop_center_knob, chop_width_change_knob);
if (chop_debug) {
sprintf (str, "Full my_priority %0.0f, ", my_priority);
seam->Print(str);
}
if ((*seam_result == NULL || (*seam_result)->priority() > my_priority) &&
my_priority < chop_ok_split) {
/* No crossing */
if (seam->IsHealthy(*blob, chop_min_outline_points,
chop_min_outline_area)) {
delete *seam_result;
*seam_result = new SEAM(*seam);
(*seam_result)->set_priority(my_priority);
} else {
delete seam;
seam = NULL;
my_priority = BAD_PRIORITY;
}
}
if (my_priority < chop_good_split) {
if (seam)
delete seam;
return; /* Made good answer */
}
if (seam) {
/* Combine with others */
if (seam_pile->size() < chop_seam_pile_size) {
combine_seam(*seam_pile, seam, seam_queue);
SeamDecPair pair(seam_pair.key(), seam);
seam_pile->Push(&pair);
} else if (chop_new_seam_pile &&
seam_pile->size() == chop_seam_pile_size &&
seam_pile->PeekTop().key() > seam_pair.key()) {
combine_seam(*seam_pile, seam, seam_queue);
SeamDecPair pair;
seam_pile->Pop(&pair); // pop the worst.
// Replace the seam in pair (deleting the old one) with
// the new seam and score, then push back into the heap.
pair.set_key(seam_pair.key());
pair.set_data(seam);
seam_pile->Push(&pair);
} else {
delete seam;
}
}
my_priority = seam_queue->empty() ? NO_FULL_PRIORITY
: seam_queue->PeekTop().key();
if ((my_priority > chop_ok_split) ||
(my_priority > chop_good_split && split))
return;
}
}
| SEAM * tesseract::Wordrec::chop_numbered_blob | ( | TWERD * | word, |
| inT32 | blob_number, | ||
| bool | italic_blob, | ||
| const GenericVector< SEAM * > & | seams | ||
| ) |
Definition at line 224 of file chopper.cpp.
{
return attempt_blob_chop(word, word->blobs[blob_number], blob_number,
italic_blob, seams);
}
| SEAM * tesseract::Wordrec::chop_one_blob | ( | const GenericVector< TBOX > & | boxes, |
| const GenericVector< BLOB_CHOICE * > & | blob_choices, | ||
| WERD_RES * | word_res, | ||
| int * | blob_number | ||
| ) |
Definition at line 374 of file chopper.cpp.
{
if (prioritize_division) {
return chop_overlapping_blob(boxes, true, word_res, blob_number);
} else {
return improve_one_blob(blob_choices, NULL, false, true, word_res,
blob_number);
}
}
| SEAM * tesseract::Wordrec::chop_overlapping_blob | ( | const GenericVector< TBOX > & | boxes, |
| bool | italic_blob, | ||
| WERD_RES * | word_res, | ||
| int * | blob_number | ||
| ) |
Definition at line 232 of file chopper.cpp.
{
TWERD *word = word_res->chopped_word;
for (*blob_number = 0; *blob_number < word->NumBlobs(); ++*blob_number) {
TBLOB *blob = word->blobs[*blob_number];
TPOINT topleft, botright;
topleft.x = blob->bounding_box().left();
topleft.y = blob->bounding_box().top();
botright.x = blob->bounding_box().right();
botright.y = blob->bounding_box().bottom();
TPOINT original_topleft, original_botright;
word_res->denorm.DenormTransform(NULL, topleft, &original_topleft);
word_res->denorm.DenormTransform(NULL, botright, &original_botright);
TBOX original_box = TBOX(original_topleft.x, original_botright.y,
original_botright.x, original_topleft.y);
bool almost_equal_box = false;
int num_overlap = 0;
for (int i = 0; i < boxes.size(); i++) {
if (original_box.overlap_fraction(boxes[i]) > 0.125)
num_overlap++;
if (original_box.almost_equal(boxes[i], 3))
almost_equal_box = true;
}
TPOINT location;
if (divisible_blob(blob, italic_blob, &location) ||
(!almost_equal_box && num_overlap > 1)) {
SEAM *seam = attempt_blob_chop(word, blob, *blob_number,
italic_blob, word_res->seam_array);
if (seam != NULL)
return seam;
}
}
*blob_number = -1;
return NULL;
}
| void tesseract::Wordrec::chop_word_main | ( | WERD_RES * | word | ) |
Definition at line 394 of file chopper.cpp.
{
int num_blobs = word->chopped_word->NumBlobs();
if (word->ratings == NULL) {
word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks);
}
if (word->ratings->get(0, 0) == NULL) {
// Run initial classification.
for (int b = 0; b < num_blobs; ++b) {
BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b,
"Initial:", word->chopped_word,
word->blamer_bundle);
word->ratings->put(b, b, choices);
}
} else {
// Blobs have been pre-classified. Set matrix cell for all blob choices
for (int col = 0; col < word->ratings->dimension(); ++col) {
for (int row = col; row < word->ratings->dimension() &&
row < col + word->ratings->bandwidth(); ++row) {
BLOB_CHOICE_LIST* choices = word->ratings->get(col, row);
if (choices != NULL) {
BLOB_CHOICE_IT bc_it(choices);
for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
bc_it.data()->set_matrix_cell(col, row);
}
}
}
}
}
// Run Segmentation Search.
BestChoiceBundle best_choice_bundle(word->ratings->dimension());
SegSearch(word, &best_choice_bundle, word->blamer_bundle);
if (word->best_choice == NULL) {
// SegSearch found no valid paths, so just use the leading diagonal.
word->FakeWordFromRatings();
}
word->RebuildBestState();
// If we finished without a hyphen at the end of the word, let the next word
// be found in the dictionary.
if (word->word->flag(W_EOL) &&
!getDict().has_hyphen_end(*word->best_choice)) {
getDict().reset_hyphen_vars(true);
}
if (word->blamer_bundle != NULL && this->fill_lattice_ != NULL) {
CallFillLattice(*word->ratings, word->best_choices,
*word->uch_set, word->blamer_bundle);
}
if (wordrec_debug_level > 0) {
tprintf("Final Ratings Matrix:\n");
word->ratings->print(getDict().getUnicharset());
}
word->FilterWordChoices(getDict().stopper_debug_level);
}
| BLOB_CHOICE_LIST * tesseract::Wordrec::classify_blob | ( | TBLOB * | blob, |
| const char * | string, | ||
| C_COL | color, | ||
| BlamerBundle * | blamer_bundle | ||
| ) |
Definition at line 56 of file wordclass.cpp.
{
#ifndef GRAPHICS_DISABLED
if (wordrec_display_all_blobs)
display_blob(blob, color);
#endif
// TODO(rays) collapse with call_matcher and move all to wordrec.cpp.
BLOB_CHOICE_LIST* choices = call_matcher(blob);
// If a blob with the same bounding box as one of the truth character
// bounding boxes is not classified as the corresponding truth character
// blame character classifier for incorrect answer.
if (blamer_bundle != NULL) {
blamer_bundle->BlameClassifier(getDict().getUnicharset(),
blob->bounding_box(),
*choices,
wordrec_debug_blamer);
}
#ifndef GRAPHICS_DISABLED
if (classify_debug_level && string)
print_ratings_list(string, choices, getDict().getUnicharset());
if (wordrec_blob_pause)
window_wait(blob_window);
#endif
return choices;
}
| BLOB_CHOICE_LIST * tesseract::Wordrec::classify_piece | ( | const GenericVector< SEAM * > & | seams, |
| inT16 | start, | ||
| inT16 | end, | ||
| const char * | description, | ||
| TWERD * | word, | ||
| BlamerBundle * | blamer_bundle | ||
| ) | [virtual] |
Definition at line 57 of file pieces.cpp.
{
if (end > start) SEAM::JoinPieces(seams, word->blobs, start, end);
BLOB_CHOICE_LIST *choices = classify_blob(word->blobs[start], description,
White, blamer_bundle);
// Set the matrix_cell_ entries in all the BLOB_CHOICES.
BLOB_CHOICE_IT bc_it(choices);
for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
bc_it.data()->set_matrix_cell(start, end);
}
if (end > start) SEAM::BreakPieces(seams, word->blobs, start, end);
return (choices);
}
| void tesseract::Wordrec::combine_seam | ( | const SeamPile & | seam_pile, |
| const SEAM * | seam, | ||
| SeamQueue * | seam_queue | ||
| ) |
Definition at line 197 of file findseam.cpp.
{
for (int x = 0; x < seam_pile.size(); ++x) {
const SEAM *this_one = seam_pile.get(x).data();
if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) {
SEAM *new_one = new SEAM(*seam);
new_one->CombineWith(*this_one);
if (chop_debug > 1) new_one->Print("Combo priority ");
add_seam_to_queue(new_one->priority(), new_one, seam_queue);
}
}
}
| int tesseract::Wordrec::dict_word | ( | const WERD_CHOICE & | word | ) |
| void tesseract::Wordrec::DoSegSearch | ( | WERD_RES * | word_res | ) |
Definition at line 31 of file segsearch.cpp.
| int tesseract::Wordrec::end_recog | ( | ) |
Definition at line 61 of file tface.cpp.
{
program_editdown (0);
return (0);
}
| void tesseract::Wordrec::fill_filtered_fragment_list | ( | BLOB_CHOICE_LIST * | choices, |
| int | fragment_pos, | ||
| int | num_frag_parts, | ||
| BLOB_CHOICE_LIST * | filtered_choices | ||
| ) |
Definition at line 106 of file pieces.cpp.
{
BLOB_CHOICE_IT filtered_choices_it(filtered_choices);
BLOB_CHOICE_IT choices_it(choices);
for (choices_it.mark_cycle_pt(); !choices_it.cycled_list();
choices_it.forward()) {
UNICHAR_ID choice_unichar_id = choices_it.data()->unichar_id();
const CHAR_FRAGMENT *frag = unicharset.get_fragment(choice_unichar_id);
if (frag != NULL && frag->get_pos() == fragment_pos &&
frag->get_total() == num_frag_parts) {
// Recover the unichar_id of the unichar that this fragment is
// a part of
BLOB_CHOICE *b = new BLOB_CHOICE(*choices_it.data());
int original_unichar = unicharset.unichar_to_id(frag->get_unichar());
b->set_unichar_id(original_unichar);
filtered_choices_it.add_to_end(b);
}
}
filtered_choices->sort(SortByUnicharID<BLOB_CHOICE>);
}
| void tesseract::Wordrec::FillLattice | ( | const MATRIX & | ratings, |
| const WERD_CHOICE_LIST & | best_choices, | ||
| const UNICHARSET & | unicharset, | ||
| BlamerBundle * | blamer_bundle | ||
| ) |
| void tesseract::Wordrec::get_fragment_lists | ( | inT16 | current_frag, |
| inT16 | current_row, | ||
| inT16 | start, | ||
| inT16 | num_frag_parts, | ||
| inT16 | num_blobs, | ||
| MATRIX * | ratings, | ||
| BLOB_CHOICE_LIST * | choice_lists | ||
| ) |
Definition at line 283 of file pieces.cpp.
{
if (current_frag == num_frag_parts) {
merge_and_put_fragment_lists(start, current_row - 1, num_frag_parts,
choice_lists, ratings);
return;
}
for (inT16 x = current_row; x < num_blobs; x++) {
BLOB_CHOICE_LIST *choices = ratings->get(current_row, x);
if (choices == NULL)
continue;
fill_filtered_fragment_list(choices, current_frag, num_frag_parts,
&choice_lists[current_frag]);
if (!choice_lists[current_frag].empty()) {
get_fragment_lists(current_frag + 1, x + 1, start, num_frag_parts,
num_blobs, ratings, choice_lists);
choice_lists[current_frag].clear();
}
}
}
Definition at line 74 of file gradechop.cpp.
{
PRIORITY grade;
grade = point_priority (split->point1) + point_priority (split->point2);
if (grade < -360.0)
grade = 0;
else
grade += 360.0;
grade *= chop_sharpness_knob; /* Values 0 to -360 */
return (grade);
}
Definition at line 51 of file gradechop.cpp.
{
PRIORITY grade;
float split_length;
split_length =
split->point1->WeightedDistance(*split->point2, chop_x_y_weight);
if (split_length <= 0)
grade = 0;
else
grade = sqrt (split_length) * chop_split_dist_knob;
return (MAX (0.0, grade));
}
| void tesseract::Wordrec::improve_by_chopping | ( | float | rating_cert_scale, |
| WERD_RES * | word, | ||
| BestChoiceBundle * | best_choice_bundle, | ||
| BlamerBundle * | blamer_bundle, | ||
| LMPainPoints * | pain_points, | ||
| GenericVector< SegSearchPending > * | pending | ||
| ) |
Definition at line 457 of file chopper.cpp.
{
int blob_number;
do { // improvement loop.
// Make a simple vector of BLOB_CHOICEs to make it easy to pick which
// one to chop.
GenericVector<BLOB_CHOICE*> blob_choices;
int num_blobs = word->ratings->dimension();
for (int i = 0; i < num_blobs; ++i) {
BLOB_CHOICE_LIST* choices = word->ratings->get(i, i);
if (choices == NULL || choices->empty()) {
blob_choices.push_back(NULL);
} else {
BLOB_CHOICE_IT bc_it(choices);
blob_choices.push_back(bc_it.data());
}
}
SEAM* seam = improve_one_blob(blob_choices, &best_choice_bundle->fixpt,
false, false, word, &blob_number);
if (seam == NULL) break;
// A chop has been made. We have to correct all the data structures to
// take into account the extra bottom-level blob.
// Put the seam into the seam_array and correct everything else on the
// word: ratings matrix (including matrix location in the BLOB_CHOICES),
// states in WERD_CHOICEs, and blob widths.
word->InsertSeam(blob_number, seam);
// Insert a new entry in the beam array.
best_choice_bundle->beam.insert(new LanguageModelState, blob_number);
// Fixpts are outdated, but will get recalculated.
best_choice_bundle->fixpt.clear();
// Remap existing pain points.
pain_points->RemapForSplit(blob_number);
// Insert a new pending at the chop point.
pending->insert(SegSearchPending(), blob_number);
// Classify the two newly created blobs using ProcessSegSearchPainPoint,
// as that updates the pending correctly and adds new pain points.
MATRIX_COORD pain_point(blob_number, blob_number);
ProcessSegSearchPainPoint(0.0f, pain_point, "Chop1", pending, word,
pain_points, blamer_bundle);
pain_point.col = blob_number + 1;
pain_point.row = blob_number + 1;
ProcessSegSearchPainPoint(0.0f, pain_point, "Chop2", pending, word,
pain_points, blamer_bundle);
if (language_model_->language_model_ngram_on) {
// N-gram evaluation depends on the number of blobs in a chunk, so we
// have to re-evaluate everything in the word.
ResetNGramSearch(word, best_choice_bundle, pending);
blob_number = 0;
}
// Run language model incrementally. (Except with the n-gram model on.)
UpdateSegSearchNodes(rating_cert_scale, blob_number, pending,
word, pain_points, best_choice_bundle, blamer_bundle);
} while (!language_model_->AcceptableChoiceFound() &&
word->ratings->dimension() < kMaxNumChunks);
// If after running only the chopper best_choice is incorrect and no blame
// has been yet set, blame the classifier if best_choice is classifier's
// top choice and is a dictionary word (i.e. language model could not have
// helped). Otherwise blame the tradeoff between the classifier and
// the old language model (permuters).
if (word->blamer_bundle != NULL &&
word->blamer_bundle->incorrect_result_reason() == IRR_CORRECT &&
!word->blamer_bundle->ChoiceIsCorrect(word->best_choice)) {
bool valid_permuter = word->best_choice != NULL &&
Dict::valid_word_permuter(word->best_choice->permuter(), false);
word->blamer_bundle->BlameClassifierOrLangModel(word,
getDict().getUnicharset(),
valid_permuter,
wordrec_debug_blamer);
}
}
| SEAM * tesseract::Wordrec::improve_one_blob | ( | const GenericVector< BLOB_CHOICE * > & | blob_choices, |
| DANGERR * | fixpt, | ||
| bool | split_next_to_fragment, | ||
| bool | italic_blob, | ||
| WERD_RES * | word, | ||
| int * | blob_number | ||
| ) |
Definition at line 330 of file chopper.cpp.
{
float rating_ceiling = MAX_FLOAT32;
SEAM *seam = NULL;
do {
*blob_number = select_blob_to_split_from_fixpt(fixpt);
if (chop_debug) tprintf("blob_number from fixpt = %d\n", *blob_number);
bool split_point_from_dict = (*blob_number != -1);
if (split_point_from_dict) {
fixpt->clear();
} else {
*blob_number = select_blob_to_split(blob_choices, rating_ceiling,
split_next_to_fragment);
}
if (chop_debug) tprintf("blob_number = %d\n", *blob_number);
if (*blob_number == -1)
return NULL;
// TODO(rays) it may eventually help to allow italic_blob to be true,
seam = chop_numbered_blob(word->chopped_word, *blob_number, italic_blob,
word->seam_array);
if (seam != NULL)
return seam; // Success!
if (blob_choices[*blob_number] == NULL)
return NULL;
if (!split_point_from_dict) {
// We chopped the worst rated blob, try something else next time.
rating_ceiling = blob_choices[*blob_number]->rating();
}
} while (true);
return seam;
}
| void tesseract::Wordrec::InitBlamerForSegSearch | ( | WERD_RES * | word_res, |
| LMPainPoints * | pain_points, | ||
| BlamerBundle * | blamer_bundle, | ||
| STRING * | blamer_debug | ||
| ) | [protected] |
Definition at line 342 of file segsearch.cpp.
{
pain_points->Clear(); // Clear pain points heap.
TessResultCallback2<bool, int, int>* pp_cb = NewPermanentTessCallback(
pain_points, &LMPainPoints::GenerateForBlamer,
static_cast<double>(segsearch_max_char_wh_ratio), word_res);
blamer_bundle->InitForSegSearch(word_res->best_choice, word_res->ratings,
getDict().WildcardID(), wordrec_debug_blamer,
blamer_debug, pp_cb);
delete pp_cb;
}
| void tesseract::Wordrec::InitialSegSearch | ( | WERD_RES * | word_res, |
| LMPainPoints * | pain_points, | ||
| GenericVector< SegSearchPending > * | pending, | ||
| BestChoiceBundle * | best_choice_bundle, | ||
| BlamerBundle * | blamer_bundle | ||
| ) |
Definition at line 150 of file segsearch.cpp.
{
if (segsearch_debug_level > 0) {
tprintf("Starting SegSearch on ratings matrix%s:\n",
wordrec_enable_assoc ? " (with assoc)" : "");
word_res->ratings->print(getDict().getUnicharset());
}
pain_points->GenerateInitial(word_res);
// Compute scaling factor that will help us recover blob outline length
// from classifier rating and certainty for the blob.
float rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale;
language_model_->InitForWord(prev_word_best_choice_,
assume_fixed_pitch_char_segment,
segsearch_max_char_wh_ratio, rating_cert_scale);
// Initialize blamer-related information: map character boxes recorded in
// blamer_bundle->norm_truth_word to the corresponding i,j indices in the
// ratings matrix. We expect this step to succeed, since when running the
// chopper we checked that the correct chops are present.
if (blamer_bundle != NULL) {
blamer_bundle->SetupCorrectSegmentation(word_res->chopped_word,
wordrec_debug_blamer);
}
// pending[col] tells whether there is update work to do to combine
// best_choice_bundle->beam[col - 1] with some BLOB_CHOICEs in matrix[col, *].
// As the language model state is updated, pending entries are modified to
// minimize duplication of work. It is important that during the update the
// children are considered in the non-decreasing order of their column, since
// this guarantees that all the parents would be up to date before an update
// of a child is done.
pending->init_to_size(word_res->ratings->dimension(), SegSearchPending());
// Search the ratings matrix for the initial best path.
(*pending)[0].SetColumnClassified();
UpdateSegSearchNodes(rating_cert_scale, 0, pending, word_res,
pain_points, best_choice_bundle, blamer_bundle);
}
| void tesseract::Wordrec::merge_and_put_fragment_lists | ( | inT16 | row, |
| inT16 | column, | ||
| inT16 | num_frag_parts, | ||
| BLOB_CHOICE_LIST * | choice_lists, | ||
| MATRIX * | ratings | ||
| ) |
Definition at line 139 of file pieces.cpp.
{
BLOB_CHOICE_IT *choice_lists_it = new BLOB_CHOICE_IT[num_frag_parts];
for (int i = 0; i < num_frag_parts; i++) {
choice_lists_it[i].set_to_list(&choice_lists[i]);
choice_lists_it[i].mark_cycle_pt();
}
BLOB_CHOICE_LIST *merged_choice = ratings->get(row, column);
if (merged_choice == NULL)
merged_choice = new BLOB_CHOICE_LIST;
bool end_of_list = false;
BLOB_CHOICE_IT merged_choice_it(merged_choice);
while (!end_of_list) {
// Find the maximum unichar_id of the current entry the iterators
// are pointing at
UNICHAR_ID max_unichar_id = choice_lists_it[0].data()->unichar_id();
for (int i = 0; i < num_frag_parts; i++) {
UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id();
if (max_unichar_id < unichar_id) {
max_unichar_id = unichar_id;
}
}
// Move the each iterators until it gets to an entry that has a
// value greater than or equal to max_unichar_id
for (int i = 0; i < num_frag_parts; i++) {
UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id();
while (!choice_lists_it[i].cycled_list() &&
unichar_id < max_unichar_id) {
choice_lists_it[i].forward();
unichar_id = choice_lists_it[i].data()->unichar_id();
}
if (choice_lists_it[i].cycled_list()) {
end_of_list = true;
break;
}
}
if (end_of_list)
break;
// Checks if the fragments are parts of the same character
UNICHAR_ID first_unichar_id = choice_lists_it[0].data()->unichar_id();
bool same_unichar = true;
for (int i = 1; i < num_frag_parts; i++) {
UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id();
if (unichar_id != first_unichar_id) {
same_unichar = false;
break;
}
}
if (same_unichar) {
// Add the merged character to the result
UNICHAR_ID merged_unichar_id = first_unichar_id;
GenericVector<ScoredFont> merged_fonts =
choice_lists_it[0].data()->fonts();
float merged_min_xheight = choice_lists_it[0].data()->min_xheight();
float merged_max_xheight = choice_lists_it[0].data()->max_xheight();
float positive_yshift = 0, negative_yshift = 0;
int merged_script_id = choice_lists_it[0].data()->script_id();
BlobChoiceClassifier classifier = choice_lists_it[0].data()->classifier();
float merged_rating = 0, merged_certainty = 0;
for (int i = 0; i < num_frag_parts; i++) {
float rating = choice_lists_it[i].data()->rating();
float certainty = choice_lists_it[i].data()->certainty();
if (i == 0 || certainty < merged_certainty)
merged_certainty = certainty;
merged_rating += rating;
choice_lists_it[i].forward();
if (choice_lists_it[i].cycled_list())
end_of_list = true;
IntersectRange(choice_lists_it[i].data()->min_xheight(),
choice_lists_it[i].data()->max_xheight(),
&merged_min_xheight, &merged_max_xheight);
float yshift = choice_lists_it[i].data()->yshift();
if (yshift > positive_yshift) positive_yshift = yshift;
if (yshift < negative_yshift) negative_yshift = yshift;
// Use the min font rating over the parts.
// TODO(rays) font lists are unsorted. Need to be faster?
const GenericVector<ScoredFont>& frag_fonts =
choice_lists_it[i].data()->fonts();
for (int f = 0; f < frag_fonts.size(); ++f) {
int merged_f = 0;
for (merged_f = 0; merged_f < merged_fonts.size() &&
merged_fonts[merged_f].fontinfo_id != frag_fonts[f].fontinfo_id;
++merged_f) {}
if (merged_f == merged_fonts.size()) {
merged_fonts.push_back(frag_fonts[f]);
} else if (merged_fonts[merged_f].score > frag_fonts[f].score) {
merged_fonts[merged_f].score = frag_fonts[f].score;
}
}
}
float merged_yshift = positive_yshift != 0
? (negative_yshift != 0 ? 0 : positive_yshift)
: negative_yshift;
BLOB_CHOICE* choice = new BLOB_CHOICE(merged_unichar_id,
merged_rating,
merged_certainty,
merged_script_id,
merged_min_xheight,
merged_max_xheight,
merged_yshift,
classifier);
choice->set_fonts(merged_fonts);
merged_choice_it.add_to_end(choice);
}
}
if (classify_debug_level)
print_ratings_list("Merged Fragments", merged_choice,
unicharset);
if (merged_choice->empty())
delete merged_choice;
else
ratings->put(row, column, merged_choice);
delete [] choice_lists_it;
}
Definition at line 315 of file pieces.cpp.
{
BLOB_CHOICE_LIST choice_lists[CHAR_FRAGMENT::kMaxChunks];
for (inT16 start = 0; start < num_blobs; start++) {
for (int frag_parts = 2; frag_parts <= CHAR_FRAGMENT::kMaxChunks;
frag_parts++) {
get_fragment_lists(0, start, start, frag_parts, num_blobs,
ratings, choice_lists);
}
}
// Delete fragments from the rating matrix
for (inT16 x = 0; x < num_blobs; x++) {
for (inT16 y = x; y < num_blobs; y++) {
BLOB_CHOICE_LIST *choices = ratings->get(x, y);
if (choices != NULL) {
BLOB_CHOICE_IT choices_it(choices);
for (choices_it.mark_cycle_pt(); !choices_it.cycled_list();
choices_it.forward()) {
UNICHAR_ID choice_unichar_id = choices_it.data()->unichar_id();
const CHAR_FRAGMENT *frag =
unicharset.get_fragment(choice_unichar_id);
if (frag != NULL)
delete choices_it.extract();
}
}
}
}
}
| bool tesseract::Wordrec::near_point | ( | EDGEPT * | point, |
| EDGEPT * | line_pt_0, | ||
| EDGEPT * | line_pt_1, | ||
| EDGEPT ** | near_pt | ||
| ) |
Definition at line 49 of file outlines.cpp.
{
TPOINT p;
float slope;
float intercept;
float x0 = line_pt_0->pos.x;
float x1 = line_pt_1->pos.x;
float y0 = line_pt_0->pos.y;
float y1 = line_pt_1->pos.y;
if (x0 == x1) {
/* Handle vertical line */
p.x = (inT16) x0;
p.y = point->pos.y;
}
else {
/* Slope and intercept */
slope = (y0 - y1) / (x0 - x1);
intercept = y1 - x1 * slope;
/* Find perpendicular */
p.x = (inT16) ((point->pos.x + (point->pos.y - intercept) * slope) /
(slope * slope + 1));
p.y = (inT16) (slope * p.x + intercept);
}
if (is_on_line (p, line_pt_0->pos, line_pt_1->pos) &&
(!same_point (p, line_pt_0->pos)) && (!same_point (p, line_pt_1->pos))) {
/* Intersection on line */
*near_pt = make_edgept(p.x, p.y, line_pt_1, line_pt_0);
return true;
} else { /* Intersection not on line */
*near_pt = closest(point, line_pt_0, line_pt_1);
return false;
}
}
Definition at line 249 of file chop.cpp.
{
add_point_to_list(points, local_max);
return;
}
if (dir == 0 && point_priority (local_max) < 0) {
add_point_to_list(points, local_max);
return;
}
}
Definition at line 225 of file chop.cpp.
{
add_point_to_list(points, local_min);
return;
}
if (dir == 0 && point_priority (local_min) < 0) {
add_point_to_list(points, local_min);
return;
}
}
| EDGEPT * tesseract::Wordrec::pick_close_point | ( | EDGEPT * | critical_point, |
| EDGEPT * | vertical_point, | ||
| int * | best_dist | ||
| ) |
Definition at line 128 of file chop.cpp.
{
found_better = FALSE;
this_distance = edgept_dist (critical_point, vertical_point);
if (this_distance <= *best_dist) {
if (!(same_point (critical_point->pos, vertical_point->pos) ||
same_point (critical_point->pos, vertical_point->next->pos) ||
(best_point && same_point (best_point->pos, vertical_point->pos)) ||
is_exterior_point (critical_point, vertical_point))) {
*best_dist = this_distance;
best_point = vertical_point;
if (chop_vertical_creep)
found_better = TRUE;
}
}
vertical_point = vertical_point->next;
}
while (found_better == TRUE);
return (best_point);
}
Definition at line 216 of file findseam.cpp.
{
SeamPile seam_pile(chop_seam_pile_size);
EDGEPT *points[MAX_NUM_POINTS];
EDGEPT_CLIST new_points;
SEAM *seam = NULL;
TESSLINE *outline;
inT16 num_points = 0;
#ifndef GRAPHICS_DISABLED
if (chop_debug > 2)
wordrec_display_splits.set_value(true);
draw_blob_edges(blob);
#endif
PointHeap point_heap(MAX_NUM_POINTS);
for (outline = blob->outlines; outline; outline = outline->next)
prioritize_points(outline, &point_heap);
while (!point_heap.empty() && num_points < MAX_NUM_POINTS) {
points[num_points++] = point_heap.PeekTop().data;
point_heap.Pop(NULL);
}
/* Initialize queue */
SeamQueue seam_queue(MAX_NUM_SEAMS);
try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob);
try_vertical_splits(points, num_points, &new_points,
&seam_queue, &seam_pile, &seam, blob);
if (seam == NULL) {
choose_best_seam(&seam_queue, NULL, BAD_PRIORITY, &seam, blob, &seam_pile);
} else if (seam->priority() > chop_good_split) {
choose_best_seam(&seam_queue, NULL, seam->priority(), &seam, blob,
&seam_pile);
}
EDGEPT_C_IT it(&new_points);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
EDGEPT *inserted_point = it.data();
if (seam == NULL || !seam->UsesPoint(inserted_point)) {
for (outline = blob->outlines; outline; outline = outline->next) {
if (outline->loop == inserted_point) {
outline->loop = outline->loop->next;
}
}
remove_edgept(inserted_point);
}
}
if (seam) {
if (seam->priority() > chop_ok_split) {
delete seam;
seam = NULL;
}
#ifndef GRAPHICS_DISABLED
else if (wordrec_display_splits) {
seam->Mark(edge_window);
if (chop_debug > 2) {
update_edge_window();
edge_window_wait();
}
}
#endif
}
if (chop_debug)
wordrec_display_splits.set_value(false);
return (seam);
}
Definition at line 166 of file chop.cpp.
{
if (this_point->vec.y < 0) {
/* Look for minima */
if (local_max != NULL)
new_max_point(local_max, points);
else if (is_inside_angle (this_point))
add_point_to_list(points, this_point);
local_max = NULL;
local_min = this_point->next;
}
else if (this_point->vec.y > 0) {
/* Look for maxima */
if (local_min != NULL)
new_min_point(local_min, points);
else if (is_inside_angle (this_point))
add_point_to_list(points, this_point);
local_min = NULL;
local_max = this_point->next;
}
else {
/* Flat area */
if (local_max != NULL) {
if (local_max->prev->vec.y != 0) {
new_max_point(local_max, points);
}
local_max = this_point->next;
local_min = NULL;
}
else {
if (local_min->prev->vec.y != 0) {
new_min_point(local_min, points);
}
local_min = this_point->next;
local_max = NULL;
}
}
/* Next point */
this_point = this_point->next;
}
while (this_point != outline->loop);
}
| void tesseract::Wordrec::ProcessSegSearchPainPoint | ( | float | pain_point_priority, |
| const MATRIX_COORD & | pain_point, | ||
| const char * | pain_point_type, | ||
| GenericVector< SegSearchPending > * | pending, | ||
| WERD_RES * | word_res, | ||
| LMPainPoints * | pain_points, | ||
| BlamerBundle * | blamer_bundle | ||
| ) | [protected] |
Definition at line 262 of file segsearch.cpp.
{
if (segsearch_debug_level > 0) {
tprintf("Classifying pain point %s priority=%.4f, col=%d, row=%d\n",
pain_point_type, pain_point_priority,
pain_point.col, pain_point.row);
}
ASSERT_HOST(pain_points != NULL);
MATRIX *ratings = word_res->ratings;
// Classify blob [pain_point.col pain_point.row]
if (!pain_point.Valid(*ratings)) {
ratings->IncreaseBandSize(pain_point.row + 1 - pain_point.col);
}
ASSERT_HOST(pain_point.Valid(*ratings));
BLOB_CHOICE_LIST *classified = classify_piece(word_res->seam_array,
pain_point.col, pain_point.row,
pain_point_type,
word_res->chopped_word,
blamer_bundle);
BLOB_CHOICE_LIST *lst = ratings->get(pain_point.col, pain_point.row);
if (lst == NULL) {
ratings->put(pain_point.col, pain_point.row, classified);
} else {
// We can not delete old BLOB_CHOICEs, since they might contain
// ViterbiStateEntries that are parents of other "active" entries.
// Thus if the matrix cell already contains classifications we add
// the new ones to the beginning of the list.
BLOB_CHOICE_IT it(lst);
it.add_list_before(classified);
delete classified; // safe to delete, since empty after add_list_before()
classified = NULL;
}
if (segsearch_debug_level > 0) {
print_ratings_list("Updated ratings matrix with a new entry:",
ratings->get(pain_point.col, pain_point.row),
getDict().getUnicharset());
ratings->print(getDict().getUnicharset());
}
// Insert initial "pain points" to join the newly classified blob
// with its left and right neighbors.
if (classified != NULL && !classified->empty()) {
if (pain_point.col > 0) {
pain_points->GeneratePainPoint(
pain_point.col - 1, pain_point.row, LM_PPTYPE_SHAPE, 0.0,
true, segsearch_max_char_wh_ratio, word_res);
}
if (pain_point.row + 1 < ratings->dimension()) {
pain_points->GeneratePainPoint(
pain_point.col, pain_point.row + 1, LM_PPTYPE_SHAPE, 0.0,
true, segsearch_max_char_wh_ratio, word_res);
}
}
(*pending)[pain_point.col].SetBlobClassified(pain_point.row);
}
| void tesseract::Wordrec::program_editdown | ( | inT32 | elasped_time | ) |
Definition at line 74 of file tface.cpp.
{
EndAdaptiveClassifier();
getDict().End();
}
| void tesseract::Wordrec::program_editup | ( | const char * | textbase, |
| bool | init_classifier, | ||
| bool | init_permute | ||
| ) |
Definition at line 46 of file tface.cpp.
{
if (textbase != NULL) imagefile = textbase;
InitFeatureDefs(&feature_defs_);
InitAdaptiveClassifier(init_classifier);
if (init_dict) getDict().Load(Dict::GlobalDawgCache());
pass2_ok_split = chop_ok_split;
}
| void tesseract::Wordrec::ResetNGramSearch | ( | WERD_RES * | word_res, |
| BestChoiceBundle * | best_choice_bundle, | ||
| GenericVector< SegSearchPending > * | pending | ||
| ) | [protected] |
Definition at line 325 of file segsearch.cpp.
{
// TODO(rays) More refactoring required here.
// Delete existing viterbi states.
for (int col = 0; col < best_choice_bundle->beam.size(); ++col) {
best_choice_bundle->beam[col]->Clear();
}
// Reset best_choice_bundle.
word_res->ClearWordChoices();
best_choice_bundle->best_vse = NULL;
// Clear out all existing pendings and add a new one for the first column.
(*pending)[0].SetColumnClassified();
for (int i = 1; i < pending->size(); ++i)
(*pending)[i].Clear();
}
| void tesseract::Wordrec::SegSearch | ( | WERD_RES * | word_res, |
| BestChoiceBundle * | best_choice_bundle, | ||
| BlamerBundle * | blamer_bundle | ||
| ) |
Definition at line 37 of file segsearch.cpp.
{
LMPainPoints pain_points(segsearch_max_pain_points,
segsearch_max_char_wh_ratio,
assume_fixed_pitch_char_segment,
&getDict(), segsearch_debug_level);
// Compute scaling factor that will help us recover blob outline length
// from classifier rating and certainty for the blob.
float rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale;
GenericVector<SegSearchPending> pending;
InitialSegSearch(word_res, &pain_points, &pending, best_choice_bundle,
blamer_bundle);
if (!SegSearchDone(0)) { // find a better choice
if (chop_enable && word_res->chopped_word != NULL) {
improve_by_chopping(rating_cert_scale, word_res, best_choice_bundle,
blamer_bundle, &pain_points, &pending);
}
if (chop_debug) SEAM::PrintSeams("Final seam list:", word_res->seam_array);
if (blamer_bundle != NULL &&
!blamer_bundle->ChoiceIsCorrect(word_res->best_choice)) {
blamer_bundle->SetChopperBlame(word_res, wordrec_debug_blamer);
}
}
// Keep trying to find a better path by fixing the "pain points".
MATRIX_COORD pain_point;
float pain_point_priority;
int num_futile_classifications = 0;
STRING blamer_debug;
while (wordrec_enable_assoc &&
(!SegSearchDone(num_futile_classifications) ||
(blamer_bundle != NULL &&
blamer_bundle->GuidedSegsearchStillGoing()))) {
// Get the next valid "pain point".
bool found_nothing = true;
LMPainPointsType pp_type;
while ((pp_type = pain_points.Deque(&pain_point, &pain_point_priority)) !=
LM_PPTYPE_NUM) {
if (!pain_point.Valid(*word_res->ratings)) {
word_res->ratings->IncreaseBandSize(
pain_point.row - pain_point.col + 1);
}
if (pain_point.Valid(*word_res->ratings) &&
!word_res->ratings->Classified(pain_point.col, pain_point.row,
getDict().WildcardID())) {
found_nothing = false;
break;
}
}
if (found_nothing) {
if (segsearch_debug_level > 0) tprintf("Pain points queue is empty\n");
break;
}
ProcessSegSearchPainPoint(pain_point_priority, pain_point,
LMPainPoints::PainPointDescription(pp_type),
&pending, word_res, &pain_points, blamer_bundle);
UpdateSegSearchNodes(rating_cert_scale, pain_point.col, &pending,
word_res, &pain_points, best_choice_bundle,
blamer_bundle);
if (!best_choice_bundle->updated) ++num_futile_classifications;
if (segsearch_debug_level > 0) {
tprintf("num_futile_classifications %d\n", num_futile_classifications);
}
best_choice_bundle->updated = false; // reset updated
// See if it's time to terminate SegSearch or time for starting a guided
// search for the true path to find the blame for the incorrect best_choice.
if (SegSearchDone(num_futile_classifications) &&
blamer_bundle != NULL &&
blamer_bundle->GuidedSegsearchNeeded(word_res->best_choice)) {
InitBlamerForSegSearch(word_res, &pain_points, blamer_bundle,
&blamer_debug);
}
} // end while loop exploring alternative paths
if (blamer_bundle != NULL) {
blamer_bundle->FinishSegSearch(word_res->best_choice,
wordrec_debug_blamer, &blamer_debug);
}
if (segsearch_debug_level > 0) {
tprintf("Done with SegSearch (AcceptableChoiceFound: %d)\n",
language_model_->AcceptableChoiceFound());
}
}
| bool tesseract::Wordrec::SegSearchDone | ( | int | num_futile_classifications | ) | [inline, protected] |
Definition at line 426 of file wordrec.h.
{
return (language_model_->AcceptableChoiceFound() ||
num_futile_classifications >=
segsearch_max_futile_classifications);
}
| int tesseract::Wordrec::select_blob_to_split | ( | const GenericVector< BLOB_CHOICE * > & | blob_choices, |
| float | rating_ceiling, | ||
| bool | split_next_to_fragment | ||
| ) |
Definition at line 541 of file chopper.cpp.
{
BLOB_CHOICE *blob_choice;
int x;
float worst = -MAX_FLOAT32;
int worst_index = -1;
float worst_near_fragment = -MAX_FLOAT32;
int worst_index_near_fragment = -1;
const CHAR_FRAGMENT **fragments = NULL;
if (chop_debug) {
if (rating_ceiling < MAX_FLOAT32)
tprintf("rating_ceiling = %8.4f\n", rating_ceiling);
else
tprintf("rating_ceiling = No Limit\n");
}
if (split_next_to_fragment && blob_choices.size() > 0) {
fragments = new const CHAR_FRAGMENT *[blob_choices.length()];
if (blob_choices[0] != NULL) {
fragments[0] = getDict().getUnicharset().get_fragment(
blob_choices[0]->unichar_id());
} else {
fragments[0] = NULL;
}
}
for (x = 0; x < blob_choices.size(); ++x) {
if (blob_choices[x] == NULL) {
if (fragments != NULL) {
delete[] fragments;
}
return x;
} else {
blob_choice = blob_choices[x];
// Populate fragments for the following position.
if (split_next_to_fragment && x+1 < blob_choices.size()) {
if (blob_choices[x + 1] != NULL) {
fragments[x + 1] = getDict().getUnicharset().get_fragment(
blob_choices[x + 1]->unichar_id());
} else {
fragments[x + 1] = NULL;
}
}
if (blob_choice->rating() < rating_ceiling &&
blob_choice->certainty() < tessedit_certainty_threshold) {
// Update worst and worst_index.
if (blob_choice->rating() > worst) {
worst_index = x;
worst = blob_choice->rating();
}
if (split_next_to_fragment) {
// Update worst_near_fragment and worst_index_near_fragment.
bool expand_following_fragment =
(x + 1 < blob_choices.size() &&
fragments[x+1] != NULL && !fragments[x+1]->is_beginning());
bool expand_preceding_fragment =
(x > 0 && fragments[x-1] != NULL && !fragments[x-1]->is_ending());
if ((expand_following_fragment || expand_preceding_fragment) &&
blob_choice->rating() > worst_near_fragment) {
worst_index_near_fragment = x;
worst_near_fragment = blob_choice->rating();
if (chop_debug) {
tprintf("worst_index_near_fragment=%d"
" expand_following_fragment=%d"
" expand_preceding_fragment=%d\n",
worst_index_near_fragment,
expand_following_fragment,
expand_preceding_fragment);
}
}
}
}
}
}
if (fragments != NULL) {
delete[] fragments;
}
// TODO(daria): maybe a threshold of badness for
// worst_near_fragment would be useful.
return worst_index_near_fragment != -1 ?
worst_index_near_fragment : worst_index;
}
| int tesseract::Wordrec::select_blob_to_split_from_fixpt | ( | DANGERR * | fixpt | ) |
Definition at line 633 of file chopper.cpp.
{
if (!fixpt)
return -1;
for (int i = 0; i < fixpt->size(); i++) {
if ((*fixpt)[i].begin + 1 == (*fixpt)[i].end &&
(*fixpt)[i].dangerous &&
(*fixpt)[i].correct_is_ngram) {
return (*fixpt)[i].begin;
}
}
return -1;
}
| void tesseract::Wordrec::set_pass1 | ( | ) |
Definition at line 85 of file tface.cpp.
{
chop_ok_split.set_value(70.0);
language_model_->getParamsModel().SetPass(ParamsModel::PTRAIN_PASS1);
SettupPass1();
}
| void tesseract::Wordrec::set_pass2 | ( | ) |
Definition at line 97 of file tface.cpp.
{
chop_ok_split.set_value(pass2_ok_split);
language_model_->getParamsModel().SetPass(ParamsModel::PTRAIN_PASS2);
SettupPass2();
}
| void tesseract::Wordrec::try_point_pairs | ( | EDGEPT * | points[MAX_NUM_POINTS], |
| inT16 | num_points, | ||
| SeamQueue * | seam_queue, | ||
| SeamPile * | seam_pile, | ||
| SEAM ** | seam, | ||
| TBLOB * | blob | ||
| ) |
Definition at line 297 of file findseam.cpp.
{
inT16 x;
inT16 y;
PRIORITY priority;
for (x = 0; x < num_points; x++) {
for (y = x + 1; y < num_points; y++) {
if (points[y] &&
points[x]->WeightedDistance(*points[y], chop_x_y_weight) <
chop_split_length &&
points[x] != points[y]->next && points[y] != points[x]->next &&
!is_exterior_point(points[x], points[y]) &&
!is_exterior_point(points[y], points[x])) {
SPLIT split(points[x], points[y]);
priority = partial_split_priority(&split);
choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
}
}
}
}
| void tesseract::Wordrec::try_vertical_splits | ( | EDGEPT * | points[MAX_NUM_POINTS], |
| inT16 | num_points, | ||
| EDGEPT_CLIST * | new_points, | ||
| SeamQueue * | seam_queue, | ||
| SeamPile * | seam_pile, | ||
| SEAM ** | seam, | ||
| TBLOB * | blob | ||
| ) |
Definition at line 335 of file findseam.cpp.
{
EDGEPT *vertical_point = NULL;
inT16 x;
PRIORITY priority;
TESSLINE *outline;
for (x = 0; x < num_points; x++) {
vertical_point = NULL;
for (outline = blob->outlines; outline; outline = outline->next) {
vertical_projection_point(points[x], outline->loop,
&vertical_point, new_points);
}
if (vertical_point && points[x] != vertical_point->next &&
vertical_point != points[x]->next &&
points[x]->WeightedDistance(*vertical_point, chop_x_y_weight) <
chop_split_length) {
SPLIT split(points[x], vertical_point);
priority = partial_split_priority(&split);
choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
}
}
}
| void tesseract::Wordrec::UpdateSegSearchNodes | ( | float | rating_cert_scale, |
| int | starting_col, | ||
| GenericVector< SegSearchPending > * | pending, | ||
| WERD_RES * | word_res, | ||
| LMPainPoints * | pain_points, | ||
| BestChoiceBundle * | best_choice_bundle, | ||
| BlamerBundle * | blamer_bundle | ||
| ) | [protected] |
Definition at line 194 of file segsearch.cpp.
{
MATRIX *ratings = word_res->ratings;
ASSERT_HOST(ratings->dimension() == pending->size());
ASSERT_HOST(ratings->dimension() == best_choice_bundle->beam.size());
for (int col = starting_col; col < ratings->dimension(); ++col) {
if (!(*pending)[col].WorkToDo()) continue;
int first_row = col;
int last_row = MIN(ratings->dimension() - 1,
col + ratings->bandwidth() - 1);
if ((*pending)[col].SingleRow() >= 0) {
first_row = last_row = (*pending)[col].SingleRow();
}
if (segsearch_debug_level > 0) {
tprintf("\n\nUpdateSegSearchNodes: col=%d, rows=[%d,%d], alljust=%d\n",
col, first_row, last_row,
(*pending)[col].IsRowJustClassified(MAX_INT32));
}
// Iterate over the pending list for this column.
for (int row = first_row; row <= last_row; ++row) {
// Update language model state of this child+parent pair.
BLOB_CHOICE_LIST *current_node = ratings->get(col, row);
LanguageModelState *parent_node =
col == 0 ? NULL : best_choice_bundle->beam[col - 1];
if (current_node != NULL &&
language_model_->UpdateState((*pending)[col].IsRowJustClassified(row),
col, row, current_node, parent_node,
pain_points, word_res,
best_choice_bundle, blamer_bundle) &&
row + 1 < ratings->dimension()) {
// Since the language model state of this entry changed, process all
// the child column.
(*pending)[row + 1].RevisitWholeColumn();
if (segsearch_debug_level > 0) {
tprintf("Added child col=%d to pending\n", row + 1);
}
} // end if UpdateState.
} // end for row.
} // end for col.
if (best_choice_bundle->best_vse != NULL) {
ASSERT_HOST(word_res->StatesAllValid());
if (best_choice_bundle->best_vse->updated) {
pain_points->GenerateFromPath(rating_cert_scale,
best_choice_bundle->best_vse, word_res);
if (!best_choice_bundle->fixpt.empty()) {
pain_points->GenerateFromAmbigs(best_choice_bundle->fixpt,
best_choice_bundle->best_vse, word_res);
}
}
}
// The segsearch is completed. Reset all updated flags on all VSEs and reset
// all pendings.
for (int col = 0; col < pending->size(); ++col) {
(*pending)[col].Clear();
ViterbiStateEntry_IT
vse_it(&best_choice_bundle->beam[col]->viterbi_state_entries);
for (vse_it.mark_cycle_pt(); !vse_it.cycled_list(); vse_it.forward()) {
vse_it.data()->updated = false;
}
}
}
| void tesseract::Wordrec::vertical_projection_point | ( | EDGEPT * | split_point, |
| EDGEPT * | target_point, | ||
| EDGEPT ** | best_point, | ||
| EDGEPT_CLIST * | new_points | ||
| ) |
Definition at line 278 of file chop.cpp.
{
if (((p->pos.x <= x && x <= p->next->pos.x) ||
(p->next->pos.x <= x && x <= p->pos.x)) &&
!same_point(split_point->pos, p->pos) &&
!same_point(split_point->pos, p->next->pos) &&
!p->IsChopPt() &&
(*best_point == NULL || !same_point((*best_point)->pos, p->pos))) {
if (near_point(split_point, p, p->next, &this_edgept)) {
new_point_it.add_before_then_move(this_edgept);
}
if (*best_point == NULL)
best_dist = edgept_dist (split_point, this_edgept);
this_edgept =
pick_close_point(split_point, this_edgept, &best_dist);
if (this_edgept)
*best_point = this_edgept;
}
p = p->next;
}
while (p != target_point);
}
} // namespace tesseract
| void tesseract::Wordrec::WordSearch | ( | WERD_RES * | word_res | ) |
Definition at line 130 of file segsearch.cpp.
{
LMPainPoints pain_points(segsearch_max_pain_points,
segsearch_max_char_wh_ratio,
assume_fixed_pitch_char_segment,
&getDict(), segsearch_debug_level);
GenericVector<SegSearchPending> pending;
BestChoiceBundle best_choice_bundle(word_res->ratings->dimension());
// Run Segmentation Search.
InitialSegSearch(word_res, &pain_points, &pending, &best_choice_bundle, NULL);
if (segsearch_debug_level > 0) {
tprintf("Ending ratings matrix%s:\n",
wordrec_enable_assoc ? " (with assoc)" : "");
word_res->ratings->print(getDict().getUnicharset());
}
}
| bool tesseract::Wordrec::assume_fixed_pitch_char_segment = FALSE |
| double tesseract::Wordrec::chop_center_knob = 0.15 |
| int tesseract::Wordrec::chop_debug = 0 |
| bool tesseract::Wordrec::chop_enable = 1 |
| double tesseract::Wordrec::chop_good_split = 50.0 |
| int tesseract::Wordrec::chop_inside_angle = -50 |
| int tesseract::Wordrec::chop_min_outline_area = 2000 |
| double tesseract::Wordrec::chop_ok_split = 100.0 |
| double tesseract::Wordrec::chop_overlap_knob = 0.9 |
| int tesseract::Wordrec::chop_seam_pile_size = 150 |
| double tesseract::Wordrec::chop_sharpness_knob = 0.06 |
| double tesseract::Wordrec::chop_split_dist_knob = 0.5 |
| int tesseract::Wordrec::chop_split_length = 10000 |
| double tesseract::Wordrec::chop_width_change_knob = 5.0 |
| void(Wordrec::* tesseract::Wordrec::fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) |
| bool tesseract::Wordrec::force_word_assoc = FALSE |
| bool tesseract::Wordrec::fragments_guide_chopper = FALSE |
| bool tesseract::Wordrec::merge_fragments_in_matrix = TRUE |
| bool tesseract::Wordrec::save_alt_choices = true |
| double tesseract::Wordrec::segsearch_max_char_wh_ratio = 2.0 |
| double tesseract::Wordrec::tessedit_certainty_threshold = -2.25 |
| bool tesseract::Wordrec::wordrec_debug_blamer = false |
| bool tesseract::Wordrec::wordrec_enable_assoc = TRUE |
| bool tesseract::Wordrec::wordrec_no_block = FALSE |
| bool tesseract::Wordrec::wordrec_run_blamer = false |
| bool tesseract::Wordrec::wordrec_skip_no_truth_words = false |
| double tesseract::Wordrec::wordrec_worst_state = 1 |