tesseract  4.1.0
recodebeam.cpp
Go to the documentation of this file.
1 // File: recodebeam.cpp
3 // Description: Beam search to decode from the re-encoded CJK as a sequence of
4 // smaller numbers in place of a single large code.
5 // Author: Ray Smith
6 //
7 // (C) Copyright 2015, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #include "recodebeam.h"
21 #include "networkio.h"
22 #include "pageres.h"
23 #include "unicharcompress.h"
24 #include <deque>
25 #include <map>
26 #include <set>
27 #include <tuple>
28 #include <vector>
29 
30 #include <algorithm>
31 
32 namespace tesseract {
33 
34 // Clipping value for certainty inside Tesseract. Reflects the minimum value
35 // of certainty that will be returned by ExtractBestPathAsUnicharIds.
36 // Supposedly on a uniform scale that can be compared across languages and
37 // engines.
38 const float RecodeBeamSearch::kMinCertainty = -20.0f;
39 
40 // The beam width at each code position.
41 const int RecodeBeamSearch::kBeamWidths[RecodedCharID::kMaxCodeLen + 1] = {
42  5, 10, 16, 16, 16, 16, 16, 16, 16, 16,
43 };
44 
45 static const char* kNodeContNames[] = {"Anything", "OnlyDup", "NoDup"};
46 
47 // Prints debug details of the node.
48 void RecodeNode::Print(int null_char, const UNICHARSET& unicharset,
49  int depth) const {
50  if (code == null_char) {
51  tprintf("null_char");
52  } else {
53  tprintf("label=%d, uid=%d=%s", code, unichar_id,
54  unicharset.debug_str(unichar_id).string());
55  }
56  tprintf(" score=%g, c=%g,%s%s%s perm=%d, hash=%llx", score, certainty,
57  start_of_dawg ? " DawgStart" : "", start_of_word ? " Start" : "",
58  end_of_word ? " End" : "", permuter, code_hash);
59  if (depth > 0 && prev != nullptr) {
60  tprintf(" prev:");
61  prev->Print(null_char, unicharset, depth - 1);
62  } else {
63  tprintf("\n");
64  }
65 }
66 
67 // Borrows the pointer, which is expected to survive until *this is deleted.
69  int null_char, bool simple_text, Dict* dict)
70  : recoder_(recoder),
71  beam_size_(0),
72  top_code_(-1),
73  second_code_(-1),
74  dict_(dict),
75  space_delimited_(true),
76  is_simple_text_(simple_text),
77  null_char_(null_char) {
78  if (dict_ != nullptr && !dict_->IsSpaceDelimitedLang()) space_delimited_ = false;
79 }
80 
81 // Decodes the set of network outputs, storing the lattice internally.
82 void RecodeBeamSearch::Decode(const NetworkIO& output, double dict_ratio,
83  double cert_offset, double worst_dict_cert,
84  const UNICHARSET* charset, int lstm_choice_mode) {
85  beam_size_ = 0;
86  int width = output.Width();
87  if (lstm_choice_mode)
88  timesteps.clear();
89  for (int t = 0; t < width; ++t) {
90  ComputeTopN(output.f(t), output.NumFeatures(), kBeamWidths[0]);
91  DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert,
92  charset);
93  if (lstm_choice_mode) {
94  SaveMostCertainChoices(output.f(t), output.NumFeatures(), charset, t);
95  }
96  }
97 }
99  double dict_ratio, double cert_offset,
100  double worst_dict_cert,
101  const UNICHARSET* charset) {
102  beam_size_ = 0;
103  int width = output.dim1();
104  for (int t = 0; t < width; ++t) {
105  ComputeTopN(output[t], output.dim2(), kBeamWidths[0]);
106  DecodeStep(output[t], t, dict_ratio, cert_offset, worst_dict_cert, charset);
107  }
108 }
109 
110 void RecodeBeamSearch::SaveMostCertainChoices(const float* outputs,
111  int num_outputs,
112  const UNICHARSET* charset,
113  int xCoord) {
114  std::vector<std::pair<const char*, float>> choices;
115  for (int i = 0; i < num_outputs; ++i) {
116  if (outputs[i] >= 0.01f) {
117  const char* character;
118  if (i + 2 >= num_outputs) {
119  character = "";
120  } else if (i > 0) {
121  character = charset->id_to_unichar_ext(i + 2);
122  } else {
123  character = charset->id_to_unichar_ext(i);
124  }
125  size_t pos = 0;
126  //order the possible choices within one timestep
127  //beginning with the most likely
128  while (choices.size() > pos && choices[pos].second > outputs[i]) {
129  pos++;
130  }
131  choices.insert(choices.begin() + pos,
132  std::pair<const char*, float>(character, outputs[i]));
133  }
134  }
135  timesteps.push_back(choices);
136 }
137 
138 // Returns the best path as labels/scores/xcoords similar to simple CTC.
140  GenericVector<int>* labels, GenericVector<int>* xcoords) const {
141  labels->truncate(0);
142  xcoords->truncate(0);
144  ExtractBestPaths(&best_nodes, nullptr);
145  // Now just run CTC on the best nodes.
146  int t = 0;
147  int width = best_nodes.size();
148  while (t < width) {
149  int label = best_nodes[t]->code;
150  if (label != null_char_) {
151  labels->push_back(label);
152  xcoords->push_back(t);
153  }
154  while (++t < width && !is_simple_text_ && best_nodes[t]->code == label) {
155  }
156  }
157  xcoords->push_back(width);
158 }
159 
160 // Returns the best path as unichar-ids/certs/ratings/xcoords skipping
161 // duplicates, nulls and intermediate parts.
163  bool debug, const UNICHARSET* unicharset, GenericVector<int>* unichar_ids,
165  GenericVector<int>* xcoords) const {
167  ExtractBestPaths(&best_nodes, nullptr);
168  ExtractPathAsUnicharIds(best_nodes, unichar_ids, certs, ratings, xcoords);
169  if (debug) {
170  DebugPath(unicharset, best_nodes);
171  DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings,
172  *xcoords);
173  }
174 }
175 
176 // Returns the best path as a set of WERD_RES.
178  float scale_factor, bool debug,
179  const UNICHARSET* unicharset,
181  int lstm_choice_mode) {
182  words->truncate(0);
183  GenericVector<int> unichar_ids;
184  GenericVector<float> certs;
185  GenericVector<float> ratings;
186  GenericVector<int> xcoords;
189  std::deque<std::tuple<int, int>> best_choices;
190  ExtractBestPaths(&best_nodes, &second_nodes);
191  if (debug) {
192  DebugPath(unicharset, best_nodes);
193  ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings,
194  &xcoords);
195  tprintf("\nSecond choice path:\n");
196  DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings,
197  xcoords);
198  }
199  int timestepEnd= 0;
200  //if lstm choice mode is required in granularity level 2 it stores the x
201  //Coordinates of every chosen character to match the alternative choices to it
202  if (lstm_choice_mode == 2) {
203  ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
204  &xcoords, &best_choices);
205  if (best_choices.size() > 0) {
206  timestepEnd = std::get<1>(best_choices.front());
207  best_choices.pop_front();
208  }
209  } else {
210  ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
211  &xcoords);
212  }
213  int num_ids = unichar_ids.size();
214  if (debug) {
215  DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings,
216  xcoords);
217  }
218  // Convert labels to unichar-ids.
219  int word_end = 0;
220  float prev_space_cert = 0.0f;
221  for (int word_start = 0; word_start < num_ids; word_start = word_end) {
222  for (word_end = word_start + 1; word_end < num_ids; ++word_end) {
223  // A word is terminated when a space character or start_of_word flag is
224  // hit. We also want to force a separate word for every non
225  // space-delimited character when not in a dictionary context.
226  if (unichar_ids[word_end] == UNICHAR_SPACE) break;
227  int index = xcoords[word_end];
228  if (best_nodes[index]->start_of_word) break;
229  if (best_nodes[index]->permuter == TOP_CHOICE_PERM &&
230  (!unicharset->IsSpaceDelimited(unichar_ids[word_end]) ||
231  !unicharset->IsSpaceDelimited(unichar_ids[word_end - 1])))
232  break;
233  }
234  float space_cert = 0.0f;
235  if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE)
236  space_cert = certs[word_end];
237  bool leading_space =
238  word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE;
239  // Create a WERD_RES for the output word.
240  WERD_RES* word_res = InitializeWord(
241  leading_space, line_box, word_start, word_end,
242  std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor);
243  if (lstm_choice_mode == 1) {
244  for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
245  word_res->timesteps.push_back(timesteps[i]);
246  }
247  timestepEnd = xcoords[word_end];
248  } else if (lstm_choice_mode == 2){
249  // Accumulated Timesteps (choice mode 2 processing)
250  float sum = 0;
251  std::vector<std::pair<const char*, float>> choice_pairs;
252  for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
253  for (std::pair<const char*, float> choice : timesteps[i]) {
254  if (std::strcmp(choice.first, "")) {
255  sum += choice.second;
256  choice_pairs.push_back(choice);
257  }
258  }
259  if ((best_choices.size() > 0 && i == std::get<1>(best_choices.front()) - 1)
260  || i == xcoords[word_end]-1) {
261  std::map<const char*, float> summed_propabilities;
262  for (auto & choice_pair : choice_pairs) {
263  summed_propabilities[choice_pair.first] += choice_pair.second;
264  }
265  std::vector<std::pair<const char*, float>> accumulated_timestep;
266  for (auto& summed_propability : summed_propabilities) {
267  if(sum == 0) break;
268  summed_propability.second/=sum;
269  size_t pos = 0;
270  while (accumulated_timestep.size() > pos
271  && accumulated_timestep[pos].second > summed_propability.second) {
272  pos++;
273  }
274  accumulated_timestep.insert(accumulated_timestep.begin() + pos,
275  std::pair<const char*,float>(summed_propability.first,
276  summed_propability.second));
277  }
278  if (best_choices.size() > 0) {
279  best_choices.pop_front();
280  }
281  choice_pairs.clear();
282  word_res->timesteps.push_back(accumulated_timestep);
283  sum = 0;
284  }
285  }
286  timestepEnd = xcoords[word_end];
287  }
288  for (int i = word_start; i < word_end; ++i) {
289  auto* choices = new BLOB_CHOICE_LIST;
290  BLOB_CHOICE_IT bc_it(choices);
291  auto* choice = new BLOB_CHOICE(
292  unichar_ids[i], ratings[i], certs[i], -1, 1.0f,
293  static_cast<float>(INT16_MAX), 0.0f, BCC_STATIC_CLASSIFIER);
294  int col = i - word_start;
295  choice->set_matrix_cell(col, col);
296  bc_it.add_after_then_move(choice);
297  word_res->ratings->put(col, col, choices);
298  }
299  int index = xcoords[word_end - 1];
300  word_res->FakeWordFromRatings(best_nodes[index]->permuter);
301  words->push_back(word_res);
302  prev_space_cert = space_cert;
303  if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE)
304  ++word_end;
305  }
306 }
307 
308 // Generates debug output of the content of the beams after a Decode.
309 void RecodeBeamSearch::DebugBeams(const UNICHARSET& unicharset) const {
310  for (int p = 0; p < beam_size_; ++p) {
311  for (int d = 0; d < 2; ++d) {
312  for (int c = 0; c < NC_COUNT; ++c) {
313  auto cont = static_cast<NodeContinuation>(c);
314  int index = BeamIndex(d, cont, 0);
315  if (beam_[p]->beams_[index].empty()) continue;
316  // Print all the best scoring nodes for each unichar found.
317  tprintf("Position %d: %s+%s beam\n", p, d ? "Dict" : "Non-Dict",
318  kNodeContNames[c]);
319  DebugBeamPos(unicharset, beam_[p]->beams_[index]);
320  }
321  }
322  }
323 }
324 
325 // Generates debug output of the content of a single beam position.
326 void RecodeBeamSearch::DebugBeamPos(const UNICHARSET& unicharset,
327  const RecodeHeap& heap) const {
328  GenericVector<const RecodeNode*> unichar_bests;
329  unichar_bests.init_to_size(unicharset.size(), nullptr);
330  const RecodeNode* null_best = nullptr;
331  int heap_size = heap.size();
332  for (int i = 0; i < heap_size; ++i) {
333  const RecodeNode* node = &heap.get(i).data;
334  if (node->unichar_id == INVALID_UNICHAR_ID) {
335  if (null_best == nullptr || null_best->score < node->score) null_best = node;
336  } else {
337  if (unichar_bests[node->unichar_id] == nullptr ||
338  unichar_bests[node->unichar_id]->score < node->score) {
339  unichar_bests[node->unichar_id] = node;
340  }
341  }
342  }
343  for (int u = 0; u < unichar_bests.size(); ++u) {
344  if (unichar_bests[u] != nullptr) {
345  const RecodeNode& node = *unichar_bests[u];
346  node.Print(null_char_, unicharset, 1);
347  }
348  }
349  if (null_best != nullptr) {
350  null_best->Print(null_char_, unicharset, 1);
351  }
352 }
353 
354 // Returns the given best_nodes as unichar-ids/certs/ratings/xcoords skipping
355 // duplicates, nulls and intermediate parts.
356 /* static */
357 void RecodeBeamSearch::ExtractPathAsUnicharIds(
358  const GenericVector<const RecodeNode*>& best_nodes,
359  GenericVector<int>* unichar_ids, GenericVector<float>* certs,
360  GenericVector<float>* ratings, GenericVector<int>* xcoords,
361  std::deque<std::tuple<int, int>>* best_choices) {
362  unichar_ids->truncate(0);
363  certs->truncate(0);
364  ratings->truncate(0);
365  xcoords->truncate(0);
366  // Backtrack extracting only valid, non-duplicate unichar-ids.
367  int t = 0;
368  int width = best_nodes.size();
369  while (t < width) {
370  int id;
371  int tposition;
372  double certainty = 0.0;
373  double rating = 0.0;
374  while (t < width && best_nodes[t]->unichar_id == INVALID_UNICHAR_ID) {
375  double cert = best_nodes[t++]->certainty;
376  if (cert < certainty) certainty = cert;
377  rating -= cert;
378  }
379  if (t < width) {
380  int unichar_id = best_nodes[t]->unichar_id;
381  if (unichar_id == UNICHAR_SPACE && !certs->empty() &&
382  best_nodes[t]->permuter != NO_PERM) {
383  // All the rating and certainty go on the previous character except
384  // for the space itself.
385  if (certainty < certs->back()) certs->back() = certainty;
386  ratings->back() += rating;
387  certainty = 0.0;
388  rating = 0.0;
389  }
390  unichar_ids->push_back(unichar_id);
391  xcoords->push_back(t);
392  if (best_choices != nullptr) {
393  tposition = t;
394  id = unichar_id;
395  }
396  do {
397  double cert = best_nodes[t++]->certainty;
398  // Special-case NO-PERM space to forget the certainty of the previous
399  // nulls. See long comment in ContinueContext.
400  if (cert < certainty || (unichar_id == UNICHAR_SPACE &&
401  best_nodes[t - 1]->permuter == NO_PERM)) {
402  certainty = cert;
403  }
404  rating -= cert;
405  } while (t < width && best_nodes[t]->duplicate);
406  certs->push_back(certainty);
407  ratings->push_back(rating);
408  } else if (!certs->empty()) {
409  if (certainty < certs->back()) certs->back() = certainty;
410  ratings->back() += rating;
411  }
412  if (best_choices != nullptr) {
413  best_choices->push_back(
414  std::tuple<int, int>(id, tposition));
415  }
416  }
417  xcoords->push_back(width);
418 }
419 
420 // Sets up a word with the ratings matrix and fake blobs with boxes in the
421 // right places.
422 WERD_RES* RecodeBeamSearch::InitializeWord(bool leading_space,
423  const TBOX& line_box, int word_start,
424  int word_end, float space_certainty,
425  const UNICHARSET* unicharset,
426  const GenericVector<int>& xcoords,
427  float scale_factor) {
428  // Make a fake blob for each non-zero label.
429  C_BLOB_LIST blobs;
430  C_BLOB_IT b_it(&blobs);
431  for (int i = word_start; i < word_end; ++i) {
432  int min_half_width = xcoords[i + 1] - xcoords[i];
433  if (i > 0 && xcoords[i] - xcoords[i - 1] < min_half_width)
434  min_half_width = xcoords[i] - xcoords[i - 1];
435  if (min_half_width < 1) min_half_width = 1;
436  // Make a fake blob.
437  TBOX box(xcoords[i] - min_half_width, 0, xcoords[i] + min_half_width,
438  line_box.height());
439  box.scale(scale_factor);
440  box.move(ICOORD(line_box.left(), line_box.bottom()));
441  box.set_top(line_box.top());
442  b_it.add_after_then_move(C_BLOB::FakeBlob(box));
443  }
444  // Make a fake word from the blobs.
445  WERD* word = new WERD(&blobs, leading_space, nullptr);
446  // Make a WERD_RES from the word.
447  auto* word_res = new WERD_RES(word);
448  word_res->uch_set = unicharset;
449  word_res->combination = true; // Give it ownership of the word.
450  word_res->space_certainty = space_certainty;
451  word_res->ratings = new MATRIX(word_end - word_start, 1);
452  return word_res;
453 }
454 
455 // Fills top_n_flags_ with bools that are true iff the corresponding output
456 // is one of the top_n.
457 void RecodeBeamSearch::ComputeTopN(const float* outputs, int num_outputs,
458  int top_n) {
459  top_n_flags_.init_to_size(num_outputs, TN_ALSO_RAN);
460  top_code_ = -1;
461  second_code_ = -1;
462  top_heap_.clear();
463  for (int i = 0; i < num_outputs; ++i) {
464  if (top_heap_.size() < top_n || outputs[i] > top_heap_.PeekTop().key) {
465  TopPair entry(outputs[i], i);
466  top_heap_.Push(&entry);
467  if (top_heap_.size() > top_n) top_heap_.Pop(&entry);
468  }
469  }
470  while (!top_heap_.empty()) {
471  TopPair entry;
472  top_heap_.Pop(&entry);
473  if (top_heap_.size() > 1) {
474  top_n_flags_[entry.data] = TN_TOPN;
475  } else {
476  top_n_flags_[entry.data] = TN_TOP2;
477  if (top_heap_.empty())
478  top_code_ = entry.data;
479  else
480  second_code_ = entry.data;
481  }
482  }
483  top_n_flags_[null_char_] = TN_TOP2;
484 }
485 
486 // Adds the computation for the current time-step to the beam. Call at each
487 // time-step in sequence from left to right. outputs is the activation vector
488 // for the current timestep.
489 void RecodeBeamSearch::DecodeStep(const float* outputs, int t,
490  double dict_ratio, double cert_offset,
491  double worst_dict_cert,
492  const UNICHARSET* charset, bool debug) {
493  if (t == beam_.size()) beam_.push_back(new RecodeBeam);
494  RecodeBeam* step = beam_[t];
495  beam_size_ = t + 1;
496  step->Clear();
497  if (t == 0) {
498  // The first step can only use singles and initials.
499  ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2,
500  charset, dict_ratio, cert_offset, worst_dict_cert, step);
501  if (dict_ != nullptr) {
502  ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs, TN_TOP2,
503  charset, dict_ratio, cert_offset, worst_dict_cert, step);
504  }
505  } else {
506  RecodeBeam* prev = beam_[t - 1];
507  if (debug) {
508  int beam_index = BeamIndex(true, NC_ANYTHING, 0);
509  for (int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) {
511  ExtractPath(&prev->beams_[beam_index].get(i).data, &path);
512  tprintf("Step %d: Dawg beam %d:\n", t, i);
513  DebugPath(charset, path);
514  }
515  beam_index = BeamIndex(false, NC_ANYTHING, 0);
516  for (int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) {
518  ExtractPath(&prev->beams_[beam_index].get(i).data, &path);
519  tprintf("Step %d: Non-Dawg beam %d:\n", t, i);
520  DebugPath(charset, path);
521  }
522  }
523  int total_beam = 0;
524  // Work through the scores by group (top-2, top-n, the rest) while the beam
525  // is empty. This enables extending the context using only the top-n results
526  // first, which may have an empty intersection with the valid codes, so we
527  // fall back to the rest if the beam is empty.
528  for (int tn = 0; tn < TN_COUNT && total_beam == 0; ++tn) {
529  auto top_n = static_cast<TopNState>(tn);
530  for (int index = 0; index < kNumBeams; ++index) {
531  // Working backwards through the heaps doesn't guarantee that we see the
532  // best first, but it comes before a lot of the worst, so it is slightly
533  // more efficient than going forwards.
534  for (int i = prev->beams_[index].size() - 1; i >= 0; --i) {
535  ContinueContext(&prev->beams_[index].get(i).data, index, outputs, top_n,
536  charset, dict_ratio, cert_offset, worst_dict_cert, step);
537  }
538  }
539  for (int index = 0; index < kNumBeams; ++index) {
541  total_beam += step->beams_[index].size();
542  }
543  }
544  // Special case for the best initial dawg. Push it on the heap if good
545  // enough, but there is only one, so it doesn't blow up the beam.
546  for (int c = 0; c < NC_COUNT; ++c) {
547  if (step->best_initial_dawgs_[c].code >= 0) {
548  int index = BeamIndex(true, static_cast<NodeContinuation>(c), 0);
549  RecodeHeap* dawg_heap = &step->beams_[index];
550  PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c],
551  dawg_heap);
552  }
553  }
554  }
555 }
556 
557 // Adds to the appropriate beams the legal (according to recoder)
558 // continuations of context prev, which is of the given length, using the
559 // given network outputs to provide scores to the choices. Uses only those
560 // choices for which top_n_flags[index] == top_n_flag.
561 void RecodeBeamSearch::ContinueContext(const RecodeNode* prev, int index,
562  const float* outputs,
563  TopNState top_n_flag,
564  const UNICHARSET* charset,
565  double dict_ratio,
566  double cert_offset,
567  double worst_dict_cert,
568  RecodeBeam* step) {
569  RecodedCharID prefix;
570  RecodedCharID full_code;
571  const RecodeNode* previous = prev;
572  int length = LengthFromBeamsIndex(index);
573  bool use_dawgs = IsDawgFromBeamsIndex(index);
574  NodeContinuation prev_cont = ContinuationFromBeamsIndex(index);
575  for (int p = length - 1; p >= 0; --p, previous = previous->prev) {
576  while (previous != nullptr &&
577  (previous->duplicate || previous->code == null_char_)) {
578  previous = previous->prev;
579  }
580  if (previous != nullptr) {
581  prefix.Set(p, previous->code);
582  full_code.Set(p, previous->code);
583  }
584  }
585  if (prev != nullptr && !is_simple_text_) {
586  if (top_n_flags_[prev->code] == top_n_flag) {
587  if (prev_cont != NC_NO_DUP) {
588  float cert =
589  NetworkIO::ProbToCertainty(outputs[prev->code]) + cert_offset;
590  PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id,
591  cert, worst_dict_cert, dict_ratio, use_dawgs,
592  NC_ANYTHING, prev, step);
593  }
594  if (prev_cont == NC_ANYTHING && top_n_flag == TN_TOP2 &&
595  prev->code != null_char_) {
596  float cert = NetworkIO::ProbToCertainty(outputs[prev->code] +
597  outputs[null_char_]) +
598  cert_offset;
599  PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id,
600  cert, worst_dict_cert, dict_ratio, use_dawgs,
601  NC_NO_DUP, prev, step);
602  }
603  }
604  if (prev_cont == NC_ONLY_DUP) return;
605  if (prev->code != null_char_ && length > 0 &&
606  top_n_flags_[null_char_] == top_n_flag) {
607  // Allow nulls within multi code sequences, as the nulls within are not
608  // explicitly included in the code sequence.
609  float cert =
610  NetworkIO::ProbToCertainty(outputs[null_char_]) + cert_offset;
611  PushDupOrNoDawgIfBetter(length, false, null_char_, INVALID_UNICHAR_ID,
612  cert, worst_dict_cert, dict_ratio, use_dawgs,
613  NC_ANYTHING, prev, step);
614  }
615  }
616  const GenericVector<int>* final_codes = recoder_.GetFinalCodes(prefix);
617  if (final_codes != nullptr) {
618  for (int i = 0; i < final_codes->size(); ++i) {
619  int code = (*final_codes)[i];
620  if (top_n_flags_[code] != top_n_flag) continue;
621  if (prev != nullptr && prev->code == code && !is_simple_text_) continue;
622  float cert = NetworkIO::ProbToCertainty(outputs[code]) + cert_offset;
623  if (cert < kMinCertainty && code != null_char_) continue;
624  full_code.Set(length, code);
625  int unichar_id = recoder_.DecodeUnichar(full_code);
626  // Map the null char to INVALID.
627  if (length == 0 && code == null_char_) unichar_id = INVALID_UNICHAR_ID;
628  if (unichar_id != INVALID_UNICHAR_ID &&
629  charset != nullptr &&
630  !charset->get_enabled(unichar_id))
631  continue; // disabled by whitelist/blacklist
632  ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio,
633  use_dawgs, NC_ANYTHING, prev, step);
634  if (top_n_flag == TN_TOP2 && code != null_char_) {
635  float prob = outputs[code] + outputs[null_char_];
636  if (prev != nullptr && prev_cont == NC_ANYTHING &&
637  prev->code != null_char_ &&
638  ((prev->code == top_code_ && code == second_code_) ||
639  (code == top_code_ && prev->code == second_code_))) {
640  prob += outputs[prev->code];
641  }
642  float cert = NetworkIO::ProbToCertainty(prob) + cert_offset;
643  ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio,
644  use_dawgs, NC_ONLY_DUP, prev, step);
645  }
646  }
647  }
648  const GenericVector<int>* next_codes = recoder_.GetNextCodes(prefix);
649  if (next_codes != nullptr) {
650  for (int i = 0; i < next_codes->size(); ++i) {
651  int code = (*next_codes)[i];
652  if (top_n_flags_[code] != top_n_flag) continue;
653  if (prev != nullptr && prev->code == code && !is_simple_text_) continue;
654  float cert = NetworkIO::ProbToCertainty(outputs[code]) + cert_offset;
655  PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID, cert,
656  worst_dict_cert, dict_ratio, use_dawgs,
657  NC_ANYTHING, prev, step);
658  if (top_n_flag == TN_TOP2 && code != null_char_) {
659  float prob = outputs[code] + outputs[null_char_];
660  if (prev != nullptr && prev_cont == NC_ANYTHING &&
661  prev->code != null_char_ &&
662  ((prev->code == top_code_ && code == second_code_) ||
663  (code == top_code_ && prev->code == second_code_))) {
664  prob += outputs[prev->code];
665  }
666  float cert = NetworkIO::ProbToCertainty(prob) + cert_offset;
667  PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID,
668  cert, worst_dict_cert, dict_ratio, use_dawgs,
669  NC_ONLY_DUP, prev, step);
670  }
671  }
672  }
673 }
674 
675 // Continues for a new unichar, using dawg or non-dawg as per flag.
676 void RecodeBeamSearch::ContinueUnichar(int code, int unichar_id, float cert,
677  float worst_dict_cert, float dict_ratio,
678  bool use_dawgs, NodeContinuation cont,
679  const RecodeNode* prev,
680  RecodeBeam* step) {
681  if (use_dawgs) {
682  if (cert > worst_dict_cert) {
683  ContinueDawg(code, unichar_id, cert, cont, prev, step);
684  }
685  } else {
686  RecodeHeap* nodawg_heap = &step->beams_[BeamIndex(false, cont, 0)];
687  PushHeapIfBetter(kBeamWidths[0], code, unichar_id, TOP_CHOICE_PERM, false,
688  false, false, false, cert * dict_ratio, prev, nullptr,
689  nodawg_heap);
690  if (dict_ != nullptr &&
691  ((unichar_id == UNICHAR_SPACE && cert > worst_dict_cert) ||
692  !dict_->getUnicharset().IsSpaceDelimited(unichar_id))) {
693  // Any top choice position that can start a new word, ie a space or
694  // any non-space-delimited character, should also be considered
695  // by the dawg search, so push initial dawg to the dawg heap.
696  float dawg_cert = cert;
697  PermuterType permuter = TOP_CHOICE_PERM;
698  // Since we use the space either side of a dictionary word in the
699  // certainty of the word, (to properly handle weak spaces) and the
700  // space is coming from a non-dict word, we need special conditions
701  // to avoid degrading the certainty of the dict word that follows.
702  // With a space we don't multiply the certainty by dict_ratio, and we
703  // flag the space with NO_PERM to indicate that we should not use the
704  // predecessor nulls to generate the confidence for the space, as they
705  // have already been multiplied by dict_ratio, and we can't go back to
706  // insert more entries in any previous heaps.
707  if (unichar_id == UNICHAR_SPACE)
708  permuter = NO_PERM;
709  else
710  dawg_cert *= dict_ratio;
711  PushInitialDawgIfBetter(code, unichar_id, permuter, false, false,
712  dawg_cert, cont, prev, step);
713  }
714  }
715 }
716 
717 // Adds a RecodeNode composed of the tuple (code, unichar_id, cert, prev,
718 // appropriate-dawg-args, cert) to the given heap (dawg_beam_) if unichar_id
719 // is a valid continuation of whatever is in prev.
720 void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert,
721  NodeContinuation cont,
722  const RecodeNode* prev, RecodeBeam* step) {
723  RecodeHeap* dawg_heap = &step->beams_[BeamIndex(true, cont, 0)];
724  RecodeHeap* nodawg_heap = &step->beams_[BeamIndex(false, cont, 0)];
725  if (unichar_id == INVALID_UNICHAR_ID) {
726  PushHeapIfBetter(kBeamWidths[0], code, unichar_id, NO_PERM, false, false,
727  false, false, cert, prev, nullptr, dawg_heap);
728  return;
729  }
730  // Avoid dictionary probe if score a total loss.
731  float score = cert;
732  if (prev != nullptr) score += prev->score;
733  if (dawg_heap->size() >= kBeamWidths[0] &&
734  score <= dawg_heap->PeekTop().data.score &&
735  nodawg_heap->size() >= kBeamWidths[0] &&
736  score <= nodawg_heap->PeekTop().data.score) {
737  return;
738  }
739  const RecodeNode* uni_prev = prev;
740  // Prev may be a partial code, null_char, or duplicate, so scan back to the
741  // last valid unichar_id.
742  while (uni_prev != nullptr &&
743  (uni_prev->unichar_id == INVALID_UNICHAR_ID || uni_prev->duplicate))
744  uni_prev = uni_prev->prev;
745  if (unichar_id == UNICHAR_SPACE) {
746  if (uni_prev != nullptr && uni_prev->end_of_word) {
747  // Space is good. Push initial state, to the dawg beam and a regular
748  // space to the top choice beam.
749  PushInitialDawgIfBetter(code, unichar_id, uni_prev->permuter, false,
750  false, cert, cont, prev, step);
751  PushHeapIfBetter(kBeamWidths[0], code, unichar_id, uni_prev->permuter,
752  false, false, false, false, cert, prev, nullptr,
753  nodawg_heap);
754  }
755  return;
756  } else if (uni_prev != nullptr && uni_prev->start_of_dawg &&
757  uni_prev->unichar_id != UNICHAR_SPACE &&
758  dict_->getUnicharset().IsSpaceDelimited(uni_prev->unichar_id) &&
759  dict_->getUnicharset().IsSpaceDelimited(unichar_id)) {
760  return; // Can't break words between space delimited chars.
761  }
762  DawgPositionVector initial_dawgs;
763  auto* updated_dawgs = new DawgPositionVector;
764  DawgArgs dawg_args(&initial_dawgs, updated_dawgs, NO_PERM);
765  bool word_start = false;
766  if (uni_prev == nullptr) {
767  // Starting from beginning of line.
768  dict_->default_dawgs(&initial_dawgs, false);
769  word_start = true;
770  } else if (uni_prev->dawgs != nullptr) {
771  // Continuing a previous dict word.
772  dawg_args.active_dawgs = uni_prev->dawgs;
773  word_start = uni_prev->start_of_dawg;
774  } else {
775  return; // Can't continue if not a dict word.
776  }
777  auto permuter = static_cast<PermuterType>(
778  dict_->def_letter_is_okay(&dawg_args,
779  dict_->getUnicharset(), unichar_id, false));
780  if (permuter != NO_PERM) {
781  PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false,
782  word_start, dawg_args.valid_end, false, cert, prev,
783  dawg_args.updated_dawgs, dawg_heap);
784  if (dawg_args.valid_end && !space_delimited_) {
785  // We can start another word right away, so push initial state as well,
786  // to the dawg beam, and the regular character to the top choice beam,
787  // since non-dict words can start here too.
788  PushInitialDawgIfBetter(code, unichar_id, permuter, word_start, true,
789  cert, cont, prev, step);
790  PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false,
791  word_start, true, false, cert, prev, nullptr, nodawg_heap);
792  }
793  } else {
794  delete updated_dawgs;
795  }
796 }
797 
798 // Adds a RecodeNode composed of the tuple (code, unichar_id,
799 // initial-dawg-state, prev, cert) to the given heap if/ there is room or if
800 // better than the current worst element if already full.
801 void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id,
802  PermuterType permuter,
803  bool start, bool end, float cert,
804  NodeContinuation cont,
805  const RecodeNode* prev,
806  RecodeBeam* step) {
807  RecodeNode* best_initial_dawg = &step->best_initial_dawgs_[cont];
808  float score = cert;
809  if (prev != nullptr) score += prev->score;
810  if (best_initial_dawg->code < 0 || score > best_initial_dawg->score) {
811  auto* initial_dawgs = new DawgPositionVector;
812  dict_->default_dawgs(initial_dawgs, false);
813  RecodeNode node(code, unichar_id, permuter, true, start, end, false, cert,
814  score, prev, initial_dawgs,
815  ComputeCodeHash(code, false, prev));
816  *best_initial_dawg = node;
817  }
818 }
819 
820 // Adds a RecodeNode composed of the tuple (code, unichar_id, permuter,
821 // false, false, false, false, cert, prev, nullptr) to heap if there is room
822 // or if better than the current worst element if already full.
823 /* static */
824 void RecodeBeamSearch::PushDupOrNoDawgIfBetter(
825  int length, bool dup, int code, int unichar_id, float cert,
826  float worst_dict_cert, float dict_ratio, bool use_dawgs,
827  NodeContinuation cont, const RecodeNode* prev, RecodeBeam* step) {
828  int index = BeamIndex(use_dawgs, cont, length);
829  if (use_dawgs) {
830  if (cert > worst_dict_cert) {
831  PushHeapIfBetter(kBeamWidths[length], code, unichar_id,
832  prev ? prev->permuter : NO_PERM, false, false, false,
833  dup, cert, prev, nullptr, &step->beams_[index]);
834  }
835  } else {
836  cert *= dict_ratio;
837  if (cert >= kMinCertainty || code == null_char_) {
838  PushHeapIfBetter(kBeamWidths[length], code, unichar_id,
839  prev ? prev->permuter : TOP_CHOICE_PERM, false, false,
840  false, dup, cert, prev, nullptr, &step->beams_[index]);
841  }
842  }
843 }
844 
845 // Adds a RecodeNode composed of the tuple (code, unichar_id, permuter,
846 // dawg_start, word_start, end, dup, cert, prev, d) to heap if there is room
847 // or if better than the current worst element if already full.
848 void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id,
849  PermuterType permuter, bool dawg_start,
850  bool word_start, bool end, bool dup,
851  float cert, const RecodeNode* prev,
853  RecodeHeap* heap) {
854  float score = cert;
855  if (prev != nullptr) score += prev->score;
856  if (heap->size() < max_size || score > heap->PeekTop().data.score) {
857  uint64_t hash = ComputeCodeHash(code, dup, prev);
858  RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end,
859  dup, cert, score, prev, d, hash);
860  if (UpdateHeapIfMatched(&node, heap)) return;
861  RecodePair entry(score, node);
862  heap->Push(&entry);
863  ASSERT_HOST(entry.data.dawgs == nullptr);
864  if (heap->size() > max_size) heap->Pop(&entry);
865  } else {
866  delete d;
867  }
868 }
869 
870 // Adds a RecodeNode to heap if there is room
871 // or if better than the current worst element if already full.
872 void RecodeBeamSearch::PushHeapIfBetter(int max_size, RecodeNode* node,
873  RecodeHeap* heap) {
874  if (heap->size() < max_size || node->score > heap->PeekTop().data.score) {
875  if (UpdateHeapIfMatched(node, heap)) {
876  return;
877  }
878  RecodePair entry(node->score, *node);
879  heap->Push(&entry);
880  ASSERT_HOST(entry.data.dawgs == nullptr);
881  if (heap->size() > max_size) heap->Pop(&entry);
882  }
883 }
884 
885 // Searches the heap for a matching entry, and updates the score with
886 // reshuffle if needed. Returns true if there was a match.
887 bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode* new_node,
888  RecodeHeap* heap) {
889  // TODO(rays) consider hash map instead of linear search.
890  // It might not be faster because the hash map would have to be updated
891  // every time a heap reshuffle happens, and that would be a lot of overhead.
892  GenericVector<RecodePair>* nodes = heap->heap();
893  for (int i = 0; i < nodes->size(); ++i) {
894  RecodeNode& node = (*nodes)[i].data;
895  if (node.code == new_node->code && node.code_hash == new_node->code_hash &&
896  node.permuter == new_node->permuter &&
897  node.start_of_dawg == new_node->start_of_dawg) {
898  if (new_node->score > node.score) {
899  // The new one is better. Update the entire node in the heap and
900  // reshuffle.
901  node = *new_node;
902  (*nodes)[i].key = node.score;
903  heap->Reshuffle(&(*nodes)[i]);
904  }
905  return true;
906  }
907  }
908  return false;
909 }
910 
911 // Computes and returns the code-hash for the given code and prev.
912 uint64_t RecodeBeamSearch::ComputeCodeHash(int code, bool dup,
913  const RecodeNode* prev) const {
914  uint64_t hash = prev == nullptr ? 0 : prev->code_hash;
915  if (!dup && code != null_char_) {
916  int num_classes = recoder_.code_range();
917  uint64_t carry = (((hash >> 32) * num_classes) >> 32);
918  hash *= num_classes;
919  hash += carry;
920  hash += code;
921  }
922  return hash;
923 }
924 
925 // Backtracks to extract the best path through the lattice that was built
926 // during Decode. On return the best_nodes vector essentially contains the set
927 // of code, score pairs that make the optimal path with the constraint that
928 // the recoder can decode the code sequence back to a sequence of unichar-ids.
929 void RecodeBeamSearch::ExtractBestPaths(
931  GenericVector<const RecodeNode*>* second_nodes) const {
932  // Scan both beams to extract the best and second best paths.
933  const RecodeNode* best_node = nullptr;
934  const RecodeNode* second_best_node = nullptr;
935  const RecodeBeam* last_beam = beam_[beam_size_ - 1];
936  for (int c = 0; c < NC_COUNT; ++c) {
937  if (c == NC_ONLY_DUP) continue;
938  auto cont = static_cast<NodeContinuation>(c);
939  for (int is_dawg = 0; is_dawg < 2; ++is_dawg) {
940  int beam_index = BeamIndex(is_dawg, cont, 0);
941  int heap_size = last_beam->beams_[beam_index].size();
942  for (int h = 0; h < heap_size; ++h) {
943  const RecodeNode* node = &last_beam->beams_[beam_index].get(h).data;
944  if (is_dawg) {
945  // dawg_node may be a null_char, or duplicate, so scan back to the
946  // last valid unichar_id.
947  const RecodeNode* dawg_node = node;
948  while (dawg_node != nullptr &&
949  (dawg_node->unichar_id == INVALID_UNICHAR_ID ||
950  dawg_node->duplicate))
951  dawg_node = dawg_node->prev;
952  if (dawg_node == nullptr || (!dawg_node->end_of_word &&
953  dawg_node->unichar_id != UNICHAR_SPACE)) {
954  // Dawg node is not valid.
955  continue;
956  }
957  }
958  if (best_node == nullptr || node->score > best_node->score) {
959  second_best_node = best_node;
960  best_node = node;
961  } else if (second_best_node == nullptr ||
962  node->score > second_best_node->score) {
963  second_best_node = node;
964  }
965  }
966  }
967  }
968  if (second_nodes != nullptr) ExtractPath(second_best_node, second_nodes);
969  ExtractPath(best_node, best_nodes);
970 }
971 
972 // Helper backtracks through the lattice from the given node, storing the
973 // path and reversing it.
974 void RecodeBeamSearch::ExtractPath(
975  const RecodeNode* node, GenericVector<const RecodeNode*>* path) const {
976  path->truncate(0);
977  while (node != nullptr) {
978  path->push_back(node);
979  node = node->prev;
980  }
981  path->reverse();
982 }
983 
984 // Helper prints debug information on the given lattice path.
985 void RecodeBeamSearch::DebugPath(
986  const UNICHARSET* unicharset,
987  const GenericVector<const RecodeNode*>& path) const {
988  for (int c = 0; c < path.size(); ++c) {
989  const RecodeNode& node = *path[c];
990  tprintf("%d ", c);
991  node.Print(null_char_, *unicharset, 1);
992  }
993 }
994 
995 // Helper prints debug information on the given unichar path.
996 void RecodeBeamSearch::DebugUnicharPath(
997  const UNICHARSET* unicharset, const GenericVector<const RecodeNode*>& path,
998  const GenericVector<int>& unichar_ids, const GenericVector<float>& certs,
999  const GenericVector<float>& ratings,
1000  const GenericVector<int>& xcoords) const {
1001  int num_ids = unichar_ids.size();
1002  double total_rating = 0.0;
1003  for (int c = 0; c < num_ids; ++c) {
1004  int coord = xcoords[c];
1005  tprintf("%d %d=%s r=%g, c=%g, s=%d, e=%d, perm=%d\n", coord, unichar_ids[c],
1006  unicharset->debug_str(unichar_ids[c]).string(), ratings[c],
1007  certs[c], path[coord]->start_of_word, path[coord]->end_of_word,
1008  path[coord]->permuter);
1009  total_rating += ratings[c];
1010  }
1011  tprintf("Path total rating = %g\n", total_rating);
1012 }
1013 
1014 } // namespace tesseract.
void Decode(const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
Definition: recodebeam.cpp:82
Definition: werd.h:56
int16_t top() const
Definition: rect.h:58
RecodeBeamSearch(const UnicharCompress &recoder, int null_char, bool simple_text, Dict *dict)
Definition: recodebeam.cpp:68
NodeContinuation
Definition: recodebeam.h:72
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:878
PermuterType
Definition: ratngs.h:242
Definition: rect.h:34
void Push(Pair *entry)
Definition: genericheap.h:95
std::vector< std::vector< std::pair< const char *, float > > > timesteps
Definition: pageres.h:223
static NodeContinuation ContinuationFromBeamsIndex(int index)
Definition: recodebeam.h:230
bool valid_end
Definition: dict.h:84
DawgPositionVector * updated_dawgs
Definition: dict.h:81
const GenericVector< int > * GetNextCodes(const RecodedCharID &code) const
static const int kNumBeams
Definition: recodebeam.h:227
GenericVector< Pair > * heap()
Definition: genericheap.h:83
void ExtractBestPathAsUnicharIds(bool debug, const UNICHARSET *unicharset, GenericVector< int > *unichar_ids, GenericVector< float > *certs, GenericVector< float > *ratings, GenericVector< int > *xcoords) const
Definition: recodebeam.cpp:162
int def_letter_is_okay(void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const
Definition: dict.cpp:404
DawgPositionVector * active_dawgs
Definition: dict.h:80
void Set(int index, int value)
int size() const
Definition: unicharset.h:341
integer coordinate
Definition: points.h:31
static int LengthFromBeamsIndex(int index)
Definition: recodebeam.h:229
void FakeWordFromRatings(PermuterType permuter)
Definition: pageres.cpp:902
static const int kMaxCodeLen
bool IsSpaceDelimited(UNICHAR_ID unichar_id) const
Definition: unicharset.h:652
void init_to_size(int size, const T &t)
static const float kMinCertainty
Definition: recodebeam.h:222
int DecodeUnichar(const RecodedCharID &code) const
const char * id_to_unichar_ext(UNICHAR_ID id) const
Definition: unicharset.cpp:299
std::vector< std::vector< std::pair< const char *, float > > > timesteps
Definition: recodebeam.h:216
static float ProbToCertainty(float prob)
Definition: networkio.cpp:568
int16_t height() const
Definition: rect.h:108
DawgPositionVector * dawgs
Definition: recodebeam.h:169
const Pair & PeekTop() const
Definition: genericheap.h:108
void ExtractBestPathAsLabels(GenericVector< int > *labels, GenericVector< int > *xcoords) const
Definition: recodebeam.cpp:139
int dim1() const
Definition: matrix.h:209
void put(ICOORD pos, const T &thing)
Definition: matrix.h:223
const char * string() const
Definition: strngs.cpp:194
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
void truncate(int size)
bool empty() const
Definition: genericvector.h:89
const GenericVector< int > * GetFinalCodes(const RecodedCharID &code) const
int push_back(T object)
PermuterType permuter
Definition: recodebeam.h:147
static int BeamIndex(bool is_dawg, NodeContinuation cont, int length)
Definition: recodebeam.h:237
int NumFeatures() const
Definition: networkio.h:111
Definition: matrix.h:578
int16_t bottom() const
Definition: rect.h:65
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:343
const RecodeNode * prev
Definition: recodebeam.h:167
bool IsSpaceDelimitedLang() const
Returns true if the language is space-delimited (not CJ, or T).
Definition: dict.cpp:892
void DebugBeams(const UNICHARSET &unicharset) const
Definition: recodebeam.cpp:309
MATRIX * ratings
Definition: pageres.h:230
#define ASSERT_HOST(x)
Definition: errcode.h:88
int16_t left() const
Definition: rect.h:72
const UNICHARSET & getUnicharset() const
Definition: dict.h:97
float * f(int t)
Definition: networkio.h:115
int dim2() const
Definition: matrix.h:210
void default_dawgs(DawgPositionVector *anylength_dawgs, bool suppress_patterns) const
Definition: dict.cpp:626
void ExtractBestPathAsWords(const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
Definition: recodebeam.cpp:177
bool Pop(Pair *entry)
Definition: genericheap.h:118
void Reshuffle(Pair *pair)
Definition: genericheap.h:182
int size() const
Definition: genericvector.h:70
void Print(int null_char, const UNICHARSET &unicharset, int depth) const
Definition: recodebeam.cpp:48
static bool IsDawgFromBeamsIndex(int index)
Definition: recodebeam.h:233
const Pair & get(int index) const
Definition: genericheap.h:87
void scale(const float f)
Definition: rect.h:175
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:241
T & back() const
int Width() const
Definition: networkio.h:107