tesseract  4.1.0
tesseract::RecodeBeamSearch Class Reference

#include <recodebeam.h>

Public Member Functions

 RecodeBeamSearch (const UnicharCompress &recoder, int null_char, bool simple_text, Dict *dict)
 
void Decode (const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
 
void Decode (const GENERIC_2D_ARRAY< float > &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset)
 
void ExtractBestPathAsLabels (GenericVector< int > *labels, GenericVector< int > *xcoords) const
 
void ExtractBestPathAsUnicharIds (bool debug, const UNICHARSET *unicharset, GenericVector< int > *unichar_ids, GenericVector< float > *certs, GenericVector< float > *ratings, GenericVector< int > *xcoords) const
 
void ExtractBestPathAsWords (const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
 
void DebugBeams (const UNICHARSET &unicharset) const
 

Static Public Member Functions

static int LengthFromBeamsIndex (int index)
 
static NodeContinuation ContinuationFromBeamsIndex (int index)
 
static bool IsDawgFromBeamsIndex (int index)
 
static int BeamIndex (bool is_dawg, NodeContinuation cont, int length)
 

Public Attributes

std::vector< std::vector< std::pair< const char *, float > > > timesteps
 

Static Public Attributes

static const float kMinCertainty = -20.0f
 
static const int kNumLengths = RecodedCharID::kMaxCodeLen + 1
 
static const int kNumBeams = 2 * NC_COUNT * kNumLengths
 

Detailed Description

Definition at line 179 of file recodebeam.h.

Constructor & Destructor Documentation

tesseract::RecodeBeamSearch::RecodeBeamSearch ( const UnicharCompress recoder,
int  null_char,
bool  simple_text,
Dict dict 
)

Definition at line 68 of file recodebeam.cpp.

70  : recoder_(recoder),
71  beam_size_(0),
72  top_code_(-1),
73  second_code_(-1),
74  dict_(dict),
75  space_delimited_(true),
76  is_simple_text_(simple_text),
77  null_char_(null_char) {
78  if (dict_ != nullptr && !dict_->IsSpaceDelimitedLang()) space_delimited_ = false;
79 }
bool IsSpaceDelimitedLang() const
Returns true if the language is space-delimited (not CJ, or T).
Definition: dict.cpp:892

Member Function Documentation

static int tesseract::RecodeBeamSearch::BeamIndex ( bool  is_dawg,
NodeContinuation  cont,
int  length 
)
inlinestatic

Definition at line 237 of file recodebeam.h.

237  {
238  return (is_dawg * NC_COUNT + cont) * kNumLengths + length;
239  }
static const int kNumLengths
Definition: recodebeam.h:224
static NodeContinuation tesseract::RecodeBeamSearch::ContinuationFromBeamsIndex ( int  index)
inlinestatic

Definition at line 230 of file recodebeam.h.

230  {
231  return static_cast<NodeContinuation>((index / kNumLengths) % NC_COUNT);
232  }
NodeContinuation
Definition: recodebeam.h:72
static const int kNumLengths
Definition: recodebeam.h:224
void tesseract::RecodeBeamSearch::DebugBeams ( const UNICHARSET unicharset) const

Definition at line 309 of file recodebeam.cpp.

309  {
310  for (int p = 0; p < beam_size_; ++p) {
311  for (int d = 0; d < 2; ++d) {
312  for (int c = 0; c < NC_COUNT; ++c) {
313  auto cont = static_cast<NodeContinuation>(c);
314  int index = BeamIndex(d, cont, 0);
315  if (beam_[p]->beams_[index].empty()) continue;
316  // Print all the best scoring nodes for each unichar found.
317  tprintf("Position %d: %s+%s beam\n", p, d ? "Dict" : "Non-Dict",
318  kNodeContNames[c]);
319  DebugBeamPos(unicharset, beam_[p]->beams_[index]);
320  }
321  }
322  }
323 }
NodeContinuation
Definition: recodebeam.h:72
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
static int BeamIndex(bool is_dawg, NodeContinuation cont, int length)
Definition: recodebeam.h:237
void tesseract::RecodeBeamSearch::Decode ( const NetworkIO output,
double  dict_ratio,
double  cert_offset,
double  worst_dict_cert,
const UNICHARSET charset,
int  lstm_choice_mode = 0 
)

Definition at line 82 of file recodebeam.cpp.

84  {
85  beam_size_ = 0;
86  int width = output.Width();
87  if (lstm_choice_mode)
88  timesteps.clear();
89  for (int t = 0; t < width; ++t) {
90  ComputeTopN(output.f(t), output.NumFeatures(), kBeamWidths[0]);
91  DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert,
92  charset);
93  if (lstm_choice_mode) {
94  SaveMostCertainChoices(output.f(t), output.NumFeatures(), charset, t);
95  }
96  }
97 }
std::vector< std::vector< std::pair< const char *, float > > > timesteps
Definition: recodebeam.h:216
void tesseract::RecodeBeamSearch::Decode ( const GENERIC_2D_ARRAY< float > &  output,
double  dict_ratio,
double  cert_offset,
double  worst_dict_cert,
const UNICHARSET charset 
)

Definition at line 98 of file recodebeam.cpp.

101  {
102  beam_size_ = 0;
103  int width = output.dim1();
104  for (int t = 0; t < width; ++t) {
105  ComputeTopN(output[t], output.dim2(), kBeamWidths[0]);
106  DecodeStep(output[t], t, dict_ratio, cert_offset, worst_dict_cert, charset);
107  }
108 }
int dim1() const
Definition: matrix.h:209
int dim2() const
Definition: matrix.h:210
void tesseract::RecodeBeamSearch::ExtractBestPathAsLabels ( GenericVector< int > *  labels,
GenericVector< int > *  xcoords 
) const

Definition at line 139 of file recodebeam.cpp.

140  {
141  labels->truncate(0);
142  xcoords->truncate(0);
144  ExtractBestPaths(&best_nodes, nullptr);
145  // Now just run CTC on the best nodes.
146  int t = 0;
147  int width = best_nodes.size();
148  while (t < width) {
149  int label = best_nodes[t]->code;
150  if (label != null_char_) {
151  labels->push_back(label);
152  xcoords->push_back(t);
153  }
154  while (++t < width && !is_simple_text_ && best_nodes[t]->code == label) {
155  }
156  }
157  xcoords->push_back(width);
158 }
void truncate(int size)
int push_back(T object)
int size() const
Definition: genericvector.h:70
void tesseract::RecodeBeamSearch::ExtractBestPathAsUnicharIds ( bool  debug,
const UNICHARSET unicharset,
GenericVector< int > *  unichar_ids,
GenericVector< float > *  certs,
GenericVector< float > *  ratings,
GenericVector< int > *  xcoords 
) const

Definition at line 162 of file recodebeam.cpp.

165  {
167  ExtractBestPaths(&best_nodes, nullptr);
168  ExtractPathAsUnicharIds(best_nodes, unichar_ids, certs, ratings, xcoords);
169  if (debug) {
170  DebugPath(unicharset, best_nodes);
171  DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings,
172  *xcoords);
173  }
174 }
void tesseract::RecodeBeamSearch::ExtractBestPathAsWords ( const TBOX line_box,
float  scale_factor,
bool  debug,
const UNICHARSET unicharset,
PointerVector< WERD_RES > *  words,
int  lstm_choice_mode = 0 
)

Definition at line 177 of file recodebeam.cpp.

181  {
182  words->truncate(0);
183  GenericVector<int> unichar_ids;
184  GenericVector<float> certs;
185  GenericVector<float> ratings;
186  GenericVector<int> xcoords;
189  std::deque<std::tuple<int, int>> best_choices;
190  ExtractBestPaths(&best_nodes, &second_nodes);
191  if (debug) {
192  DebugPath(unicharset, best_nodes);
193  ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings,
194  &xcoords);
195  tprintf("\nSecond choice path:\n");
196  DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings,
197  xcoords);
198  }
199  int timestepEnd= 0;
200  //if lstm choice mode is required in granularity level 2 it stores the x
201  //Coordinates of every chosen character to match the alternative choices to it
202  if (lstm_choice_mode == 2) {
203  ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
204  &xcoords, &best_choices);
205  if (best_choices.size() > 0) {
206  timestepEnd = std::get<1>(best_choices.front());
207  best_choices.pop_front();
208  }
209  } else {
210  ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
211  &xcoords);
212  }
213  int num_ids = unichar_ids.size();
214  if (debug) {
215  DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings,
216  xcoords);
217  }
218  // Convert labels to unichar-ids.
219  int word_end = 0;
220  float prev_space_cert = 0.0f;
221  for (int word_start = 0; word_start < num_ids; word_start = word_end) {
222  for (word_end = word_start + 1; word_end < num_ids; ++word_end) {
223  // A word is terminated when a space character or start_of_word flag is
224  // hit. We also want to force a separate word for every non
225  // space-delimited character when not in a dictionary context.
226  if (unichar_ids[word_end] == UNICHAR_SPACE) break;
227  int index = xcoords[word_end];
228  if (best_nodes[index]->start_of_word) break;
229  if (best_nodes[index]->permuter == TOP_CHOICE_PERM &&
230  (!unicharset->IsSpaceDelimited(unichar_ids[word_end]) ||
231  !unicharset->IsSpaceDelimited(unichar_ids[word_end - 1])))
232  break;
233  }
234  float space_cert = 0.0f;
235  if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE)
236  space_cert = certs[word_end];
237  bool leading_space =
238  word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE;
239  // Create a WERD_RES for the output word.
240  WERD_RES* word_res = InitializeWord(
241  leading_space, line_box, word_start, word_end,
242  std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor);
243  if (lstm_choice_mode == 1) {
244  for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
245  word_res->timesteps.push_back(timesteps[i]);
246  }
247  timestepEnd = xcoords[word_end];
248  } else if (lstm_choice_mode == 2){
249  // Accumulated Timesteps (choice mode 2 processing)
250  float sum = 0;
251  std::vector<std::pair<const char*, float>> choice_pairs;
252  for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
253  for (std::pair<const char*, float> choice : timesteps[i]) {
254  if (std::strcmp(choice.first, "")) {
255  sum += choice.second;
256  choice_pairs.push_back(choice);
257  }
258  }
259  if ((best_choices.size() > 0 && i == std::get<1>(best_choices.front()) - 1)
260  || i == xcoords[word_end]-1) {
261  std::map<const char*, float> summed_propabilities;
262  for (auto & choice_pair : choice_pairs) {
263  summed_propabilities[choice_pair.first] += choice_pair.second;
264  }
265  std::vector<std::pair<const char*, float>> accumulated_timestep;
266  for (auto& summed_propability : summed_propabilities) {
267  if(sum == 0) break;
268  summed_propability.second/=sum;
269  size_t pos = 0;
270  while (accumulated_timestep.size() > pos
271  && accumulated_timestep[pos].second > summed_propability.second) {
272  pos++;
273  }
274  accumulated_timestep.insert(accumulated_timestep.begin() + pos,
275  std::pair<const char*,float>(summed_propability.first,
276  summed_propability.second));
277  }
278  if (best_choices.size() > 0) {
279  best_choices.pop_front();
280  }
281  choice_pairs.clear();
282  word_res->timesteps.push_back(accumulated_timestep);
283  sum = 0;
284  }
285  }
286  timestepEnd = xcoords[word_end];
287  }
288  for (int i = word_start; i < word_end; ++i) {
289  auto* choices = new BLOB_CHOICE_LIST;
290  BLOB_CHOICE_IT bc_it(choices);
291  auto* choice = new BLOB_CHOICE(
292  unichar_ids[i], ratings[i], certs[i], -1, 1.0f,
293  static_cast<float>(INT16_MAX), 0.0f, BCC_STATIC_CLASSIFIER);
294  int col = i - word_start;
295  choice->set_matrix_cell(col, col);
296  bc_it.add_after_then_move(choice);
297  word_res->ratings->put(col, col, choices);
298  }
299  int index = xcoords[word_end - 1];
300  word_res->FakeWordFromRatings(best_nodes[index]->permuter);
301  words->push_back(word_res);
302  prev_space_cert = space_cert;
303  if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE)
304  ++word_end;
305  }
306 }
std::vector< std::vector< std::pair< const char *, float > > > timesteps
Definition: pageres.h:223
void FakeWordFromRatings(PermuterType permuter)
Definition: pageres.cpp:902
bool IsSpaceDelimited(UNICHAR_ID unichar_id) const
Definition: unicharset.h:652
std::vector< std::vector< std::pair< const char *, float > > > timesteps
Definition: recodebeam.h:216
void put(ICOORD pos, const T &thing)
Definition: matrix.h:223
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
int push_back(T object)
MATRIX * ratings
Definition: pageres.h:230
int size() const
Definition: genericvector.h:70
static bool tesseract::RecodeBeamSearch::IsDawgFromBeamsIndex ( int  index)
inlinestatic

Definition at line 233 of file recodebeam.h.

233  {
234  return index / (kNumLengths * NC_COUNT) > 0;
235  }
static const int kNumLengths
Definition: recodebeam.h:224
static int tesseract::RecodeBeamSearch::LengthFromBeamsIndex ( int  index)
inlinestatic

Definition at line 229 of file recodebeam.h.

229 { return index % kNumLengths; }
static const int kNumLengths
Definition: recodebeam.h:224

Member Data Documentation

const float tesseract::RecodeBeamSearch::kMinCertainty = -20.0f
static

Definition at line 222 of file recodebeam.h.

const int tesseract::RecodeBeamSearch::kNumBeams = 2 * NC_COUNT * kNumLengths
static

Definition at line 227 of file recodebeam.h.

const int tesseract::RecodeBeamSearch::kNumLengths = RecodedCharID::kMaxCodeLen + 1
static

Definition at line 224 of file recodebeam.h.

std::vector< std::vector<std::pair<const char*, float> > > tesseract::RecodeBeamSearch::timesteps

Definition at line 216 of file recodebeam.h.


The documentation for this class was generated from the following files: