21 # include "config_auto.h" 26 #include "allheaders.h" 52 training_iteration_(0),
60 debug_win_(nullptr) {}
74 if (lang ==
nullptr)
return true;
82 bool include_charsets = mgr ==
nullptr ||
103 if (
network_ ==
nullptr)
return false;
104 bool include_charsets = mgr ==
nullptr ||
117 if (include_charsets && !
LoadRecoder(fp))
return false;
118 if (!include_charsets && !
LoadCharsets(mgr))
return false;
141 tprintf(
"Space was garbled in recoding!!\n");
170 tprintf(
"Failed to load any lstm-specific dictionaries for lang %s!!\n",
180 bool debug,
double worst_dict_cert,
181 const TBOX& line_box,
183 int lstm_choice_mode) {
187 if (!
RecognizeLine(image_data, invert, debug,
false,
false, &scale_factor,
194 search_->
Decode(outputs, kDictRatio, kCertOffset, worst_dict_cert,
202 float* mean_output,
float* sd) {
203 const int kOutputScale = INT8_MAX;
204 STATS stats(0, kOutputScale + 1);
205 for (
int t = 0; t < outputs.
Width(); ++t) {
206 int best_label = outputs.
BestLabel(t,
nullptr);
208 float best_output = outputs.
f(t)[best_label];
209 stats.
add(static_cast<int>(kOutputScale * best_output), 1);
219 *min_output =
static_cast<float>(stats.
min_bucket()) / kOutputScale;
220 *mean_output = stats.
mean() / kOutputScale;
221 *sd = stats.
sd() / kOutputScale;
228 bool debug,
bool re_invert,
bool upside_down,
232 const int kMaxImageWidth = 2560;
238 if (pix ==
nullptr) {
239 tprintf(
"Line cannot be recognized!!\n");
243 tprintf(
"Image too large to learn!! Size = %dx%d\n", pixGetWidth(pix),
248 if (upside_down) pixRotate180(pix, pix);
250 *scale_factor = min_width / *scale_factor;
256 float pos_min, pos_mean, pos_sd;
257 OutputStats(*outputs, &pos_min, &pos_mean, &pos_sd);
258 if (invert && pos_min < 0.5) {
268 float inv_min, inv_mean, inv_sd;
269 OutputStats(inv_outputs, &inv_min, &inv_mean, &inv_sd);
270 if (inv_min > pos_min && inv_mean > pos_mean && inv_sd < pos_sd) {
273 tprintf(
"Inverting image: old min=%g, mean=%g, sd=%g, inv %g,%g,%g\n",
274 pos_min, pos_mean, pos_sd, inv_min, inv_mean, inv_sd);
276 *outputs = inv_outputs;
277 *inputs = inv_inputs;
278 }
else if (re_invert) {
300 for (
int start = 0; start < labels.
size(); start = end) {
304 result +=
DecodeLabel(labels, start, &end,
nullptr);
315 const char* window_name,
317 #ifndef GRAPHICS_DISABLED // do nothing if there's no graphics 318 Pix* input_pix = inputs.
ToPix();
320 pixGetHeight(input_pix), window);
323 #endif // GRAPHICS_DISABLED 331 #ifndef GRAPHICS_DISABLED // do nothing if there's no graphics 335 for (
int start = 0; start < labels.
size(); start = end) {
336 int xpos = xcoords[start] * x_scale;
342 const char* str =
DecodeLabel(labels, start, &end,
nullptr);
343 if (*str ==
'\\') str =
"\\\\";
344 xpos = xcoords[(start + end) / 2] * x_scale;
345 window->
Text(xpos, height, str);
347 window->
Line(xpos, 0, xpos, height * 3 / 2);
350 #endif // GRAPHICS_DISABLED 361 for (
int start = 0; start < labels.
size(); start = end) {
369 const char* label =
DecodeLabel(labels, start, &end, &decoded);
372 for (
int i = start + 1; i < end; ++i) {
374 xcoords[i], xcoords[i + 1]);
383 const char* label,
int best_choice,
384 int x_start,
int x_end) {
385 tprintf(
"%s=%d On [%d, %d), scores=", label, best_choice, x_start, x_end);
386 double max_score = 0.0;
387 double mean_score = 0.0;
388 const int width = x_end - x_start;
389 for (
int x = x_start; x < x_end; ++x) {
390 const float* line = outputs.
f(x);
391 const double score = line[best_choice] * 100.0;
392 if (score > max_score) max_score = score;
393 mean_score += score / width;
395 double best_score = 0.0;
397 if (c != best_choice && line[c] > best_score) {
399 best_score = line[c];
405 tprintf(
", Mean=%g, max=%g\n", mean_score, max_score);
411 #if 0 // TODO: unused, remove if still unused after 2020. 412 static bool NullIsBest(
const NetworkIO& output,
float null_thr,
414 if (output.
f(t)[
null_char] >= null_thr)
return true;
456 const int width = output.
Width();
457 for (
int t = 0; t < width; ++t) {
459 const int label = output.
BestLabel(t, &score);
471 int start,
int* end,
int* decoded) {
477 if (decoded !=
nullptr) {
484 while (index < labels.
size() &&
486 code.
Set(code.
length(), labels[index++]);
487 while (index < labels.
size() && labels[index] ==
null_char_) ++index;
491 if (uni_id != INVALID_UNICHAR_ID &&
492 (index == labels.
size() ||
496 if (decoded !=
nullptr) *decoded = uni_id;
501 return "<Undecodable>";
503 if (decoded !=
nullptr) *decoded = labels[start];
504 if (labels[start] ==
null_char_)
return "<null>";
519 if (label == INVALID_UNICHAR_ID)
return "..";
void Decode(const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
NetworkScratch scratch_space_
bool save_to_file(const char *const filename) const
void Text(int x, int y, const char *mystring)
void DebugActivationRange(const NetworkIO &outputs, const char *label, int best_choice, int x_start, int x_end)
bool Serialize(const char *data, size_t count=1)
const char * DecodeSingleLabel(int label)
char * user_patterns_suffix
bool Serialize(const TessdataManager *mgr, TFile *fp) const
bool IsComponentAvailable(TessdataType type) const
bool LoadCharsets(const TessdataManager *mgr)
void LabelsFromOutputs(const NetworkIO &outputs, GenericVector< int > *labels, GenericVector< int > *xcoords)
bool Serialize(TFile *fp) const
virtual void SetRandomizer(TRand *randomizer)
void OutputStats(const NetworkIO &outputs, float *min_output, float *mean_output, float *sd)
bool DeSerialize(TFile *fp)
const UNICHARSET & GetUnicharset() const
bool IsValidFirstCode(int code) const
void LabelsViaReEncode(const NetworkIO &output, GenericVector< int > *labels, GenericVector< int > *xcoords)
void Set(int index, int value)
static void PreparePixInput(const StaticShape &shape, const Pix *pix, TRand *randomizer, NetworkIO *input)
bool GetComponent(TessdataType type, TFile *fp)
void LoadLSTM(const STRING &lang, TessdataManager *data_file)
static const int kMaxCodeLen
void SetupForLoad(DawgCache *dawg_cache)
STRING DecodeLabels(const GenericVector< int > &labels)
void SetupPassThrough(const UNICHARSET &unicharset)
void add(int32_t value, int32_t count)
static const float kMinCertainty
int DecodeUnichar(const RecodedCharID &code) const
RecodeBeamSearch * search_
void DisplayForward(const NetworkIO &inputs, const GenericVector< int > &labels, const GenericVector< int > &label_coords, const char *window_name, ScrollView **window)
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
virtual void CacheXScaleFactor(int factor)
bool SimpleTextOutput() const
bool Load(const ParamsVectors *params, const char *lang, TessdataManager *mgr)
void ExtractBestPathAsLabels(GenericVector< int > *labels, GenericVector< int > *xcoords) const
void TextAttributes(const char *font, int pixel_size, bool bold, bool italic, bool underlined)
int32_t min_bucket() const
virtual StaticShape InputShape() const
void LabelsViaSimpleText(const NetworkIO &output, GenericVector< int > *labels, GenericVector< int > *xcoords)
const char * DecodeLabel(const GenericVector< int > &labels, int start, int *end, int *decoded)
virtual void Forward(bool debug, const NetworkIO &input, const TransposedArray *input_transpose, NetworkScratch *scratch, NetworkIO *output)=0
char * user_patterns_file
void set_int_mode(bool is_quantized)
DLLSYM void tprintf(const char *format,...)
void RecognizeLine(const ImageData &image_data, bool invert, bool debug, double worst_dict_cert, const TBOX &line_box, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
bool DeSerialize(bool swap, FILE *fp)
static Pix * PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width, TRand *randomizer, float *image_scale)
bool DeSerialize(char *data, size_t count=1)
void DisplayLSTMOutput(const GenericVector< int > &labels, const GenericVector< int > &xcoords, int height, ScrollView *window)
bool LoadRecoder(TFile *fp)
bool load_from_file(const char *const filename, bool skip_fragments)
int32_t get_total() const
static Network * CreateFromFile(TFile *fp)
virtual int XScaleFactor() const
int EncodeUnichar(int unichar_id, RecodedCharID *code) const
int32_t sample_iteration_
void Line(int x1, int y1, int x2, int y2)
int BestLabel(int t, float *score) const
static TESS_API DawgCache * GlobalDawgCache()
static void ClearWindow(bool tess_coords, const char *window_name, int width, int height, ScrollView **window)
bool LoadDictionary(const ParamsVectors *params, const char *lang, TessdataManager *mgr)
bool Serialize(FILE *fp) const
void ExtractBestPathAsWords(const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
void DebugActivationPath(const NetworkIO &outputs, const GenericVector< int > &labels, const GenericVector< int > &xcoords)
bool DeSerialize(const TessdataManager *mgr, TFile *fp)
int32_t training_iteration_
virtual bool Serialize(TFile *fp) const
static int DisplayImage(Pix *pix, ScrollView *window)