tesseract  4.1.0
ocrblock.h File Reference
#include "ocrpara.h"
#include "ocrrow.h"
#include "pdblock.h"

Go to the source code of this file.

Classes

class  BLOCK
 

Functions

void PrintSegmentationStats (BLOCK_LIST *block_list)
 
void ExtractBlobsFromSegmentation (BLOCK_LIST *blocks, C_BLOB_LIST *output_blob_list)
 
void RefreshWordBlobsFromNewBlobs (BLOCK_LIST *block_list, C_BLOB_LIST *new_blobs, C_BLOB_LIST *not_found_blobs)
 

Function Documentation

void ExtractBlobsFromSegmentation ( BLOCK_LIST *  blocks,
C_BLOB_LIST *  output_blob_list 
)

Definition at line 439 of file ocrblock.cpp.

440  {
441  C_BLOB_IT return_list_it(output_blob_list);
442  BLOCK_IT block_it(blocks);
443  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
444  BLOCK* block = block_it.data();
445  ROW_IT row_it(block->row_list());
446  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
447  ROW* row = row_it.data();
448  // Iterate over all werds in the row.
449  WERD_IT werd_it(row->word_list());
450  for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
451  WERD* werd = werd_it.data();
452  return_list_it.move_to_last();
453  return_list_it.add_list_after(werd->cblob_list());
454  return_list_it.move_to_last();
455  return_list_it.add_list_after(werd->rej_cblob_list());
456  }
457  }
458  }
459 }
Definition: werd.h:56
C_BLOB_LIST * rej_cblob_list()
Definition: werd.h:90
WERD_LIST * word_list()
Definition: ocrrow.h:55
Definition: ocrrow.h:36
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:117
Definition: ocrblock.h:29
C_BLOB_LIST * cblob_list()
Definition: werd.h:95
void PrintSegmentationStats ( BLOCK_LIST *  block_list)

Definition at line 406 of file ocrblock.cpp.

406  {
407  int num_blocks = 0;
408  int num_rows = 0;
409  int num_words = 0;
410  int num_blobs = 0;
411  BLOCK_IT block_it(block_list);
412  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
413  BLOCK* block = block_it.data();
414  ++num_blocks;
415  ROW_IT row_it(block->row_list());
416  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
417  ++num_rows;
418  ROW* row = row_it.data();
419  // Iterate over all werds in the row.
420  WERD_IT werd_it(row->word_list());
421  for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
422  WERD* werd = werd_it.data();
423  ++num_words;
424  num_blobs += werd->cblob_list()->length();
425  }
426  }
427  }
428  tprintf("Block list stats:\nBlocks = %d\nRows = %d\nWords = %d\nBlobs = %d\n",
429  num_blocks, num_rows, num_words, num_blobs);
430 }
Definition: werd.h:56
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:36
WERD_LIST * word_list()
Definition: ocrrow.h:55
Definition: ocrrow.h:36
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:117
Definition: ocrblock.h:29
C_BLOB_LIST * cblob_list()
Definition: werd.h:95
void RefreshWordBlobsFromNewBlobs ( BLOCK_LIST *  block_list,
C_BLOB_LIST *  new_blobs,
C_BLOB_LIST *  not_found_blobs 
)

Definition at line 474 of file ocrblock.cpp.

476  {
477  // Now iterate over all the blobs in the segmentation_block_list_, and just
478  // replace the corresponding c-blobs inside the werds.
479  BLOCK_IT block_it(block_list);
480  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
481  BLOCK* block = block_it.data();
482  if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText())
483  continue; // Don't touch non-text blocks.
484  // Iterate over all rows in the block.
485  ROW_IT row_it(block->row_list());
486  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
487  ROW* row = row_it.data();
488  // Iterate over all werds in the row.
489  WERD_IT werd_it(row->word_list());
490  WERD_LIST new_words;
491  WERD_IT new_words_it(&new_words);
492  for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
493  WERD* werd = werd_it.extract();
494  WERD* new_werd = werd->ConstructWerdWithNewBlobs(new_blobs,
495  not_found_blobs);
496  if (new_werd) {
497  // Insert this new werd into the actual row's werd-list. Remove the
498  // existing one.
499  new_words_it.add_after_then_move(new_werd);
500  delete werd;
501  } else {
502  // Reinsert the older word back, for lack of better options.
503  // This is critical since dropping the words messes up segmentation:
504  // eg. 1st word in the row might otherwise have W_FUZZY_NON turned on.
505  new_words_it.add_after_then_move(werd);
506  }
507  }
508  // Get rid of the old word list & replace it with the new one.
509  row->word_list()->clear();
510  werd_it.move_to_first();
511  werd_it.add_list_after(&new_words);
512  }
513  }
514 }
Definition: werd.h:56
POLY_BLOCK * poly_block() const
Definition: pdblock.h:56
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:191
WERD * ConstructWerdWithNewBlobs(C_BLOB_LIST *all_blobs, C_BLOB_LIST *orphan_blobs)
Definition: werd.cpp:390
WERD_LIST * word_list()
Definition: ocrrow.h:55
bool IsText() const
Definition: polyblk.h:49
Definition: ocrrow.h:36
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:117
Definition: ocrblock.h:29