tesseract 3.04.01

tesseract::ColumnFinder Class Reference

#include <colfind.h>

Inheritance diagram for tesseract::ColumnFinder:
tesseract::TabFind tesseract::AlignedBlob tesseract::BlobGrid tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > tesseract::GridBase

List of all members.

Public Member Functions

 ColumnFinder (int gridsize, const ICOORD &bleft, const ICOORD &tright, int resolution, bool cjk_script, double aligned_gap_fraction, TabVector_LIST *vlines, TabVector_LIST *hlines, int vertical_x, int vertical_y)
virtual ~ColumnFinder ()
const DENORMdenorm () const
const TextlineProjectionprojection () const
void set_cjk_script (bool is_cjk)
void SetupAndFilterNoise (PageSegMode pageseg_mode, Pix *photo_mask_pix, TO_BLOCK *input_block)
bool IsVerticallyAlignedText (double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
void CorrectOrientation (TO_BLOCK *block, bool vertical_text_lines, int recognition_rotation)
int FindBlocks (PageSegMode pageseg_mode, Pix *scaled_color, int scaled_factor, TO_BLOCK *block, Pix *photo_mask_pix, Pix *thresholds_pix, Pix *grey_pix, BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks)
void GetDeskewVectors (FCOORD *deskew, FCOORD *reskew)
void SetEquationDetect (EquationDetectBase *detect)

Detailed Description

Definition at line 52 of file colfind.h.


Constructor & Destructor Documentation

tesseract::ColumnFinder::ColumnFinder ( int  gridsize,
const ICOORD bleft,
const ICOORD tright,
int  resolution,
bool  cjk_script,
double  aligned_gap_fraction,
TabVector_LIST *  vlines,
TabVector_LIST *  hlines,
int  vertical_x,
int  vertical_y 
)

Definition at line 88 of file colfind.cpp.

  : TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y,
            resolution),
    cjk_script_(cjk_script),
    min_gutter_width_(static_cast<int>(kMinGutterWidthGrid * gridsize)),
    mean_column_gap_(tright.x() - bleft.x()),
    tabfind_aligned_gap_fraction_(aligned_gap_fraction),
    reskew_(1.0f, 0.0f), rotation_(1.0f, 0.0f), rerotate_(1.0f, 0.0f),
    best_columns_(NULL), stroke_width_(NULL),
    part_grid_(gridsize, bleft, tright), nontext_map_(NULL),
    projection_(resolution),
    denorm_(NULL), input_blobs_win_(NULL), equation_detect_(NULL) {
  TabVector_IT h_it(&horizontal_lines_);
  h_it.add_list_after(hlines);
}

ColumnFinder::~ColumnFinder() {
  column_sets_.delete_data_pointers();
  if (best_columns_ != NULL) {
tesseract::ColumnFinder::~ColumnFinder ( ) [virtual]

Definition at line 109 of file colfind.cpp.

                          {
    DENORM* dead_denorm = denorm_;
    denorm_ = const_cast<DENORM*>(denorm_->predecessor());
    delete dead_denorm;
  }

  // The ColPartitions are destroyed automatically, but any boxes in
  // the noise_parts_ list are owned and need to be deleted explicitly.
  ColPartition_IT part_it(&noise_parts_);
  for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
    ColPartition* part = part_it.data();
    part->DeleteBoxes();
  }
  // Likewise any boxes in the good_parts_ list need to be deleted.
  // These are just the image parts. Text parts have already given their
  // boxes on to the TO_BLOCK, and have empty lists.
  part_it.set_to_list(&good_parts_);
  for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
    ColPartition* part = part_it.data();
    part->DeleteBoxes();
  }
  // Also, any blobs on the image_bblobs_ list need to have their cblobs
  // deleted. This only happens if there has been an early return from
  // FindColumns, as in a normal return, the blobs go into the grid and
  // end up in noise_parts_, good_parts_ or the output blocks.
  BLOBNBOX_IT bb_it(&image_bblobs_);
  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
    BLOBNBOX* bblob = bb_it.data();
    delete bblob->cblob();
  }
}

// Performs initial processing on the blobs in the input_block:
// Setup the part_grid, stroke_width_, nontext_map.
// Obvious noise blobs are filtered out and used to mark the nontext_map_.

Member Function Documentation

void tesseract::ColumnFinder::CorrectOrientation ( TO_BLOCK block,
bool  vertical_text_lines,
int  recognition_rotation 
)

Definition at line 213 of file colfind.cpp.

                                 {
    rotation_ = anticlockwise90;
  } else if (recognition_rotation == 2) {
    rotation_ = rotation180;
  } else if (recognition_rotation == 3) {
    rotation_ = clockwise90;
  }
  // We infer text writing direction to be vertical if there are several
  // vertical text lines detected, and horizontal if not. But if the page
  // orientation was determined to be 90 or 270 degrees, the true writing
  // direction is the opposite of what we inferred.
  if (recognition_rotation & 1) {
    vertical_text_lines = !vertical_text_lines;
  }
  // If we still believe the writing direction is vertical, we use the
  // convention of rotating the page ccw 90 degrees to make the text lines
  // horizontal, and mark the blobs for rotation cw 90 degrees for
  // classification so that the text order is correct after recognition.
  if (vertical_text_lines) {
    rotation_.rotate(anticlockwise90);
    text_rotation_.rotate(clockwise90);
  }
  // Set rerotate_ to the inverse of rotation_.
  rerotate_ = FCOORD(rotation_.x(), -rotation_.y());
  if (rotation_.x() != 1.0f || rotation_.y() != 0.0f) {
    // Rotate all the blobs and tab vectors.
    RotateBlobList(rotation_, &block->large_blobs);
    RotateBlobList(rotation_, &block->blobs);
    RotateBlobList(rotation_, &block->small_blobs);
    RotateBlobList(rotation_, &block->noise_blobs);
    TabFind::ResetForVerticalText(rotation_, rerotate_, &horizontal_lines_,
                                  &min_gutter_width_);
    part_grid_.Init(gridsize(), bleft(), tright());
    // Reset all blobs to initial state and filter by size.
    // Since they have rotated, the list they belong on could have changed.
    block->ReSetAndReFilterBlobs();
    SetBlockRuleEdges(block);
    stroke_width_->CorrectForRotation(rerotate_, &part_grid_);
  }
  if (textord_debug_tabfind) {
    tprintf("Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n",
            vertical_text_lines, recognition_rotation,
            rotation_.x(), rotation_.y(),
            text_rotation_.x(), text_rotation_.y());
  }
  // Setup the denormalization.
  ASSERT_HOST(denorm_ == NULL);
  denorm_ = new DENORM;
  denorm_->SetupNormalization(NULL, &rotation_, NULL,
                              0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
}

// Finds blocks of text, image, rule line, table etc, returning them in the
// blocks and to_blocks
// (Each TO_BLOCK points to the basic BLOCK and adds more information.)
const DENORM* tesseract::ColumnFinder::denorm ( ) const [inline]

Definition at line 70 of file colfind.h.

                               {
    return denorm_;
  }
int tesseract::ColumnFinder::FindBlocks ( PageSegMode  pageseg_mode,
Pix *  scaled_color,
int  scaled_factor,
TO_BLOCK block,
Pix *  photo_mask_pix,
Pix *  thresholds_pix,
Pix *  grey_pix,
BLOCK_LIST *  blocks,
BLOBNBOX_LIST *  diacritic_blobs,
TO_BLOCK_LIST *  to_blocks 
)

Definition at line 301 of file colfind.cpp.

                                                       {
  pixOr(photo_mask_pix, photo_mask_pix, nontext_map_);
  stroke_width_->FindLeaderPartitions(input_block, &part_grid_);
  stroke_width_->RemoveLineResidue(&big_parts_);
  FindInitialTabVectors(NULL, min_gutter_width_, tabfind_aligned_gap_fraction_,
                        input_block);
  SetBlockRuleEdges(input_block);
  stroke_width_->GradeBlobsIntoPartitions(
      pageseg_mode, rerotate_, input_block, nontext_map_, denorm_, cjk_script_,
      &projection_, diacritic_blobs, &part_grid_, &big_parts_);
  if (!PSM_SPARSE(pageseg_mode)) {
    ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_,
                                   input_block, this, &part_grid_, &big_parts_);
    ImageFind::TransferImagePartsToImageMask(rerotate_, &part_grid_,
                                             photo_mask_pix);
    ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_,
                                   input_block, this, &part_grid_, &big_parts_);
  }
  part_grid_.ReTypeBlobs(&image_bblobs_);
  TidyBlobs(input_block);
  Reset();
  // TODO(rays) need to properly handle big_parts_.
  ColPartition_IT p_it(&big_parts_);
  for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward())
    p_it.data()->DisownBoxesNoAssert();
  big_parts_.clear();
  delete stroke_width_;
  stroke_width_ = NULL;
  // Compute the edge offsets whether or not there is a grey_pix. It is done
  // here as the c_blobs haven't been touched by rotation or anything yet,
  // so no denorm is required, yet the text has been separated from image, so
  // no time is wasted running it on image blobs.
  input_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);

  // A note about handling right-to-left scripts (Hebrew/Arabic):
  // The columns must be reversed and come out in right-to-left instead of
  // the normal left-to-right order. Because the left-to-right ordering
  // is implicit in many data structures, it is simpler to fool the algorithms
  // into thinking they are dealing with left-to-right text.
  // To do this, we reflect the needed data in the y-axis and then reflect
  // the blocks back after they have been created. This is a temporary
  // arrangement that is confined to this function only, so the reflection
  // is completely invisible in the output blocks.
  // The only objects reflected are:
  // The vertical separator lines that have already been found;
  // The bounding boxes of all BLOBNBOXES on all lists on the input_block
  // plus the image_bblobs. The outlines are not touched, since they are
  // not looked at.
  bool input_is_rtl = input_block->block->right_to_left();
  if (input_is_rtl) {
    // Reflect the vertical separator lines (member of TabFind).
    ReflectInYAxis();
    // Reflect the blob boxes.
    ReflectForRtl(input_block, &image_bblobs_);
    part_grid_.ReflectInYAxis();
  }

  if (!PSM_SPARSE(pageseg_mode)) {
    if (!PSM_COL_FIND_ENABLED(pageseg_mode)) {
      // No tab stops needed. Just the grid that FindTabVectors makes.
      DontFindTabVectors(&image_bblobs_, input_block, &deskew_, &reskew_);
    } else {
      SetBlockRuleEdges(input_block);
      // Find the tab stops, estimate skew, and deskew the tabs, blobs and
      // part_grid_.
      FindTabVectors(&horizontal_lines_, &image_bblobs_, input_block,
                     min_gutter_width_, tabfind_aligned_gap_fraction_,
                     &part_grid_, &deskew_, &reskew_);
      // Add the deskew to the denorm_.
      DENORM* new_denorm = new DENORM;
      new_denorm->SetupNormalization(NULL, &deskew_, denorm_,
                                     0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
      denorm_ = new_denorm;
    }
    SetBlockRuleEdges(input_block);
    part_grid_.SetTabStops(this);

    // Make the column_sets_.
    if (!MakeColumns(false)) {
      tprintf("Empty page!!\n");
      part_grid_.DeleteParts();
      return 0;  // This is an empty page.
    }

    // Refill the grid using rectangular spreading, and get the benefit
    // of the completed tab vectors marking the rule edges of each blob.
    Clear();
    #ifndef GRAPHICS_DISABLED
    if (textord_tabfind_show_reject_blobs) {
      ScrollView* rej_win = MakeWindow(500, 300, "Rejected blobs");
      input_block->plot_graded_blobs(rej_win);
    }
    #endif  // GRAPHICS_DISABLED
    InsertBlobsToGrid(false, false, &image_bblobs_, this);
    InsertBlobsToGrid(true, true, &input_block->blobs, this);

    part_grid_.GridFindMargins(best_columns_);
    // Split and merge the partitions by looking at local neighbours.
    GridSplitPartitions();
    // Resolve unknown partitions by adding to an existing partition, fixing
    // the type, or declaring them noise.
    part_grid_.GridFindMargins(best_columns_);
    GridMergePartitions();
    // Insert any unused noise blobs that are close enough to an appropriate
    // partition.
    InsertRemainingNoise(input_block);
    // Add horizontal line separators as partitions.
    GridInsertHLinePartitions();
    GridInsertVLinePartitions();
    // Recompute margins based on a local neighbourhood search.
    part_grid_.GridFindMargins(best_columns_);
    SetPartitionTypes();
  }
  if (textord_tabfind_show_initial_partitions) {
    ScrollView* part_win = MakeWindow(100, 300, "InitialPartitions");
    part_grid_.DisplayBoxes(part_win);
    DisplayTabVectors(part_win);
  }

  if (!PSM_SPARSE(pageseg_mode)) {
    if (equation_detect_) {
      equation_detect_->FindEquationParts(&part_grid_, best_columns_);
    }
    if (textord_tabfind_find_tables) {
      TableFinder table_finder;
      table_finder.Init(gridsize(), bleft(), tright());
      table_finder.set_resolution(resolution_);
      table_finder.set_left_to_right_language(
          !input_block->block->right_to_left());
      // Copy cleaned partitions from part_grid_ to clean_part_grid_ and
      // insert dot-like noise into period_grid_
      table_finder.InsertCleanPartitions(&part_grid_, input_block);
      // Get Table Regions
      table_finder.LocateTables(&part_grid_, best_columns_, WidthCB(), reskew_);
    }
    GridRemoveUnderlinePartitions();
    part_grid_.DeleteUnknownParts(input_block);

    // Build the partitions into chains that belong in the same block and
    // refine into one-to-one links, then smooth the types within each chain.
    part_grid_.FindPartitionPartners();
    part_grid_.FindFigureCaptions();
    part_grid_.RefinePartitionPartners(true);
    SmoothPartnerRuns();

    #ifndef GRAPHICS_DISABLED
    if (textord_tabfind_show_partitions) {
      ScrollView* window = MakeWindow(400, 300, "Partitions");
      if (window != NULL) {
        if (textord_debug_images)
          window->Image(AlignedBlob::textord_debug_pix().string(),
                        image_origin().x(), image_origin().y());
        part_grid_.DisplayBoxes(window);
        if (!textord_debug_printable)
          DisplayTabVectors(window);
        if (window != NULL && textord_tabfind_show_partitions > 1) {
          delete window->AwaitEvent(SVET_DESTROY);
        }
      }
    }
    #endif  // GRAPHICS_DISABLED
    part_grid_.AssertNoDuplicates();
  }
  // Ownership of the ColPartitions moves from part_sets_ to part_grid_ here,
  // and ownership of the BLOBNBOXes moves to the ColPartitions.
  // (They were previously owned by the block or the image_bblobs list.)
  ReleaseBlobsAndCleanupUnused(input_block);
  // Ownership of the ColPartitions moves from part_grid_ to good_parts_ and
  // noise_parts_ here. In text blocks, ownership of the BLOBNBOXes moves
  // from the ColPartitions to the output TO_BLOCK. In non-text, the
  // BLOBNBOXes stay with the ColPartitions and get deleted in the destructor.
  if (PSM_SPARSE(pageseg_mode))
    part_grid_.ExtractPartitionsAsBlocks(blocks, to_blocks);
  else
    TransformToBlocks(blocks, to_blocks);
  if (textord_debug_tabfind) {
    tprintf("Found %d blocks, %d to_blocks\n",
            blocks->length(), to_blocks->length());
  }

  DisplayBlocks(blocks);
  RotateAndReskewBlocks(input_is_rtl, to_blocks);
  int result = 0;
  #ifndef GRAPHICS_DISABLED
  if (blocks_win_ != NULL) {
    bool waiting = false;
    do {
      waiting = false;
      SVEvent* event = blocks_win_->AwaitEvent(SVET_ANY);
      if (event->type == SVET_INPUT && event->parameter != NULL) {
        if (*event->parameter == 'd')
          result = -1;
        else
          blocks->clear();
      } else if (event->type == SVET_DESTROY) {
        blocks_win_ = NULL;
      } else {
        waiting = true;
      }
      delete event;
    } while (waiting);
  }
  #endif  // GRAPHICS_DISABLED
  return result;
}

// Get the rotation required to deskew, and its inverse rotation.
void ColumnFinder::GetDeskewVectors(FCOORD* deskew, FCOORD* reskew) {
  *reskew = reskew_;
void tesseract::ColumnFinder::GetDeskewVectors ( FCOORD deskew,
FCOORD reskew 
)

Definition at line 513 of file colfind.cpp.

                                                               {
  equation_detect_ = detect;
}
bool tesseract::ColumnFinder::IsVerticallyAlignedText ( double  find_vertical_text_ratio,
TO_BLOCK block,
BLOBNBOX_CLIST *  osd_blobs 
)

Definition at line 195 of file colfind.cpp.

const TextlineProjection* tesseract::ColumnFinder::projection ( ) const [inline]

Definition at line 73 of file colfind.h.

                                               {
    return &projection_;
  }
void tesseract::ColumnFinder::set_cjk_script ( bool  is_cjk) [inline]

Definition at line 76 of file colfind.h.

                                   {
    cjk_script_ = is_cjk;
  }
void tesseract::ColumnFinder::SetEquationDetect ( EquationDetectBase detect)

Definition at line 519 of file colfind.cpp.

void tesseract::ColumnFinder::SetupAndFilterNoise ( PageSegMode  pageseg_mode,
Pix *  photo_mask_pix,
TO_BLOCK input_block 
)

Definition at line 157 of file colfind.cpp.

                                   {
    input_blobs_win_ = MakeWindow(0, 0, "Filtered Input Blobs");
    input_block->plot_graded_blobs(input_blobs_win_);
  }
  #endif  // GRAPHICS_DISABLED
  SetBlockRuleEdges(input_block);
  pixDestroy(&nontext_map_);
  // Run a preliminary strokewidth neighbour detection on the medium blobs.
  stroke_width_->SetNeighboursOnMediumBlobs(input_block);
  CCNonTextDetect nontext_detect(gridsize(), bleft(), tright());
  // Remove obvious noise and make the initial non-text map.
  nontext_map_ = nontext_detect.ComputeNonTextMask(textord_debug_tabfind,
                                                   photo_mask_pix, input_block);
  stroke_width_->FindTextlineDirectionAndFixBrokenCJK(pageseg_mode, cjk_script_,
                                                      input_block);
  // Clear the strokewidth grid ready for rotation or leader finding.
  stroke_width_->Clear();
}

// Tests for vertical alignment of text (returning true if so), and generates
// a list of blobs of moderate aspect ratio, in the most frequent writing
// direction (in osd_blobs) for orientation and script detection to test

The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines