tesseract 3.04.01

WERD_CHOICE Class Reference

#include <ratngs.h>

Inheritance diagram for WERD_CHOICE:
ELIST_LINK

List of all members.

Public Member Functions

 WERD_CHOICE (const UNICHARSET *unicharset)
 WERD_CHOICE (const UNICHARSET *unicharset, int reserved)
 WERD_CHOICE (const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uinT8 src_permuter, const UNICHARSET &unicharset)
 WERD_CHOICE (const char *src_string, const UNICHARSET &unicharset)
 WERD_CHOICE (const WERD_CHOICE &word)
 ~WERD_CHOICE ()
const UNICHARSETunicharset () const
int length () const
float adjust_factor () const
void set_adjust_factor (float factor)
const UNICHAR_IDunichar_ids () const
UNICHAR_ID unichar_id (int index) const
int state (int index) const
tesseract::ScriptPos BlobPosition (int index) const
float rating () const
float certainty () const
float certainty (int index) const
float min_x_height () const
float max_x_height () const
void set_x_heights (float min_height, float max_height)
uinT8 permuter () const
const char * permuter_name () const
BLOB_CHOICE_LIST * blob_choices (int index, MATRIX *ratings) const
MATRIX_COORD MatrixCoord (int index) const
void set_unichar_id (UNICHAR_ID unichar_id, int index)
bool dangerous_ambig_found () const
void set_dangerous_ambig_found_ (bool value)
void set_rating (float new_val)
void set_certainty (float new_val)
void set_permuter (uinT8 perm)
void set_length (int len)
void double_the_size ()
 Make more space in unichar_id_ and fragment_lengths_ arrays.
void init (int reserved)
void init (const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uinT8 src_permuter)
void make_bad ()
 Set the fields in this choice to be default (bad) values.
void append_unichar_id_space_allocated (UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
void append_unichar_id (UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
void set_unichar_id (UNICHAR_ID unichar_id, int blob_count, float rating, float certainty, int index)
void set_blob_choice (int index, int blob_count, const BLOB_CHOICE *blob_choice)
bool contains_unichar_id (UNICHAR_ID unichar_id) const
void remove_unichar_ids (int index, int num)
void remove_last_unichar_id ()
void remove_unichar_id (int index)
bool has_rtl_unichar_id () const
void reverse_and_mirror_unichar_ids ()
void punct_stripped (int *start_core, int *end_core) const
void GetNonSuperscriptSpan (int *start, int *end) const
WERD_CHOICE shallow_copy (int start, int end) const
void string_and_lengths (STRING *word_str, STRING *word_lengths_str) const
const STRING debug_string () const
bool set_unichars_in_script_order (bool in_script_order)
bool unichars_in_script_order () const
const STRINGunichar_string () const
const STRINGunichar_lengths () const
void SetScriptPositions (bool small_caps, TWERD *word)
void SetScriptPositions (const tesseract::ScriptPos *positions, int length)
void SetAllScriptPositions (tesseract::ScriptPos position)
int GetTopScriptID () const
void UpdateStateForSplit (int blob_position)
int TotalOfStates () const
void print () const
void print (const char *msg) const
void print_state (const char *msg) const
void DisplaySegmentation (TWERD *word)
WERD_CHOICEoperator+= (const WERD_CHOICE &second)
WERD_CHOICEoperator= (const WERD_CHOICE &source)

Static Public Member Functions

static const char * permuter_name (uinT8 permuter)
static tesseract::ScriptPos ScriptPositionOf (bool print_debug, const UNICHARSET &unicharset, const TBOX &blob_box, UNICHAR_ID unichar_id)

Static Public Attributes

static const float kBadRating = 100000.0

Detailed Description

Definition at line 271 of file ratngs.h.


Constructor & Destructor Documentation

WERD_CHOICE::WERD_CHOICE ( const UNICHARSET unicharset) [inline]

Definition at line 276 of file ratngs.h.

    : unicharset_(unicharset) { this->init(8); }
WERD_CHOICE::WERD_CHOICE ( const UNICHARSET unicharset,
int  reserved 
) [inline]

Definition at line 278 of file ratngs.h.

    : unicharset_(unicharset) { this->init(reserved); }
WERD_CHOICE::WERD_CHOICE ( const char *  src_string,
const char *  src_lengths,
float  src_rating,
float  src_certainty,
uinT8  src_permuter,
const UNICHARSET unicharset 
) [inline]

Definition at line 280 of file ratngs.h.

    : unicharset_(&unicharset) {
    this->init(src_string, src_lengths, src_rating,
               src_certainty, src_permuter);
  }
WERD_CHOICE::WERD_CHOICE ( const char *  src_string,
const UNICHARSET unicharset 
)

WERD_CHOICE::WERD_CHOICE

Constructor to build a WERD_CHOICE from the given string. The function assumes that src_string is not NULL.

Definition at line 198 of file ratngs.cpp.

    : unicharset_(&unicharset){
  GenericVector<UNICHAR_ID> encoding;
  GenericVector<char> lengths;
  if (unicharset.encode_string(src_string, true, &encoding, &lengths, NULL)) {
    lengths.push_back('\0');
    STRING src_lengths = &lengths[0];
    this->init(src_string, src_lengths.string(), 0.0, 0.0, NO_PERM);
  } else {  // There must have been an invalid unichar in the string.
    this->init(8);
    this->make_bad();
  }
}
WERD_CHOICE::WERD_CHOICE ( const WERD_CHOICE word) [inline]

Definition at line 291 of file ratngs.h.

                                       : ELIST_LINK(word), unicharset_(word.unicharset_) {
    this->init(word.length());
    this->operator=(word);
  }
WERD_CHOICE::~WERD_CHOICE ( )

WERD_CHOICE::~WERD_CHOICE

Definition at line 254 of file ratngs.cpp.

                          {
  delete[] unichar_ids_;
  delete[] script_pos_;
  delete[] state_;
  delete[] certainties_;
}

Member Function Documentation

float WERD_CHOICE::adjust_factor ( ) const [inline]

Definition at line 303 of file ratngs.h.

                              {
    return adjust_factor_;
  }
void WERD_CHOICE::append_unichar_id ( UNICHAR_ID  unichar_id,
int  blob_count,
float  rating,
float  certainty 
)

append_unichar_id

Make sure there is enough space in the word for the new unichar id and call append_unichar_id_space_allocated().

Definition at line 446 of file ratngs.cpp.

                                   {
  if (length_ == reserved_) {
    this->double_the_size();
  }
  this->append_unichar_id_space_allocated(unichar_id, blob_count,
                                          rating, certainty);
}
void WERD_CHOICE::append_unichar_id_space_allocated ( UNICHAR_ID  unichar_id,
int  blob_count,
float  rating,
float  certainty 
) [inline]

This function assumes that there is enough space reserved in the WERD_CHOICE for adding another unichar. This is an efficient alternative to append_unichar_id().

Definition at line 449 of file ratngs.h.

                                     {
    assert(reserved_ > length_);
    length_++;
    this->set_unichar_id(unichar_id, blob_count,
                         rating, certainty, length_-1);
  }
BLOB_CHOICE_LIST * WERD_CHOICE::blob_choices ( int  index,
MATRIX ratings 
) const

Definition at line 268 of file ratngs.cpp.

                                                                            {
  MATRIX_COORD coord = MatrixCoord(index);
  BLOB_CHOICE_LIST* result = ratings->get(coord.col, coord.row);
  if (result == NULL) {
    result = new BLOB_CHOICE_LIST;
    ratings->put(coord.col, coord.row, result);
  }
  return result;
}
tesseract::ScriptPos WERD_CHOICE::BlobPosition ( int  index) const [inline]

Definition at line 319 of file ratngs.h.

                                                   {
    if (index < 0 || index >= length_)
      return tesseract::SP_NORMAL;
    return script_pos_[index];
  }
float WERD_CHOICE::certainty ( ) const [inline]

Definition at line 327 of file ratngs.h.

                                 {
    return certainty_;
  }
float WERD_CHOICE::certainty ( int  index) const [inline]

Definition at line 330 of file ratngs.h.

                                          {
    return certainties_[index];
  }
bool WERD_CHOICE::contains_unichar_id ( UNICHAR_ID  unichar_id) const

contains_unichar_id

Returns true if unichar_ids_ contain the given unichar_id, false otherwise.

Definition at line 304 of file ratngs.cpp.

                                                                 {
  for (int i = 0; i < length_; ++i) {
    if (unichar_ids_[i] == unichar_id) {
      return true;
    }
  }
  return false;
}
bool WERD_CHOICE::dangerous_ambig_found ( ) const [inline]

Definition at line 360 of file ratngs.h.

                                     {
    return dangerous_ambig_found_;
  }
const STRING WERD_CHOICE::debug_string ( ) const [inline]

Definition at line 502 of file ratngs.h.

                                    {
    STRING word_str;
    for (int i = 0; i < length_; ++i) {
      word_str += unicharset_->debug_str(unichar_ids_[i]);
      word_str += " ";
    }
    return word_str;
  }
void WERD_CHOICE::DisplaySegmentation ( TWERD word)

Definition at line 747 of file ratngs.cpp.

                                                 {
#ifndef GRAPHICS_DISABLED
  // Number of different colors to draw with.
  const int kNumColors = 6;
  static ScrollView *segm_window = NULL;
  // Check the state against the static prev_drawn_state.
  static GenericVector<int> prev_drawn_state;
  bool already_done = prev_drawn_state.size() == length_;
  if (!already_done) prev_drawn_state.init_to_size(length_, 0);
  for (int i = 0; i < length_; ++i) {
    if (prev_drawn_state[i] != state_[i]) {
      already_done = false;
    }
    prev_drawn_state[i] = state_[i];
  }
  if (already_done || word->blobs.empty()) return;

  // Create the window if needed.
  if (segm_window == NULL) {
    segm_window = new ScrollView("Segmentation", 5, 10, 500, 256,
                                 2000.0, 256.0, true);
  } else {
    segm_window->Clear();
  }

  TBOX bbox;
  int blob_index = 0;
  for (int c = 0; c < length_; ++c) {
    ScrollView::Color color =
        static_cast<ScrollView::Color>(c % kNumColors + 3);
    for (int i = 0; i < state_[c]; ++i, ++blob_index) {
      TBLOB* blob = word->blobs[blob_index];
      bbox += blob->bounding_box();
      blob->plot(segm_window, color, color);
    }
  }
  segm_window->ZoomToRectangle(bbox.left(), bbox.top(),
                               bbox.right(), bbox.bottom());
  segm_window->Update();
  window_wait(segm_window);
#endif
}
void WERD_CHOICE::double_the_size ( ) [inline]

Make more space in unichar_id_ and fragment_lengths_ arrays.

Definition at line 384 of file ratngs.h.

                                {
    if (reserved_ > 0) {
      unichar_ids_ = GenericVector<UNICHAR_ID>::double_the_size_memcpy(
          reserved_, unichar_ids_);
      script_pos_ = GenericVector<tesseract::ScriptPos>::double_the_size_memcpy(
          reserved_, script_pos_);
      state_ = GenericVector<int>::double_the_size_memcpy(
          reserved_, state_);
      certainties_ = GenericVector<float>::double_the_size_memcpy(
          reserved_, certainties_);
      reserved_ *= 2;
    } else {
      unichar_ids_ = new UNICHAR_ID[1];
      script_pos_ = new tesseract::ScriptPos[1];
      state_ = new int[1];
      certainties_ = new float[1];
      reserved_ = 1;
    }
  }
void WERD_CHOICE::GetNonSuperscriptSpan ( int *  start,
int *  end 
) const

Definition at line 375 of file ratngs.cpp.

                                                                    {
  int end = length();
  while (end > 0 &&
         unicharset_->get_isdigit(unichar_ids_[end - 1]) &&
         BlobPosition(end - 1) == tesseract::SP_SUPERSCRIPT) {
    end--;
  }
  int start = 0;
  while (start < end &&
         unicharset_->get_isdigit(unichar_ids_[start]) &&
         BlobPosition(start) == tesseract::SP_SUPERSCRIPT) {
    start++;
  }
  *pstart = start;
  *pend = end;
}
int WERD_CHOICE::GetTopScriptID ( ) const

Definition at line 653 of file ratngs.cpp.

                                      {
  int max_script = unicharset_->get_script_table_size();
  int *sid = new int[max_script];
  int x;
  for (x = 0; x < max_script; x++) sid[x] = 0;
  for (x = 0; x < length_; ++x) {
    int script_id = unicharset_->get_script(unichar_id(x));
    sid[script_id]++;
  }
  if (unicharset_->han_sid() != unicharset_->null_sid()) {
    // Add the Hiragana & Katakana counts to Han and zero them out.
    if (unicharset_->hiragana_sid() != unicharset_->null_sid()) {
      sid[unicharset_->han_sid()] += sid[unicharset_->hiragana_sid()];
      sid[unicharset_->hiragana_sid()] = 0;
    }
    if (unicharset_->katakana_sid() != unicharset_->null_sid()) {
      sid[unicharset_->han_sid()] += sid[unicharset_->katakana_sid()];
      sid[unicharset_->katakana_sid()] = 0;
    }
  }
  // Note that high script ID overrides lower one on a tie, thus biasing
  // towards non-Common script (if sorted that way in unicharset file).
  int max_sid = 0;
  for (x = 1; x < max_script; x++)
    if (sid[x] >= sid[max_sid]) max_sid = x;
  if (sid[max_sid] < length_ / 2)
    max_sid = unicharset_->null_sid();
  delete[] sid;
  return max_sid;
}
bool WERD_CHOICE::has_rtl_unichar_id ( ) const

has_rtl_unichar_id

Returns true if unichar_ids contain at least one "strongly" RTL unichar.

Definition at line 409 of file ratngs.cpp.

                                           {
  int i;
  for (i = 0; i < length_; ++i) {
    UNICHARSET::Direction dir = unicharset_->get_direction(unichar_ids_[i]);
    if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
        dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC) {
      return true;
    }
  }
  return false;
}
void WERD_CHOICE::init ( const char *  src_string,
const char *  src_lengths,
float  src_rating,
float  src_certainty,
uinT8  src_permuter 
)

Helper function to build a WERD_CHOICE from the given string, fragment lengths, rating, certainty and permuter. The function assumes that src_string is not NULL. src_lengths argument could be NULL, in which case the unichars in src_string are assumed to all be of length 1.

WERD_CHOICE::init

Helper function to build a WERD_CHOICE from the given string, fragment lengths, rating, certainty and permuter.

The function assumes that src_string is not NULL. src_lengths argument could be NULL, in which case the unichars in src_string are assumed to all be of length 1.

Definition at line 223 of file ratngs.cpp.

                                           {
  int src_string_len = strlen(src_string);
  if (src_string_len == 0) {
    this->init(8);
  } else {
    this->init(src_lengths ? strlen(src_lengths): src_string_len);
    length_ = reserved_;
    int offset = 0;
    for (int i = 0; i < length_; ++i) {
      int unichar_length = src_lengths ? src_lengths[i] : 1;
      unichar_ids_[i] =
          unicharset_->unichar_to_id(src_string+offset, unichar_length);
      state_[i] = 1;
      certainties_[i] = src_certainty;
      offset += unichar_length;
    }
  }
  adjust_factor_ = 1.0f;
  rating_ = src_rating;
  certainty_ = src_certainty;
  permuter_ = src_permuter;
  dangerous_ambig_found_ = false;
}
void WERD_CHOICE::init ( int  reserved) [inline]

Initializes WERD_CHOICE - reserves length slots in unichar_ids_ and fragment_length_ arrays. Sets other values to default (blank) values.

Definition at line 406 of file ratngs.h.

                                 {
    reserved_ = reserved;
    if (reserved > 0) {
      unichar_ids_ = new UNICHAR_ID[reserved];
      script_pos_ = new tesseract::ScriptPos[reserved];
      state_ = new int[reserved];
      certainties_ = new float[reserved];
    } else {
      unichar_ids_ = NULL;
      script_pos_ = NULL;
      state_ = NULL;
      certainties_ = NULL;
    }
    length_ = 0;
    adjust_factor_ = 1.0f;
    rating_ = 0.0;
    certainty_ = MAX_FLOAT32;
    min_x_height_ = 0.0f;
    max_x_height_ = MAX_FLOAT32;
    permuter_ = NO_PERM;
    unichars_in_script_order_ = false;  // Tesseract is strict left-to-right.
    dangerous_ambig_found_ = false;
  }
int WERD_CHOICE::length ( ) const [inline]

Definition at line 300 of file ratngs.h.

                            {
    return length_;
  }
void WERD_CHOICE::make_bad ( ) [inline]

Set the fields in this choice to be default (bad) values.

Definition at line 440 of file ratngs.h.

                         {
    length_ = 0;
    rating_ = kBadRating;
    certainty_ = -MAX_FLOAT32;
  }
MATRIX_COORD WERD_CHOICE::MatrixCoord ( int  index) const

Definition at line 280 of file ratngs.cpp.

                                                     {
  int col = 0;
  for (int i = 0; i < index; ++i)
    col += state_[i];
  int row = col + state_[index] - 1;
  return MATRIX_COORD(col, row);
}
float WERD_CHOICE::max_x_height ( ) const [inline]

Definition at line 336 of file ratngs.h.

                                    {
    return max_x_height_;
  }
float WERD_CHOICE::min_x_height ( ) const [inline]

Definition at line 333 of file ratngs.h.

                                    {
    return min_x_height_;
  }
WERD_CHOICE & WERD_CHOICE::operator+= ( const WERD_CHOICE second)

WERD_CHOICE::operator+=

Cat a second word rating on the end of this current one. The ratings are added and the confidence is the min. If the permuters are NOT the same the permuter is set to COMPOUND_PERM

Definition at line 463 of file ratngs.cpp.

                                                                 {
  ASSERT_HOST(unicharset_ == second.unicharset_);
  while (reserved_ < length_ + second.length()) {
    this->double_the_size();
  }
  const UNICHAR_ID *other_unichar_ids = second.unichar_ids();
  for (int i = 0; i < second.length(); ++i) {
    unichar_ids_[length_ + i] = other_unichar_ids[i];
    state_[length_ + i] = second.state_[i];
    certainties_[length_ + i] = second.certainties_[i];
    script_pos_[length_ + i] = second.BlobPosition(i);
  }
  length_ += second.length();
  if (second.adjust_factor_ > adjust_factor_)
    adjust_factor_ = second.adjust_factor_;
  rating_ += second.rating();  // add ratings
  if (second.certainty() < certainty_) // take min
    certainty_ = second.certainty();
  if (second.dangerous_ambig_found_)
    dangerous_ambig_found_ = true;
  if (permuter_ == NO_PERM) {
    permuter_ = second.permuter();
  } else if (second.permuter() != NO_PERM &&
             second.permuter() != permuter_) {
    permuter_ = COMPOUND_PERM;
  }
  return *this;
}
WERD_CHOICE & WERD_CHOICE::operator= ( const WERD_CHOICE source)

WERD_CHOICE::operator=

Allocate enough memory to hold a copy of source and copy over all the information from source to this WERD_CHOICE.

Definition at line 499 of file ratngs.cpp.

                                                             {
  while (reserved_ < source.length()) {
    this->double_the_size();
  }

  unicharset_ = source.unicharset_;
  const UNICHAR_ID *other_unichar_ids = source.unichar_ids();
  for (int i = 0; i < source.length(); ++i) {
    unichar_ids_[i] = other_unichar_ids[i];
    state_[i] = source.state_[i];
    certainties_[i] = source.certainties_[i];
    script_pos_[i] = source.BlobPosition(i);
  }
  length_ = source.length();
  adjust_factor_ = source.adjust_factor_;
  rating_ = source.rating();
  certainty_ = source.certainty();
  min_x_height_ = source.min_x_height();
  max_x_height_ = source.max_x_height();
  permuter_ = source.permuter();
  dangerous_ambig_found_ = source.dangerous_ambig_found_;
  return *this;
}
uinT8 WERD_CHOICE::permuter ( ) const [inline]

Definition at line 343 of file ratngs.h.

                                {
    return permuter_;
  }
const char * WERD_CHOICE::permuter_name ( ) const

Definition at line 261 of file ratngs.cpp.

                                             {
  return kPermuterTypeNames[permuter_];
}
const char * WERD_CHOICE::permuter_name ( uinT8  permuter) [static]

Definition at line 174 of file ratngs.cpp.

                                                     {
  return kPermuterTypeNames[permuter];
}
void WERD_CHOICE::print ( const char *  msg) const

WERD_CHOICE::print

Print WERD_CHOICE to stdout.

Definition at line 710 of file ratngs.cpp.

                                             {
  tprintf("%s : ", msg);
  for (int i = 0; i < length_; ++i) {
    tprintf("%s", unicharset_->id_to_unichar(unichar_ids_[i]));
  }
  tprintf(" : R=%g, C=%g, F=%g, Perm=%d, xht=[%g,%g], ambig=%d\n",
          rating_, certainty_, adjust_factor_, permuter_,
          min_x_height_, max_x_height_, dangerous_ambig_found_);
  tprintf("pos");
  for (int i = 0; i < length_; ++i) {
    tprintf("\t%s", ScriptPosToString(script_pos_[i]));
  }
  tprintf("\nstr");
  for (int i = 0; i < length_; ++i) {
    tprintf("\t%s", unicharset_->id_to_unichar(unichar_ids_[i]));
  }
  tprintf("\nstate:");
  for (int i = 0; i < length_; ++i) {
    tprintf("\t%d ", state_[i]);
  }
  tprintf("\nC");
  for (int i = 0; i < length_; ++i) {
    tprintf("\t%.3f", certainties_[i]);
  }
  tprintf("\n");
}
void WERD_CHOICE::print ( ) const [inline]

Definition at line 563 of file ratngs.h.

{ this->print(""); }
void WERD_CHOICE::print_state ( const char *  msg) const

Definition at line 738 of file ratngs.cpp.

                                                   {
  tprintf("%s", msg);
  for (int i = 0; i < length_; ++i)
    tprintf(" %d", state_[i]);
  tprintf("\n");
}
void WERD_CHOICE::punct_stripped ( int *  start,
int *  end 
) const

punct_stripped

Returns the half-open interval of unichar_id indices [start, end) which enclose the core portion of this word -- the part after stripping punctuation from the left and right.

Definition at line 361 of file ratngs.cpp.

                                                           {
  *start = 0;
  *end = length() - 1;
  while (*start < length() &&
         unicharset()->get_ispunctuation(unichar_id(*start))) {
    (*start)++;
  }
  while (*end > -1 &&
         unicharset()->get_ispunctuation(unichar_id(*end))) {
    (*end)--;
  }
  (*end)++;
}
float WERD_CHOICE::rating ( ) const [inline]

Definition at line 324 of file ratngs.h.

                              {
    return rating_;
  }
void WERD_CHOICE::remove_last_unichar_id ( ) [inline]

Definition at line 480 of file ratngs.h.

{ --length_; }
void WERD_CHOICE::remove_unichar_id ( int  index) [inline]

Definition at line 481 of file ratngs.h.

                                           {
    this->remove_unichar_ids(index, 1);
  }
void WERD_CHOICE::remove_unichar_ids ( int  start,
int  num 
)

remove_unichar_ids

Removes num unichar ids starting from index start from unichar_ids_ and updates length_ and fragment_lengths_ to reflect this change. Note: this function does not modify rating_ and certainty_.

Definition at line 320 of file ratngs.cpp.

                                                       {
  ASSERT_HOST(start >= 0 && start + num <= length_);
  // Accumulate the states to account for the merged blobs.
  for (int i = 0; i < num; ++i) {
    if (start > 0)
      state_[start - 1] += state_[start + i];
    else if (start + num < length_)
      state_[start + num] += state_[start + i];
  }
  for (int i = start; i + num < length_; ++i) {
    unichar_ids_[i] = unichar_ids_[i + num];
    script_pos_[i] = script_pos_[i + num];
    state_[i] = state_[i + num];
    certainties_[i] = certainties_[i + num];
  }
  length_ -= num;
}
void WERD_CHOICE::reverse_and_mirror_unichar_ids ( )

reverse_and_mirror_unichar_ids

Reverses and mirrors unichars in unichar_ids.

Definition at line 343 of file ratngs.cpp.

                                                 {
  for (int i = 0; i < length_ / 2; ++i) {
    UNICHAR_ID tmp_id = unichar_ids_[i];
    unichar_ids_[i] = unicharset_->get_mirror(unichar_ids_[length_-1-i]);
    unichar_ids_[length_-1-i] = unicharset_->get_mirror(tmp_id);
  }
  if (length_ % 2 != 0) {
    unichar_ids_[length_/2] = unicharset_->get_mirror(unichar_ids_[length_/2]);
  }
}
ScriptPos WERD_CHOICE::ScriptPositionOf ( bool  print_debug,
const UNICHARSET unicharset,
const TBOX blob_box,
UNICHAR_ID  unichar_id 
) [static]

Definition at line 615 of file ratngs.cpp.

                                                               {
  ScriptPos retval = tesseract::SP_NORMAL;
  int top = blob_box.top();
  int bottom = blob_box.bottom();
  int min_bottom, max_bottom, min_top, max_top;
  unicharset.get_top_bottom(unichar_id,
                            &min_bottom, &max_bottom,
                            &min_top, &max_top);

  int sub_thresh_top = min_top - kMinSubscriptOffset;
  int sub_thresh_bot = kBlnBaselineOffset - kMinSubscriptOffset;
  int sup_thresh_bot = max_bottom + kMinSuperscriptOffset;
  if (bottom <= kMaxDropCapBottom) {
    retval = tesseract::SP_DROPCAP;
  } else if (top < sub_thresh_top && bottom < sub_thresh_bot) {
    retval = tesseract::SP_SUBSCRIPT;
  } else if (bottom > sup_thresh_bot) {
    retval = tesseract::SP_SUPERSCRIPT;
  }

  if (print_debug) {
    const char *pos = ScriptPosToString(retval);
    tprintf("%s Character %s[bot:%d top: %d]  "
            "bot_range[%d,%d]  top_range[%d, %d] "
            "sub_thresh[bot:%d top:%d]  sup_thresh_bot %d\n",
            pos, unicharset.id_to_unichar(unichar_id),
            bottom, top,
            min_bottom, max_bottom, min_top, max_top,
            sub_thresh_bot, sub_thresh_top,
            sup_thresh_bot);
  }
  return retval;
}
void WERD_CHOICE::set_adjust_factor ( float  factor) [inline]

Definition at line 306 of file ratngs.h.

                                       {
    adjust_factor_ = factor;
  }
void WERD_CHOICE::set_blob_choice ( int  index,
int  blob_count,
const BLOB_CHOICE blob_choice 
)

Definition at line 290 of file ratngs.cpp.

                                                                  {
  unichar_ids_[index] = blob_choice->unichar_id();
  script_pos_[index] = tesseract::SP_NORMAL;
  state_[index] = blob_count;
  certainties_[index] = blob_choice->certainty();
}
void WERD_CHOICE::set_certainty ( float  new_val) [inline]

Definition at line 369 of file ratngs.h.

                                           {
    certainty_ = new_val;
  }
void WERD_CHOICE::set_dangerous_ambig_found_ ( bool  value) [inline]

Definition at line 363 of file ratngs.h.

                                              {
    dangerous_ambig_found_ = value;
  }
void WERD_CHOICE::set_length ( int  len) [inline]

Definition at line 378 of file ratngs.h.

                                  {
    ASSERT_HOST(reserved_ >= len);
    length_ = len;
  }
void WERD_CHOICE::set_permuter ( uinT8  perm) [inline]

Definition at line 372 of file ratngs.h.

                                       {
    permuter_ = perm;
  }
void WERD_CHOICE::set_rating ( float  new_val) [inline]

Definition at line 366 of file ratngs.h.

                                        {
    rating_ = new_val;
  }
void WERD_CHOICE::set_unichar_id ( UNICHAR_ID  unichar_id,
int  blob_count,
float  rating,
float  certainty,
int  index 
) [inline]

Definition at line 461 of file ratngs.h.

                                                                       {
    assert(index < length_);
    unichar_ids_[index] = unichar_id;
    state_[index] = blob_count;
    certainties_[index] = certainty;
    script_pos_[index] = tesseract::SP_NORMAL;
    rating_ += rating;
    if (certainty < certainty_) {
      certainty_ = certainty;
    }
  }
void WERD_CHOICE::set_unichar_id ( UNICHAR_ID  unichar_id,
int  index 
) [inline]

Definition at line 356 of file ratngs.h.

                                                               {
    assert(index < length_);
    unichar_ids_[index] = unichar_id;
  }
bool WERD_CHOICE::set_unichars_in_script_order ( bool  in_script_order) [inline]

Definition at line 514 of file ratngs.h.

                                                          {
    return unichars_in_script_order_ = in_script_order;
  }
void WERD_CHOICE::set_x_heights ( float  min_height,
float  max_height 
) [inline]

Definition at line 339 of file ratngs.h.

                                                                {
    min_x_height_ = min_height;
    max_x_height_ = max_height;
  }
void WERD_CHOICE::SetAllScriptPositions ( tesseract::ScriptPos  position)

Definition at line 609 of file ratngs.cpp.

                                                                   {
  for (int i = 0; i < length_; ++i)
    script_pos_[i] = position;
}
void WERD_CHOICE::SetScriptPositions ( const tesseract::ScriptPos positions,
int  length 
)

Definition at line 599 of file ratngs.cpp.

                                                 {
  ASSERT_HOST(length == length_);
  if (positions != script_pos_) {
    delete [] script_pos_;
    script_pos_ = new ScriptPos[length];
    memcpy(script_pos_, positions, sizeof(positions[0]) * length);
  }
}
void WERD_CHOICE::SetScriptPositions ( bool  small_caps,
TWERD word 
)

Definition at line 528 of file ratngs.cpp.

                                                                 {
  // Since WERD_CHOICE isn't supposed to depend on a Tesseract,
  // we don't have easy access to the flags Tesseract stores.  Therefore, debug
  // for this module is hard compiled in.
  int debug = 0;

  // Initialize to normal.
  for (int i = 0; i < length_; ++i)
    script_pos_[i] = tesseract::SP_NORMAL;
  if (word->blobs.empty() || word->NumBlobs() != TotalOfStates()) {
    return;
  }

  int position_counts[4];
  for (int i = 0; i < 4; i++) {
    position_counts[i] = 0;
  }

  int chunk_index = 0;
  for (int blob_index = 0; blob_index < length_; ++blob_index, ++chunk_index) {
    TBLOB* tblob = word->blobs[chunk_index];
    int uni_id = unichar_id(blob_index);
    TBOX blob_box = tblob->bounding_box();
    if (state_ != NULL) {
      for (int i = 1; i <  state_[blob_index]; ++i) {
        ++chunk_index;
        tblob = word->blobs[chunk_index];
        blob_box += tblob->bounding_box();
      }
    }
    script_pos_[blob_index] = ScriptPositionOf(false, *unicharset_, blob_box,
                                               uni_id);
    if (small_caps && script_pos_[blob_index] != tesseract::SP_DROPCAP) {
      script_pos_[blob_index] = tesseract::SP_NORMAL;
    }
    position_counts[script_pos_[blob_index]]++;
  }
  // If almost everything looks like a superscript or subscript,
  // we most likely just got the baseline wrong.
  if (position_counts[tesseract::SP_SUBSCRIPT] > 0.75 * length_ ||
      position_counts[tesseract::SP_SUPERSCRIPT] > 0.75 * length_) {
    if (debug >= 2) {
      tprintf("Most characters of %s are subscript or superscript.\n"
              "That seems wrong, so I'll assume we got the baseline wrong\n",
              unichar_string().string());
    }
    for (int i = 0; i < length_; i++) {
      ScriptPos sp = script_pos_[i];
      if (sp == tesseract::SP_SUBSCRIPT || sp == tesseract::SP_SUPERSCRIPT) {
        position_counts[sp]--;
        position_counts[tesseract::SP_NORMAL]++;
        script_pos_[i] = tesseract::SP_NORMAL;
      }
    }
  }

  if ((debug >= 1 && position_counts[tesseract::SP_NORMAL] < length_) ||
      debug >= 2) {
    tprintf("SetScriptPosition on %s\n", unichar_string().string());
    int chunk_index = 0;
    for (int blob_index = 0; blob_index < length_; ++blob_index) {
      if (debug >= 2 || script_pos_[blob_index] != tesseract::SP_NORMAL) {
        TBLOB* tblob = word->blobs[chunk_index];
        ScriptPositionOf(true, *unicharset_, tblob->bounding_box(),
                         unichar_id(blob_index));
      }
      chunk_index += state_ != NULL ? state_[blob_index] : 1;
    }
  }
}
WERD_CHOICE WERD_CHOICE::shallow_copy ( int  start,
int  end 
) const

Definition at line 392 of file ratngs.cpp.

                                                              {
  ASSERT_HOST(start >= 0 && start <= length_);
  ASSERT_HOST(end >= 0 && end <= length_);
  if (end < start) { end = start; }
  WERD_CHOICE retval(unicharset_, end - start);
  for (int i = start; i < end; i++) {
    retval.append_unichar_id_space_allocated(
        unichar_ids_[i], state_[i], 0.0f, certainties_[i]);
  }
  return retval;
}
int WERD_CHOICE::state ( int  index) const [inline]

Definition at line 316 of file ratngs.h.

                                    {
    return state_[index];
  }
void WERD_CHOICE::string_and_lengths ( STRING word_str,
STRING word_lengths_str 
) const

string_and_lengths

Populates the given word_str with unichars from unichar_ids and and word_lengths_str with the corresponding unichar lengths.

Definition at line 427 of file ratngs.cpp.

                                                                     {
  *word_str = "";
  if (word_lengths_str != NULL) *word_lengths_str = "";
  for (int i = 0; i < length_; ++i) {
    const char *ch = unicharset_->id_to_unichar_ext(unichar_ids_[i]);
    *word_str += ch;
    if (word_lengths_str != NULL) {
      *word_lengths_str += strlen(ch);
    }
  }
}
int WERD_CHOICE::TotalOfStates ( ) const

Definition at line 697 of file ratngs.cpp.

                                     {
  int total_chunks = 0;
  for (int i = 0; i < length_; ++i) {
    total_chunks += state_[i];
  }
  return total_chunks;
}
UNICHAR_ID WERD_CHOICE::unichar_id ( int  index) const [inline]

Definition at line 312 of file ratngs.h.

                                                {
    assert(index < length_);
    return unichar_ids_[index];
  }
const UNICHAR_ID* WERD_CHOICE::unichar_ids ( ) const [inline]

Definition at line 309 of file ratngs.h.

                                               {
    return unichar_ids_;
  }
const STRING& WERD_CHOICE::unichar_lengths ( ) const [inline]

Definition at line 531 of file ratngs.h.

                                        {
    this->string_and_lengths(&unichar_string_, &unichar_lengths_);
    return unichar_lengths_;
  }
const STRING& WERD_CHOICE::unichar_string ( ) const [inline]

Definition at line 524 of file ratngs.h.

                                       {
    this->string_and_lengths(&unichar_string_, &unichar_lengths_);
    return unichar_string_;
  }
bool WERD_CHOICE::unichars_in_script_order ( ) const [inline]

Definition at line 518 of file ratngs.h.

                                        {
    return unichars_in_script_order_;
  }
const UNICHARSET* WERD_CHOICE::unicharset ( ) const [inline]

Definition at line 297 of file ratngs.h.

                                       {
    return unicharset_;
  }
void WERD_CHOICE::UpdateStateForSplit ( int  blob_position)

Definition at line 685 of file ratngs.cpp.

                                                       {
  int total_chunks = 0;
  for (int i = 0; i < length_; ++i) {
    total_chunks += state_[i];
    if (total_chunks > blob_position) {
      ++state_[i];
      return;
    }
  }
}

Member Data Documentation

const float WERD_CHOICE::kBadRating = 100000.0 [static]

Definition at line 273 of file ratngs.h.


The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines