tesseract 3.04.01

WERD_RES Class Reference

#include <pageres.h>

Inheritance diagram for WERD_RES:
ELIST_LINK

List of all members.

Public Member Functions

 WERD_RES ()
 WERD_RES (WERD *the_word)
 WERD_RES (const WERD_RES &source)
 ~WERD_RES ()
const char * BestUTF8 (int blob_index, bool in_rtl_context) const
const char * RawUTF8 (int blob_index) const
UNICHARSET::Direction SymbolDirection (int blob_index) const
bool AnyRtlCharsInWord () const
bool AnyLtrCharsInWord () const
bool UnicharsInReadingOrder () const
void InitNonPointers ()
void InitPointers ()
void Clear ()
void ClearResults ()
void ClearWordChoices ()
void ClearRatings ()
WERD_RESoperator= (const WERD_RES &source)
void CopySimpleFields (const WERD_RES &source)
void InitForRetryRecognition (const WERD_RES &source)
bool SetupForRecognition (const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, Pix *pix, int norm_mode, const TBOX *norm_box, bool numeric_mode, bool use_body_size, bool allow_detailed_fx, ROW *row, const BLOCK *block)
void SetupBasicsFromChoppedWord (const UNICHARSET &unicharset_in)
void SetupFake (const UNICHARSET &uch)
void SetupWordScript (const UNICHARSET &unicharset_in)
void SetupBlamerBundle ()
void SetupBlobWidthsAndGaps ()
void InsertSeam (int blob_number, SEAM *seam)
bool AlternativeChoiceAdjustmentsWorseThan (float threshold) const
bool IsAmbiguous ()
bool StatesAllValid ()
void DebugWordChoices (bool debug, const char *word_to_debug)
void DebugTopChoice (const char *msg) const
void FilterWordChoices (int debug_level)
void ComputeAdaptionThresholds (float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
bool LogNewRawChoice (WERD_CHOICE *word_choice)
bool LogNewCookedChoice (int max_num_choices, bool debug, WERD_CHOICE *word_choice)
void PrintBestChoices () const
int GetBlobsWidth (int start_blob, int last_blob)
int GetBlobsGap (int blob_index)
BLOB_CHOICEGetBlobChoice (int index) const
BLOB_CHOICE_LIST * GetBlobChoices (int index) const
void ConsumeWordResults (WERD_RES *word)
void ReplaceBestChoice (WERD_CHOICE *choice)
void RebuildBestState ()
void CloneChoppedToRebuild ()
void SetupBoxWord ()
void SetScriptPositions ()
void SetAllScriptPositions (tesseract::ScriptPos position)
void FakeClassifyWord (int blob_count, BLOB_CHOICE **choices)
void FakeWordFromRatings ()
void BestChoiceToCorrectText ()
bool ConditionalBlobMerge (TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX & > *box_cb)
void MergeAdjacentBlobs (int index)
UNICHAR_ID BothQuotes (UNICHAR_ID id1, UNICHAR_ID id2)
void fix_quotes ()
UNICHAR_ID BothHyphens (UNICHAR_ID id1, UNICHAR_ID id2)
bool HyphenBoxesOverlap (const TBOX &box1, const TBOX &box2)
void fix_hyphens ()
UNICHAR_ID BothSpaces (UNICHAR_ID id1, UNICHAR_ID id2)
void merge_tess_fails ()
void copy_on (WERD_RES *word_res)
bool PiecesAllNatural (int start, int count) const

Static Public Member Functions

static WERD_RESdeep_copy (const WERD_RES *src)

Public Attributes

WERDword
tesseract::BoxWordbln_boxes
ROWblob_row
DENORM denorm
const UNICHARSETuch_set
TWERDchopped_word
GenericVector< SEAM * > seam_array
GenericVector< int > blob_widths
GenericVector< int > blob_gaps
MATRIXratings
WERD_CHOICEbest_choice
WERD_CHOICEraw_choice
WERD_CHOICE_LIST best_choices
BlamerBundleblamer_bundle
TWERDrebuild_word
tesseract::BoxWordbox_word
GenericVector< int > best_state
GenericVector< STRINGcorrect_text
tesseract::Tesseracttesseract
WERD_CHOICEep_choice
REJMAP reject_map
BOOL8 tess_failed
BOOL8 tess_accepted
BOOL8 tess_would_adapt
BOOL8 done
bool small_caps
bool odd_size
inT8 italic
inT8 bold
const FontInfofontinfo
const FontInfofontinfo2
inT8 fontinfo_id_count
inT8 fontinfo_id2_count
BOOL8 guessed_x_ht
BOOL8 guessed_caps_ht
CRUNCH_MODE unlv_crunch_mode
float x_height
float caps_height
float baseline_shift
BOOL8 combination
BOOL8 part_of_combo
BOOL8 reject_spaces

Detailed Description

Definition at line 155 of file pageres.h.


Constructor & Destructor Documentation

WERD_RES::WERD_RES ( ) [inline]

Definition at line 319 of file pageres.h.

WERD_RES::WERD_RES ( WERD the_word) [inline]

Definition at line 323 of file pageres.h.

                           {
    InitNonPointers();
    InitPointers();
    word = the_word;
  }
WERD_RES::WERD_RES ( const WERD_RES source) [inline]

Definition at line 330 of file pageres.h.

                                   : ELIST_LINK(source) {
    InitPointers();
    *this = source;            // see operator=
  }
WERD_RES::~WERD_RES ( )

Definition at line 1084 of file pageres.cpp.

                     {
  Clear();
}

Member Function Documentation

bool WERD_RES::AlternativeChoiceAdjustmentsWorseThan ( float  threshold) const

Definition at line 430 of file pageres.cpp.

                                                                          {
  // The choices are not changed by this iteration.
  WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
  for (wc_it.forward(); !wc_it.at_first(); wc_it.forward()) {
    WERD_CHOICE* choice = wc_it.data();
    if (choice->adjust_factor() <= threshold)
      return false;
  }
  return true;
}
bool WERD_RES::AnyLtrCharsInWord ( ) const [inline]

Definition at line 389 of file pageres.h.

                                 {
    if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1)
      return false;
    for (int id = 0; id < best_choice->length(); id++) {
      int unichar_id = best_choice->unichar_id(id);
      if (unichar_id < 0 || unichar_id >= uch_set->size())
        continue;  // Ignore illegal chars.
      UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
      if (dir == UNICHARSET::U_LEFT_TO_RIGHT)
        return true;
    }
    return false;
  }
bool WERD_RES::AnyRtlCharsInWord ( ) const [inline]

Definition at line 372 of file pageres.h.

                                 {
    if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1)
      return false;
    for (int id = 0; id < best_choice->length(); id++) {
      int unichar_id = best_choice->unichar_id(id);
      if (unichar_id < 0 || unichar_id >= uch_set->size())
        continue;  // Ignore illegal chars.
      UNICHARSET::Direction dir =
          uch_set->get_direction(unichar_id);
      if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
          dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC ||
          dir == UNICHARSET::U_ARABIC_NUMBER)
        return true;
    }
    return false;
  }
void WERD_RES::BestChoiceToCorrectText ( )

Definition at line 917 of file pageres.cpp.

                                       {
  correct_text.clear();
  ASSERT_HOST(best_choice != NULL);
  for (int i = 0; i < best_choice->length(); ++i) {
    UNICHAR_ID choice_id = best_choice->unichar_id(i);
    const char* blob_choice = uch_set->id_to_unichar(choice_id);
    correct_text.push_back(STRING(blob_choice));
  }
}
const char* WERD_RES::BestUTF8 ( int  blob_index,
bool  in_rtl_context 
) const [inline]

Definition at line 342 of file pageres.h.

                                                                  {
    if (blob_index < 0 || best_choice == NULL ||
        blob_index >= best_choice->length())
      return NULL;
    UNICHAR_ID id = best_choice->unichar_id(blob_index);
    if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
      return NULL;
    UNICHAR_ID mirrored = uch_set->get_mirror(id);
    if (in_rtl_context && mirrored > 0 && mirrored != INVALID_UNICHAR_ID)
      id = mirrored;
    return uch_set->id_to_unichar_ext(id);
  }
UNICHAR_ID WERD_RES::BothHyphens ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1024 of file pageres.cpp.

                                                               {
  const char *ch = uch_set->id_to_unichar(id1);
  const char *next_ch = uch_set->id_to_unichar(id2);
  if (strlen(ch) == 1 && strlen(next_ch) == 1 &&
      (*ch == '-' || *ch == '~') && (*next_ch == '-' || *next_ch == '~'))
    return uch_set->unichar_to_id("-");
  return INVALID_UNICHAR_ID;
}
UNICHAR_ID WERD_RES::BothQuotes ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1002 of file pageres.cpp.

                                                              {
  const char *ch = uch_set->id_to_unichar(id1);
  const char *next_ch = uch_set->id_to_unichar(id2);
  if (is_simple_quote(ch, strlen(ch)) &&
      is_simple_quote(next_ch, strlen(next_ch)))
    return uch_set->unichar_to_id("\"");
  return INVALID_UNICHAR_ID;
}
UNICHAR_ID WERD_RES::BothSpaces ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1053 of file pageres.cpp.

                                                              {
  if (id1 == id2 && id1 == uch_set->unichar_to_id(" "))
    return id1;
  else
    return INVALID_UNICHAR_ID;
}
void WERD_RES::Clear ( )

Definition at line 1130 of file pageres.cpp.

                     {
  if (word != NULL && combination) {
    delete word;
  }
  word = NULL;
  delete blamer_bundle;
  blamer_bundle = NULL;
  ClearResults();
}
void WERD_RES::ClearRatings ( )

Definition at line 1185 of file pageres.cpp.

                            {
  if (ratings != NULL) {
    ratings->delete_matrix_pointers();
    delete ratings;
    ratings = NULL;
  }
}
void WERD_RES::ClearResults ( )

Definition at line 1140 of file pageres.cpp.

                            {
  done = false;
  fontinfo = NULL;
  fontinfo2 = NULL;
  fontinfo_id_count = 0;
  fontinfo_id2_count = 0;
  if (bln_boxes != NULL) {
    delete bln_boxes;
    bln_boxes = NULL;
  }
  blob_row = NULL;
  if (chopped_word != NULL) {
    delete chopped_word;
    chopped_word = NULL;
  }
  if (rebuild_word != NULL) {
    delete rebuild_word;
    rebuild_word = NULL;
  }
  if (box_word != NULL) {
    delete box_word;
    box_word = NULL;
  }
  best_state.clear();
  correct_text.clear();
  seam_array.delete_data_pointers();
  seam_array.clear();
  blob_widths.clear();
  blob_gaps.clear();
  ClearRatings();
  ClearWordChoices();
  if (blamer_bundle != NULL) blamer_bundle->ClearResults();
}
void WERD_RES::ClearWordChoices ( )

Definition at line 1173 of file pageres.cpp.

                                {
  best_choice = NULL;
  if (raw_choice != NULL) {
    delete raw_choice;
    raw_choice = NULL;
  }
  best_choices.clear();
  if (ep_choice != NULL) {
    delete ep_choice;
    ep_choice = NULL;
  }
}
void WERD_RES::CloneChoppedToRebuild ( )

Definition at line 828 of file pageres.cpp.

                                     {
  if (rebuild_word != NULL)
    delete rebuild_word;
  rebuild_word = new TWERD(*chopped_word);
  SetupBoxWord();
  int word_len = box_word->length();
  best_state.reserve(word_len);
  correct_text.reserve(word_len);
  for (int i = 0; i < word_len; ++i) {
    best_state.push_back(1);
    correct_text.push_back(STRING(""));
  }
}
void WERD_RES::ComputeAdaptionThresholds ( float  certainty_scale,
float  min_rating,
float  max_rating,
float  rating_margin,
float *  thresholds 
)

Definition at line 553 of file pageres.cpp.

                                                            {
  int chunk = 0;
  int end_chunk = best_choice->state(0);
  int end_raw_chunk = raw_choice->state(0);
  int raw_blob = 0;
  for (int i = 0; i < best_choice->length(); i++, thresholds++) {
    float avg_rating = 0.0f;
    int num_error_chunks = 0;

    // For each chunk in best choice blob i, count non-matching raw results.
    while (chunk < end_chunk) {
      if (chunk >= end_raw_chunk) {
        ++raw_blob;
        end_raw_chunk += raw_choice->state(raw_blob);
      }
      if (best_choice->unichar_id(i) !=
          raw_choice->unichar_id(raw_blob)) {
        avg_rating += raw_choice->certainty(raw_blob);
        ++num_error_chunks;
      }
      ++chunk;
    }

    if (num_error_chunks > 0) {
      avg_rating /= num_error_chunks;
      *thresholds = (avg_rating / -certainty_scale) * (1.0 - rating_margin);
    } else {
      *thresholds = max_rating;
    }

    if (*thresholds > max_rating)
      *thresholds = max_rating;
    if (*thresholds < min_rating)
      *thresholds = min_rating;
  }
}
bool WERD_RES::ConditionalBlobMerge ( TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *  class_cb,
TessResultCallback2< bool, const TBOX &, const TBOX & > *  box_cb 
)

Definition at line 932 of file pageres.cpp.

                                                                 {
  ASSERT_HOST(best_choice->length() == 0 || ratings != NULL);
  bool modified = false;
  for (int i = 0; i + 1 < best_choice->length(); ++i) {
    UNICHAR_ID new_id = class_cb->Run(best_choice->unichar_id(i),
                                      best_choice->unichar_id(i+1));
    if (new_id != INVALID_UNICHAR_ID &&
        (box_cb == NULL || box_cb->Run(box_word->BlobBox(i),
                                       box_word->BlobBox(i + 1)))) {
      // Raw choice should not be fixed.
      best_choice->set_unichar_id(new_id, i);
      modified = true;
      MergeAdjacentBlobs(i);
      const MATRIX_COORD& coord = best_choice->MatrixCoord(i);
      if (!coord.Valid(*ratings)) {
        ratings->IncreaseBandSize(coord.row + 1 - coord.col);
      }
      BLOB_CHOICE_LIST* blob_choices = GetBlobChoices(i);
      if (FindMatchingChoice(new_id, blob_choices) == NULL) {
        // Insert a fake result.
        BLOB_CHOICE* blob_choice = new BLOB_CHOICE;
        blob_choice->set_unichar_id(new_id);
        BLOB_CHOICE_IT bc_it(blob_choices);
        bc_it.add_before_then_move(blob_choice);
      }
    }
  }
  delete class_cb;
  delete box_cb;
  return modified;
}
void WERD_RES::ConsumeWordResults ( WERD_RES word)

Definition at line 757 of file pageres.cpp.

                                                {
  denorm = word->denorm;
  blob_row = word->blob_row;
  MovePointerData(&chopped_word, &word->chopped_word);
  MovePointerData(&rebuild_word, &word->rebuild_word);
  MovePointerData(&box_word, &word->box_word);
  seam_array.delete_data_pointers();
  seam_array = word->seam_array;
  word->seam_array.clear();
  best_state.move(&word->best_state);
  correct_text.move(&word->correct_text);
  blob_widths.move(&word->blob_widths);
  blob_gaps.move(&word->blob_gaps);
  if (ratings != NULL) ratings->delete_matrix_pointers();
  MovePointerData(&ratings, &word->ratings);
  best_choice = word->best_choice;
  MovePointerData(&raw_choice, &word->raw_choice);
  best_choices.clear();
  WERD_CHOICE_IT wc_it(&best_choices);
  wc_it.add_list_after(&word->best_choices);
  reject_map = word->reject_map;
  if (word->blamer_bundle != NULL) {
    assert(blamer_bundle != NULL);
    blamer_bundle->CopyResults(*(word->blamer_bundle));
  }
  CopySimpleFields(*word);
}
void WERD_RES::copy_on ( WERD_RES word_res) [inline]

Definition at line 641 of file pageres.h.

                                   {  //from this word
    word->set_flag(W_BOL, word->flag(W_BOL) || word_res->word->flag(W_BOL));
    word->set_flag(W_EOL, word->flag(W_EOL) || word_res->word->flag(W_EOL));
    word->copy_on(word_res->word);
  }
void WERD_RES::CopySimpleFields ( const WERD_RES source)
void WERD_RES::DebugTopChoice ( const char *  msg) const

Definition at line 490 of file pageres.cpp.

                                                   {
  tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ",
          tess_accepted, tess_would_adapt, done);
  if (best_choice == NULL)
    tprintf("<Null choice>\n");
  else
    best_choice->print(msg);
}
void WERD_RES::DebugWordChoices ( bool  debug,
const char *  word_to_debug 
)

Definition at line 471 of file pageres.cpp.

                                                                     {
  if (debug ||
      (word_to_debug != NULL && *word_to_debug != '\0' && best_choice != NULL &&
       best_choice->unichar_string() == STRING(word_to_debug))) {
    if (raw_choice != NULL)
      raw_choice->print("\nBest Raw Choice");

    WERD_CHOICE_IT it(&best_choices);
    int index = 0;
    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
      WERD_CHOICE* choice = it.data();
      STRING label;
      label.add_str_int("\nCooked Choice #", index);
      choice->print(label.string());
    }
  }
}
static WERD_RES* WERD_RES::deep_copy ( const WERD_RES src) [inline, static]

Definition at line 630 of file pageres.h.

                                                  {
    WERD_RES* result = new WERD_RES(*src);
    // That didn't copy the ratings, but we want a copy if there is one to
    // begin width.
    if (src->ratings != NULL)
      result->ratings = src->ratings->DeepCopy();
    return result;
  }
void WERD_RES::FakeClassifyWord ( int  blob_count,
BLOB_CHOICE **  choices 
)

Definition at line 872 of file pageres.cpp.

                                                                     {
  // Setup the WERD_RES.
  ASSERT_HOST(box_word != NULL);
  ASSERT_HOST(blob_count == box_word->length());
  ClearWordChoices();
  ClearRatings();
  ratings = new MATRIX(blob_count, 1);
  for (int c = 0; c < blob_count; ++c) {
    BLOB_CHOICE_LIST* choice_list = new BLOB_CHOICE_LIST;
    BLOB_CHOICE_IT choice_it(choice_list);
    choice_it.add_after_then_move(choices[c]);
    ratings->put(c, c, choice_list);
  }
  FakeWordFromRatings();
  reject_map.initialise(blob_count);
  done = true;
}
void WERD_RES::FakeWordFromRatings ( )

Definition at line 892 of file pageres.cpp.

                                   {
  int num_blobs = ratings->dimension();
  WERD_CHOICE* word_choice = new WERD_CHOICE(uch_set, num_blobs);
  word_choice->set_permuter(TOP_CHOICE_PERM);
  for (int b = 0; b < num_blobs; ++b) {
    UNICHAR_ID unichar_id = UNICHAR_SPACE;
    float rating = MAX_INT32;
    float certainty = -MAX_INT32;
    BLOB_CHOICE_LIST* choices = ratings->get(b, b);
    if (choices != NULL && !choices->empty()) {
      BLOB_CHOICE_IT bc_it(choices);
      BLOB_CHOICE* choice = bc_it.data();
      unichar_id = choice->unichar_id();
      rating = choice->rating();
      certainty = choice->certainty();
    }
    word_choice->append_unichar_id_space_allocated(unichar_id, 1, rating,
                                                   certainty);
  }
  LogNewRawChoice(word_choice);
  // Ownership of word_choice taken by word here.
  LogNewCookedChoice(1, false, word_choice);
}
void WERD_RES::FilterWordChoices ( int  debug_level)

Definition at line 504 of file pageres.cpp.

                                                {
  if (best_choice == NULL || best_choices.singleton())
    return;

  if (debug_level >= 2)
    best_choice->print("\nFiltering against best choice");
  WERD_CHOICE_IT it(&best_choices);
  int index = 0;
  for (it.forward(); !it.at_first(); it.forward(), ++index) {
    WERD_CHOICE* choice = it.data();
    float threshold = StopperAmbigThreshold(best_choice->adjust_factor(),
                                            choice->adjust_factor());
    // i, j index the blob choice in choice, best_choice.
    // chunk is an index into the chopped_word blobs (AKA chunks).
    // Since the two words may use different segmentations of the chunks, we
    // iterate over the chunks to find out whether a comparable blob
    // classification is much worse than the best result.
    int i = 0, j = 0, chunk = 0;
    // Each iteration of the while deals with 1 chunk. On entry choice_chunk
    // and best_chunk are the indices of the first chunk in the NEXT blob,
    // i.e. we don't have to increment i, j while chunk < choice_chunk and
    // best_chunk respectively.
    int choice_chunk = choice->state(0), best_chunk = best_choice->state(0);
    while (i < choice->length() && j < best_choice->length()) {
      if (choice->unichar_id(i) != best_choice->unichar_id(j) &&
          choice->certainty(i) - best_choice->certainty(j) < threshold) {
        if (debug_level >= 2) {
          STRING label;
          label.add_str_int("\nDiscarding bad choice #", index);
          choice->print(label.string());
          tprintf("i %d j %d Chunk %d Choice->Blob[i].Certainty %.4g"
              " BestChoice->ChunkCertainty[Chunk] %g Threshold %g\n",
              i, j, chunk, choice->certainty(i),
              best_choice->certainty(j), threshold);
        }
        delete it.extract();
        break;
      }
      ++chunk;
      // If needed, advance choice_chunk to keep up with chunk.
      while (choice_chunk < chunk && ++i < choice->length())
        choice_chunk += choice->state(i);
      // If needed, advance best_chunk to keep up with chunk.
      while (best_chunk < chunk && ++j < best_choice->length())
        best_chunk += best_choice->state(j);
    }
  }
}
void WERD_RES::fix_hyphens ( )

Definition at line 1041 of file pageres.cpp.

void WERD_RES::fix_quotes ( )

Definition at line 1012 of file pageres.cpp.

                          {
  if (!uch_set->contains_unichar("\"") ||
      !uch_set->get_enabled(uch_set->unichar_to_id("\"")))
    return;  // Don't create it if it is disallowed.

  ConditionalBlobMerge(
      NewPermanentTessCallback(this, &WERD_RES::BothQuotes),
      NULL);
}
BLOB_CHOICE * WERD_RES::GetBlobChoice ( int  index) const

Definition at line 742 of file pageres.cpp.

                                                    {
  if (index < 0 || index >= best_choice->length()) return NULL;
  BLOB_CHOICE_LIST* choices = GetBlobChoices(index);
  return FindMatchingChoice(best_choice->unichar_id(index), choices);
}
BLOB_CHOICE_LIST * WERD_RES::GetBlobChoices ( int  index) const

Definition at line 751 of file pageres.cpp.

                                                          {
  return best_choice->blob_choices(index, ratings);
}
int WERD_RES::GetBlobsGap ( int  blob_index)

Definition at line 732 of file pageres.cpp.

                                        {
  if (blob_index < 0 || blob_index >= blob_gaps.size())
    return 0;
  return blob_gaps[blob_index];
}
int WERD_RES::GetBlobsWidth ( int  start_blob,
int  last_blob 
)

Definition at line 722 of file pageres.cpp.

                                                         {
  int result = 0;
  for (int b = start_blob; b <= last_blob; ++b) {
    result += blob_widths[b];
    if (b < last_blob)
      result += blob_gaps[b];
  }
  return result;
}
bool WERD_RES::HyphenBoxesOverlap ( const TBOX box1,
const TBOX box2 
)

Definition at line 1035 of file pageres.cpp.

                                                                    {
  return box1.right() >= box2.left();
}
void WERD_RES::InitForRetryRecognition ( const WERD_RES source)

Definition at line 269 of file pageres.cpp.

                                                             {
  word = source.word;
  CopySimpleFields(source);
  if (source.blamer_bundle != NULL) {
    blamer_bundle = new BlamerBundle();
    blamer_bundle->CopyTruth(*source.blamer_bundle);
  }
}
void WERD_RES::InitNonPointers ( )

Definition at line 1088 of file pageres.cpp.

                               {
  tess_failed = FALSE;
  tess_accepted = FALSE;
  tess_would_adapt = FALSE;
  done = FALSE;
  unlv_crunch_mode = CR_NONE;
  small_caps = false;
  odd_size = false;
  italic = FALSE;
  bold = FALSE;
  // The fontinfos and tesseract count as non-pointers as they point to
  // data owned elsewhere.
  fontinfo = NULL;
  fontinfo2 = NULL;
  tesseract = NULL;
  fontinfo_id_count = 0;
  fontinfo_id2_count = 0;
  x_height = 0.0;
  caps_height = 0.0;
  baseline_shift = 0.0f;
  guessed_x_ht = TRUE;
  guessed_caps_ht = TRUE;
  combination = FALSE;
  part_of_combo = FALSE;
  reject_spaces = FALSE;
}
void WERD_RES::InitPointers ( )

Definition at line 1115 of file pageres.cpp.

                            {
  word = NULL;
  bln_boxes = NULL;
  blob_row = NULL;
  uch_set = NULL;
  chopped_word = NULL;
  rebuild_word = NULL;
  box_word = NULL;
  ratings = NULL;
  best_choice = NULL;
  raw_choice = NULL;
  ep_choice = NULL;
  blamer_bundle = NULL;
}
void WERD_RES::InsertSeam ( int  blob_number,
SEAM seam 
)

Definition at line 409 of file pageres.cpp.

                                                     {
  // Insert the seam into the SEAMS array.
  seam->PrepareToInsertSeam(seam_array, chopped_word->blobs, blob_number, true);
  seam_array.insert(seam, blob_number);
  if (ratings != NULL) {
    // Expand the ratings matrix.
    ratings = ratings->ConsumeAndMakeBigger(blob_number);
    // Fix all the segmentation states.
    if (raw_choice != NULL)
      raw_choice->UpdateStateForSplit(blob_number);
    WERD_CHOICE_IT wc_it(&best_choices);
    for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
      WERD_CHOICE* choice = wc_it.data();
      choice->UpdateStateForSplit(blob_number);
    }
    SetupBlobWidthsAndGaps();
  }
}
bool WERD_RES::IsAmbiguous ( )

Definition at line 443 of file pageres.cpp.

                           {
  return !best_choices.singleton() || best_choice->dangerous_ambig_found();
}
bool WERD_RES::LogNewCookedChoice ( int  max_num_choices,
bool  debug,
WERD_CHOICE word_choice 
)

Definition at line 612 of file pageres.cpp.

                                                            {
  if (best_choice != NULL) {
    // Throw out obviously bad choices to save some work.
    // TODO(rays) Get rid of this! This piece of code produces different
    // results according to the order in which words are found, which is an
    // undesirable behavior. It would be better to keep all the choices and
    // prune them later when more information is available.
    float max_certainty_delta =
        StopperAmbigThreshold(best_choice->adjust_factor(),
                              word_choice->adjust_factor());
    if (max_certainty_delta > -kStopperAmbiguityThresholdOffset)
      max_certainty_delta = -kStopperAmbiguityThresholdOffset;
    if (word_choice->certainty() - best_choice->certainty() <
        max_certainty_delta) {
      if (debug) {
        STRING bad_string;
        word_choice->string_and_lengths(&bad_string, NULL);
        tprintf("Discarding choice \"%s\" with an overly low certainty"
                " %.3f vs best choice certainty %.3f (Threshold: %.3f)\n",
                bad_string.string(), word_choice->certainty(),
                best_choice->certainty(),
                max_certainty_delta + best_choice->certainty());
      }
      delete word_choice;
      return false;
    }
  }

  // Insert in the list in order of increasing rating, but knock out worse
  // string duplicates.
  WERD_CHOICE_IT it(&best_choices);
  const STRING& new_str = word_choice->unichar_string();
  bool inserted = false;
  int num_choices = 0;
  if (!it.empty()) {
    do {
      WERD_CHOICE* choice = it.data();
      if (choice->rating() > word_choice->rating() && !inserted) {
        // Time to insert.
        it.add_before_stay_put(word_choice);
        inserted = true;
        if (num_choices == 0)
          best_choice = word_choice;  // This is the new best.
        ++num_choices;
      }
      if (choice->unichar_string() == new_str) {
        if (inserted) {
          // New is better.
          delete it.extract();
        } else {
          // Old is better.
          if (debug) {
            tprintf("Discarding duplicate choice \"%s\", rating %g vs %g\n",
                    new_str.string(), word_choice->rating(), choice->rating());
          }
          delete word_choice;
          return false;
        }
      } else {
        ++num_choices;
        if (num_choices > max_num_choices)
          delete it.extract();
      }
      it.forward();
    } while (!it.at_first());
  }
  if (!inserted && num_choices < max_num_choices) {
    it.add_to_end(word_choice);
    inserted = true;
    if (num_choices == 0)
      best_choice = word_choice;  // This is the new best.
  }
  if (debug) {
    if (inserted)
      tprintf("New %s", best_choice == word_choice ? "Best" : "Secondary");
    else
      tprintf("Poor");
    word_choice->print(" Word Choice");
  }
  if (!inserted) {
    delete word_choice;
    return false;
  }
  return true;
}
bool WERD_RES::LogNewRawChoice ( WERD_CHOICE word_choice)

Definition at line 596 of file pageres.cpp.

                                                       {
  if (raw_choice == NULL || word_choice->rating() < raw_choice->rating()) {
    delete raw_choice;
    raw_choice = new WERD_CHOICE(*word_choice);
    raw_choice->set_permuter(TOP_CHOICE_PERM);
    return true;
  }
  return false;
}
void WERD_RES::merge_tess_fails ( )

Definition at line 1061 of file pageres.cpp.

void WERD_RES::MergeAdjacentBlobs ( int  index)

Definition at line 968 of file pageres.cpp.

                                           {
  if (reject_map.length() == best_choice->length())
    reject_map.remove_pos(index);
  best_choice->remove_unichar_id(index + 1);
  rebuild_word->MergeBlobs(index, index + 2);
  box_word->MergeBoxes(index, index + 2);
  if (index + 1 < best_state.length()) {
    best_state[index] += best_state[index + 1];
    best_state.remove(index + 1);
  }
}
WERD_RES & WERD_RES::operator= ( const WERD_RES source)

Definition at line 178 of file pageres.cpp.

                                                     {
  this->ELIST_LINK::operator=(source);
  Clear();
  if (source.combination) {
    word = new WERD;
    *word = *(source.word);      // deep copy
  } else {
    word = source.word;          // pt to same word
  }
  if (source.bln_boxes != NULL)
    bln_boxes = new tesseract::BoxWord(*source.bln_boxes);
  if (source.chopped_word != NULL)
    chopped_word = new TWERD(*source.chopped_word);
  if (source.rebuild_word != NULL)
    rebuild_word = new TWERD(*source.rebuild_word);
  // TODO(rays) Do we ever need to copy the seam_array?
  blob_row = source.blob_row;
  denorm = source.denorm;
  if (source.box_word != NULL)
    box_word = new tesseract::BoxWord(*source.box_word);
  best_state = source.best_state;
  correct_text = source.correct_text;
  blob_widths = source.blob_widths;
  blob_gaps = source.blob_gaps;
  // None of the uses of operator= require the ratings matrix to be copied,
  // so don't as it would be really slow.

  // Copy the cooked choices.
  WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&source.best_choices));
  WERD_CHOICE_IT wc_dest_it(&best_choices);
  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
    const WERD_CHOICE *choice = wc_it.data();
    wc_dest_it.add_after_then_move(new WERD_CHOICE(*choice));
  }
  if (!wc_dest_it.empty()) {
    wc_dest_it.move_to_first();
    best_choice = wc_dest_it.data();
  } else {
    best_choice = NULL;
  }

  if (source.raw_choice != NULL) {
    raw_choice = new WERD_CHOICE(*source.raw_choice);
  } else {
    raw_choice = NULL;
  }
  if (source.ep_choice != NULL) {
    ep_choice = new WERD_CHOICE(*source.ep_choice);
  } else {
    ep_choice = NULL;
  }
  reject_map = source.reject_map;
  combination = source.combination;
  part_of_combo = source.part_of_combo;
  CopySimpleFields(source);
  if (source.blamer_bundle != NULL) {
    blamer_bundle =  new BlamerBundle(*(source.blamer_bundle));
  }
  return *this;
}
bool WERD_RES::PiecesAllNatural ( int  start,
int  count 
) const

Definition at line 1072 of file pageres.cpp.

                                                          {
  // all seams must have no splits.
  for (int index = start; index < start + count - 1; ++index) {
    if (index >= 0 && index < seam_array.size()) {
      SEAM* seam = seam_array[index];
      if (seam != NULL && seam->HasAnySplits()) return false;
    }
  }
  return true;
}
void WERD_RES::PrintBestChoices ( ) const

Definition at line 709 of file pageres.cpp.

                                      {
  STRING alternates_str;
  WERD_CHOICE_IT it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    if (!it.at_first()) alternates_str += "\", \"";
    alternates_str += it.data()->unichar_string();
  }
  tprintf("Alternates for \"%s\": {\"%s\"}\n",
          best_choice->unichar_string().string(), alternates_str.string());
}
const char* WERD_RES::RawUTF8 ( int  blob_index) const [inline]

Definition at line 355 of file pageres.h.

                                            {
    if (blob_index < 0 || blob_index >= raw_choice->length())
      return NULL;
    UNICHAR_ID id = raw_choice->unichar_id(blob_index);
    if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
      return NULL;
    return uch_set->id_to_unichar(id);
  }
void WERD_RES::RebuildBestState ( )

Definition at line 800 of file pageres.cpp.

                                {
  ASSERT_HOST(best_choice != NULL);
  if (rebuild_word != NULL)
    delete rebuild_word;
  rebuild_word = new TWERD;
  if (seam_array.empty())
    start_seam_list(chopped_word, &seam_array);
  best_state.truncate(0);
  int start = 0;
  for (int i = 0; i < best_choice->length(); ++i) {
    int length = best_choice->state(i);
    best_state.push_back(length);
    if (length > 1) {
      SEAM::JoinPieces(seam_array, chopped_word->blobs, start,
                       start + length - 1);
    }
    TBLOB* blob = chopped_word->blobs[start];
    rebuild_word->blobs.push_back(new TBLOB(*blob));
    if (length > 1) {
      SEAM::BreakPieces(seam_array, chopped_word->blobs, start,
                        start + length - 1);
    }
    start += length;
  }
}
void WERD_RES::ReplaceBestChoice ( WERD_CHOICE choice)

Definition at line 787 of file pageres.cpp.

                                                    {
  best_choice = choice;
  RebuildBestState();
  SetupBoxWord();
  // Make up a fake reject map of the right length to keep the
  // rejection pass happy.
  reject_map.initialise(best_state.length());
  done = tess_accepted = tess_would_adapt = true;
  SetScriptPositions();
}
void WERD_RES::SetAllScriptPositions ( tesseract::ScriptPos  position)

Definition at line 860 of file pageres.cpp.

                                                                {
  raw_choice->SetAllScriptPositions(position);
  WERD_CHOICE_IT wc_it(&best_choices);
  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward())
    wc_it.data()->SetAllScriptPositions(position);
}
void WERD_RES::SetScriptPositions ( )
void WERD_RES::SetupBasicsFromChoppedWord ( const UNICHARSET unicharset_in)
void WERD_RES::SetupBlamerBundle ( )

Definition at line 384 of file pageres.cpp.

void WERD_RES::SetupBlobWidthsAndGaps ( )

Definition at line 391 of file pageres.cpp.

                                      {
  blob_widths.truncate(0);
  blob_gaps.truncate(0);
  int num_blobs = chopped_word->NumBlobs();
  for (int b = 0; b < num_blobs; ++b) {
    TBLOB *blob = chopped_word->blobs[b];
    TBOX box = blob->bounding_box();
    blob_widths.push_back(box.width());
    if (b + 1 < num_blobs) {
      blob_gaps.push_back(
          chopped_word->blobs[b + 1]->bounding_box().left() - box.right());
    }
  }
}
void WERD_RES::SetupBoxWord ( )
void WERD_RES::SetupFake ( const UNICHARSET uch)

Definition at line 343 of file pageres.cpp.

                                                        {
  ClearResults();
  SetupWordScript(unicharset_in);
  chopped_word = new TWERD;
  rebuild_word = new TWERD;
  bln_boxes = new tesseract::BoxWord;
  box_word = new tesseract::BoxWord;
  int blob_count = word->cblob_list()->length();
  if (blob_count > 0) {
    BLOB_CHOICE** fake_choices = new BLOB_CHOICE*[blob_count];
    // For non-text blocks, just pass any blobs through to the box_word
    // and call the word failed with a fake classification.
    C_BLOB_IT b_it(word->cblob_list());
    int blob_id = 0;
    for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
      TBOX box = b_it.data()->bounding_box();
      box_word->InsertBox(box_word->length(), box);
      fake_choices[blob_id++] = new BLOB_CHOICE;
    }
    FakeClassifyWord(blob_count, fake_choices);
    delete [] fake_choices;
  } else {
    WERD_CHOICE* word = new WERD_CHOICE(&unicharset_in);
    word->make_bad();
    LogNewRawChoice(word);
    // Ownership of word is taken by *this WERD_RES in LogNewCookedChoice.
    LogNewCookedChoice(1, false, word);
  }
  tess_failed = true;
  done = true;
}
bool WERD_RES::SetupForRecognition ( const UNICHARSET unicharset_in,
tesseract::Tesseract tesseract,
Pix *  pix,
int  norm_mode,
const TBOX norm_box,
bool  numeric_mode,
bool  use_body_size,
bool  allow_detailed_fx,
ROW row,
const BLOCK block 
)

Definition at line 294 of file pageres.cpp.

                                                                 {
  tesseract::OcrEngineMode norm_mode_hint =
      static_cast<tesseract::OcrEngineMode>(norm_mode);
  tesseract = tess;
  POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
  if ((norm_mode_hint != tesseract::OEM_CUBE_ONLY &&
       word->cblob_list()->empty()) || (pb != NULL && !pb->IsText())) {
    // Empty words occur when all the blobs have been moved to the rej_blobs
    // list, which seems to occur frequently in junk.
    SetupFake(unicharset_in);
    word->set_flag(W_REP_CHAR, false);
    return false;
  }
  ClearResults();
  SetupWordScript(unicharset_in);
  chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word);
  float word_xheight = use_body_size && row != NULL && row->body_size() > 0.0f
                     ? row->body_size() : x_height;
  chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE),
                            word_xheight, baseline_shift, numeric_mode,
                            norm_mode_hint, norm_box, &denorm);
  blob_row = row;
  SetupBasicsFromChoppedWord(unicharset_in);
  SetupBlamerBundle();
  int num_blobs = chopped_word->NumBlobs();
  ratings = new MATRIX(num_blobs, kWordrecMaxNumJoinChunks);
  tess_failed = false;
  return true;
}
void WERD_RES::SetupWordScript ( const UNICHARSET unicharset_in)

Definition at line 375 of file pageres.cpp.

                                                    {
  uch_set = &uch;
  int script = uch.default_sid();
  word->set_script_id(script);
  word->set_flag(W_SCRIPT_HAS_XHEIGHT, uch.script_has_xheight());
  word->set_flag(W_SCRIPT_IS_LATIN, script == uch.latin_sid());
}
bool WERD_RES::StatesAllValid ( )

Definition at line 449 of file pageres.cpp.

                              {
  int ratings_dim = ratings->dimension();
  if (raw_choice->TotalOfStates() != ratings_dim) {
    tprintf("raw_choice has total of states = %d vs ratings dim of %d\n",
            raw_choice->TotalOfStates(), ratings_dim);
    return false;
  }
  WERD_CHOICE_IT it(&best_choices);
  int index = 0;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
    WERD_CHOICE* choice = it.data();
    if (choice->TotalOfStates() != ratings_dim) {
      tprintf("Cooked #%d has total of states = %d vs ratings dim of %d\n",
              choice->TotalOfStates(), ratings_dim);
      return false;
    }
  }
  return true;
}
UNICHARSET::Direction WERD_RES::SymbolDirection ( int  blob_index) const [inline]

Definition at line 364 of file pageres.h.

                                                            {
    if (best_choice == NULL ||
        blob_index >= best_choice->length() ||
        blob_index < 0)
      return UNICHARSET::U_OTHER_NEUTRAL;
    return uch_set->get_direction(best_choice->unichar_id(blob_index));
  }
bool WERD_RES::UnicharsInReadingOrder ( ) const [inline]

Definition at line 406 of file pageres.h.


Member Data Documentation

Definition at line 297 of file pageres.h.

Definition at line 219 of file pageres.h.

WERD_CHOICE_LIST WERD_RES::best_choices

Definition at line 227 of file pageres.h.

Definition at line 255 of file pageres.h.

Definition at line 230 of file pageres.h.

Definition at line 184 of file pageres.h.

Definition at line 208 of file pageres.h.

Definition at line 186 of file pageres.h.

Definition at line 205 of file pageres.h.

Definition at line 286 of file pageres.h.

Definition at line 250 of file pageres.h.

Definition at line 296 of file pageres.h.

Definition at line 201 of file pageres.h.

Definition at line 315 of file pageres.h.

Definition at line 190 of file pageres.h.

Definition at line 282 of file pageres.h.

Definition at line 270 of file pageres.h.

Definition at line 288 of file pageres.h.

Definition at line 289 of file pageres.h.

Definition at line 291 of file pageres.h.

Definition at line 290 of file pageres.h.

Definition at line 293 of file pageres.h.

Definition at line 292 of file pageres.h.

Definition at line 285 of file pageres.h.

Definition at line 284 of file pageres.h.

Definition at line 316 of file pageres.h.

Definition at line 215 of file pageres.h.

Definition at line 224 of file pageres.h.

Definition at line 244 of file pageres.h.

Definition at line 271 of file pageres.h.

Definition at line 317 of file pageres.h.

Definition at line 203 of file pageres.h.

Definition at line 283 of file pageres.h.

Definition at line 280 of file pageres.h.

Definition at line 272 of file pageres.h.

Definition at line 281 of file pageres.h.

Definition at line 266 of file pageres.h.

Definition at line 192 of file pageres.h.

Definition at line 294 of file pageres.h.

Definition at line 175 of file pageres.h.

Definition at line 295 of file pageres.h.


The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines