tesseract 3.04.01

ccstruct/boxword.h

Go to the documentation of this file.
00001 
00002 // File:        boxword.h
00003 // Description: Class to represent the bounding boxes of the output.
00004 // Author:      Ray Smith
00005 // Created:     Tue May 25 14:18:14 PDT 2010
00006 //
00007 // (C) Copyright 2010, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_CSTRUCT_BOXWORD_H__
00021 #define TESSERACT_CSTRUCT_BOXWORD_H__
00022 
00023 #include "genericvector.h"
00024 #include "rect.h"
00025 #include "unichar.h"
00026 
00027 class BLOCK;
00028 class DENORM;
00029 struct TWERD;
00030 class UNICHARSET;
00031 class WERD;
00032 class WERD_CHOICE;
00033 class WERD_RES;
00034 
00035 namespace tesseract {
00036 
00037 // Class to hold an array of bounding boxes for an output word and
00038 // the bounding box of the whole word.
00039 class BoxWord {
00040  public:
00041   BoxWord();
00042   explicit BoxWord(const BoxWord& src);
00043   ~BoxWord();
00044 
00045   BoxWord& operator=(const BoxWord& src);
00046 
00047   void CopyFrom(const BoxWord& src);
00048 
00049   // Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
00050   // switch back to original image coordinates.
00051   static BoxWord* CopyFromNormalized(TWERD* tessword);
00052 
00053   // Clean up the bounding boxes from the polygonal approximation by
00054   // expanding slightly, then clipping to the blobs from the original_word
00055   // that overlap. If not null, the block provides the inverse rotation.
00056   void ClipToOriginalWord(const BLOCK* block, WERD* original_word);
00057 
00058   // Merges the boxes from start to end, not including end, and deletes
00059   // the boxes between start and end.
00060   void MergeBoxes(int start, int end);
00061 
00062   // Inserts a new box before the given index.
00063   // Recomputes the bounding box.
00064   void InsertBox(int index, const TBOX& box);
00065 
00066   // Changes the box at the given index to the new box.
00067   // Recomputes the bounding box.
00068   void ChangeBox(int index, const TBOX& box);
00069 
00070   // Deletes the box with the given index, and shuffles up the rest.
00071   // Recomputes the bounding box.
00072   void DeleteBox(int index);
00073 
00074   // Deletes all the boxes stored in BoxWord.
00075   void DeleteAllBoxes();
00076 
00077   // This and other putatively are the same, so call the (permanent) callback
00078   // for each blob index where the bounding boxes match.
00079   // The callback is deleted on completion.
00080   void ProcessMatchedBlobs(const TWERD& other, TessCallback1<int>* cb) const;
00081 
00082   const TBOX& bounding_box() const {
00083     return bbox_;
00084   }
00085   int length() const {
00086     return length_;
00087   }
00088   const TBOX& BlobBox(int index) const {
00089     return boxes_[index];
00090   }
00091 
00092  private:
00093   void ComputeBoundingBox();
00094 
00095   TBOX bbox_;
00096   int length_;
00097   GenericVector<TBOX> boxes_;
00098 };
00099 
00100 }  // namespace tesseract.
00101 
00102 
00103 #endif  // TESSERACT_CSTRUCT_BOXWORD_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines