tesseract 3.04.01

ccstruct/seam.cpp

Go to the documentation of this file.
00001 /* -*-C-*-
00002  ********************************************************************************
00003  *
00004  * File:        seam.c  (Formerly seam.c)
00005  * Description:
00006  * Author:       Mark Seaman, OCR Technology
00007  * Created:      Fri Oct 16 14:37:00 1987
00008  * Modified:     Fri May 17 16:30:13 1991 (Mark Seaman) marks@hpgrlt
00009  * Language:     C
00010  * Package:      N/A
00011  * Status:       Reusable Software Component
00012  *
00013  * (c) Copyright 1987, Hewlett-Packard Company.
00014  ** Licensed under the Apache License, Version 2.0 (the "License");
00015  ** you may not use this file except in compliance with the License.
00016  ** You may obtain a copy of the License at
00017  ** http://www.apache.org/licenses/LICENSE-2.0
00018  ** Unless required by applicable law or agreed to in writing, software
00019  ** distributed under the License is distributed on an "AS IS" BASIS,
00020  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00021  ** See the License for the specific language governing permissions and
00022  ** limitations under the License.
00023  *
00024  *********************************************************************************/
00025 /*----------------------------------------------------------------------
00026               I n c l u d e s
00027 ----------------------------------------------------------------------*/
00028 #include "seam.h"
00029 #include "blobs.h"
00030 #include "tprintf.h"
00031 
00032 /*----------------------------------------------------------------------
00033         Public Function Code
00034 ----------------------------------------------------------------------*/
00035 
00036 // Returns the bounding box of all the points in the seam.
00037 TBOX SEAM::bounding_box() const {
00038   TBOX box(location_.x, location_.y, location_.x, location_.y);
00039   for (int s = 0; s < num_splits_; ++s) {
00040     box += splits_[s].bounding_box();
00041   }
00042   return box;
00043 }
00044 
00045 // Returns true if other can be combined into *this.
00046 bool SEAM::CombineableWith(const SEAM& other, int max_x_dist,
00047                            float max_total_priority) const {
00048   int dist = location_.x - other.location_.x;
00049   if (-max_x_dist < dist && dist < max_x_dist &&
00050       num_splits_ + other.num_splits_ <= kMaxNumSplits &&
00051       priority_ + other.priority_ < max_total_priority &&
00052       !OverlappingSplits(other) && !SharesPosition(other)) {
00053     return true;
00054   } else {
00055     return false;
00056   }
00057 }
00058 
00059 // Combines other into *this. Only works if CombinableWith returned true.
00060 void SEAM::CombineWith(const SEAM& other) {
00061   priority_ += other.priority_;
00062   location_ += other.location_;
00063   location_ /= 2;
00064 
00065   for (int s = 0; s < other.num_splits_ && num_splits_ < kMaxNumSplits; ++s)
00066     splits_[num_splits_++] = other.splits_[s];
00067 }
00068 
00069 // Returns true if the splits in *this SEAM appear OK in the sense that they
00070 // do not cross any outlines and do not chop off any ridiculously small
00071 // pieces.
00072 bool SEAM::IsHealthy(const TBLOB& blob, int min_points, int min_area) const {
00073   // TODO(rays) Try testing all the splits. Duplicating original code for now,
00074   // which tested only the first.
00075   return num_splits_ == 0 || splits_[0].IsHealthy(blob, min_points, min_area);
00076 }
00077 
00078 // Computes the widthp_/widthn_ range for all existing SEAMs and for *this
00079 // seam, which is about to be inserted at insert_index. Returns false if
00080 // any of the computations fails, as this indicates an invalid chop.
00081 // widthn_/widthp_ are only changed if modify is true.
00082 bool SEAM::PrepareToInsertSeam(const GenericVector<SEAM*>& seams,
00083                                const GenericVector<TBLOB*>& blobs,
00084                                int insert_index, bool modify) {
00085   for (int s = 0; s < insert_index; ++s) {
00086     if (!seams[s]->FindBlobWidth(blobs, s, modify)) return false;
00087   }
00088   if (!FindBlobWidth(blobs, insert_index, modify)) return false;
00089   for (int s = insert_index; s < seams.size(); ++s) {
00090     if (!seams[s]->FindBlobWidth(blobs, s + 1, modify)) return false;
00091   }
00092   return true;
00093 }
00094 
00095 // Computes the widthp_/widthn_ range. Returns false if not all the splits
00096 // are accounted for. widthn_/widthp_ are only changed if modify is true.
00097 bool SEAM::FindBlobWidth(const GenericVector<TBLOB*>& blobs, int index,
00098                          bool modify) {
00099   int num_found = 0;
00100   if (modify) {
00101     widthp_ = 0;
00102     widthn_ = 0;
00103   }
00104   for (int s = 0; s < num_splits_; ++s) {
00105     const SPLIT& split = splits_[s];
00106     bool found_split = split.ContainedByBlob(*blobs[index]);
00107     // Look right.
00108     for (int b = index + 1; !found_split && b < blobs.size(); ++b) {
00109       found_split = split.ContainedByBlob(*blobs[b]);
00110       if (found_split && b - index > widthp_ && modify) widthp_ = b - index;
00111     }
00112     // Look left.
00113     for (int b = index - 1; !found_split && b >= 0; --b) {
00114       found_split = split.ContainedByBlob(*blobs[b]);
00115       if (found_split && index - b > widthn_ && modify) widthn_ = index - b;
00116     }
00117     if (found_split) ++num_found;
00118   }
00119   return num_found == num_splits_;
00120 }
00121 
00122 // Splits this blob into two blobs by applying the splits included in
00123 // *this SEAM
00124 void SEAM::ApplySeam(bool italic_blob, TBLOB* blob, TBLOB* other_blob) const {
00125   for (int s = 0; s < num_splits_; ++s) {
00126     splits_[s].SplitOutlineList(blob->outlines);
00127   }
00128   blob->ComputeBoundingBoxes();
00129 
00130   divide_blobs(blob, other_blob, italic_blob, location_);
00131 
00132   blob->EliminateDuplicateOutlines();
00133   other_blob->EliminateDuplicateOutlines();
00134 
00135   blob->CorrectBlobOrder(other_blob);
00136 }
00137 
00138 // Undoes ApplySeam by removing the seam between these two blobs.
00139 // Produces one blob as a result, and deletes other_blob.
00140 void SEAM::UndoSeam(TBLOB* blob, TBLOB* other_blob) const {
00141   if (blob->outlines == NULL) {
00142     blob->outlines = other_blob->outlines;
00143     other_blob->outlines = NULL;
00144   }
00145 
00146   TESSLINE* outline = blob->outlines;
00147   while (outline->next) outline = outline->next;
00148   outline->next = other_blob->outlines;
00149   other_blob->outlines = NULL;
00150   delete other_blob;
00151 
00152   for (int s = 0; s < num_splits_; ++s) {
00153     splits_[s].UnsplitOutlineList(blob);
00154   }
00155   blob->ComputeBoundingBoxes();
00156   blob->EliminateDuplicateOutlines();
00157 }
00158 
00159 // Prints everything in *this SEAM.
00160 void SEAM::Print(const char* label) const {
00161   tprintf(label);
00162   tprintf(" %6.2f @ (%d,%d), p=%d, n=%d ", priority_, location_.x, location_.y,
00163           widthp_, widthn_);
00164   for (int s = 0; s < num_splits_; ++s) {
00165     splits_[s].Print();
00166     if (s + 1 < num_splits_) tprintf(",   ");
00167   }
00168   tprintf("\n");
00169 }
00170 
00171 // Prints a collection of SEAMs.
00172 /* static */
00173 void SEAM::PrintSeams(const char* label, const GenericVector<SEAM*>& seams) {
00174   if (!seams.empty()) {
00175     tprintf("%s\n", label);
00176     for (int x = 0; x < seams.size(); ++x) {
00177       tprintf("%2d:   ", x);
00178       seams[x]->Print("");
00179     }
00180     tprintf("\n");
00181   }
00182 }
00183 
00184 #ifndef GRAPHICS_DISABLED
00185 // Draws the seam in the given window.
00186 void SEAM::Mark(ScrollView* window) const {
00187   for (int s = 0; s < num_splits_; ++s) splits_[s].Mark(window);
00188 }
00189 #endif
00190 
00191 // Break up the blobs in this chain so that they are all independent.
00192 // This operation should undo the affect of join_pieces.
00193 /* static */
00194 void SEAM::BreakPieces(const GenericVector<SEAM*>& seams,
00195                        const GenericVector<TBLOB*>& blobs, int first,
00196                        int last) {
00197   for (int x = first; x < last; ++x) seams[x]->Reveal();
00198 
00199   TESSLINE* outline = blobs[first]->outlines;
00200   int next_blob = first + 1;
00201 
00202   while (outline != NULL && next_blob <= last) {
00203     if (outline->next == blobs[next_blob]->outlines) {
00204       outline->next = NULL;
00205       outline = blobs[next_blob]->outlines;
00206       ++next_blob;
00207     } else {
00208       outline = outline->next;
00209     }
00210   }
00211 }
00212 
00213 // Join a group of base level pieces into a single blob that can then
00214 // be classified.
00215 /* static */
00216 void SEAM::JoinPieces(const GenericVector<SEAM*>& seams,
00217                       const GenericVector<TBLOB*>& blobs, int first, int last) {
00218   TESSLINE* outline = blobs[first]->outlines;
00219   if (!outline)
00220     return;
00221 
00222   for (int x = first; x < last; ++x) {
00223     SEAM *seam = seams[x];
00224     if (x - seam->widthn_ >= first && x + seam->widthp_ < last) seam->Hide();
00225     while (outline->next) outline = outline->next;
00226     outline->next = blobs[x + 1]->outlines;
00227   }
00228 }
00229 
00230 // Hides the seam so the outlines appear not to be cut by it.
00231 void SEAM::Hide() const {
00232   for (int s = 0; s < num_splits_; ++s) {
00233     splits_[s].Hide();
00234   }
00235 }
00236 
00237 // Undoes hide, so the outlines are cut by the seam.
00238 void SEAM::Reveal() const {
00239   for (int s = 0; s < num_splits_; ++s) {
00240     splits_[s].Reveal();
00241   }
00242 }
00243 
00244 // Computes and returns, but does not set, the full priority of *this SEAM.
00245 float SEAM::FullPriority(int xmin, int xmax, double overlap_knob,
00246                          int centered_maxwidth, double center_knob,
00247                          double width_change_knob) const {
00248   if (num_splits_ == 0) return 0.0f;
00249   for (int s = 1; s < num_splits_; ++s) {
00250     splits_[s].SplitOutline();
00251   }
00252   float full_priority =
00253       priority_ +
00254       splits_[0].FullPriority(xmin, xmax, overlap_knob, centered_maxwidth,
00255                               center_knob, width_change_knob);
00256   for (int s = num_splits_ - 1; s >= 1; --s) {
00257     splits_[s].UnsplitOutlines();
00258   }
00259   return full_priority;
00260 }
00261 
00269 void start_seam_list(TWERD* word, GenericVector<SEAM*>* seam_array) {
00270   seam_array->truncate(0);
00271   TPOINT location;
00272 
00273   for (int b = 1; b < word->NumBlobs(); ++b) {
00274     TBOX bbox = word->blobs[b - 1]->bounding_box();
00275     TBOX nbox = word->blobs[b]->bounding_box();
00276     location.x = (bbox.right() + nbox.left()) / 2;
00277     location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4;
00278     seam_array->push_back(new SEAM(0.0f, location));
00279   }
00280 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines