tesseract 3.04.01

wordrec/findseam.cpp

Go to the documentation of this file.
00001 /* -*-C-*-
00002  ********************************************************************************
00003  *
00004  * File:        findseam.c  (Formerly findseam.c)
00005  * Description:
00006  * Author:       Mark Seaman, OCR Technology
00007  * Created:      Fri Oct 16 14:37:00 1987
00008  * Modified:     Tue Jul 30 15:44:59 1991 (Mark Seaman) marks@hpgrlt
00009  * Language:     C
00010  * Package:      N/A
00011  * Status:       Reusable Software Component
00012  *
00013  * (c) Copyright 1987, Hewlett-Packard Company.
00014  ** Licensed under the Apache License, Version 2.0 (the "License");
00015  ** you may not use this file except in compliance with the License.
00016  ** You may obtain a copy of the License at
00017  ** http://www.apache.org/licenses/LICENSE-2.0
00018  ** Unless required by applicable law or agreed to in writing, software
00019  ** distributed under the License is distributed on an "AS IS" BASIS,
00020  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00021  ** See the License for the specific language governing permissions and
00022  ** limitations under the License.
00023  *
00024  *********************************************************************************/
00025 /*----------------------------------------------------------------------
00026               I n c l u d e s
00027 ----------------------------------------------------------------------*/
00028 #include "findseam.h"
00029 #include "gradechop.h"
00030 #include "plotedges.h"
00031 #include "outlines.h"
00032 #include "freelist.h"
00033 #include "seam.h"
00034 #include "wordrec.h"
00035 
00036 // Include automatically generated configuration file if running autoconf.
00037 #ifdef HAVE_CONFIG_H
00038 #include "config_auto.h"
00039 #endif
00040 
00041 /*----------------------------------------------------------------------
00042               T y p e s
00043 ----------------------------------------------------------------------*/
00044 #define SPLIT_CLOSENESS        20/* Difference in x value */
00045                                  /* How many to keep */
00046 #define MAX_NUM_SEAMS          150
00047                                  /* How many to keep */
00048 #define MAX_OLD_SEAMS          150
00049 #define NO_FULL_PRIORITY       -1/* Special marker for pri. */
00050                                  /* Evalute right away */
00051 #define BAD_PRIORITY           9999.0
00052 
00053 /*----------------------------------------------------------------------
00054               F u n c t i o n s
00055 ----------------------------------------------------------------------*/
00056 namespace tesseract {
00057 
00058 /**********************************************************************
00059  * add_seam_to_queue
00060  *
00061  * Adds the given new_seam to the seams priority queue, unless it is full
00062  * and the new seam is worse than the worst.
00063  **********************************************************************/
00064 void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam,
00065                                 SeamQueue* seams) {
00066   if (new_seam == NULL) return;
00067   if (chop_debug) {
00068     tprintf("Pushing new seam with priority %g :", new_priority);
00069     new_seam->Print("seam: ");
00070   }
00071   if (seams->size() >= MAX_NUM_SEAMS) {
00072     SeamPair old_pair(0, NULL);
00073     if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) {
00074       if (chop_debug) {
00075         tprintf("Old seam staying with priority %g\n", old_pair.key());
00076       }
00077       delete new_seam;
00078       seams->Push(&old_pair);
00079       return;
00080     } else if (chop_debug) {
00081       tprintf("New seam with priority %g beats old worst seam with %g\n",
00082               new_priority, old_pair.key());
00083     }
00084   }
00085   SeamPair new_pair(new_priority, new_seam);
00086   seams->Push(&new_pair);
00087 }
00088 
00089 
00090 /**********************************************************************
00091  * choose_best_seam
00092  *
00093  * Choose the best seam that can be created by assembling this a
00094  * collection of splits.  A queue of all the possible seams is
00095  * maintained.  Each new split received is placed in that queue with
00096  * its partial priority value.  These values in the seam queue are
00097  * evaluated and combined until a good enough seam is found.  If no
00098  * further good seams are being found then this function returns to the
00099  * caller, who will send more splits.  If this function is called with
00100  * a split of NULL, then no further splits can be supplied by the
00101  * caller.
00102  **********************************************************************/
00103 void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split,
00104                                PRIORITY priority, SEAM **seam_result,
00105                                TBLOB *blob, SeamPile *seam_pile) {
00106   SEAM *seam;
00107   char str[80];
00108   float my_priority;
00109   /* Add seam of split */
00110   my_priority = priority;
00111   if (split != NULL) {
00112     TPOINT split_point = split->point1->pos;
00113     split_point += split->point2->pos;
00114     split_point /= 2;
00115     seam = new SEAM(my_priority, split_point, *split);
00116     if (chop_debug > 1) seam->Print("Partial priority    ");
00117     add_seam_to_queue(my_priority, seam, seam_queue);
00118 
00119     if (my_priority > chop_good_split)
00120       return;
00121   }
00122 
00123   TBOX bbox = blob->bounding_box();
00124   /* Queue loop */
00125   while (!seam_queue->empty()) {
00126     SeamPair seam_pair;
00127     seam_queue->Pop(&seam_pair);
00128     seam = seam_pair.extract_data();
00129     /* Set full priority */
00130     my_priority = seam->FullPriority(bbox.left(), bbox.right(),
00131                                      chop_overlap_knob, chop_centered_maxwidth,
00132                                      chop_center_knob, chop_width_change_knob);
00133     if (chop_debug) {
00134       sprintf (str, "Full my_priority %0.0f,  ", my_priority);
00135       seam->Print(str);
00136     }
00137 
00138     if ((*seam_result == NULL || (*seam_result)->priority() > my_priority) &&
00139         my_priority < chop_ok_split) {
00140       /* No crossing */
00141       if (seam->IsHealthy(*blob, chop_min_outline_points,
00142                           chop_min_outline_area)) {
00143         delete *seam_result;
00144         *seam_result = new SEAM(*seam);
00145         (*seam_result)->set_priority(my_priority);
00146       } else {
00147         delete seam;
00148         seam = NULL;
00149         my_priority = BAD_PRIORITY;
00150       }
00151     }
00152 
00153     if (my_priority < chop_good_split) {
00154       if (seam)
00155         delete seam;
00156       return;                    /* Made good answer */
00157     }
00158 
00159     if (seam) {
00160       /* Combine with others */
00161       if (seam_pile->size() < chop_seam_pile_size) {
00162         combine_seam(*seam_pile, seam, seam_queue);
00163         SeamDecPair pair(seam_pair.key(), seam);
00164         seam_pile->Push(&pair);
00165       } else if (chop_new_seam_pile &&
00166                  seam_pile->size() == chop_seam_pile_size &&
00167                  seam_pile->PeekTop().key() > seam_pair.key()) {
00168         combine_seam(*seam_pile, seam, seam_queue);
00169         SeamDecPair pair;
00170         seam_pile->Pop(&pair);  // pop the worst.
00171         // Replace the seam in pair (deleting the old one) with
00172         // the new seam and score, then push back into the heap.
00173         pair.set_key(seam_pair.key());
00174         pair.set_data(seam);
00175         seam_pile->Push(&pair);
00176       } else {
00177         delete seam;
00178       }
00179     }
00180 
00181     my_priority = seam_queue->empty() ? NO_FULL_PRIORITY
00182                                       : seam_queue->PeekTop().key();
00183     if ((my_priority > chop_ok_split) ||
00184       (my_priority > chop_good_split && split))
00185       return;
00186   }
00187 }
00188 
00189 
00190 /**********************************************************************
00191  * combine_seam
00192  *
00193  * Find other seams to combine with this one.  The new seams that result
00194  * from this union should be added to the seam queue.  The return value
00195  * tells whether or not any additional seams were added to the queue.
00196  **********************************************************************/
00197 void Wordrec::combine_seam(const SeamPile& seam_pile,
00198                            const SEAM* seam, SeamQueue* seam_queue) {
00199   for (int x = 0; x < seam_pile.size(); ++x) {
00200     const SEAM *this_one = seam_pile.get(x).data();
00201     if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) {
00202       SEAM *new_one = new SEAM(*seam);
00203       new_one->CombineWith(*this_one);
00204       if (chop_debug > 1) new_one->Print("Combo priority       ");
00205       add_seam_to_queue(new_one->priority(), new_one, seam_queue);
00206     }
00207   }
00208 }
00209 
00210 /**********************************************************************
00211  * pick_good_seam
00212  *
00213  * Find and return a good seam that will split this blob into two pieces.
00214  * Work from the outlines provided.
00215  **********************************************************************/
00216 SEAM *Wordrec::pick_good_seam(TBLOB *blob) {
00217   SeamPile seam_pile(chop_seam_pile_size);
00218   EDGEPT *points[MAX_NUM_POINTS];
00219   EDGEPT_CLIST new_points;
00220   SEAM *seam = NULL;
00221   TESSLINE *outline;
00222   inT16 num_points = 0;
00223 
00224 #ifndef GRAPHICS_DISABLED
00225   if (chop_debug > 2)
00226     wordrec_display_splits.set_value(true);
00227 
00228   draw_blob_edges(blob);
00229 #endif
00230 
00231   PointHeap point_heap(MAX_NUM_POINTS);
00232   for (outline = blob->outlines; outline; outline = outline->next)
00233     prioritize_points(outline, &point_heap);
00234 
00235   while (!point_heap.empty() && num_points < MAX_NUM_POINTS) {
00236     points[num_points++] = point_heap.PeekTop().data;
00237     point_heap.Pop(NULL);
00238   }
00239 
00240   /* Initialize queue */
00241   SeamQueue seam_queue(MAX_NUM_SEAMS);
00242 
00243   try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob);
00244   try_vertical_splits(points, num_points, &new_points,
00245                       &seam_queue, &seam_pile, &seam, blob);
00246 
00247   if (seam == NULL) {
00248     choose_best_seam(&seam_queue, NULL, BAD_PRIORITY, &seam, blob, &seam_pile);
00249   } else if (seam->priority() > chop_good_split) {
00250     choose_best_seam(&seam_queue, NULL, seam->priority(), &seam, blob,
00251                      &seam_pile);
00252   }
00253 
00254   EDGEPT_C_IT it(&new_points);
00255   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00256     EDGEPT *inserted_point = it.data();
00257     if (seam == NULL || !seam->UsesPoint(inserted_point)) {
00258       for (outline = blob->outlines; outline; outline = outline->next) {
00259         if (outline->loop == inserted_point) {
00260           outline->loop = outline->loop->next;
00261         }
00262       }
00263       remove_edgept(inserted_point);
00264     }
00265   }
00266 
00267   if (seam) {
00268     if (seam->priority() > chop_ok_split) {
00269       delete seam;
00270       seam = NULL;
00271     }
00272 #ifndef GRAPHICS_DISABLED
00273     else if (wordrec_display_splits) {
00274       seam->Mark(edge_window);
00275       if (chop_debug > 2) {
00276         update_edge_window();
00277         edge_window_wait();
00278       }
00279     }
00280 #endif
00281   }
00282 
00283   if (chop_debug)
00284     wordrec_display_splits.set_value(false);
00285 
00286   return (seam);
00287 }
00288 
00289 
00290 /**********************************************************************
00291  * try_point_pairs
00292  *
00293  * Try all the splits that are produced by pairing critical points
00294  * together.  See if any of them are suitable for use.  Use a seam
00295  * queue and seam pile that have already been initialized and used.
00296  **********************************************************************/
00297 void Wordrec::try_point_pairs(EDGEPT * points[MAX_NUM_POINTS],
00298                               inT16 num_points,
00299                               SeamQueue* seam_queue,
00300                               SeamPile* seam_pile,
00301                               SEAM ** seam,
00302                               TBLOB * blob) {
00303   inT16 x;
00304   inT16 y;
00305   PRIORITY priority;
00306 
00307   for (x = 0; x < num_points; x++) {
00308     for (y = x + 1; y < num_points; y++) {
00309       if (points[y] &&
00310           points[x]->WeightedDistance(*points[y], chop_x_y_weight) <
00311               chop_split_length &&
00312           points[x] != points[y]->next && points[y] != points[x]->next &&
00313           !is_exterior_point(points[x], points[y]) &&
00314           !is_exterior_point(points[y], points[x])) {
00315         SPLIT split(points[x], points[y]);
00316         priority = partial_split_priority(&split);
00317 
00318         choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
00319       }
00320     }
00321   }
00322 }
00323 
00324 
00325 /**********************************************************************
00326  * try_vertical_splits
00327  *
00328  * Try all the splits that are produced by vertical projection to see
00329  * if any of them are suitable for use.  Use a seam queue and seam pile
00330  * that have already been initialized and used.
00331  * Return in new_points a collection of points that were inserted into
00332  * the blob while examining vertical splits and which may safely be
00333  * removed once a seam is chosen if they are not part of the seam.
00334  **********************************************************************/
00335 void Wordrec::try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS],
00336                                   inT16 num_points,
00337                                   EDGEPT_CLIST *new_points,
00338                                   SeamQueue* seam_queue,
00339                                   SeamPile* seam_pile,
00340                                   SEAM ** seam,
00341                                   TBLOB * blob) {
00342   EDGEPT *vertical_point = NULL;
00343   inT16 x;
00344   PRIORITY priority;
00345   TESSLINE *outline;
00346 
00347   for (x = 0; x < num_points; x++) {
00348     vertical_point = NULL;
00349     for (outline = blob->outlines; outline; outline = outline->next) {
00350       vertical_projection_point(points[x], outline->loop,
00351                                 &vertical_point, new_points);
00352     }
00353 
00354     if (vertical_point && points[x] != vertical_point->next &&
00355         vertical_point != points[x]->next &&
00356         points[x]->WeightedDistance(*vertical_point, chop_x_y_weight) <
00357             chop_split_length) {
00358       SPLIT split(points[x], vertical_point);
00359       priority = partial_split_priority(&split);
00360       choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
00361     }
00362   }
00363 }
00364 
00365 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines