|
tesseract 3.04.01
|
00001 /* -*-C-*- 00002 ******************************************************************************** 00003 * 00004 * File: findseam.c (Formerly findseam.c) 00005 * Description: 00006 * Author: Mark Seaman, OCR Technology 00007 * Created: Fri Oct 16 14:37:00 1987 00008 * Modified: Tue Jul 30 15:44:59 1991 (Mark Seaman) marks@hpgrlt 00009 * Language: C 00010 * Package: N/A 00011 * Status: Reusable Software Component 00012 * 00013 * (c) Copyright 1987, Hewlett-Packard Company. 00014 ** Licensed under the Apache License, Version 2.0 (the "License"); 00015 ** you may not use this file except in compliance with the License. 00016 ** You may obtain a copy of the License at 00017 ** http://www.apache.org/licenses/LICENSE-2.0 00018 ** Unless required by applicable law or agreed to in writing, software 00019 ** distributed under the License is distributed on an "AS IS" BASIS, 00020 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00021 ** See the License for the specific language governing permissions and 00022 ** limitations under the License. 00023 * 00024 *********************************************************************************/ 00025 /*---------------------------------------------------------------------- 00026 I n c l u d e s 00027 ----------------------------------------------------------------------*/ 00028 #include "findseam.h" 00029 #include "gradechop.h" 00030 #include "plotedges.h" 00031 #include "outlines.h" 00032 #include "freelist.h" 00033 #include "seam.h" 00034 #include "wordrec.h" 00035 00036 // Include automatically generated configuration file if running autoconf. 00037 #ifdef HAVE_CONFIG_H 00038 #include "config_auto.h" 00039 #endif 00040 00041 /*---------------------------------------------------------------------- 00042 T y p e s 00043 ----------------------------------------------------------------------*/ 00044 #define SPLIT_CLOSENESS 20/* Difference in x value */ 00045 /* How many to keep */ 00046 #define MAX_NUM_SEAMS 150 00047 /* How many to keep */ 00048 #define MAX_OLD_SEAMS 150 00049 #define NO_FULL_PRIORITY -1/* Special marker for pri. */ 00050 /* Evalute right away */ 00051 #define BAD_PRIORITY 9999.0 00052 00053 /*---------------------------------------------------------------------- 00054 F u n c t i o n s 00055 ----------------------------------------------------------------------*/ 00056 namespace tesseract { 00057 00058 /********************************************************************** 00059 * add_seam_to_queue 00060 * 00061 * Adds the given new_seam to the seams priority queue, unless it is full 00062 * and the new seam is worse than the worst. 00063 **********************************************************************/ 00064 void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam, 00065 SeamQueue* seams) { 00066 if (new_seam == NULL) return; 00067 if (chop_debug) { 00068 tprintf("Pushing new seam with priority %g :", new_priority); 00069 new_seam->Print("seam: "); 00070 } 00071 if (seams->size() >= MAX_NUM_SEAMS) { 00072 SeamPair old_pair(0, NULL); 00073 if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) { 00074 if (chop_debug) { 00075 tprintf("Old seam staying with priority %g\n", old_pair.key()); 00076 } 00077 delete new_seam; 00078 seams->Push(&old_pair); 00079 return; 00080 } else if (chop_debug) { 00081 tprintf("New seam with priority %g beats old worst seam with %g\n", 00082 new_priority, old_pair.key()); 00083 } 00084 } 00085 SeamPair new_pair(new_priority, new_seam); 00086 seams->Push(&new_pair); 00087 } 00088 00089 00090 /********************************************************************** 00091 * choose_best_seam 00092 * 00093 * Choose the best seam that can be created by assembling this a 00094 * collection of splits. A queue of all the possible seams is 00095 * maintained. Each new split received is placed in that queue with 00096 * its partial priority value. These values in the seam queue are 00097 * evaluated and combined until a good enough seam is found. If no 00098 * further good seams are being found then this function returns to the 00099 * caller, who will send more splits. If this function is called with 00100 * a split of NULL, then no further splits can be supplied by the 00101 * caller. 00102 **********************************************************************/ 00103 void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, 00104 PRIORITY priority, SEAM **seam_result, 00105 TBLOB *blob, SeamPile *seam_pile) { 00106 SEAM *seam; 00107 char str[80]; 00108 float my_priority; 00109 /* Add seam of split */ 00110 my_priority = priority; 00111 if (split != NULL) { 00112 TPOINT split_point = split->point1->pos; 00113 split_point += split->point2->pos; 00114 split_point /= 2; 00115 seam = new SEAM(my_priority, split_point, *split); 00116 if (chop_debug > 1) seam->Print("Partial priority "); 00117 add_seam_to_queue(my_priority, seam, seam_queue); 00118 00119 if (my_priority > chop_good_split) 00120 return; 00121 } 00122 00123 TBOX bbox = blob->bounding_box(); 00124 /* Queue loop */ 00125 while (!seam_queue->empty()) { 00126 SeamPair seam_pair; 00127 seam_queue->Pop(&seam_pair); 00128 seam = seam_pair.extract_data(); 00129 /* Set full priority */ 00130 my_priority = seam->FullPriority(bbox.left(), bbox.right(), 00131 chop_overlap_knob, chop_centered_maxwidth, 00132 chop_center_knob, chop_width_change_knob); 00133 if (chop_debug) { 00134 sprintf (str, "Full my_priority %0.0f, ", my_priority); 00135 seam->Print(str); 00136 } 00137 00138 if ((*seam_result == NULL || (*seam_result)->priority() > my_priority) && 00139 my_priority < chop_ok_split) { 00140 /* No crossing */ 00141 if (seam->IsHealthy(*blob, chop_min_outline_points, 00142 chop_min_outline_area)) { 00143 delete *seam_result; 00144 *seam_result = new SEAM(*seam); 00145 (*seam_result)->set_priority(my_priority); 00146 } else { 00147 delete seam; 00148 seam = NULL; 00149 my_priority = BAD_PRIORITY; 00150 } 00151 } 00152 00153 if (my_priority < chop_good_split) { 00154 if (seam) 00155 delete seam; 00156 return; /* Made good answer */ 00157 } 00158 00159 if (seam) { 00160 /* Combine with others */ 00161 if (seam_pile->size() < chop_seam_pile_size) { 00162 combine_seam(*seam_pile, seam, seam_queue); 00163 SeamDecPair pair(seam_pair.key(), seam); 00164 seam_pile->Push(&pair); 00165 } else if (chop_new_seam_pile && 00166 seam_pile->size() == chop_seam_pile_size && 00167 seam_pile->PeekTop().key() > seam_pair.key()) { 00168 combine_seam(*seam_pile, seam, seam_queue); 00169 SeamDecPair pair; 00170 seam_pile->Pop(&pair); // pop the worst. 00171 // Replace the seam in pair (deleting the old one) with 00172 // the new seam and score, then push back into the heap. 00173 pair.set_key(seam_pair.key()); 00174 pair.set_data(seam); 00175 seam_pile->Push(&pair); 00176 } else { 00177 delete seam; 00178 } 00179 } 00180 00181 my_priority = seam_queue->empty() ? NO_FULL_PRIORITY 00182 : seam_queue->PeekTop().key(); 00183 if ((my_priority > chop_ok_split) || 00184 (my_priority > chop_good_split && split)) 00185 return; 00186 } 00187 } 00188 00189 00190 /********************************************************************** 00191 * combine_seam 00192 * 00193 * Find other seams to combine with this one. The new seams that result 00194 * from this union should be added to the seam queue. The return value 00195 * tells whether or not any additional seams were added to the queue. 00196 **********************************************************************/ 00197 void Wordrec::combine_seam(const SeamPile& seam_pile, 00198 const SEAM* seam, SeamQueue* seam_queue) { 00199 for (int x = 0; x < seam_pile.size(); ++x) { 00200 const SEAM *this_one = seam_pile.get(x).data(); 00201 if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) { 00202 SEAM *new_one = new SEAM(*seam); 00203 new_one->CombineWith(*this_one); 00204 if (chop_debug > 1) new_one->Print("Combo priority "); 00205 add_seam_to_queue(new_one->priority(), new_one, seam_queue); 00206 } 00207 } 00208 } 00209 00210 /********************************************************************** 00211 * pick_good_seam 00212 * 00213 * Find and return a good seam that will split this blob into two pieces. 00214 * Work from the outlines provided. 00215 **********************************************************************/ 00216 SEAM *Wordrec::pick_good_seam(TBLOB *blob) { 00217 SeamPile seam_pile(chop_seam_pile_size); 00218 EDGEPT *points[MAX_NUM_POINTS]; 00219 EDGEPT_CLIST new_points; 00220 SEAM *seam = NULL; 00221 TESSLINE *outline; 00222 inT16 num_points = 0; 00223 00224 #ifndef GRAPHICS_DISABLED 00225 if (chop_debug > 2) 00226 wordrec_display_splits.set_value(true); 00227 00228 draw_blob_edges(blob); 00229 #endif 00230 00231 PointHeap point_heap(MAX_NUM_POINTS); 00232 for (outline = blob->outlines; outline; outline = outline->next) 00233 prioritize_points(outline, &point_heap); 00234 00235 while (!point_heap.empty() && num_points < MAX_NUM_POINTS) { 00236 points[num_points++] = point_heap.PeekTop().data; 00237 point_heap.Pop(NULL); 00238 } 00239 00240 /* Initialize queue */ 00241 SeamQueue seam_queue(MAX_NUM_SEAMS); 00242 00243 try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob); 00244 try_vertical_splits(points, num_points, &new_points, 00245 &seam_queue, &seam_pile, &seam, blob); 00246 00247 if (seam == NULL) { 00248 choose_best_seam(&seam_queue, NULL, BAD_PRIORITY, &seam, blob, &seam_pile); 00249 } else if (seam->priority() > chop_good_split) { 00250 choose_best_seam(&seam_queue, NULL, seam->priority(), &seam, blob, 00251 &seam_pile); 00252 } 00253 00254 EDGEPT_C_IT it(&new_points); 00255 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00256 EDGEPT *inserted_point = it.data(); 00257 if (seam == NULL || !seam->UsesPoint(inserted_point)) { 00258 for (outline = blob->outlines; outline; outline = outline->next) { 00259 if (outline->loop == inserted_point) { 00260 outline->loop = outline->loop->next; 00261 } 00262 } 00263 remove_edgept(inserted_point); 00264 } 00265 } 00266 00267 if (seam) { 00268 if (seam->priority() > chop_ok_split) { 00269 delete seam; 00270 seam = NULL; 00271 } 00272 #ifndef GRAPHICS_DISABLED 00273 else if (wordrec_display_splits) { 00274 seam->Mark(edge_window); 00275 if (chop_debug > 2) { 00276 update_edge_window(); 00277 edge_window_wait(); 00278 } 00279 } 00280 #endif 00281 } 00282 00283 if (chop_debug) 00284 wordrec_display_splits.set_value(false); 00285 00286 return (seam); 00287 } 00288 00289 00290 /********************************************************************** 00291 * try_point_pairs 00292 * 00293 * Try all the splits that are produced by pairing critical points 00294 * together. See if any of them are suitable for use. Use a seam 00295 * queue and seam pile that have already been initialized and used. 00296 **********************************************************************/ 00297 void Wordrec::try_point_pairs(EDGEPT * points[MAX_NUM_POINTS], 00298 inT16 num_points, 00299 SeamQueue* seam_queue, 00300 SeamPile* seam_pile, 00301 SEAM ** seam, 00302 TBLOB * blob) { 00303 inT16 x; 00304 inT16 y; 00305 PRIORITY priority; 00306 00307 for (x = 0; x < num_points; x++) { 00308 for (y = x + 1; y < num_points; y++) { 00309 if (points[y] && 00310 points[x]->WeightedDistance(*points[y], chop_x_y_weight) < 00311 chop_split_length && 00312 points[x] != points[y]->next && points[y] != points[x]->next && 00313 !is_exterior_point(points[x], points[y]) && 00314 !is_exterior_point(points[y], points[x])) { 00315 SPLIT split(points[x], points[y]); 00316 priority = partial_split_priority(&split); 00317 00318 choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile); 00319 } 00320 } 00321 } 00322 } 00323 00324 00325 /********************************************************************** 00326 * try_vertical_splits 00327 * 00328 * Try all the splits that are produced by vertical projection to see 00329 * if any of them are suitable for use. Use a seam queue and seam pile 00330 * that have already been initialized and used. 00331 * Return in new_points a collection of points that were inserted into 00332 * the blob while examining vertical splits and which may safely be 00333 * removed once a seam is chosen if they are not part of the seam. 00334 **********************************************************************/ 00335 void Wordrec::try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS], 00336 inT16 num_points, 00337 EDGEPT_CLIST *new_points, 00338 SeamQueue* seam_queue, 00339 SeamPile* seam_pile, 00340 SEAM ** seam, 00341 TBLOB * blob) { 00342 EDGEPT *vertical_point = NULL; 00343 inT16 x; 00344 PRIORITY priority; 00345 TESSLINE *outline; 00346 00347 for (x = 0; x < num_points; x++) { 00348 vertical_point = NULL; 00349 for (outline = blob->outlines; outline; outline = outline->next) { 00350 vertical_projection_point(points[x], outline->loop, 00351 &vertical_point, new_points); 00352 } 00353 00354 if (vertical_point && points[x] != vertical_point->next && 00355 vertical_point != points[x]->next && 00356 points[x]->WeightedDistance(*vertical_point, chop_x_y_weight) < 00357 chop_split_length) { 00358 SPLIT split(points[x], vertical_point); 00359 priority = partial_split_priority(&split); 00360 choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile); 00361 } 00362 } 00363 } 00364 00365 }