|
tesseract 3.04.01
|
00001 /* -*-C-*- 00002 ******************************************************************************** 00003 * 00004 * File: split.c (Formerly split.c) 00005 * Description: 00006 * Author: Mark Seaman, OCR Technology 00007 * Created: Fri Oct 16 14:37:00 1987 00008 * Modified: Fri May 17 16:27:49 1991 (Mark Seaman) marks@hpgrlt 00009 * Language: C 00010 * Package: N/A 00011 * Status: Reusable Software Component 00012 * 00013 * (c) Copyright 1987, Hewlett-Packard Company. 00014 ** Licensed under the Apache License, Version 2.0 (the "License"); 00015 ** you may not use this file except in compliance with the License. 00016 ** You may obtain a copy of the License at 00017 ** http://www.apache.org/licenses/LICENSE-2.0 00018 ** Unless required by applicable law or agreed to in writing, software 00019 ** distributed under the License is distributed on an "AS IS" BASIS, 00020 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00021 ** See the License for the specific language governing permissions and 00022 ** limitations under the License. 00023 * 00024 *************************************************************************/ 00025 /*---------------------------------------------------------------------- 00026 I n c l u d e s 00027 ----------------------------------------------------------------------*/ 00028 // Include automatically generated configuration file if running autoconf. 00029 #ifdef HAVE_CONFIG_H 00030 #include "config_auto.h" 00031 #endif 00032 00033 #include "split.h" 00034 #include "coutln.h" 00035 #include "tprintf.h" 00036 00037 #ifdef __UNIX__ 00038 #include <assert.h> 00039 #endif 00040 00041 /*---------------------------------------------------------------------- 00042 V a r i a b l e s 00043 ----------------------------------------------------------------------*/ 00044 // Limit on the amount of penalty for the chop being off-center. 00045 const int kCenterGradeCap = 25; 00046 // Ridiculously large priority for splits that are no use. 00047 const double kBadPriority = 999.0; 00048 00049 BOOL_VAR(wordrec_display_splits, 0, "Display splits"); 00050 00051 // Returns the bounding box of all the points in the split. 00052 TBOX SPLIT::bounding_box() const { 00053 return TBOX( 00054 MIN(point1->pos.x, point2->pos.x), MIN(point1->pos.y, point2->pos.y), 00055 MAX(point1->pos.x, point2->pos.x), MAX(point1->pos.y, point2->pos.y)); 00056 } 00057 00058 // Hides the SPLIT so the outlines appear not to be cut by it. 00059 void SPLIT::Hide() const { 00060 EDGEPT* edgept = point1; 00061 do { 00062 edgept->Hide(); 00063 edgept = edgept->next; 00064 } while (!edgept->EqualPos(*point2) && edgept != point1); 00065 edgept = point2; 00066 do { 00067 edgept->Hide(); 00068 edgept = edgept->next; 00069 } while (!edgept->EqualPos(*point1) && edgept != point2); 00070 } 00071 00072 // Undoes hide, so the outlines are cut by the SPLIT. 00073 void SPLIT::Reveal() const { 00074 EDGEPT* edgept = point1; 00075 do { 00076 edgept->Reveal(); 00077 edgept = edgept->next; 00078 } while (!edgept->EqualPos(*point2) && edgept != point1); 00079 edgept = point2; 00080 do { 00081 edgept->Reveal(); 00082 edgept = edgept->next; 00083 } while (!edgept->EqualPos(*point1) && edgept != point2); 00084 } 00085 00086 // Compute a split priority based on the bounding boxes of the parts. 00087 // The arguments here are config parameters defined in Wordrec. Add chop_ 00088 // to the beginning of the name. 00089 float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob, 00090 int centered_maxwidth, double center_knob, 00091 double width_change_knob) const { 00092 TBOX box1 = Box12(); 00093 TBOX box2 = Box21(); 00094 int min_left = MIN(box1.left(), box2.left()); 00095 int max_right = MAX(box1.right(), box2.right()); 00096 if (xmin < min_left && xmax > max_right) return kBadPriority; 00097 00098 float grade = 0.0f; 00099 // grade_overlap. 00100 int width1 = box1.width(); 00101 int width2 = box2.width(); 00102 int min_width = MIN(width1, width2); 00103 int overlap = -box1.x_gap(box2); 00104 if (overlap == min_width) { 00105 grade += 100.0f; // Total overlap. 00106 } else { 00107 if (2 * overlap > min_width) overlap += 2 * overlap - min_width; 00108 if (overlap > 0) grade += overlap_knob * overlap; 00109 } 00110 // grade_center_of_blob. 00111 if (width1 <= centered_maxwidth || width2 <= centered_maxwidth) { 00112 grade += MIN(kCenterGradeCap, center_knob * abs(width1 - width2)); 00113 } 00114 // grade_width_change. 00115 float width_change_grade = 20 - (max_right - min_left - MAX(width1, width2)); 00116 if (width_change_grade > 0.0f) 00117 grade += width_change_grade * width_change_knob; 00118 return grade; 00119 } 00120 00121 // Returns true if *this SPLIT appears OK in the sense that it does not cross 00122 // any outlines and does not chop off any ridiculously small pieces. 00123 bool SPLIT::IsHealthy(const TBLOB& blob, int min_points, int min_area) const { 00124 return !IsLittleChunk(min_points, min_area) && 00125 !blob.SegmentCrossesOutline(point1->pos, point2->pos); 00126 } 00127 00128 // Returns true if the split generates a small chunk in terms of either area 00129 // or number of points. 00130 bool SPLIT::IsLittleChunk(int min_points, int min_area) const { 00131 if (point1->ShortNonCircularSegment(min_points, point2) && 00132 point1->SegmentArea(point2) < min_area) { 00133 return true; 00134 } 00135 if (point2->ShortNonCircularSegment(min_points, point1) && 00136 point2->SegmentArea(point1) < min_area) { 00137 return true; 00138 } 00139 return false; 00140 } 00141 00142 /********************************************************************** 00143 * make_edgept 00144 * 00145 * Create an EDGEPT and hook it into an existing list of edge points. 00146 **********************************************************************/ 00147 EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev) { 00148 EDGEPT *this_edgept; 00149 /* Create point */ 00150 this_edgept = new EDGEPT; 00151 this_edgept->pos.x = x; 00152 this_edgept->pos.y = y; 00153 // Now deal with the src_outline steps. 00154 C_OUTLINE* prev_ol = prev->src_outline; 00155 if (prev_ol != NULL && prev->next == next) { 00156 // Compute the fraction of the segment that is being cut. 00157 FCOORD segment_vec(next->pos.x - prev->pos.x, next->pos.y - prev->pos.y); 00158 FCOORD target_vec(x - prev->pos.x, y - prev->pos.y); 00159 double cut_fraction = target_vec.length() / segment_vec.length(); 00160 // Get the start and end at the step level. 00161 ICOORD step_start = prev_ol->position_at_index(prev->start_step); 00162 int end_step = prev->start_step + prev->step_count; 00163 int step_length = prev_ol->pathlength(); 00164 ICOORD step_end = prev_ol->position_at_index(end_step % step_length); 00165 ICOORD step_vec = step_end - step_start; 00166 double target_length = step_vec.length() * cut_fraction; 00167 // Find the point on the segment that gives the length nearest to target. 00168 int best_step = prev->start_step; 00169 ICOORD total_step(0, 0); 00170 double best_dist = target_length; 00171 for (int s = prev->start_step; s < end_step; ++s) { 00172 total_step += prev_ol->step(s % step_length); 00173 double dist = fabs(target_length - total_step.length()); 00174 if (dist < best_dist) { 00175 best_dist = dist; 00176 best_step = s + 1; 00177 } 00178 } 00179 // The new point is an intermediate point. 00180 this_edgept->src_outline = prev_ol; 00181 this_edgept->step_count = end_step - best_step; 00182 this_edgept->start_step = best_step % step_length; 00183 prev->step_count = best_step - prev->start_step; 00184 } else { 00185 // The new point is poly only. 00186 this_edgept->src_outline = NULL; 00187 this_edgept->step_count = 0; 00188 this_edgept->start_step = 0; 00189 } 00190 /* Hook it up */ 00191 this_edgept->next = next; 00192 this_edgept->prev = prev; 00193 prev->next = this_edgept; 00194 next->prev = this_edgept; 00195 /* Set up vec entries */ 00196 this_edgept->vec.x = this_edgept->next->pos.x - x; 00197 this_edgept->vec.y = this_edgept->next->pos.y - y; 00198 this_edgept->prev->vec.x = x - this_edgept->prev->pos.x; 00199 this_edgept->prev->vec.y = y - this_edgept->prev->pos.y; 00200 return this_edgept; 00201 } 00202 00203 /********************************************************************** 00204 * remove_edgept 00205 * 00206 * Remove a given EDGEPT from its list and delete it. 00207 **********************************************************************/ 00208 void remove_edgept(EDGEPT *point) { 00209 EDGEPT *prev = point->prev; 00210 EDGEPT *next = point->next; 00211 // Add point's steps onto prev's steps if they are from the same outline. 00212 if (prev->src_outline == point->src_outline && prev->src_outline != NULL) { 00213 prev->step_count += point->step_count; 00214 } 00215 prev->next = next; 00216 next->prev = prev; 00217 prev->vec.x = next->pos.x - prev->pos.x; 00218 prev->vec.y = next->pos.y - prev->pos.y; 00219 delete point; 00220 } 00221 00222 /********************************************************************** 00223 * Print 00224 * 00225 * Shows the coordinates of both points in a split. 00226 **********************************************************************/ 00227 void SPLIT::Print() const { 00228 tprintf("(%d,%d)--(%d,%d)", point1->pos.x, point1->pos.y, point2->pos.x, 00229 point2->pos.y); 00230 } 00231 00232 #ifndef GRAPHICS_DISABLED 00233 // Draws the split in the given window. 00234 void SPLIT::Mark(ScrollView* window) const { 00235 window->Pen(ScrollView::GREEN); 00236 window->Line(point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y); 00237 window->UpdateWindow(); 00238 } 00239 #endif 00240 00241 // Creates two outlines out of one by splitting the original one in half. 00242 // Inserts the resulting outlines into the given list. 00243 void SPLIT::SplitOutlineList(TESSLINE* outlines) const { 00244 SplitOutline(); 00245 while (outlines->next != NULL) outlines = outlines->next; 00246 00247 outlines->next = new TESSLINE; 00248 outlines->next->loop = point1; 00249 outlines->next->ComputeBoundingBox(); 00250 00251 outlines = outlines->next; 00252 00253 outlines->next = new TESSLINE; 00254 outlines->next->loop = point2; 00255 outlines->next->ComputeBoundingBox(); 00256 00257 outlines->next->next = NULL; 00258 } 00259 00260 // Makes a split between these two edge points, but does not affect the 00261 // outlines to which they belong. 00262 void SPLIT::SplitOutline() const { 00263 EDGEPT* temp2 = point2->next; 00264 EDGEPT* temp1 = point1->next; 00265 /* Create two new points */ 00266 EDGEPT* new_point1 = make_edgept(point1->pos.x, point1->pos.y, temp1, point2); 00267 EDGEPT* new_point2 = make_edgept(point2->pos.x, point2->pos.y, temp2, point1); 00268 // point1 and 2 are now cross-over points, so they must have NULL 00269 // src_outlines and give their src_outline information their new 00270 // replacements. 00271 new_point1->src_outline = point1->src_outline; 00272 new_point1->start_step = point1->start_step; 00273 new_point1->step_count = point1->step_count; 00274 new_point2->src_outline = point2->src_outline; 00275 new_point2->start_step = point2->start_step; 00276 new_point2->step_count = point2->step_count; 00277 point1->src_outline = NULL; 00278 point1->start_step = 0; 00279 point1->step_count = 0; 00280 point2->src_outline = NULL; 00281 point2->start_step = 0; 00282 point2->step_count = 0; 00283 } 00284 00285 // Undoes the effect of SplitOutlineList, correcting the outlines for undoing 00286 // the split, but possibly leaving some duplicate outlines. 00287 void SPLIT::UnsplitOutlineList(TBLOB* blob) const { 00288 /* Modify edge points */ 00289 UnsplitOutlines(); 00290 00291 TESSLINE* outline1 = new TESSLINE; 00292 outline1->next = blob->outlines; 00293 blob->outlines = outline1; 00294 outline1->loop = point1; 00295 00296 TESSLINE* outline2 = new TESSLINE; 00297 outline2->next = blob->outlines; 00298 blob->outlines = outline2; 00299 outline2->loop = point2; 00300 } 00301 00302 // Removes the split that was put between these two points. 00303 void SPLIT::UnsplitOutlines() const { 00304 EDGEPT* tmp1 = point1->next; 00305 EDGEPT* tmp2 = point2->next; 00306 00307 tmp1->next->prev = point2; 00308 tmp2->next->prev = point1; 00309 00310 // tmp2 is coincident with point1. point1 takes tmp2's place as tmp2 is 00311 // deleted. 00312 point1->next = tmp2->next; 00313 point1->src_outline = tmp2->src_outline; 00314 point1->start_step = tmp2->start_step; 00315 point1->step_count = tmp2->step_count; 00316 // Likewise point2 takes tmp1's place. 00317 point2->next = tmp1->next; 00318 point2->src_outline = tmp1->src_outline; 00319 point2->start_step = tmp1->start_step; 00320 point2->step_count = tmp1->step_count; 00321 00322 delete tmp1; 00323 delete tmp2; 00324 00325 point1->vec.x = point1->next->pos.x - point1->pos.x; 00326 point1->vec.y = point1->next->pos.y - point1->pos.y; 00327 00328 point2->vec.x = point2->next->pos.x - point2->pos.x; 00329 point2->vec.y = point2->next->pos.y - point2->pos.y; 00330 }