|
tesseract 3.04.01
|
00001 /********************************************************************** 00002 * File: polyaprx.cpp (Formerly polygon.c) 00003 * Description: Code for polygonal approximation from old edgeprog. 00004 * Author: Ray Smith 00005 * Created: Thu Nov 25 11:42:04 GMT 1993 00006 * 00007 * (C) Copyright 1993, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include <stdio.h> 00021 #ifdef __UNIX__ 00022 #include <assert.h> 00023 #endif 00024 #define FASTEDGELENGTH 256 00025 #include "polyaprx.h" 00026 #include "params.h" 00027 #include "tprintf.h" 00028 00029 #define EXTERN 00030 00031 EXTERN BOOL_VAR(poly_debug, FALSE, "Debug old poly"); 00032 EXTERN BOOL_VAR(poly_wide_objects_better, TRUE, 00033 "More accurate approx on wide things"); 00034 00035 #define FIXED 4 /*OUTLINE point is fixed */ 00036 00037 #define RUNLENGTH 1 /*length of run */ 00038 00039 #define DIR 2 /*direction of run */ 00040 00041 #define FLAGS 0 00042 00043 #define fixed_dist 20 //really an int_variable 00044 #define approx_dist 15 //really an int_variable 00045 00046 const int par1 = 4500 / (approx_dist * approx_dist); 00047 const int par2 = 6750 / (approx_dist * approx_dist); 00048 00049 00050 /********************************************************************** 00051 * tesspoly_outline 00052 * 00053 * Approximate an outline from chain codes form using the old tess algorithm. 00054 * If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB 00055 * contain pointers to the input C_OUTLINEs that enable higher-resolution 00056 * feature extraction that does not use the polygonal approximation. 00057 **********************************************************************/ 00058 00059 00060 TESSLINE* ApproximateOutline(bool allow_detailed_fx, C_OUTLINE* c_outline) { 00061 TBOX loop_box; // bounding box 00062 inT32 area; // loop area 00063 EDGEPT stack_edgepts[FASTEDGELENGTH]; // converted path 00064 EDGEPT* edgepts = stack_edgepts; 00065 00066 // Use heap memory if the stack buffer is not big enough. 00067 if (c_outline->pathlength() > FASTEDGELENGTH) 00068 edgepts = new EDGEPT[c_outline->pathlength()]; 00069 00070 loop_box = c_outline->bounding_box(); 00071 area = loop_box.height(); 00072 if (!poly_wide_objects_better && loop_box.width() > area) 00073 area = loop_box.width(); 00074 area *= area; 00075 edgesteps_to_edgepts(c_outline, edgepts); 00076 fix2(edgepts, area); 00077 EDGEPT* edgept = poly2(edgepts, area); // 2nd approximation. 00078 EDGEPT* startpt = edgept; 00079 EDGEPT* result = NULL; 00080 EDGEPT* prev_result = NULL; 00081 do { 00082 EDGEPT* new_pt = new EDGEPT; 00083 new_pt->pos = edgept->pos; 00084 new_pt->prev = prev_result; 00085 if (prev_result == NULL) { 00086 result = new_pt; 00087 } else { 00088 prev_result->next = new_pt; 00089 new_pt->prev = prev_result; 00090 } 00091 if (allow_detailed_fx) { 00092 new_pt->src_outline = edgept->src_outline; 00093 new_pt->start_step = edgept->start_step; 00094 new_pt->step_count = edgept->step_count; 00095 } 00096 prev_result = new_pt; 00097 edgept = edgept->next; 00098 } 00099 while (edgept != startpt); 00100 prev_result->next = result; 00101 result->prev = prev_result; 00102 if (edgepts != stack_edgepts) 00103 delete [] edgepts; 00104 return TESSLINE::BuildFromOutlineList(result); 00105 } 00106 00107 00108 /********************************************************************** 00109 * edgesteps_to_edgepts 00110 * 00111 * Convert a C_OUTLINE to EDGEPTs. 00112 **********************************************************************/ 00113 00114 EDGEPT * 00115 edgesteps_to_edgepts ( //convert outline 00116 C_OUTLINE * c_outline, //input 00117 EDGEPT edgepts[] //output is array 00118 ) { 00119 inT32 length; //steps in path 00120 ICOORD pos; //current coords 00121 inT32 stepindex; //current step 00122 inT32 stepinc; //increment 00123 inT32 epindex; //current EDGEPT 00124 inT32 count; //repeated steps 00125 ICOORD vec; //for this 8 step 00126 ICOORD prev_vec; 00127 inT8 epdir; //of this step 00128 DIR128 prevdir; //prvious dir 00129 DIR128 dir; //of this step 00130 00131 pos = c_outline->start_pos (); //start of loop 00132 length = c_outline->pathlength (); 00133 stepindex = 0; 00134 epindex = 0; 00135 prevdir = -1; 00136 count = 0; 00137 int prev_stepindex = 0; 00138 do { 00139 dir = c_outline->step_dir (stepindex); 00140 vec = c_outline->step (stepindex); 00141 if (stepindex < length - 1 00142 && c_outline->step_dir (stepindex + 1) - dir == -32) { 00143 dir += 128 - 16; 00144 vec += c_outline->step (stepindex + 1); 00145 stepinc = 2; 00146 } 00147 else 00148 stepinc = 1; 00149 if (count == 0) { 00150 prevdir = dir; 00151 prev_vec = vec; 00152 } 00153 if (prevdir.get_dir () != dir.get_dir ()) { 00154 edgepts[epindex].pos.x = pos.x (); 00155 edgepts[epindex].pos.y = pos.y (); 00156 prev_vec *= count; 00157 edgepts[epindex].vec.x = prev_vec.x (); 00158 edgepts[epindex].vec.y = prev_vec.y (); 00159 pos += prev_vec; 00160 edgepts[epindex].flags[RUNLENGTH] = count; 00161 edgepts[epindex].prev = &edgepts[epindex - 1]; 00162 edgepts[epindex].flags[FLAGS] = 0; 00163 edgepts[epindex].next = &edgepts[epindex + 1]; 00164 prevdir += 64; 00165 epdir = (DIR128) 0 - prevdir; 00166 epdir >>= 4; 00167 epdir &= 7; 00168 edgepts[epindex].flags[DIR] = epdir; 00169 edgepts[epindex].src_outline = c_outline; 00170 edgepts[epindex].start_step = prev_stepindex; 00171 edgepts[epindex].step_count = stepindex - prev_stepindex; 00172 epindex++; 00173 prevdir = dir; 00174 prev_vec = vec; 00175 count = 1; 00176 prev_stepindex = stepindex; 00177 } 00178 else 00179 count++; 00180 stepindex += stepinc; 00181 } 00182 while (stepindex < length); 00183 edgepts[epindex].pos.x = pos.x (); 00184 edgepts[epindex].pos.y = pos.y (); 00185 prev_vec *= count; 00186 edgepts[epindex].vec.x = prev_vec.x (); 00187 edgepts[epindex].vec.y = prev_vec.y (); 00188 pos += prev_vec; 00189 edgepts[epindex].flags[RUNLENGTH] = count; 00190 edgepts[epindex].flags[FLAGS] = 0; 00191 edgepts[epindex].src_outline = c_outline; 00192 edgepts[epindex].start_step = prev_stepindex; 00193 edgepts[epindex].step_count = stepindex - prev_stepindex; 00194 edgepts[epindex].prev = &edgepts[epindex - 1]; 00195 edgepts[epindex].next = &edgepts[0]; 00196 prevdir += 64; 00197 epdir = (DIR128) 0 - prevdir; 00198 epdir >>= 4; 00199 epdir &= 7; 00200 edgepts[epindex].flags[DIR] = epdir; 00201 edgepts[0].prev = &edgepts[epindex]; 00202 ASSERT_HOST (pos.x () == c_outline->start_pos ().x () 00203 && pos.y () == c_outline->start_pos ().y ()); 00204 return &edgepts[0]; 00205 } 00206 00207 00208 /********************************************************************** 00209 *fix2(start,area) fixes points on the outline according to a trial method* 00210 **********************************************************************/ 00211 00212 //#pragma OPT_LEVEL 1 /*stop compiler bugs*/ 00213 00214 void fix2( //polygonal approx 00215 EDGEPT *start, /*loop to approimate */ 00216 int area) { 00217 EDGEPT *edgept; /*current point */ 00218 EDGEPT *edgept1; 00219 EDGEPT *loopstart; /*modified start of loop */ 00220 EDGEPT *linestart; /*start of line segment */ 00221 int dir1, dir2; /*directions of line */ 00222 int sum1, sum2; /*lengths in dir1,dir2 */ 00223 int stopped; /*completed flag */ 00224 int fixed_count; //no of fixed points 00225 int d01, d12, d23, gapmin; 00226 TPOINT d01vec, d12vec, d23vec; 00227 EDGEPT *edgefix, *startfix; 00228 EDGEPT *edgefix0, *edgefix1, *edgefix2, *edgefix3; 00229 00230 edgept = start; /*start of loop */ 00231 while (((edgept->flags[DIR] - edgept->prev->flags[DIR] + 1) & 7) < 3 00232 && (dir1 = 00233 (edgept->prev->flags[DIR] - edgept->next->flags[DIR]) & 7) != 2 00234 && dir1 != 6) 00235 edgept = edgept->next; /*find suitable start */ 00236 loopstart = edgept; /*remember start */ 00237 00238 stopped = 0; /*not finished yet */ 00239 edgept->flags[FLAGS] |= FIXED; /*fix it */ 00240 do { 00241 linestart = edgept; /*possible start of line */ 00242 dir1 = edgept->flags[DIR]; /*first direction */ 00243 /*length of dir1 */ 00244 sum1 = edgept->flags[RUNLENGTH]; 00245 edgept = edgept->next; 00246 dir2 = edgept->flags[DIR]; /*2nd direction */ 00247 /*length in dir2 */ 00248 sum2 = edgept->flags[RUNLENGTH]; 00249 if (((dir1 - dir2 + 1) & 7) < 3) { 00250 while (edgept->prev->flags[DIR] == edgept->next->flags[DIR]) { 00251 edgept = edgept->next; /*look at next */ 00252 if (edgept->flags[DIR] == dir1) 00253 /*sum lengths */ 00254 sum1 += edgept->flags[RUNLENGTH]; 00255 else 00256 sum2 += edgept->flags[RUNLENGTH]; 00257 } 00258 00259 if (edgept == loopstart) 00260 stopped = 1; /*finished */ 00261 if (sum2 + sum1 > 2 00262 && linestart->prev->flags[DIR] == dir2 00263 && (linestart->prev->flags[RUNLENGTH] > 00264 linestart->flags[RUNLENGTH] || sum2 > sum1)) { 00265 /*start is back one */ 00266 linestart = linestart->prev; 00267 linestart->flags[FLAGS] |= FIXED; 00268 } 00269 00270 if (((edgept->next->flags[DIR] - edgept->flags[DIR] + 1) & 7) >= 3 00271 || (edgept->flags[DIR] == dir1 && sum1 >= sum2) 00272 || ((edgept->prev->flags[RUNLENGTH] < edgept->flags[RUNLENGTH] 00273 || (edgept->flags[DIR] == dir2 && sum2 >= sum1)) 00274 && linestart->next != edgept)) 00275 edgept = edgept->next; 00276 } 00277 /*sharp bend */ 00278 edgept->flags[FLAGS] |= FIXED; 00279 } 00280 /*do whole loop */ 00281 while (edgept != loopstart && !stopped); 00282 00283 edgept = start; 00284 do { 00285 if (((edgept->flags[RUNLENGTH] >= 8) && 00286 (edgept->flags[DIR] != 2) && (edgept->flags[DIR] != 6)) || 00287 ((edgept->flags[RUNLENGTH] >= 8) && 00288 ((edgept->flags[DIR] == 2) || (edgept->flags[DIR] == 6)))) { 00289 edgept->flags[FLAGS] |= FIXED; 00290 edgept1 = edgept->next; 00291 edgept1->flags[FLAGS] |= FIXED; 00292 } 00293 edgept = edgept->next; 00294 } 00295 while (edgept != start); 00296 00297 edgept = start; 00298 do { 00299 /*single fixed step */ 00300 if (edgept->flags[FLAGS] & FIXED && edgept->flags[RUNLENGTH] == 1 00301 /*and neighours free */ 00302 && edgept->next->flags[FLAGS] & FIXED && (edgept->prev->flags[FLAGS] & FIXED) == 0 00303 /*same pair of dirs */ 00304 && (edgept->next->next->flags[FLAGS] & FIXED) == 0 && edgept->prev->flags[DIR] == edgept->next->flags[DIR] && edgept->prev->prev->flags[DIR] == edgept->next->next->flags[DIR] 00305 && ((edgept->prev->flags[DIR] - edgept->flags[DIR] + 1) & 7) < 3) { 00306 /*unfix it */ 00307 edgept->flags[FLAGS] &= ~FIXED; 00308 edgept->next->flags[FLAGS] &= ~FIXED; 00309 } 00310 edgept = edgept->next; /*do all points */ 00311 } 00312 while (edgept != start); /*until finished */ 00313 00314 stopped = 0; 00315 if (area < 450) 00316 area = 450; 00317 00318 gapmin = area * fixed_dist * fixed_dist / 44000; 00319 00320 edgept = start; 00321 fixed_count = 0; 00322 do { 00323 if (edgept->flags[FLAGS] & FIXED) 00324 fixed_count++; 00325 edgept = edgept->next; 00326 } 00327 while (edgept != start); 00328 while ((edgept->flags[FLAGS] & FIXED) == 0) 00329 edgept = edgept->next; 00330 edgefix0 = edgept; 00331 00332 edgept = edgept->next; 00333 while ((edgept->flags[FLAGS] & FIXED) == 0) 00334 edgept = edgept->next; 00335 edgefix1 = edgept; 00336 00337 edgept = edgept->next; 00338 while ((edgept->flags[FLAGS] & FIXED) == 0) 00339 edgept = edgept->next; 00340 edgefix2 = edgept; 00341 00342 edgept = edgept->next; 00343 while ((edgept->flags[FLAGS] & FIXED) == 0) 00344 edgept = edgept->next; 00345 edgefix3 = edgept; 00346 00347 startfix = edgefix2; 00348 00349 do { 00350 if (fixed_count <= 3) 00351 break; //already too few 00352 point_diff (d12vec, edgefix1->pos, edgefix2->pos); 00353 d12 = LENGTH (d12vec); 00354 // TODO(rays) investigate this change: 00355 // Only unfix a point if it is part of a low-curvature section 00356 // of outline and the total angle change of the outlines is 00357 // less than 90 degrees, ie the scalar product is positive. 00358 // if (d12 <= gapmin && SCALAR(edgefix0->vec, edgefix2->vec) > 0) { 00359 if (d12 <= gapmin) { 00360 point_diff (d01vec, edgefix0->pos, edgefix1->pos); 00361 d01 = LENGTH (d01vec); 00362 point_diff (d23vec, edgefix2->pos, edgefix3->pos); 00363 d23 = LENGTH (d23vec); 00364 if (d01 > d23) { 00365 edgefix2->flags[FLAGS] &= ~FIXED; 00366 fixed_count--; 00367 } 00368 else { 00369 edgefix1->flags[FLAGS] &= ~FIXED; 00370 fixed_count--; 00371 edgefix1 = edgefix2; 00372 } 00373 } 00374 else { 00375 edgefix0 = edgefix1; 00376 edgefix1 = edgefix2; 00377 } 00378 edgefix2 = edgefix3; 00379 edgept = edgept->next; 00380 while ((edgept->flags[FLAGS] & FIXED) == 0) { 00381 if (edgept == startfix) 00382 stopped = 1; 00383 edgept = edgept->next; 00384 } 00385 edgefix3 = edgept; 00386 edgefix = edgefix2; 00387 } 00388 while ((edgefix != startfix) && (!stopped)); 00389 } 00390 00391 00392 //#pragma OPT_LEVEL 2 /*stop compiler bugs*/ 00393 00394 /********************************************************************** 00395 *poly2(startpt,area,path) applies a second approximation to the outline 00396 *using the points which have been fixed by the first approximation* 00397 **********************************************************************/ 00398 00399 EDGEPT *poly2( //second poly 00400 EDGEPT *startpt, /*start of loop */ 00401 int area /*area of blob box */ 00402 ) { 00403 EDGEPT *edgept; /*current outline point */ 00404 EDGEPT *loopstart; /*starting point */ 00405 EDGEPT *linestart; /*start of line */ 00406 int edgesum; /*correction count */ 00407 00408 if (area < 1200) 00409 area = 1200; /*minimum value */ 00410 00411 loopstart = NULL; /*not found it yet */ 00412 edgept = startpt; /*start of loop */ 00413 00414 do { 00415 /*current point fixed */ 00416 if (edgept->flags[FLAGS] & FIXED 00417 /*and next not */ 00418 && (edgept->next->flags[FLAGS] & FIXED) == 0) { 00419 loopstart = edgept; /*start of repoly */ 00420 break; 00421 } 00422 edgept = edgept->next; /*next point */ 00423 } 00424 while (edgept != startpt); /*until found or finished */ 00425 00426 if (loopstart == NULL && (startpt->flags[FLAGS] & FIXED) == 0) { 00427 /*fixed start of loop */ 00428 startpt->flags[FLAGS] |= FIXED; 00429 loopstart = startpt; /*or start of loop */ 00430 } 00431 if (loopstart) { 00432 do { 00433 edgept = loopstart; /*first to do */ 00434 do { 00435 linestart = edgept; 00436 edgesum = 0; /*sum of lengths */ 00437 do { 00438 /*sum lengths */ 00439 edgesum += edgept->flags[RUNLENGTH]; 00440 edgept = edgept->next; /*move on */ 00441 } 00442 while ((edgept->flags[FLAGS] & FIXED) == 0 00443 && edgept != loopstart && edgesum < 126); 00444 if (poly_debug) 00445 tprintf 00446 ("Poly2:starting at (%d,%d)+%d=(%d,%d),%d to (%d,%d)\n", 00447 linestart->pos.x, linestart->pos.y, linestart->flags[DIR], 00448 linestart->vec.x, linestart->vec.y, edgesum, edgept->pos.x, 00449 edgept->pos.y); 00450 /*reapproximate */ 00451 cutline(linestart, edgept, area); 00452 00453 while ((edgept->next->flags[FLAGS] & FIXED) 00454 && edgept != loopstart) 00455 edgept = edgept->next; /*look for next non-fixed */ 00456 } 00457 /*do all the loop */ 00458 while (edgept != loopstart); 00459 edgesum = 0; 00460 do { 00461 if (edgept->flags[FLAGS] & FIXED) 00462 edgesum++; 00463 edgept = edgept->next; 00464 } 00465 //count fixed pts 00466 while (edgept != loopstart); 00467 if (edgesum < 3) 00468 area /= 2; //must have 3 pts 00469 } 00470 while (edgesum < 3); 00471 do { 00472 linestart = edgept; 00473 do { 00474 edgept = edgept->next; 00475 } 00476 while ((edgept->flags[FLAGS] & FIXED) == 0); 00477 linestart->next = edgept; 00478 edgept->prev = linestart; 00479 linestart->vec.x = edgept->pos.x - linestart->pos.x; 00480 linestart->vec.y = edgept->pos.y - linestart->pos.y; 00481 } 00482 while (edgept != loopstart); 00483 } 00484 else 00485 edgept = startpt; /*start of loop */ 00486 00487 loopstart = edgept; /*new start */ 00488 return loopstart; /*correct exit */ 00489 } 00490 00491 00492 /********************************************************************** 00493 *cutline(first,last,area) straightens out a line by partitioning 00494 *and joining the ends by a straight line* 00495 **********************************************************************/ 00496 00497 void cutline( //recursive refine 00498 EDGEPT *first, /*ends of line */ 00499 EDGEPT *last, 00500 int area /*area of object */ 00501 ) { 00502 EDGEPT *edge; /*current edge */ 00503 TPOINT vecsum; /*vector sum */ 00504 int vlen; /*approx length of vecsum */ 00505 TPOINT vec; /*accumulated vector */ 00506 EDGEPT *maxpoint; /*worst point */ 00507 int maxperp; /*max deviation */ 00508 int perp; /*perp distance */ 00509 int ptcount; /*no of points */ 00510 int squaresum; /*sum of perps */ 00511 00512 edge = first; /*start of line */ 00513 if (edge->next == last) 00514 return; /*simple line */ 00515 00516 /*vector sum */ 00517 vecsum.x = last->pos.x - edge->pos.x; 00518 vecsum.y = last->pos.y - edge->pos.y; 00519 if (vecsum.x == 0 && vecsum.y == 0) { 00520 /*special case */ 00521 vecsum.x = -edge->prev->vec.x; 00522 vecsum.y = -edge->prev->vec.y; 00523 } 00524 /*absolute value */ 00525 vlen = vecsum.x > 0 ? vecsum.x : -vecsum.x; 00526 if (vecsum.y > vlen) 00527 vlen = vecsum.y; /*maximum */ 00528 else if (-vecsum.y > vlen) 00529 vlen = -vecsum.y; /*absolute value */ 00530 00531 vec.x = edge->vec.x; /*accumulated vector */ 00532 vec.y = edge->vec.y; 00533 maxperp = 0; /*none yet */ 00534 squaresum = ptcount = 0; 00535 edge = edge->next; /*move to actual point */ 00536 maxpoint = edge; /*in case there isn't one */ 00537 do { 00538 perp = CROSS (vec, vecsum); /*get perp distance */ 00539 if (perp != 0) { 00540 perp *= perp; /*squared deviation */ 00541 } 00542 squaresum += perp; /*sum squares */ 00543 ptcount++; /*count points */ 00544 if (poly_debug) 00545 tprintf ("Cutline:Final perp=%d\n", perp); 00546 if (perp > maxperp) { 00547 maxperp = perp; 00548 maxpoint = edge; /*find greatest deviation */ 00549 } 00550 vec.x += edge->vec.x; /*accumulate vectors */ 00551 vec.y += edge->vec.y; 00552 edge = edge->next; 00553 } 00554 while (edge != last); /*test all line */ 00555 00556 perp = LENGTH (vecsum); 00557 ASSERT_HOST (perp != 0); 00558 00559 if (maxperp < 256 * MAX_INT16) { 00560 maxperp <<= 8; 00561 maxperp /= perp; /*true max perp */ 00562 } 00563 else { 00564 maxperp /= perp; 00565 maxperp <<= 8; /*avoid overflow */ 00566 } 00567 if (squaresum < 256 * MAX_INT16) 00568 /*mean squared perp */ 00569 perp = (squaresum << 8) / (perp * ptcount); 00570 else 00571 /*avoid overflow */ 00572 perp = (squaresum / perp << 8) / ptcount; 00573 00574 if (poly_debug) 00575 tprintf ("Cutline:A=%d, max=%.2f(%.2f%%), msd=%.2f(%.2f%%)\n", 00576 area, maxperp / 256.0, maxperp * 200.0 / area, 00577 perp / 256.0, perp * 300.0 / area); 00578 if (maxperp * par1 >= 10 * area || perp * par2 >= 10 * area || vlen >= 126) { 00579 maxpoint->flags[FLAGS] |= FIXED; 00580 /*partitions */ 00581 cutline(first, maxpoint, area); 00582 cutline(maxpoint, last, area); 00583 } 00584 }