tesseract 3.04.01

ccstruct/polyaprx.cpp

Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        polyaprx.cpp  (Formerly polygon.c)
00003  * Description: Code for polygonal approximation from old edgeprog.
00004  * Author:      Ray Smith
00005  * Created:     Thu Nov 25 11:42:04 GMT 1993
00006  *
00007  * (C) Copyright 1993, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include          <stdio.h>
00021 #ifdef __UNIX__
00022 #include          <assert.h>
00023 #endif
00024 #define FASTEDGELENGTH    256
00025 #include          "polyaprx.h"
00026 #include          "params.h"
00027 #include          "tprintf.h"
00028 
00029 #define EXTERN
00030 
00031 EXTERN BOOL_VAR(poly_debug, FALSE, "Debug old poly");
00032 EXTERN BOOL_VAR(poly_wide_objects_better, TRUE,
00033                 "More accurate approx on wide things");
00034 
00035 #define FIXED       4            /*OUTLINE point is fixed */
00036 
00037 #define RUNLENGTH     1          /*length of run */
00038 
00039 #define DIR         2            /*direction of run */
00040 
00041 #define FLAGS       0
00042 
00043 #define fixed_dist      20       //really an int_variable
00044 #define approx_dist     15       //really an int_variable
00045 
00046 const int par1 = 4500 / (approx_dist * approx_dist);
00047 const int par2 = 6750 / (approx_dist * approx_dist);
00048 
00049 
00050 /**********************************************************************
00051  * tesspoly_outline
00052  *
00053  * Approximate an outline from chain codes form using the old tess algorithm.
00054  * If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB
00055  * contain pointers to the input C_OUTLINEs that enable higher-resolution
00056  * feature extraction that does not use the polygonal approximation.
00057  **********************************************************************/
00058 
00059 
00060 TESSLINE* ApproximateOutline(bool allow_detailed_fx, C_OUTLINE* c_outline) {
00061   TBOX loop_box;                  // bounding box
00062   inT32 area;                    // loop area
00063   EDGEPT stack_edgepts[FASTEDGELENGTH];  // converted path
00064   EDGEPT* edgepts = stack_edgepts;
00065 
00066   // Use heap memory if the stack buffer is not big enough.
00067   if (c_outline->pathlength() > FASTEDGELENGTH)
00068     edgepts = new EDGEPT[c_outline->pathlength()];
00069 
00070   loop_box = c_outline->bounding_box();
00071   area = loop_box.height();
00072   if (!poly_wide_objects_better && loop_box.width() > area)
00073     area = loop_box.width();
00074   area *= area;
00075   edgesteps_to_edgepts(c_outline, edgepts);
00076   fix2(edgepts, area);
00077   EDGEPT* edgept = poly2(edgepts, area);  // 2nd approximation.
00078   EDGEPT* startpt = edgept;
00079   EDGEPT* result = NULL;
00080   EDGEPT* prev_result = NULL;
00081   do {
00082     EDGEPT* new_pt = new EDGEPT;
00083     new_pt->pos = edgept->pos;
00084     new_pt->prev = prev_result;
00085     if (prev_result == NULL) {
00086       result = new_pt;
00087     } else {
00088       prev_result->next = new_pt;
00089       new_pt->prev = prev_result;
00090     }
00091     if (allow_detailed_fx) {
00092       new_pt->src_outline = edgept->src_outline;
00093       new_pt->start_step = edgept->start_step;
00094       new_pt->step_count = edgept->step_count;
00095     }
00096     prev_result = new_pt;
00097     edgept = edgept->next;
00098   }
00099   while (edgept != startpt);
00100   prev_result->next = result;
00101   result->prev = prev_result;
00102   if (edgepts != stack_edgepts)
00103     delete [] edgepts;
00104   return TESSLINE::BuildFromOutlineList(result);
00105 }
00106 
00107 
00108 /**********************************************************************
00109  * edgesteps_to_edgepts
00110  *
00111  * Convert a C_OUTLINE to EDGEPTs.
00112  **********************************************************************/
00113 
00114 EDGEPT *
00115 edgesteps_to_edgepts (           //convert outline
00116 C_OUTLINE * c_outline,           //input
00117 EDGEPT edgepts[]                 //output is array
00118 ) {
00119   inT32 length;                  //steps in path
00120   ICOORD pos;                    //current coords
00121   inT32 stepindex;               //current step
00122   inT32 stepinc;                 //increment
00123   inT32 epindex;                 //current EDGEPT
00124   inT32 count;                   //repeated steps
00125   ICOORD vec;                    //for this 8 step
00126   ICOORD prev_vec;
00127   inT8 epdir;                    //of this step
00128   DIR128 prevdir;                //prvious dir
00129   DIR128 dir;                    //of this step
00130 
00131   pos = c_outline->start_pos (); //start of loop
00132   length = c_outline->pathlength ();
00133   stepindex = 0;
00134   epindex = 0;
00135   prevdir = -1;
00136   count = 0;
00137   int prev_stepindex = 0;
00138   do {
00139     dir = c_outline->step_dir (stepindex);
00140     vec = c_outline->step (stepindex);
00141     if (stepindex < length - 1
00142     && c_outline->step_dir (stepindex + 1) - dir == -32) {
00143       dir += 128 - 16;
00144       vec += c_outline->step (stepindex + 1);
00145       stepinc = 2;
00146     }
00147     else
00148       stepinc = 1;
00149     if (count == 0) {
00150       prevdir = dir;
00151       prev_vec = vec;
00152     }
00153     if (prevdir.get_dir () != dir.get_dir ()) {
00154       edgepts[epindex].pos.x = pos.x ();
00155       edgepts[epindex].pos.y = pos.y ();
00156       prev_vec *= count;
00157       edgepts[epindex].vec.x = prev_vec.x ();
00158       edgepts[epindex].vec.y = prev_vec.y ();
00159       pos += prev_vec;
00160       edgepts[epindex].flags[RUNLENGTH] = count;
00161       edgepts[epindex].prev = &edgepts[epindex - 1];
00162       edgepts[epindex].flags[FLAGS] = 0;
00163       edgepts[epindex].next = &edgepts[epindex + 1];
00164       prevdir += 64;
00165       epdir = (DIR128) 0 - prevdir;
00166       epdir >>= 4;
00167       epdir &= 7;
00168       edgepts[epindex].flags[DIR] = epdir;
00169       edgepts[epindex].src_outline = c_outline;
00170       edgepts[epindex].start_step = prev_stepindex;
00171       edgepts[epindex].step_count = stepindex - prev_stepindex;
00172       epindex++;
00173       prevdir = dir;
00174       prev_vec = vec;
00175       count = 1;
00176       prev_stepindex = stepindex;
00177     }
00178     else
00179       count++;
00180     stepindex += stepinc;
00181   }
00182   while (stepindex < length);
00183   edgepts[epindex].pos.x = pos.x ();
00184   edgepts[epindex].pos.y = pos.y ();
00185   prev_vec *= count;
00186   edgepts[epindex].vec.x = prev_vec.x ();
00187   edgepts[epindex].vec.y = prev_vec.y ();
00188   pos += prev_vec;
00189   edgepts[epindex].flags[RUNLENGTH] = count;
00190   edgepts[epindex].flags[FLAGS] = 0;
00191   edgepts[epindex].src_outline = c_outline;
00192   edgepts[epindex].start_step = prev_stepindex;
00193   edgepts[epindex].step_count = stepindex - prev_stepindex;
00194   edgepts[epindex].prev = &edgepts[epindex - 1];
00195   edgepts[epindex].next = &edgepts[0];
00196   prevdir += 64;
00197   epdir = (DIR128) 0 - prevdir;
00198   epdir >>= 4;
00199   epdir &= 7;
00200   edgepts[epindex].flags[DIR] = epdir;
00201   edgepts[0].prev = &edgepts[epindex];
00202   ASSERT_HOST (pos.x () == c_outline->start_pos ().x ()
00203     && pos.y () == c_outline->start_pos ().y ());
00204   return &edgepts[0];
00205 }
00206 
00207 
00208 /**********************************************************************
00209  *fix2(start,area) fixes points on the outline according to a trial method*
00210  **********************************************************************/
00211 
00212 //#pragma OPT_LEVEL 1                                                                           /*stop compiler bugs*/
00213 
00214 void fix2(                //polygonal approx
00215           EDGEPT *start,  /*loop to approimate */
00216           int area) {
00217   EDGEPT *edgept;                /*current point */
00218   EDGEPT *edgept1;
00219   EDGEPT *loopstart;             /*modified start of loop */
00220   EDGEPT *linestart;             /*start of line segment */
00221   int dir1, dir2;                /*directions of line */
00222   int sum1, sum2;                /*lengths in dir1,dir2 */
00223   int stopped;                   /*completed flag */
00224   int fixed_count;               //no of fixed points
00225   int d01, d12, d23, gapmin;
00226   TPOINT d01vec, d12vec, d23vec;
00227   EDGEPT *edgefix, *startfix;
00228   EDGEPT *edgefix0, *edgefix1, *edgefix2, *edgefix3;
00229 
00230   edgept = start;                /*start of loop */
00231   while (((edgept->flags[DIR] - edgept->prev->flags[DIR] + 1) & 7) < 3
00232     && (dir1 =
00233     (edgept->prev->flags[DIR] - edgept->next->flags[DIR]) & 7) != 2
00234     && dir1 != 6)
00235     edgept = edgept->next;       /*find suitable start */
00236   loopstart = edgept;            /*remember start */
00237 
00238   stopped = 0;                   /*not finished yet */
00239   edgept->flags[FLAGS] |= FIXED; /*fix it */
00240   do {
00241     linestart = edgept;          /*possible start of line */
00242     dir1 = edgept->flags[DIR];   /*first direction */
00243                                  /*length of dir1 */
00244     sum1 = edgept->flags[RUNLENGTH];
00245     edgept = edgept->next;
00246     dir2 = edgept->flags[DIR];   /*2nd direction */
00247                                  /*length in dir2 */
00248     sum2 = edgept->flags[RUNLENGTH];
00249     if (((dir1 - dir2 + 1) & 7) < 3) {
00250       while (edgept->prev->flags[DIR] == edgept->next->flags[DIR]) {
00251         edgept = edgept->next;   /*look at next */
00252         if (edgept->flags[DIR] == dir1)
00253                                  /*sum lengths */
00254           sum1 += edgept->flags[RUNLENGTH];
00255         else
00256           sum2 += edgept->flags[RUNLENGTH];
00257       }
00258 
00259       if (edgept == loopstart)
00260         stopped = 1;             /*finished */
00261       if (sum2 + sum1 > 2
00262         && linestart->prev->flags[DIR] == dir2
00263         && (linestart->prev->flags[RUNLENGTH] >
00264       linestart->flags[RUNLENGTH] || sum2 > sum1)) {
00265                                  /*start is back one */
00266         linestart = linestart->prev;
00267         linestart->flags[FLAGS] |= FIXED;
00268       }
00269 
00270       if (((edgept->next->flags[DIR] - edgept->flags[DIR] + 1) & 7) >= 3
00271         || (edgept->flags[DIR] == dir1 && sum1 >= sum2)
00272         || ((edgept->prev->flags[RUNLENGTH] < edgept->flags[RUNLENGTH]
00273         || (edgept->flags[DIR] == dir2 && sum2 >= sum1))
00274           && linestart->next != edgept))
00275         edgept = edgept->next;
00276     }
00277                                  /*sharp bend */
00278     edgept->flags[FLAGS] |= FIXED;
00279   }
00280                                  /*do whole loop */
00281   while (edgept != loopstart && !stopped);
00282 
00283   edgept = start;
00284   do {
00285     if (((edgept->flags[RUNLENGTH] >= 8) &&
00286       (edgept->flags[DIR] != 2) && (edgept->flags[DIR] != 6)) ||
00287       ((edgept->flags[RUNLENGTH] >= 8) &&
00288     ((edgept->flags[DIR] == 2) || (edgept->flags[DIR] == 6)))) {
00289       edgept->flags[FLAGS] |= FIXED;
00290       edgept1 = edgept->next;
00291       edgept1->flags[FLAGS] |= FIXED;
00292     }
00293     edgept = edgept->next;
00294   }
00295   while (edgept != start);
00296 
00297   edgept = start;
00298   do {
00299                                  /*single fixed step */
00300     if (edgept->flags[FLAGS] & FIXED && edgept->flags[RUNLENGTH] == 1
00301                                  /*and neighours free */
00302       && edgept->next->flags[FLAGS] & FIXED && (edgept->prev->flags[FLAGS] & FIXED) == 0
00303                                  /*same pair of dirs */
00304       && (edgept->next->next->flags[FLAGS] & FIXED) == 0 && edgept->prev->flags[DIR] == edgept->next->flags[DIR] && edgept->prev->prev->flags[DIR] == edgept->next->next->flags[DIR]
00305     && ((edgept->prev->flags[DIR] - edgept->flags[DIR] + 1) & 7) < 3) {
00306                                  /*unfix it */
00307       edgept->flags[FLAGS] &= ~FIXED;
00308       edgept->next->flags[FLAGS] &= ~FIXED;
00309     }
00310     edgept = edgept->next;       /*do all points */
00311   }
00312   while (edgept != start);       /*until finished */
00313 
00314   stopped = 0;
00315   if (area < 450)
00316     area = 450;
00317 
00318   gapmin = area * fixed_dist * fixed_dist / 44000;
00319 
00320   edgept = start;
00321   fixed_count = 0;
00322   do {
00323     if (edgept->flags[FLAGS] & FIXED)
00324       fixed_count++;
00325     edgept = edgept->next;
00326   }
00327   while (edgept != start);
00328   while ((edgept->flags[FLAGS] & FIXED) == 0)
00329     edgept = edgept->next;
00330   edgefix0 = edgept;
00331 
00332   edgept = edgept->next;
00333   while ((edgept->flags[FLAGS] & FIXED) == 0)
00334     edgept = edgept->next;
00335   edgefix1 = edgept;
00336 
00337   edgept = edgept->next;
00338   while ((edgept->flags[FLAGS] & FIXED) == 0)
00339     edgept = edgept->next;
00340   edgefix2 = edgept;
00341 
00342   edgept = edgept->next;
00343   while ((edgept->flags[FLAGS] & FIXED) == 0)
00344     edgept = edgept->next;
00345   edgefix3 = edgept;
00346 
00347   startfix = edgefix2;
00348 
00349   do {
00350     if (fixed_count <= 3)
00351       break;                     //already too few
00352     point_diff (d12vec, edgefix1->pos, edgefix2->pos);
00353     d12 = LENGTH (d12vec);
00354     // TODO(rays) investigate this change:
00355     // Only unfix a point if it is part of a low-curvature section
00356     // of outline and the total angle change of the outlines is
00357     // less than 90 degrees, ie the scalar product is positive.
00358     // if (d12 <= gapmin && SCALAR(edgefix0->vec, edgefix2->vec) > 0) {
00359     if (d12 <= gapmin) {
00360       point_diff (d01vec, edgefix0->pos, edgefix1->pos);
00361       d01 = LENGTH (d01vec);
00362       point_diff (d23vec, edgefix2->pos, edgefix3->pos);
00363       d23 = LENGTH (d23vec);
00364       if (d01 > d23) {
00365         edgefix2->flags[FLAGS] &= ~FIXED;
00366         fixed_count--;
00367       }
00368       else {
00369         edgefix1->flags[FLAGS] &= ~FIXED;
00370         fixed_count--;
00371         edgefix1 = edgefix2;
00372       }
00373     }
00374     else {
00375       edgefix0 = edgefix1;
00376       edgefix1 = edgefix2;
00377     }
00378     edgefix2 = edgefix3;
00379     edgept = edgept->next;
00380     while ((edgept->flags[FLAGS] & FIXED) == 0) {
00381       if (edgept == startfix)
00382         stopped = 1;
00383       edgept = edgept->next;
00384     }
00385     edgefix3 = edgept;
00386     edgefix = edgefix2;
00387   }
00388   while ((edgefix != startfix) && (!stopped));
00389 }
00390 
00391 
00392 //#pragma OPT_LEVEL 2                                                                           /*stop compiler bugs*/
00393 
00394 /**********************************************************************
00395  *poly2(startpt,area,path) applies a second approximation to the outline
00396  *using the points which have been fixed by the first approximation*
00397  **********************************************************************/
00398 
00399 EDGEPT *poly2(                  //second poly
00400               EDGEPT *startpt,  /*start of loop */
00401               int area          /*area of blob box */
00402              ) {
00403   EDGEPT *edgept;                /*current outline point */
00404   EDGEPT *loopstart;             /*starting point */
00405   EDGEPT *linestart;             /*start of line */
00406   int edgesum;                   /*correction count */
00407 
00408   if (area < 1200)
00409     area = 1200;                 /*minimum value */
00410 
00411   loopstart = NULL;              /*not found it yet */
00412   edgept = startpt;              /*start of loop */
00413 
00414   do {
00415                                  /*current point fixed */
00416     if (edgept->flags[FLAGS] & FIXED
00417                                  /*and next not */
00418     && (edgept->next->flags[FLAGS] & FIXED) == 0) {
00419       loopstart = edgept;        /*start of repoly */
00420       break;
00421     }
00422     edgept = edgept->next;       /*next point */
00423   }
00424   while (edgept != startpt);     /*until found or finished */
00425 
00426   if (loopstart == NULL && (startpt->flags[FLAGS] & FIXED) == 0) {
00427                                  /*fixed start of loop */
00428     startpt->flags[FLAGS] |= FIXED;
00429     loopstart = startpt;         /*or start of loop */
00430   }
00431   if (loopstart) {
00432     do {
00433       edgept = loopstart;        /*first to do */
00434       do {
00435         linestart = edgept;
00436         edgesum = 0;             /*sum of lengths */
00437         do {
00438                                  /*sum lengths */
00439           edgesum += edgept->flags[RUNLENGTH];
00440           edgept = edgept->next; /*move on */
00441         }
00442         while ((edgept->flags[FLAGS] & FIXED) == 0
00443           && edgept != loopstart && edgesum < 126);
00444         if (poly_debug)
00445           tprintf
00446             ("Poly2:starting at (%d,%d)+%d=(%d,%d),%d to (%d,%d)\n",
00447             linestart->pos.x, linestart->pos.y, linestart->flags[DIR],
00448             linestart->vec.x, linestart->vec.y, edgesum, edgept->pos.x,
00449             edgept->pos.y);
00450                                  /*reapproximate */
00451         cutline(linestart, edgept, area);
00452 
00453         while ((edgept->next->flags[FLAGS] & FIXED)
00454           && edgept != loopstart)
00455           edgept = edgept->next; /*look for next non-fixed */
00456       }
00457                                  /*do all the loop */
00458       while (edgept != loopstart);
00459       edgesum = 0;
00460       do {
00461         if (edgept->flags[FLAGS] & FIXED)
00462           edgesum++;
00463         edgept = edgept->next;
00464       }
00465                                  //count fixed pts
00466       while (edgept != loopstart);
00467       if (edgesum < 3)
00468         area /= 2;               //must have 3 pts
00469     }
00470     while (edgesum < 3);
00471     do {
00472       linestart = edgept;
00473       do {
00474         edgept = edgept->next;
00475       }
00476       while ((edgept->flags[FLAGS] & FIXED) == 0);
00477       linestart->next = edgept;
00478       edgept->prev = linestart;
00479       linestart->vec.x = edgept->pos.x - linestart->pos.x;
00480       linestart->vec.y = edgept->pos.y - linestart->pos.y;
00481     }
00482     while (edgept != loopstart);
00483   }
00484   else
00485     edgept = startpt;            /*start of loop */
00486 
00487   loopstart = edgept;            /*new start */
00488   return loopstart;              /*correct exit */
00489 }
00490 
00491 
00492 /**********************************************************************
00493  *cutline(first,last,area) straightens out a line by partitioning
00494  *and joining the ends by a straight line*
00495  **********************************************************************/
00496 
00497 void cutline(                //recursive refine
00498              EDGEPT *first,  /*ends of line */
00499              EDGEPT *last,
00500              int area        /*area of object */
00501             ) {
00502   EDGEPT *edge;                  /*current edge */
00503   TPOINT vecsum;                 /*vector sum */
00504   int vlen;                      /*approx length of vecsum */
00505   TPOINT vec;                    /*accumulated vector */
00506   EDGEPT *maxpoint;              /*worst point */
00507   int maxperp;                   /*max deviation */
00508   int perp;                      /*perp distance */
00509   int ptcount;                   /*no of points */
00510   int squaresum;                 /*sum of perps */
00511 
00512   edge = first;                  /*start of line */
00513   if (edge->next == last)
00514     return;                      /*simple line */
00515 
00516                                  /*vector sum */
00517   vecsum.x = last->pos.x - edge->pos.x;
00518   vecsum.y = last->pos.y - edge->pos.y;
00519   if (vecsum.x == 0 && vecsum.y == 0) {
00520                                  /*special case */
00521     vecsum.x = -edge->prev->vec.x;
00522     vecsum.y = -edge->prev->vec.y;
00523   }
00524                                  /*absolute value */
00525   vlen = vecsum.x > 0 ? vecsum.x : -vecsum.x;
00526   if (vecsum.y > vlen)
00527     vlen = vecsum.y;             /*maximum */
00528   else if (-vecsum.y > vlen)
00529     vlen = -vecsum.y;            /*absolute value */
00530 
00531   vec.x = edge->vec.x;           /*accumulated vector */
00532   vec.y = edge->vec.y;
00533   maxperp = 0;                   /*none yet */
00534   squaresum = ptcount = 0;
00535   edge = edge->next;             /*move to actual point */
00536   maxpoint = edge;               /*in case there isn't one */
00537   do {
00538     perp = CROSS (vec, vecsum);  /*get perp distance */
00539     if (perp != 0) {
00540       perp *= perp;              /*squared deviation */
00541     }
00542     squaresum += perp;           /*sum squares */
00543     ptcount++;                   /*count points */
00544     if (poly_debug)
00545       tprintf ("Cutline:Final perp=%d\n", perp);
00546     if (perp > maxperp) {
00547       maxperp = perp;
00548       maxpoint = edge;           /*find greatest deviation */
00549     }
00550     vec.x += edge->vec.x;        /*accumulate vectors */
00551     vec.y += edge->vec.y;
00552     edge = edge->next;
00553   }
00554   while (edge != last);          /*test all line */
00555 
00556   perp = LENGTH (vecsum);
00557   ASSERT_HOST (perp != 0);
00558 
00559   if (maxperp < 256 * MAX_INT16) {
00560     maxperp <<= 8;
00561     maxperp /= perp;             /*true max perp */
00562   }
00563   else {
00564     maxperp /= perp;
00565     maxperp <<= 8;               /*avoid overflow */
00566   }
00567   if (squaresum < 256 * MAX_INT16)
00568                                  /*mean squared perp */
00569     perp = (squaresum << 8) / (perp * ptcount);
00570   else
00571                                  /*avoid overflow */
00572     perp = (squaresum / perp << 8) / ptcount;
00573 
00574   if (poly_debug)
00575     tprintf ("Cutline:A=%d, max=%.2f(%.2f%%), msd=%.2f(%.2f%%)\n",
00576       area, maxperp / 256.0, maxperp * 200.0 / area,
00577       perp / 256.0, perp * 300.0 / area);
00578   if (maxperp * par1 >= 10 * area || perp * par2 >= 10 * area || vlen >= 126) {
00579     maxpoint->flags[FLAGS] |= FIXED;
00580                                  /*partitions */
00581     cutline(first, maxpoint, area);
00582     cutline(maxpoint, last, area);
00583   }
00584 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines