tesseract 3.04.01

ccstruct/split.cpp

Go to the documentation of this file.
00001 /* -*-C-*-
00002  ********************************************************************************
00003  *
00004  * File:        split.c  (Formerly split.c)
00005  * Description:
00006  * Author:       Mark Seaman, OCR Technology
00007  * Created:      Fri Oct 16 14:37:00 1987
00008  * Modified:     Fri May 17 16:27:49 1991 (Mark Seaman) marks@hpgrlt
00009  * Language:     C
00010  * Package:      N/A
00011  * Status:       Reusable Software Component
00012  *
00013  * (c) Copyright 1987, Hewlett-Packard Company.
00014  ** Licensed under the Apache License, Version 2.0 (the "License");
00015  ** you may not use this file except in compliance with the License.
00016  ** You may obtain a copy of the License at
00017  ** http://www.apache.org/licenses/LICENSE-2.0
00018  ** Unless required by applicable law or agreed to in writing, software
00019  ** distributed under the License is distributed on an "AS IS" BASIS,
00020  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00021  ** See the License for the specific language governing permissions and
00022  ** limitations under the License.
00023  *
00024  *************************************************************************/
00025 /*----------------------------------------------------------------------
00026               I n c l u d e s
00027 ----------------------------------------------------------------------*/
00028 // Include automatically generated configuration file if running autoconf.
00029 #ifdef HAVE_CONFIG_H
00030 #include "config_auto.h"
00031 #endif
00032 
00033 #include "split.h"
00034 #include "coutln.h"
00035 #include "tprintf.h"
00036 
00037 #ifdef __UNIX__
00038 #include <assert.h>
00039 #endif
00040 
00041 /*----------------------------------------------------------------------
00042               V a r i a b l e s
00043 ----------------------------------------------------------------------*/
00044 // Limit on the amount of penalty for the chop being off-center.
00045 const int kCenterGradeCap = 25;
00046 // Ridiculously large priority for splits that are no use.
00047 const double kBadPriority = 999.0;
00048 
00049 BOOL_VAR(wordrec_display_splits, 0, "Display splits");
00050 
00051 // Returns the bounding box of all the points in the split.
00052 TBOX SPLIT::bounding_box() const {
00053   return TBOX(
00054       MIN(point1->pos.x, point2->pos.x), MIN(point1->pos.y, point2->pos.y),
00055       MAX(point1->pos.x, point2->pos.x), MAX(point1->pos.y, point2->pos.y));
00056 }
00057 
00058 // Hides the SPLIT so the outlines appear not to be cut by it.
00059 void SPLIT::Hide() const {
00060   EDGEPT* edgept = point1;
00061   do {
00062     edgept->Hide();
00063     edgept = edgept->next;
00064   } while (!edgept->EqualPos(*point2) && edgept != point1);
00065   edgept = point2;
00066   do {
00067     edgept->Hide();
00068     edgept = edgept->next;
00069   } while (!edgept->EqualPos(*point1) && edgept != point2);
00070 }
00071 
00072 // Undoes hide, so the outlines are cut by the SPLIT.
00073 void SPLIT::Reveal() const {
00074   EDGEPT* edgept = point1;
00075   do {
00076     edgept->Reveal();
00077     edgept = edgept->next;
00078   } while (!edgept->EqualPos(*point2) && edgept != point1);
00079   edgept = point2;
00080   do {
00081     edgept->Reveal();
00082     edgept = edgept->next;
00083   } while (!edgept->EqualPos(*point1) && edgept != point2);
00084 }
00085 
00086 // Compute a split priority based on the bounding boxes of the parts.
00087 // The arguments here are config parameters defined in Wordrec. Add chop_
00088 // to the beginning of the name.
00089 float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob,
00090                           int centered_maxwidth, double center_knob,
00091                           double width_change_knob) const {
00092   TBOX box1 = Box12();
00093   TBOX box2 = Box21();
00094   int min_left = MIN(box1.left(), box2.left());
00095   int max_right = MAX(box1.right(), box2.right());
00096   if (xmin < min_left && xmax > max_right) return kBadPriority;
00097 
00098   float grade = 0.0f;
00099   // grade_overlap.
00100   int width1 = box1.width();
00101   int width2 = box2.width();
00102   int min_width = MIN(width1, width2);
00103   int overlap = -box1.x_gap(box2);
00104   if (overlap == min_width) {
00105     grade += 100.0f;  // Total overlap.
00106   } else {
00107     if (2 * overlap > min_width) overlap += 2 * overlap - min_width;
00108     if (overlap > 0) grade += overlap_knob * overlap;
00109   }
00110   // grade_center_of_blob.
00111   if (width1 <= centered_maxwidth || width2 <= centered_maxwidth) {
00112     grade += MIN(kCenterGradeCap, center_knob * abs(width1 - width2));
00113   }
00114   // grade_width_change.
00115   float width_change_grade = 20 - (max_right - min_left - MAX(width1, width2));
00116   if (width_change_grade > 0.0f)
00117     grade += width_change_grade * width_change_knob;
00118   return grade;
00119 }
00120 
00121 // Returns true if *this SPLIT appears OK in the sense that it does not cross
00122 // any outlines and does not chop off any ridiculously small pieces.
00123 bool SPLIT::IsHealthy(const TBLOB& blob, int min_points, int min_area) const {
00124   return !IsLittleChunk(min_points, min_area) &&
00125          !blob.SegmentCrossesOutline(point1->pos, point2->pos);
00126 }
00127 
00128 // Returns true if the split generates a small chunk in terms of either area
00129 // or number of points.
00130 bool SPLIT::IsLittleChunk(int min_points, int min_area) const {
00131   if (point1->ShortNonCircularSegment(min_points, point2) &&
00132       point1->SegmentArea(point2) < min_area) {
00133     return true;
00134   }
00135   if (point2->ShortNonCircularSegment(min_points, point1) &&
00136       point2->SegmentArea(point1) < min_area) {
00137     return true;
00138   }
00139   return false;
00140 }
00141 
00142 /**********************************************************************
00143  * make_edgept
00144  *
00145  * Create an EDGEPT and hook it into an existing list of edge points.
00146  **********************************************************************/
00147 EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev) {
00148   EDGEPT *this_edgept;
00149   /* Create point */
00150   this_edgept = new EDGEPT;
00151   this_edgept->pos.x = x;
00152   this_edgept->pos.y = y;
00153   // Now deal with the src_outline steps.
00154   C_OUTLINE* prev_ol = prev->src_outline;
00155   if (prev_ol != NULL && prev->next == next) {
00156     // Compute the fraction of the segment that is being cut.
00157     FCOORD segment_vec(next->pos.x - prev->pos.x, next->pos.y - prev->pos.y);
00158     FCOORD target_vec(x - prev->pos.x, y - prev->pos.y);
00159     double cut_fraction = target_vec.length() / segment_vec.length();
00160     // Get the start and end at the step level.
00161     ICOORD step_start = prev_ol->position_at_index(prev->start_step);
00162     int end_step = prev->start_step + prev->step_count;
00163     int step_length = prev_ol->pathlength();
00164     ICOORD step_end = prev_ol->position_at_index(end_step % step_length);
00165     ICOORD step_vec = step_end - step_start;
00166     double target_length = step_vec.length() * cut_fraction;
00167     // Find the point on the segment that gives the length nearest to target.
00168     int best_step = prev->start_step;
00169     ICOORD total_step(0, 0);
00170     double best_dist = target_length;
00171     for (int s = prev->start_step; s < end_step; ++s) {
00172       total_step += prev_ol->step(s % step_length);
00173       double dist = fabs(target_length - total_step.length());
00174       if (dist < best_dist) {
00175         best_dist = dist;
00176         best_step = s + 1;
00177       }
00178     }
00179     // The new point is an intermediate point.
00180     this_edgept->src_outline = prev_ol;
00181     this_edgept->step_count = end_step - best_step;
00182     this_edgept->start_step = best_step % step_length;
00183     prev->step_count = best_step - prev->start_step;
00184   } else {
00185     // The new point is poly only.
00186     this_edgept->src_outline = NULL;
00187     this_edgept->step_count = 0;
00188     this_edgept->start_step = 0;
00189   }
00190   /* Hook it up */
00191   this_edgept->next = next;
00192   this_edgept->prev = prev;
00193   prev->next = this_edgept;
00194   next->prev = this_edgept;
00195   /* Set up vec entries */
00196   this_edgept->vec.x = this_edgept->next->pos.x - x;
00197   this_edgept->vec.y = this_edgept->next->pos.y - y;
00198   this_edgept->prev->vec.x = x - this_edgept->prev->pos.x;
00199   this_edgept->prev->vec.y = y - this_edgept->prev->pos.y;
00200   return this_edgept;
00201 }
00202 
00203 /**********************************************************************
00204  * remove_edgept
00205  *
00206  * Remove a given EDGEPT from its list and delete it.
00207  **********************************************************************/
00208 void remove_edgept(EDGEPT *point) {
00209   EDGEPT *prev = point->prev;
00210   EDGEPT *next = point->next;
00211   // Add point's steps onto prev's steps if they are from the same outline.
00212   if (prev->src_outline == point->src_outline && prev->src_outline != NULL) {
00213     prev->step_count += point->step_count;
00214   }
00215   prev->next = next;
00216   next->prev = prev;
00217   prev->vec.x = next->pos.x - prev->pos.x;
00218   prev->vec.y = next->pos.y - prev->pos.y;
00219   delete point;
00220 }
00221 
00222 /**********************************************************************
00223  * Print
00224  *
00225  * Shows the coordinates of both points in a split.
00226  **********************************************************************/
00227 void SPLIT::Print() const {
00228   tprintf("(%d,%d)--(%d,%d)", point1->pos.x, point1->pos.y, point2->pos.x,
00229           point2->pos.y);
00230 }
00231 
00232 #ifndef GRAPHICS_DISABLED
00233 // Draws the split in the given window.
00234 void SPLIT::Mark(ScrollView* window) const {
00235   window->Pen(ScrollView::GREEN);
00236   window->Line(point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y);
00237   window->UpdateWindow();
00238 }
00239 #endif
00240 
00241 // Creates two outlines out of one by splitting the original one in half.
00242 // Inserts the resulting outlines into the given list.
00243 void SPLIT::SplitOutlineList(TESSLINE* outlines) const {
00244   SplitOutline();
00245   while (outlines->next != NULL) outlines = outlines->next;
00246 
00247   outlines->next = new TESSLINE;
00248   outlines->next->loop = point1;
00249   outlines->next->ComputeBoundingBox();
00250 
00251   outlines = outlines->next;
00252 
00253   outlines->next = new TESSLINE;
00254   outlines->next->loop = point2;
00255   outlines->next->ComputeBoundingBox();
00256 
00257   outlines->next->next = NULL;
00258 }
00259 
00260 // Makes a split between these two edge points, but does not affect the
00261 // outlines to which they belong.
00262 void SPLIT::SplitOutline() const {
00263   EDGEPT* temp2 = point2->next;
00264   EDGEPT* temp1 = point1->next;
00265   /* Create two new points */
00266   EDGEPT* new_point1 = make_edgept(point1->pos.x, point1->pos.y, temp1, point2);
00267   EDGEPT* new_point2 = make_edgept(point2->pos.x, point2->pos.y, temp2, point1);
00268   // point1 and 2 are now cross-over points, so they must have NULL
00269   // src_outlines and give their src_outline information their new
00270   // replacements.
00271   new_point1->src_outline = point1->src_outline;
00272   new_point1->start_step = point1->start_step;
00273   new_point1->step_count = point1->step_count;
00274   new_point2->src_outline = point2->src_outline;
00275   new_point2->start_step = point2->start_step;
00276   new_point2->step_count = point2->step_count;
00277   point1->src_outline = NULL;
00278   point1->start_step = 0;
00279   point1->step_count = 0;
00280   point2->src_outline = NULL;
00281   point2->start_step = 0;
00282   point2->step_count = 0;
00283 }
00284 
00285 // Undoes the effect of SplitOutlineList, correcting the outlines for undoing
00286 // the split, but possibly leaving some duplicate outlines.
00287 void SPLIT::UnsplitOutlineList(TBLOB* blob) const {
00288   /* Modify edge points */
00289   UnsplitOutlines();
00290 
00291   TESSLINE* outline1 = new TESSLINE;
00292   outline1->next = blob->outlines;
00293   blob->outlines = outline1;
00294   outline1->loop = point1;
00295 
00296   TESSLINE* outline2 = new TESSLINE;
00297   outline2->next = blob->outlines;
00298   blob->outlines = outline2;
00299   outline2->loop = point2;
00300 }
00301 
00302 // Removes the split that was put between these two points.
00303 void SPLIT::UnsplitOutlines() const {
00304   EDGEPT* tmp1 = point1->next;
00305   EDGEPT* tmp2 = point2->next;
00306 
00307   tmp1->next->prev = point2;
00308   tmp2->next->prev = point1;
00309 
00310   // tmp2 is coincident with point1. point1 takes tmp2's place as tmp2 is
00311   // deleted.
00312   point1->next = tmp2->next;
00313   point1->src_outline = tmp2->src_outline;
00314   point1->start_step = tmp2->start_step;
00315   point1->step_count = tmp2->step_count;
00316   // Likewise point2 takes tmp1's place.
00317   point2->next = tmp1->next;
00318   point2->src_outline = tmp1->src_outline;
00319   point2->start_step = tmp1->start_step;
00320   point2->step_count = tmp1->step_count;
00321 
00322   delete tmp1;
00323   delete tmp2;
00324 
00325   point1->vec.x = point1->next->pos.x - point1->pos.x;
00326   point1->vec.y = point1->next->pos.y - point1->pos.y;
00327 
00328   point2->vec.x = point2->next->pos.x - point2->pos.x;
00329   point2->vec.y = point2->next->pos.y - point2->pos.y;
00330 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines