|
tesseract 3.04.01
|
00001 /* -*-C-*- 00002 ******************************************************************************** 00003 * 00004 * File: blobs.c (Formerly blobs.c) 00005 * Description: Blob definition 00006 * Author: Mark Seaman, OCR Technology 00007 * Created: Fri Oct 27 15:39:52 1989 00008 * Modified: Thu Mar 28 15:33:26 1991 (Mark Seaman) marks@hpgrlt 00009 * Language: C 00010 * Package: N/A 00011 * Status: Experimental (Do Not Distribute) 00012 * 00013 * (c) Copyright 1989, Hewlett-Packard Company. 00014 ** Licensed under the Apache License, Version 2.0 (the "License"); 00015 ** you may not use this file except in compliance with the License. 00016 ** You may obtain a copy of the License at 00017 ** http://www.apache.org/licenses/LICENSE-2.0 00018 ** Unless required by applicable law or agreed to in writing, software 00019 ** distributed under the License is distributed on an "AS IS" BASIS, 00020 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00021 ** See the License for the specific language governing permissions and 00022 ** limitations under the License. 00023 * 00024 *********************************************************************************/ 00025 00026 /*---------------------------------------------------------------------- 00027 I n c l u d e s 00028 ----------------------------------------------------------------------*/ 00029 // Include automatically generated configuration file if running autoconf. 00030 #ifdef HAVE_CONFIG_H 00031 #include "config_auto.h" 00032 #endif 00033 00034 #include "blobs.h" 00035 #include "ccstruct.h" 00036 #include "clst.h" 00037 #include "cutil.h" 00038 #include "emalloc.h" 00039 #include "helpers.h" 00040 #include "linlsq.h" 00041 #include "ndminx.h" 00042 #include "normalis.h" 00043 #include "ocrblock.h" 00044 #include "ocrrow.h" 00045 #include "points.h" 00046 #include "polyaprx.h" 00047 #include "structures.h" 00048 #include "werd.h" 00049 00050 using tesseract::CCStruct; 00051 00052 // A Vector representing the "vertical" direction when measuring the 00053 // divisiblity of blobs into multiple blobs just by separating outlines. 00054 // See divisible_blob below for the use. 00055 const TPOINT kDivisibleVerticalUpright(0, 1); 00056 // A vector representing the "vertical" direction for italic text for use 00057 // when separating outlines. Using it actually deteriorates final accuracy, 00058 // so it is only used for ApplyBoxes chopping to get a better segmentation. 00059 const TPOINT kDivisibleVerticalItalic(1, 5); 00060 00061 /*---------------------------------------------------------------------- 00062 F u n c t i o n s 00063 ----------------------------------------------------------------------*/ 00064 00065 CLISTIZE(EDGEPT); 00066 00067 // Returns true when the two line segments cross each other. 00068 // (Moved from outlines.cpp). 00069 // Finds where the projected lines would cross and then checks to see if the 00070 // point of intersection lies on both of the line segments. If it does 00071 // then these two segments cross. 00072 /* static */ 00073 bool TPOINT::IsCrossed(const TPOINT& a0, const TPOINT& a1, const TPOINT& b0, 00074 const TPOINT& b1) { 00075 int b0a1xb0b1, b0b1xb0a0; 00076 int a1b1xa1a0, a1a0xa1b0; 00077 00078 TPOINT b0a1, b0a0, a1b1, b0b1, a1a0; 00079 00080 b0a1.x = a1.x - b0.x; 00081 b0a0.x = a0.x - b0.x; 00082 a1b1.x = b1.x - a1.x; 00083 b0b1.x = b1.x - b0.x; 00084 a1a0.x = a0.x - a1.x; 00085 b0a1.y = a1.y - b0.y; 00086 b0a0.y = a0.y - b0.y; 00087 a1b1.y = b1.y - a1.y; 00088 b0b1.y = b1.y - b0.y; 00089 a1a0.y = a0.y - a1.y; 00090 00091 b0a1xb0b1 = CROSS(b0a1, b0b1); 00092 b0b1xb0a0 = CROSS(b0b1, b0a0); 00093 a1b1xa1a0 = CROSS(a1b1, a1a0); 00094 // For clarity, we want CROSS(a1a0,a1b0) here but we have b0a1 instead of a1b0 00095 // so use -CROSS(a1b0,b0a1) instead, which is the same. 00096 a1a0xa1b0 = -CROSS(a1a0, b0a1); 00097 00098 return ((b0a1xb0b1 > 0 && b0b1xb0a0 > 0) || 00099 (b0a1xb0b1 < 0 && b0b1xb0a0 < 0)) && 00100 ((a1b1xa1a0 > 0 && a1a0xa1b0 > 0) || (a1b1xa1a0 < 0 && a1a0xa1b0 < 0)); 00101 } 00102 00103 // Consume the circular list of EDGEPTs to make a TESSLINE. 00104 TESSLINE* TESSLINE::BuildFromOutlineList(EDGEPT* outline) { 00105 TESSLINE* result = new TESSLINE; 00106 result->loop = outline; 00107 if (outline->src_outline != NULL) { 00108 // ASSUMPTION: This function is only ever called from ApproximateOutline 00109 // and therefore either all points have a src_outline or all do not. 00110 // Just as SetupFromPos sets the vectors from the vertices, setup the 00111 // step_count members to indicate the (positive) number of original 00112 // C_OUTLINE steps to the next vertex. 00113 EDGEPT* pt = outline; 00114 do { 00115 pt->step_count = pt->next->start_step - pt->start_step; 00116 if (pt->step_count < 0) 00117 pt->step_count += pt->src_outline->pathlength(); 00118 pt = pt->next; 00119 } while (pt != outline); 00120 } 00121 result->SetupFromPos(); 00122 return result; 00123 } 00124 00125 // Copies the data and the outline, but leaves next untouched. 00126 void TESSLINE::CopyFrom(const TESSLINE& src) { 00127 Clear(); 00128 topleft = src.topleft; 00129 botright = src.botright; 00130 start = src.start; 00131 is_hole = src.is_hole; 00132 if (src.loop != NULL) { 00133 EDGEPT* prevpt = NULL; 00134 EDGEPT* newpt = NULL; 00135 EDGEPT* srcpt = src.loop; 00136 do { 00137 newpt = new EDGEPT(*srcpt); 00138 if (prevpt == NULL) { 00139 loop = newpt; 00140 } else { 00141 newpt->prev = prevpt; 00142 prevpt->next = newpt; 00143 } 00144 prevpt = newpt; 00145 srcpt = srcpt->next; 00146 } while (srcpt != src.loop); 00147 loop->prev = newpt; 00148 newpt->next = loop; 00149 } 00150 } 00151 00152 // Deletes owned data. 00153 void TESSLINE::Clear() { 00154 if (loop == NULL) 00155 return; 00156 00157 EDGEPT* this_edge = loop; 00158 do { 00159 EDGEPT* next_edge = this_edge->next; 00160 delete this_edge; 00161 this_edge = next_edge; 00162 } while (this_edge != loop); 00163 loop = NULL; 00164 } 00165 00166 // Normalize in-place using the DENORM. 00167 void TESSLINE::Normalize(const DENORM& denorm) { 00168 EDGEPT* pt = loop; 00169 do { 00170 denorm.LocalNormTransform(pt->pos, &pt->pos); 00171 pt = pt->next; 00172 } while (pt != loop); 00173 SetupFromPos(); 00174 } 00175 00176 // Rotates by the given rotation in place. 00177 void TESSLINE::Rotate(const FCOORD rot) { 00178 EDGEPT* pt = loop; 00179 do { 00180 int tmp = static_cast<int>(floor(pt->pos.x * rot.x() - 00181 pt->pos.y * rot.y() + 0.5)); 00182 pt->pos.y = static_cast<int>(floor(pt->pos.y * rot.x() + 00183 pt->pos.x * rot.y() + 0.5)); 00184 pt->pos.x = tmp; 00185 pt = pt->next; 00186 } while (pt != loop); 00187 SetupFromPos(); 00188 } 00189 00190 // Moves by the given vec in place. 00191 void TESSLINE::Move(const ICOORD vec) { 00192 EDGEPT* pt = loop; 00193 do { 00194 pt->pos.x += vec.x(); 00195 pt->pos.y += vec.y(); 00196 pt = pt->next; 00197 } while (pt != loop); 00198 SetupFromPos(); 00199 } 00200 00201 // Scales by the given factor in place. 00202 void TESSLINE::Scale(float factor) { 00203 EDGEPT* pt = loop; 00204 do { 00205 pt->pos.x = static_cast<int>(floor(pt->pos.x * factor + 0.5)); 00206 pt->pos.y = static_cast<int>(floor(pt->pos.y * factor + 0.5)); 00207 pt = pt->next; 00208 } while (pt != loop); 00209 SetupFromPos(); 00210 } 00211 00212 // Sets up the start and vec members of the loop from the pos members. 00213 void TESSLINE::SetupFromPos() { 00214 EDGEPT* pt = loop; 00215 do { 00216 pt->vec.x = pt->next->pos.x - pt->pos.x; 00217 pt->vec.y = pt->next->pos.y - pt->pos.y; 00218 pt = pt->next; 00219 } while (pt != loop); 00220 start = pt->pos; 00221 ComputeBoundingBox(); 00222 } 00223 00224 // Recomputes the bounding box from the points in the loop. 00225 void TESSLINE::ComputeBoundingBox() { 00226 int minx = MAX_INT32; 00227 int miny = MAX_INT32; 00228 int maxx = -MAX_INT32; 00229 int maxy = -MAX_INT32; 00230 00231 // Find boundaries. 00232 start = loop->pos; 00233 EDGEPT* this_edge = loop; 00234 do { 00235 if (!this_edge->IsHidden() || !this_edge->prev->IsHidden()) { 00236 if (this_edge->pos.x < minx) 00237 minx = this_edge->pos.x; 00238 if (this_edge->pos.y < miny) 00239 miny = this_edge->pos.y; 00240 if (this_edge->pos.x > maxx) 00241 maxx = this_edge->pos.x; 00242 if (this_edge->pos.y > maxy) 00243 maxy = this_edge->pos.y; 00244 } 00245 this_edge = this_edge->next; 00246 } while (this_edge != loop); 00247 // Reset bounds. 00248 topleft.x = minx; 00249 topleft.y = maxy; 00250 botright.x = maxx; 00251 botright.y = miny; 00252 } 00253 00254 // Computes the min and max cross product of the outline points with the 00255 // given vec and returns the results in min_xp and max_xp. Geometrically 00256 // this is the left and right edge of the outline perpendicular to the 00257 // given direction, but to get the distance units correct, you would 00258 // have to divide by the modulus of vec. 00259 void TESSLINE::MinMaxCrossProduct(const TPOINT vec, 00260 int* min_xp, int* max_xp) const { 00261 *min_xp = MAX_INT32; 00262 *max_xp = MIN_INT32; 00263 EDGEPT* this_edge = loop; 00264 do { 00265 if (!this_edge->IsHidden() || !this_edge->prev->IsHidden()) { 00266 int product = CROSS(this_edge->pos, vec); 00267 UpdateRange(product, min_xp, max_xp); 00268 } 00269 this_edge = this_edge->next; 00270 } while (this_edge != loop); 00271 } 00272 00273 TBOX TESSLINE::bounding_box() const { 00274 return TBOX(topleft.x, botright.y, botright.x, topleft.y); 00275 } 00276 00277 #ifndef GRAPHICS_DISABLED 00278 void TESSLINE::plot(ScrollView* window, ScrollView::Color color, 00279 ScrollView::Color child_color) { 00280 if (is_hole) 00281 window->Pen(child_color); 00282 else 00283 window->Pen(color); 00284 window->SetCursor(start.x, start.y); 00285 EDGEPT* pt = loop; 00286 do { 00287 bool prev_hidden = pt->IsHidden(); 00288 pt = pt->next; 00289 if (prev_hidden) 00290 window->SetCursor(pt->pos.x, pt->pos.y); 00291 else 00292 window->DrawTo(pt->pos.x, pt->pos.y); 00293 } while (pt != loop); 00294 } 00295 #endif // GRAPHICS_DISABLED 00296 00297 // Returns the first non-hidden EDGEPT that has a different src_outline to 00298 // its predecessor, or, if all the same, the lowest indexed point. 00299 EDGEPT* TESSLINE::FindBestStartPt() const { 00300 EDGEPT* best_start = loop; 00301 int best_step = loop->start_step; 00302 // Iterate the polygon. 00303 EDGEPT* pt = loop; 00304 do { 00305 if (pt->IsHidden()) continue; 00306 if (pt->prev->IsHidden() || pt->prev->src_outline != pt->src_outline) 00307 return pt; // Qualifies as the best. 00308 if (pt->start_step < best_step) { 00309 best_step = pt->start_step; 00310 best_start = pt; 00311 } 00312 } while ((pt = pt->next) != loop); 00313 return best_start; 00314 } 00315 00316 // Iterate the given list of outlines, converting to TESSLINE by polygonal 00317 // approximation and recursively any children, returning the current tail 00318 // of the resulting list of TESSLINEs. 00319 static TESSLINE** ApproximateOutlineList(bool allow_detailed_fx, 00320 C_OUTLINE_LIST* outlines, 00321 bool children, 00322 TESSLINE** tail) { 00323 C_OUTLINE_IT ol_it(outlines); 00324 for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) { 00325 C_OUTLINE* outline = ol_it.data(); 00326 if (outline->pathlength() > 0) { 00327 TESSLINE* tessline = ApproximateOutline(allow_detailed_fx, outline); 00328 tessline->is_hole = children; 00329 *tail = tessline; 00330 tail = &tessline->next; 00331 } 00332 if (!outline->child()->empty()) { 00333 tail = ApproximateOutlineList(allow_detailed_fx, outline->child(), true, 00334 tail); 00335 } 00336 } 00337 return tail; 00338 } 00339 00340 // Factory to build a TBLOB from a C_BLOB with polygonal approximation along 00341 // the way. If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB 00342 // contain pointers to the input C_OUTLINEs that enable higher-resolution 00343 // feature extraction that does not use the polygonal approximation. 00344 TBLOB* TBLOB::PolygonalCopy(bool allow_detailed_fx, C_BLOB* src) { 00345 TBLOB* tblob = new TBLOB; 00346 ApproximateOutlineList(allow_detailed_fx, src->out_list(), false, 00347 &tblob->outlines); 00348 return tblob; 00349 } 00350 00351 // Factory builds a blob with no outlines, but copies the other member data. 00352 TBLOB* TBLOB::ShallowCopy(const TBLOB& src) { 00353 TBLOB* blob = new TBLOB; 00354 blob->denorm_ = src.denorm_; 00355 return blob; 00356 } 00357 00358 // Normalizes the blob for classification only if needed. 00359 // (Normally this means a non-zero classify rotation.) 00360 // If no Normalization is needed, then NULL is returned, and the input blob 00361 // can be used directly. Otherwise a new TBLOB is returned which must be 00362 // deleted after use. 00363 TBLOB* TBLOB::ClassifyNormalizeIfNeeded() const { 00364 TBLOB* rotated_blob = NULL; 00365 // If necessary, copy the blob and rotate it. The rotation is always 00366 // +/- 90 degrees, as 180 was already taken care of. 00367 if (denorm_.block() != NULL && 00368 denorm_.block()->classify_rotation().y() != 0.0) { 00369 TBOX box = bounding_box(); 00370 int x_middle = (box.left() + box.right()) / 2; 00371 int y_middle = (box.top() + box.bottom()) / 2; 00372 rotated_blob = new TBLOB(*this); 00373 const FCOORD& rotation = denorm_.block()->classify_rotation(); 00374 // Move the rotated blob back to the same y-position so that we 00375 // can still distinguish similar glyphs with differeny y-position. 00376 float target_y = kBlnBaselineOffset + 00377 (rotation.y() > 0 ? x_middle - box.left() : box.right() - x_middle); 00378 rotated_blob->Normalize(NULL, &rotation, &denorm_, x_middle, y_middle, 00379 1.0f, 1.0f, 0.0f, target_y, 00380 denorm_.inverse(), denorm_.pix()); 00381 } 00382 return rotated_blob; 00383 } 00384 00385 // Copies the data and the outline, but leaves next untouched. 00386 void TBLOB::CopyFrom(const TBLOB& src) { 00387 Clear(); 00388 TESSLINE* prev_outline = NULL; 00389 for (TESSLINE* srcline = src.outlines; srcline != NULL; 00390 srcline = srcline->next) { 00391 TESSLINE* new_outline = new TESSLINE(*srcline); 00392 if (outlines == NULL) 00393 outlines = new_outline; 00394 else 00395 prev_outline->next = new_outline; 00396 prev_outline = new_outline; 00397 } 00398 denorm_ = src.denorm_; 00399 } 00400 00401 // Deletes owned data. 00402 void TBLOB::Clear() { 00403 for (TESSLINE* next_outline = NULL; outlines != NULL; 00404 outlines = next_outline) { 00405 next_outline = outlines->next; 00406 delete outlines; 00407 } 00408 } 00409 00410 // Sets up the built-in DENORM and normalizes the blob in-place. 00411 // For parameters see DENORM::SetupNormalization, plus the inverse flag for 00412 // this blob and the Pix for the full image. 00413 void TBLOB::Normalize(const BLOCK* block, 00414 const FCOORD* rotation, 00415 const DENORM* predecessor, 00416 float x_origin, float y_origin, 00417 float x_scale, float y_scale, 00418 float final_xshift, float final_yshift, 00419 bool inverse, Pix* pix) { 00420 denorm_.SetupNormalization(block, rotation, predecessor, x_origin, y_origin, 00421 x_scale, y_scale, final_xshift, final_yshift); 00422 denorm_.set_inverse(inverse); 00423 denorm_.set_pix(pix); 00424 // TODO(rays) outline->Normalize is more accurate, but breaks tests due 00425 // the changes it makes. Reinstate this code with a retraining. 00426 // The reason this change is troublesome is that it normalizes for the 00427 // baseline value computed independently at each x-coord. If the baseline 00428 // is not horizontal, this introduces shear into the normalized blob, which 00429 // is useful on the rare occasions that the baseline is really curved, but 00430 // the baselines need to be stabilized the rest of the time. 00431 #if 0 00432 for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { 00433 outline->Normalize(denorm_); 00434 } 00435 #else 00436 denorm_.LocalNormBlob(this); 00437 #endif 00438 } 00439 00440 // Rotates by the given rotation in place. 00441 void TBLOB::Rotate(const FCOORD rotation) { 00442 for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { 00443 outline->Rotate(rotation); 00444 } 00445 } 00446 00447 // Moves by the given vec in place. 00448 void TBLOB::Move(const ICOORD vec) { 00449 for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { 00450 outline->Move(vec); 00451 } 00452 } 00453 00454 // Scales by the given factor in place. 00455 void TBLOB::Scale(float factor) { 00456 for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { 00457 outline->Scale(factor); 00458 } 00459 } 00460 00461 // Recomputes the bounding boxes of the outlines. 00462 void TBLOB::ComputeBoundingBoxes() { 00463 for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { 00464 outline->ComputeBoundingBox(); 00465 } 00466 } 00467 00468 // Returns the number of outlines. 00469 int TBLOB::NumOutlines() const { 00470 int result = 0; 00471 for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) 00472 ++result; 00473 return result; 00474 } 00475 00476 /********************************************************************** 00477 * TBLOB::bounding_box() 00478 * 00479 * Compute the bounding_box of a compound blob, defined to be the 00480 * bounding box of the union of all top-level outlines in the blob. 00481 **********************************************************************/ 00482 TBOX TBLOB::bounding_box() const { 00483 if (outlines == NULL) 00484 return TBOX(0, 0, 0, 0); 00485 TESSLINE *outline = outlines; 00486 TBOX box = outline->bounding_box(); 00487 for (outline = outline->next; outline != NULL; outline = outline->next) { 00488 box += outline->bounding_box(); 00489 } 00490 return box; 00491 } 00492 00493 // Finds and deletes any duplicate outlines in this blob, without deleting 00494 // their EDGEPTs. 00495 void TBLOB::EliminateDuplicateOutlines() { 00496 for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { 00497 TESSLINE* last_outline = outline; 00498 for (TESSLINE* other_outline = outline->next; other_outline != NULL; 00499 last_outline = other_outline, other_outline = other_outline->next) { 00500 if (outline->SameBox(*other_outline)) { 00501 last_outline->next = other_outline->next; 00502 // This doesn't leak - the outlines share the EDGEPTs. 00503 other_outline->loop = NULL; 00504 delete other_outline; 00505 other_outline = last_outline; 00506 // If it is part of a cut, then it can't be a hole any more. 00507 outline->is_hole = false; 00508 } 00509 } 00510 } 00511 } 00512 00513 // Swaps the outlines of *this and next if needed to keep the centers in 00514 // increasing x. 00515 void TBLOB::CorrectBlobOrder(TBLOB* next) { 00516 TBOX box = bounding_box(); 00517 TBOX next_box = next->bounding_box(); 00518 if (box.x_middle() > next_box.x_middle()) { 00519 Swap(&outlines, &next->outlines); 00520 } 00521 } 00522 00523 #ifndef GRAPHICS_DISABLED 00524 void TBLOB::plot(ScrollView* window, ScrollView::Color color, 00525 ScrollView::Color child_color) { 00526 for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) 00527 outline->plot(window, color, child_color); 00528 } 00529 #endif // GRAPHICS_DISABLED 00530 00531 // Computes the center of mass and second moments for the old baseline and 00532 // 2nd moment normalizations. Returns the outline length. 00533 // The input denorm should be the normalizations that have been applied from 00534 // the image to the current state of this TBLOB. 00535 int TBLOB::ComputeMoments(FCOORD* center, FCOORD* second_moments) const { 00536 // Compute 1st and 2nd moments of the original outline. 00537 LLSQ accumulator; 00538 TBOX box = bounding_box(); 00539 // Iterate the outlines, accumulating edges relative the box.botleft(). 00540 CollectEdges(box, NULL, &accumulator, NULL, NULL); 00541 *center = accumulator.mean_point() + box.botleft(); 00542 // The 2nd moments are just the standard deviation of the point positions. 00543 double x2nd = sqrt(accumulator.x_variance()); 00544 double y2nd = sqrt(accumulator.y_variance()); 00545 if (x2nd < 1.0) x2nd = 1.0; 00546 if (y2nd < 1.0) y2nd = 1.0; 00547 second_moments->set_x(x2nd); 00548 second_moments->set_y(y2nd); 00549 return accumulator.count(); 00550 } 00551 00552 // Computes the precise bounding box of the coords that are generated by 00553 // GetEdgeCoords. This may be different from the bounding box of the polygon. 00554 void TBLOB::GetPreciseBoundingBox(TBOX* precise_box) const { 00555 TBOX box = bounding_box(); 00556 *precise_box = TBOX(); 00557 CollectEdges(box, precise_box, NULL, NULL, NULL); 00558 precise_box->move(box.botleft()); 00559 } 00560 00561 // Adds edges to the given vectors. 00562 // For all the edge steps in all the outlines, or polygonal approximation 00563 // where there are no edge steps, collects the steps into x_coords/y_coords. 00564 // x_coords is a collection of the x-coords of vertical edges for each 00565 // y-coord starting at box.bottom(). 00566 // y_coords is a collection of the y-coords of horizontal edges for each 00567 // x-coord starting at box.left(). 00568 // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom. 00569 // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1. 00570 void TBLOB::GetEdgeCoords(const TBOX& box, 00571 GenericVector<GenericVector<int> >* x_coords, 00572 GenericVector<GenericVector<int> >* y_coords) const { 00573 GenericVector<int> empty; 00574 x_coords->init_to_size(box.height(), empty); 00575 y_coords->init_to_size(box.width(), empty); 00576 CollectEdges(box, NULL, NULL, x_coords, y_coords); 00577 // Sort the output vectors. 00578 for (int i = 0; i < x_coords->size(); ++i) 00579 (*x_coords)[i].sort(); 00580 for (int i = 0; i < y_coords->size(); ++i) 00581 (*y_coords)[i].sort(); 00582 } 00583 00584 // Accumulates the segment between pt1 and pt2 in the LLSQ, quantizing over 00585 // the integer coordinate grid to properly weight long vectors. 00586 static void SegmentLLSQ(const FCOORD& pt1, const FCOORD& pt2, 00587 LLSQ* accumulator) { 00588 FCOORD step(pt2); 00589 step -= pt1; 00590 int xstart = IntCastRounded(MIN(pt1.x(), pt2.x())); 00591 int xend = IntCastRounded(MAX(pt1.x(), pt2.x())); 00592 int ystart = IntCastRounded(MIN(pt1.y(), pt2.y())); 00593 int yend = IntCastRounded(MAX(pt1.y(), pt2.y())); 00594 if (xstart == xend && ystart == yend) return; // Nothing to do. 00595 double weight = step.length() / (xend - xstart + yend - ystart); 00596 // Compute and save the y-position at the middle of each x-step. 00597 for (int x = xstart; x < xend; ++x) { 00598 double y = pt1.y() + step.y() * (x + 0.5 - pt1.x()) / step.x(); 00599 accumulator->add(x + 0.5, y, weight); 00600 } 00601 // Compute and save the x-position at the middle of each y-step. 00602 for (int y = ystart; y < yend; ++y) { 00603 double x = pt1.x() + step.x() * (y + 0.5 - pt1.y()) / step.y(); 00604 accumulator->add(x, y + 0.5, weight); 00605 } 00606 } 00607 00608 // Adds any edges from a single segment of outline between pt1 and pt2 to 00609 // the x_coords, y_coords vectors. pt1 and pt2 should be relative to the 00610 // bottom-left of the bounding box, hence indices to x_coords, y_coords 00611 // are clipped to ([0,x_limit], [0,y_limit]). 00612 // See GetEdgeCoords above for a description of x_coords, y_coords. 00613 static void SegmentCoords(const FCOORD& pt1, const FCOORD& pt2, 00614 int x_limit, int y_limit, 00615 GenericVector<GenericVector<int> >* x_coords, 00616 GenericVector<GenericVector<int> >* y_coords) { 00617 FCOORD step(pt2); 00618 step -= pt1; 00619 int start = ClipToRange(IntCastRounded(MIN(pt1.x(), pt2.x())), 0, x_limit); 00620 int end = ClipToRange(IntCastRounded(MAX(pt1.x(), pt2.x())), 0, x_limit); 00621 for (int x = start; x < end; ++x) { 00622 int y = IntCastRounded(pt1.y() + step.y() * (x + 0.5 - pt1.x()) / step.x()); 00623 (*y_coords)[x].push_back(y); 00624 } 00625 start = ClipToRange(IntCastRounded(MIN(pt1.y(), pt2.y())), 0, y_limit); 00626 end = ClipToRange(IntCastRounded(MAX(pt1.y(), pt2.y())), 0, y_limit); 00627 for (int y = start; y < end; ++y) { 00628 int x = IntCastRounded(pt1.x() + step.x() * (y + 0.5 - pt1.y()) / step.y()); 00629 (*x_coords)[y].push_back(x); 00630 } 00631 } 00632 00633 // Adds any edges from a single segment of outline between pt1 and pt2 to 00634 // the bbox such that it guarantees to contain anything produced by 00635 // SegmentCoords. 00636 static void SegmentBBox(const FCOORD& pt1, const FCOORD& pt2, TBOX* bbox) { 00637 FCOORD step(pt2); 00638 step -= pt1; 00639 int x1 = IntCastRounded(MIN(pt1.x(), pt2.x())); 00640 int x2 = IntCastRounded(MAX(pt1.x(), pt2.x())); 00641 if (x2 > x1) { 00642 int y1 = IntCastRounded(pt1.y() + step.y() * (x1 + 0.5 - pt1.x()) / 00643 step.x()); 00644 int y2 = IntCastRounded(pt1.y() + step.y() * (x2 - 0.5 - pt1.x()) / 00645 step.x()); 00646 TBOX point(x1, MIN(y1, y2), x2, MAX(y1, y2)); 00647 *bbox += point; 00648 } 00649 int y1 = IntCastRounded(MIN(pt1.y(), pt2.y())); 00650 int y2 = IntCastRounded(MAX(pt1.y(), pt2.y())); 00651 if (y2 > y1) { 00652 int x1 = IntCastRounded(pt1.x() + step.x() * (y1 + 0.5 - pt1.y()) / 00653 step.y()); 00654 int x2 = IntCastRounded(pt1.x() + step.x() * (y2 - 0.5 - pt1.y()) / 00655 step.y()); 00656 TBOX point(MIN(x1, x2), y1, MAX(x1, x2), y2); 00657 *bbox += point; 00658 } 00659 } 00660 00661 // Collects edges into the given bounding box, LLSQ accumulator and/or x_coords, 00662 // y_coords vectors. 00663 // For a description of x_coords/y_coords, see GetEdgeCoords above. 00664 // Startpt to lastpt, inclusive, MUST have the same src_outline member, 00665 // which may be NULL. The vector from lastpt to its next is included in 00666 // the accumulation. Hidden edges should be excluded by the caller. 00667 // The input denorm should be the normalizations that have been applied from 00668 // the image to the current state of the TBLOB from which startpt, lastpt come. 00669 // box is the bounding box of the blob from which the EDGEPTs are taken and 00670 // indices into x_coords, y_coords are offset by box.botleft(). 00671 static void CollectEdgesOfRun(const EDGEPT* startpt, const EDGEPT* lastpt, 00672 const DENORM& denorm, const TBOX& box, 00673 TBOX* bounding_box, 00674 LLSQ* accumulator, 00675 GenericVector<GenericVector<int> > *x_coords, 00676 GenericVector<GenericVector<int> > *y_coords) { 00677 const C_OUTLINE* outline = startpt->src_outline; 00678 int x_limit = box.width() - 1; 00679 int y_limit = box.height() - 1; 00680 if (outline != NULL) { 00681 // Use higher-resolution edge points stored on the outline. 00682 // The outline coordinates may not match the binary image because of the 00683 // rotation for vertical text lines, but the root_denorm IS the matching 00684 // start of the DENORM chain. 00685 const DENORM* root_denorm = denorm.RootDenorm(); 00686 int step_length = outline->pathlength(); 00687 int start_index = startpt->start_step; 00688 // Note that if this run straddles the wrap-around point of the outline, 00689 // that lastpt->start_step may have a lower index than startpt->start_step, 00690 // and we want to use an end_index that allows us to use a positive 00691 // increment, so we add step_length if necessary, but that may be beyond the 00692 // bounds of the outline steps/ due to wrap-around, so we use % step_length 00693 // everywhere, except for start_index. 00694 int end_index = lastpt->start_step + lastpt->step_count; 00695 if (end_index <= start_index) 00696 end_index += step_length; 00697 // pos is the integer coordinates of the binary image steps. 00698 ICOORD pos = outline->position_at_index(start_index); 00699 FCOORD origin(box.left(), box.bottom()); 00700 // f_pos is a floating-point version of pos that offers improved edge 00701 // positioning using greyscale information or smoothing of edge steps. 00702 FCOORD f_pos = outline->sub_pixel_pos_at_index(pos, start_index); 00703 // pos_normed is f_pos after the appropriate normalization, and relative 00704 // to origin. 00705 // prev_normed is the previous value of pos_normed. 00706 FCOORD prev_normed; 00707 denorm.NormTransform(root_denorm, f_pos, &prev_normed); 00708 prev_normed -= origin; 00709 for (int index = start_index; index < end_index; ++index) { 00710 ICOORD step = outline->step(index % step_length); 00711 // Only use the point if its edge strength is positive. This excludes 00712 // points that don't provide useful information, eg 00713 // ___________ 00714 // |___________ 00715 // The vertical step provides only noisy, damaging information, as even 00716 // with a greyscale image, the positioning of the edge there may be a 00717 // fictitious extrapolation, so previous processing has eliminated it. 00718 if (outline->edge_strength_at_index(index % step_length) > 0) { 00719 FCOORD f_pos = outline->sub_pixel_pos_at_index(pos, 00720 index % step_length); 00721 FCOORD pos_normed; 00722 denorm.NormTransform(root_denorm, f_pos, &pos_normed); 00723 pos_normed -= origin; 00724 // Accumulate the information that is selected by the caller. 00725 if (bounding_box != NULL) { 00726 SegmentBBox(pos_normed, prev_normed, bounding_box); 00727 } 00728 if (accumulator != NULL) { 00729 SegmentLLSQ(pos_normed, prev_normed, accumulator); 00730 } 00731 if (x_coords != NULL && y_coords != NULL) { 00732 SegmentCoords(pos_normed, prev_normed, x_limit, y_limit, 00733 x_coords, y_coords); 00734 } 00735 prev_normed = pos_normed; 00736 } 00737 pos += step; 00738 } 00739 } else { 00740 // There is no outline, so we are forced to use the polygonal approximation. 00741 const EDGEPT* endpt = lastpt->next; 00742 const EDGEPT* pt = startpt; 00743 do { 00744 FCOORD next_pos(pt->next->pos.x - box.left(), 00745 pt->next->pos.y - box.bottom()); 00746 FCOORD pos(pt->pos.x - box.left(), pt->pos.y - box.bottom()); 00747 if (bounding_box != NULL) { 00748 SegmentBBox(next_pos, pos, bounding_box); 00749 } 00750 if (accumulator != NULL) { 00751 SegmentLLSQ(next_pos, pos, accumulator); 00752 } 00753 if (x_coords != NULL && y_coords != NULL) { 00754 SegmentCoords(next_pos, pos, x_limit, y_limit, x_coords, y_coords); 00755 } 00756 } while ((pt = pt->next) != endpt); 00757 } 00758 } 00759 00760 // For all the edge steps in all the outlines, or polygonal approximation 00761 // where there are no edge steps, collects the steps into the bounding_box, 00762 // llsq and/or the x_coords/y_coords. Both are used in different kinds of 00763 // normalization. 00764 // For a description of x_coords, y_coords, see GetEdgeCoords above. 00765 void TBLOB::CollectEdges(const TBOX& box, 00766 TBOX* bounding_box, LLSQ* llsq, 00767 GenericVector<GenericVector<int> >* x_coords, 00768 GenericVector<GenericVector<int> >* y_coords) const { 00769 // Iterate the outlines. 00770 for (const TESSLINE* ol = outlines; ol != NULL; ol = ol->next) { 00771 // Iterate the polygon. 00772 EDGEPT* loop_pt = ol->FindBestStartPt(); 00773 EDGEPT* pt = loop_pt; 00774 if (pt == NULL) continue; 00775 do { 00776 if (pt->IsHidden()) continue; 00777 // Find a run of equal src_outline. 00778 EDGEPT* last_pt = pt; 00779 do { 00780 last_pt = last_pt->next; 00781 } while (last_pt != loop_pt && !last_pt->IsHidden() && 00782 last_pt->src_outline == pt->src_outline); 00783 last_pt = last_pt->prev; 00784 CollectEdgesOfRun(pt, last_pt, denorm_, box, 00785 bounding_box, llsq, x_coords, y_coords); 00786 pt = last_pt; 00787 } while ((pt = pt->next) != loop_pt); 00788 } 00789 } 00790 00791 // Factory to build a TWERD from a (C_BLOB) WERD, with polygonal 00792 // approximation along the way. 00793 TWERD* TWERD::PolygonalCopy(bool allow_detailed_fx, WERD* src) { 00794 TWERD* tessword = new TWERD; 00795 tessword->latin_script = src->flag(W_SCRIPT_IS_LATIN); 00796 C_BLOB_IT b_it(src->cblob_list()); 00797 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { 00798 C_BLOB* blob = b_it.data(); 00799 TBLOB* tblob = TBLOB::PolygonalCopy(allow_detailed_fx, blob); 00800 tessword->blobs.push_back(tblob); 00801 } 00802 return tessword; 00803 } 00804 00805 // Baseline normalizes the blobs in-place, recording the normalization in the 00806 // DENORMs in the blobs. 00807 void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix, 00808 bool inverse, float x_height, float baseline_shift, 00809 bool numeric_mode, tesseract::OcrEngineMode hint, 00810 const TBOX* norm_box, 00811 DENORM* word_denorm) { 00812 TBOX word_box = bounding_box(); 00813 if (norm_box != NULL) word_box = *norm_box; 00814 float word_middle = (word_box.left() + word_box.right()) / 2.0f; 00815 float input_y_offset = 0.0f; 00816 float final_y_offset = static_cast<float>(kBlnBaselineOffset); 00817 float scale = kBlnXHeight / x_height; 00818 if (hint == tesseract::OEM_CUBE_ONLY || row == NULL) { 00819 word_middle = word_box.left(); 00820 input_y_offset = word_box.bottom(); 00821 final_y_offset = 0.0f; 00822 if (hint == tesseract::OEM_CUBE_ONLY) 00823 scale = 1.0f; 00824 } else { 00825 input_y_offset = row->base_line(word_middle) + baseline_shift; 00826 } 00827 for (int b = 0; b < blobs.size(); ++b) { 00828 TBLOB* blob = blobs[b]; 00829 TBOX blob_box = blob->bounding_box(); 00830 float mid_x = (blob_box.left() + blob_box.right()) / 2.0f; 00831 float baseline = input_y_offset; 00832 float blob_scale = scale; 00833 if (numeric_mode) { 00834 baseline = blob_box.bottom(); 00835 blob_scale = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()), 00836 scale, scale * 1.5f); 00837 } else if (row != NULL && hint != tesseract::OEM_CUBE_ONLY) { 00838 baseline = row->base_line(mid_x) + baseline_shift; 00839 } 00840 // The image will be 8-bit grey if the input was grey or color. Note that in 00841 // a grey image 0 is black and 255 is white. If the input was binary, then 00842 // the pix will be binary and 0 is white, with 1 being black. 00843 // To tell the difference pixGetDepth() will return 8 or 1. 00844 // The inverse flag will be true iff the word has been determined to be 00845 // white on black, and is independent of whether the pix is 8 bit or 1 bit. 00846 blob->Normalize(block, NULL, NULL, word_middle, baseline, blob_scale, 00847 blob_scale, 0.0f, final_y_offset, inverse, pix); 00848 } 00849 if (word_denorm != NULL) { 00850 word_denorm->SetupNormalization(block, NULL, NULL, word_middle, 00851 input_y_offset, scale, scale, 00852 0.0f, final_y_offset); 00853 word_denorm->set_inverse(inverse); 00854 word_denorm->set_pix(pix); 00855 } 00856 } 00857 00858 // Copies the data and the blobs, but leaves next untouched. 00859 void TWERD::CopyFrom(const TWERD& src) { 00860 Clear(); 00861 latin_script = src.latin_script; 00862 for (int b = 0; b < src.blobs.size(); ++b) { 00863 TBLOB* new_blob = new TBLOB(*src.blobs[b]); 00864 blobs.push_back(new_blob); 00865 } 00866 } 00867 00868 // Deletes owned data. 00869 void TWERD::Clear() { 00870 blobs.delete_data_pointers(); 00871 blobs.clear(); 00872 } 00873 00874 // Recomputes the bounding boxes of the blobs. 00875 void TWERD::ComputeBoundingBoxes() { 00876 for (int b = 0; b < blobs.size(); ++b) { 00877 blobs[b]->ComputeBoundingBoxes(); 00878 } 00879 } 00880 00881 TBOX TWERD::bounding_box() const { 00882 TBOX result; 00883 for (int b = 0; b < blobs.size(); ++b) { 00884 TBOX box = blobs[b]->bounding_box(); 00885 result += box; 00886 } 00887 return result; 00888 } 00889 00890 // Merges the blobs from start to end, not including end, and deletes 00891 // the blobs between start and end. 00892 void TWERD::MergeBlobs(int start, int end) { 00893 if (start >= blobs.size() - 1) return; // Nothing to do. 00894 TESSLINE* outline = blobs[start]->outlines; 00895 for (int i = start + 1; i < end && i < blobs.size(); ++i) { 00896 TBLOB* next_blob = blobs[i]; 00897 // Take the outlines from the next blob. 00898 if (outline == NULL) { 00899 blobs[start]->outlines = next_blob->outlines; 00900 outline = blobs[start]->outlines; 00901 } else { 00902 while (outline->next != NULL) 00903 outline = outline->next; 00904 outline->next = next_blob->outlines; 00905 next_blob->outlines = NULL; 00906 } 00907 // Delete the next blob and move on. 00908 delete next_blob; 00909 blobs[i] = NULL; 00910 } 00911 // Remove dead blobs from the vector. 00912 for (int i = start + 1; i < end && start + 1 < blobs.size(); ++i) { 00913 blobs.remove(start + 1); 00914 } 00915 } 00916 00917 #ifndef GRAPHICS_DISABLED 00918 void TWERD::plot(ScrollView* window) { 00919 ScrollView::Color color = WERD::NextColor(ScrollView::BLACK); 00920 for (int b = 0; b < blobs.size(); ++b) { 00921 blobs[b]->plot(window, color, ScrollView::BROWN); 00922 color = WERD::NextColor(color); 00923 } 00924 } 00925 #endif // GRAPHICS_DISABLED 00926 00927 /********************************************************************** 00928 * divisible_blob 00929 * 00930 * Returns true if the blob contains multiple outlines than can be 00931 * separated using divide_blobs. Sets the location to be used in the 00932 * call to divide_blobs. 00933 **********************************************************************/ 00934 bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location) { 00935 if (blob->outlines == NULL || blob->outlines->next == NULL) 00936 return false; // Need at least 2 outlines for it to be possible. 00937 int max_gap = 0; 00938 TPOINT vertical = italic_blob ? kDivisibleVerticalItalic 00939 : kDivisibleVerticalUpright; 00940 for (TESSLINE* outline1 = blob->outlines; outline1 != NULL; 00941 outline1 = outline1->next) { 00942 if (outline1->is_hole) 00943 continue; // Holes do not count as separable. 00944 TPOINT mid_pt1( 00945 static_cast<inT16>((outline1->topleft.x + outline1->botright.x) / 2), 00946 static_cast<inT16>((outline1->topleft.y + outline1->botright.y) / 2)); 00947 int mid_prod1 = CROSS(mid_pt1, vertical); 00948 int min_prod1, max_prod1; 00949 outline1->MinMaxCrossProduct(vertical, &min_prod1, &max_prod1); 00950 for (TESSLINE* outline2 = outline1->next; outline2 != NULL; 00951 outline2 = outline2->next) { 00952 if (outline2->is_hole) 00953 continue; // Holes do not count as separable. 00954 TPOINT mid_pt2( 00955 static_cast<inT16>((outline2->topleft.x + outline2->botright.x) / 2), 00956 static_cast<inT16>((outline2->topleft.y + outline2->botright.y) / 2)); 00957 int mid_prod2 = CROSS(mid_pt2, vertical); 00958 int min_prod2, max_prod2; 00959 outline2->MinMaxCrossProduct(vertical, &min_prod2, &max_prod2); 00960 int mid_gap = abs(mid_prod2 - mid_prod1); 00961 int overlap = MIN(max_prod1, max_prod2) - MAX(min_prod1, min_prod2); 00962 if (mid_gap - overlap / 4 > max_gap) { 00963 max_gap = mid_gap - overlap / 4; 00964 *location = mid_pt1; 00965 *location += mid_pt2; 00966 *location /= 2; 00967 } 00968 } 00969 } 00970 // Use the y component of the vertical vector as an approximation to its 00971 // length. 00972 return max_gap > vertical.y; 00973 } 00974 00975 /********************************************************************** 00976 * divide_blobs 00977 * 00978 * Create two blobs by grouping the outlines in the appropriate blob. 00979 * The outlines that are beyond the location point are moved to the 00980 * other blob. The ones whose x location is less than that point are 00981 * retained in the original blob. 00982 **********************************************************************/ 00983 void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, 00984 const TPOINT& location) { 00985 TPOINT vertical = italic_blob ? kDivisibleVerticalItalic 00986 : kDivisibleVerticalUpright; 00987 TESSLINE *outline1 = NULL; 00988 TESSLINE *outline2 = NULL; 00989 00990 TESSLINE *outline = blob->outlines; 00991 blob->outlines = NULL; 00992 int location_prod = CROSS(location, vertical); 00993 00994 while (outline != NULL) { 00995 TPOINT mid_pt( 00996 static_cast<inT16>((outline->topleft.x + outline->botright.x) / 2), 00997 static_cast<inT16>((outline->topleft.y + outline->botright.y) / 2)); 00998 int mid_prod = CROSS(mid_pt, vertical); 00999 if (mid_prod < location_prod) { 01000 // Outline is in left blob. 01001 if (outline1) 01002 outline1->next = outline; 01003 else 01004 blob->outlines = outline; 01005 outline1 = outline; 01006 } else { 01007 // Outline is in right blob. 01008 if (outline2) 01009 outline2->next = outline; 01010 else 01011 other_blob->outlines = outline; 01012 outline2 = outline; 01013 } 01014 outline = outline->next; 01015 } 01016 01017 if (outline1) 01018 outline1->next = NULL; 01019 if (outline2) 01020 outline2->next = NULL; 01021 }