|
tesseract 3.04.01
|
00001 /********************************************************************** 00002 * File: cube_page_segmenter.cpp 00003 * Description: Implementation of the Cube Page Segmenter Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2007 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include "cube_line_segmenter.h" 00021 #include "ndminx.h" 00022 00023 namespace tesseract { 00024 // constants that worked for Arabic page segmenter 00025 const int CubeLineSegmenter::kLineSepMorphMinHgt = 20; 00026 const int CubeLineSegmenter::kHgtBins = 20; 00027 const double CubeLineSegmenter::kMaxValidLineRatio = 3.2; 00028 const int CubeLineSegmenter::kMaxConnCompHgt = 150; 00029 const int CubeLineSegmenter::kMaxConnCompWid = 500; 00030 const int CubeLineSegmenter::kMaxHorzAspectRatio = 50; 00031 const int CubeLineSegmenter::kMaxVertAspectRatio = 20; 00032 const int CubeLineSegmenter::kMinWid = 2; 00033 const int CubeLineSegmenter::kMinHgt = 2; 00034 const float CubeLineSegmenter::kMinValidLineHgtRatio = 2.5; 00035 00036 CubeLineSegmenter::CubeLineSegmenter(CubeRecoContext *cntxt, Pix *img) { 00037 cntxt_ = cntxt; 00038 orig_img_ = img; 00039 img_ = NULL; 00040 lines_pixa_ = NULL; 00041 init_ = false; 00042 line_cnt_ = 0; 00043 columns_ = NULL; 00044 con_comps_ = NULL; 00045 est_alef_hgt_ = 0.0; 00046 est_dot_hgt_ = 0.0; 00047 } 00048 00049 CubeLineSegmenter::~CubeLineSegmenter() { 00050 if (img_ != NULL) { 00051 pixDestroy(&img_); 00052 img_ = NULL; 00053 } 00054 00055 if (lines_pixa_ != NULL) { 00056 pixaDestroy(&lines_pixa_); 00057 lines_pixa_ = NULL; 00058 } 00059 00060 if (con_comps_ != NULL) { 00061 pixaDestroy(&con_comps_); 00062 con_comps_ = NULL; 00063 } 00064 00065 if (columns_ != NULL) { 00066 pixaaDestroy(&columns_); 00067 columns_ = NULL; 00068 } 00069 } 00070 00071 // compute validity ratio for a line 00072 double CubeLineSegmenter::ValidityRatio(Pix *line_mask_pix, Box *line_box) { 00073 return line_box->h / est_alef_hgt_; 00074 } 00075 00076 // validate line 00077 bool CubeLineSegmenter::ValidLine(Pix *line_mask_pix, Box *line_box) { 00078 double validity_ratio = ValidityRatio(line_mask_pix, line_box); 00079 00080 return validity_ratio < kMaxValidLineRatio; 00081 } 00082 00083 // perform a vertical Closing with the specified threshold 00084 // returning the resulting conn comps as a pixa 00085 Pixa *CubeLineSegmenter::VerticalClosing(Pix *pix, 00086 int threshold, Boxa **boxa) { 00087 char sequence_str[16]; 00088 00089 // do the morphology 00090 sprintf(sequence_str, "c100.%d", threshold); 00091 Pix *morphed_pix = pixMorphCompSequence(pix, sequence_str, 0); 00092 if (morphed_pix == NULL) { 00093 return NULL; 00094 } 00095 00096 // get the resulting lines by computing concomps 00097 Pixa *pixac; 00098 (*boxa) = pixConnComp(morphed_pix, &pixac, 8); 00099 00100 pixDestroy(&morphed_pix); 00101 00102 if ((*boxa) == NULL) { 00103 return NULL; 00104 } 00105 00106 return pixac; 00107 } 00108 00109 // Helper cleans up after CrackLine. 00110 static void CleanupCrackLine(int line_cnt, Pixa **lines_pixa, 00111 Boxa **line_con_comps, 00112 Pixa **line_con_comps_pix) { 00113 for (int line = 0; line < line_cnt; line++) { 00114 if (lines_pixa[line] != NULL) { 00115 pixaDestroy(&lines_pixa[line]); 00116 } 00117 } 00118 00119 delete []lines_pixa; 00120 boxaDestroy(line_con_comps); 00121 pixaDestroy(line_con_comps_pix); 00122 } 00123 00124 // do a desperate attempt at cracking lines 00125 Pixa *CubeLineSegmenter::CrackLine(Pix *cracked_line_pix, 00126 Box *cracked_line_box, int line_cnt) { 00127 // create lines pixa array 00128 Pixa **lines_pixa = new Pixa*[line_cnt]; 00129 if (lines_pixa == NULL) { 00130 return NULL; 00131 } 00132 00133 memset(lines_pixa, 0, line_cnt * sizeof(*lines_pixa)); 00134 00135 // compute line conn comps 00136 Pixa *line_con_comps_pix; 00137 Boxa *line_con_comps = ComputeLineConComps(cracked_line_pix, 00138 cracked_line_box, &line_con_comps_pix); 00139 00140 if (line_con_comps == NULL) { 00141 delete []lines_pixa; 00142 return NULL; 00143 } 00144 00145 // assign each conn comp to the a line based on its centroid 00146 for (int con = 0; con < line_con_comps->n; con++) { 00147 Box *con_box = line_con_comps->box[con]; 00148 Pix *con_pix = line_con_comps_pix->pix[con]; 00149 int mid_y = (con_box->y - cracked_line_box->y) + (con_box->h / 2), 00150 line_idx = MIN(line_cnt - 1, 00151 (mid_y * line_cnt / cracked_line_box->h)); 00152 00153 // create the line if it has not been created? 00154 if (lines_pixa[line_idx] == NULL) { 00155 lines_pixa[line_idx] = pixaCreate(line_con_comps->n); 00156 if (lines_pixa[line_idx] == NULL) { 00157 CleanupCrackLine(line_cnt, lines_pixa, &line_con_comps, 00158 &line_con_comps_pix); 00159 return NULL; 00160 } 00161 } 00162 00163 // add the concomp to the line 00164 if (pixaAddPix(lines_pixa[line_idx], con_pix, L_CLONE) != 0 || 00165 pixaAddBox(lines_pixa[line_idx], con_box, L_CLONE)) { 00166 CleanupCrackLine(line_cnt, lines_pixa, &line_con_comps, 00167 &line_con_comps_pix); 00168 return NULL; 00169 } 00170 } 00171 00172 // create the lines pixa 00173 Pixa *lines = pixaCreate(line_cnt); 00174 bool success = true; 00175 00176 // create and check the validity of the lines 00177 for (int line = 0; line < line_cnt; line++) { 00178 Pixa *line_pixa = lines_pixa[line]; 00179 00180 // skip invalid lines 00181 if (line_pixa == NULL) { 00182 continue; 00183 } 00184 00185 // merge the pix, check the validity of the line 00186 // and add it to the lines pixa 00187 Box *line_box; 00188 Pix *line_pix = Pixa2Pix(line_pixa, &line_box); 00189 if (line_pix == NULL || 00190 line_box == NULL || 00191 ValidLine(line_pix, line_box) == false || 00192 pixaAddPix(lines, line_pix, L_INSERT) != 0 || 00193 pixaAddBox(lines, line_box, L_INSERT) != 0) { 00194 if (line_pix != NULL) { 00195 pixDestroy(&line_pix); 00196 } 00197 00198 if (line_box != NULL) { 00199 boxDestroy(&line_box); 00200 } 00201 00202 success = false; 00203 00204 break; 00205 } 00206 } 00207 00208 // cleanup 00209 CleanupCrackLine(line_cnt, lines_pixa, &line_con_comps, 00210 &line_con_comps_pix); 00211 00212 if (success == false) { 00213 pixaDestroy(&lines); 00214 lines = NULL; 00215 } 00216 00217 return lines; 00218 } 00219 00220 // do a desperate attempt at cracking lines 00221 Pixa *CubeLineSegmenter::CrackLine(Pix *cracked_line_pix, 00222 Box *cracked_line_box) { 00223 // estimate max line count 00224 int max_line_cnt = static_cast<int>((cracked_line_box->h / 00225 est_alef_hgt_) + 0.5); 00226 if (max_line_cnt < 2) { 00227 return NULL; 00228 } 00229 00230 for (int line_cnt = 2; line_cnt < max_line_cnt; line_cnt++) { 00231 Pixa *lines = CrackLine(cracked_line_pix, cracked_line_box, line_cnt); 00232 if (lines != NULL) { 00233 return lines; 00234 } 00235 } 00236 00237 return NULL; 00238 } 00239 00240 // split a line continuously until valid or fail 00241 Pixa *CubeLineSegmenter::SplitLine(Pix *line_mask_pix, Box *line_box) { 00242 // clone the line mask 00243 Pix *line_pix = pixClone(line_mask_pix); 00244 00245 if (line_pix == NULL) { 00246 return NULL; 00247 } 00248 00249 // AND with the image to get the actual line 00250 pixRasterop(line_pix, 0, 0, line_pix->w, line_pix->h, 00251 PIX_SRC & PIX_DST, img_, line_box->x, line_box->y); 00252 00253 // continue to do rasterop morphology on the line until 00254 // it splits to valid lines or we fail 00255 int morph_hgt = kLineSepMorphMinHgt - 1, 00256 best_threshold = kLineSepMorphMinHgt - 1, 00257 max_valid_portion = 0; 00258 00259 Boxa *boxa; 00260 Pixa *pixac; 00261 00262 do { 00263 pixac = VerticalClosing(line_pix, morph_hgt, &boxa); 00264 00265 // add the box offset to all the lines 00266 // and check for the validity of each 00267 int line, 00268 valid_line_cnt = 0, 00269 valid_portion = 0; 00270 00271 for (line = 0; line < pixac->n; line++) { 00272 boxa->box[line]->x += line_box->x; 00273 boxa->box[line]->y += line_box->y; 00274 00275 if (ValidLine(pixac->pix[line], boxa->box[line]) == true) { 00276 // count valid lines 00277 valid_line_cnt++; 00278 00279 // and the valid portions 00280 valid_portion += boxa->box[line]->h; 00281 } 00282 } 00283 00284 // all the lines are valid 00285 if (valid_line_cnt == pixac->n) { 00286 boxaDestroy(&boxa); 00287 pixDestroy(&line_pix); 00288 return pixac; 00289 } 00290 00291 // a larger valid portion 00292 if (valid_portion > max_valid_portion) { 00293 max_valid_portion = valid_portion; 00294 best_threshold = morph_hgt; 00295 } 00296 00297 boxaDestroy(&boxa); 00298 pixaDestroy(&pixac); 00299 00300 morph_hgt--; 00301 } 00302 while (morph_hgt > 0); 00303 00304 // failed to break into valid lines 00305 // attempt to crack the line 00306 pixac = CrackLine(line_pix, line_box); 00307 if (pixac != NULL) { 00308 pixDestroy(&line_pix); 00309 return pixac; 00310 } 00311 00312 // try to leverage any of the lines 00313 // did the best threshold yield a non zero valid portion 00314 if (max_valid_portion > 0) { 00315 // use this threshold to break lines 00316 pixac = VerticalClosing(line_pix, best_threshold, &boxa); 00317 00318 // add the box offset to all the lines 00319 // and check for the validity of each 00320 for (int line = 0; line < pixac->n; line++) { 00321 boxa->box[line]->x += line_box->x; 00322 boxa->box[line]->y += line_box->y; 00323 00324 // remove invalid lines from the pixa 00325 if (ValidLine(pixac->pix[line], boxa->box[line]) == false) { 00326 pixaRemovePix(pixac, line); 00327 line--; 00328 } 00329 } 00330 00331 boxaDestroy(&boxa); 00332 pixDestroy(&line_pix); 00333 return pixac; 00334 } 00335 00336 // last resort: attempt to crack the line 00337 pixDestroy(&line_pix); 00338 00339 return NULL; 00340 } 00341 00342 // Checks of a line is too small 00343 bool CubeLineSegmenter::SmallLine(Box *line_box) { 00344 return line_box->h <= (kMinValidLineHgtRatio * est_dot_hgt_); 00345 } 00346 00347 // Compute the connected components in a line 00348 Boxa * CubeLineSegmenter::ComputeLineConComps(Pix *line_mask_pix, 00349 Box *line_box, 00350 Pixa **con_comps_pixa) { 00351 // clone the line mask 00352 Pix *line_pix = pixClone(line_mask_pix); 00353 00354 if (line_pix == NULL) { 00355 return NULL; 00356 } 00357 00358 // AND with the image to get the actual line 00359 pixRasterop(line_pix, 0, 0, line_pix->w, line_pix->h, 00360 PIX_SRC & PIX_DST, img_, line_box->x, line_box->y); 00361 00362 // compute the connected components of the line to be merged 00363 Boxa *line_con_comps = pixConnComp(line_pix, con_comps_pixa, 8); 00364 00365 pixDestroy(&line_pix); 00366 00367 // offset boxes by the bbox of the line 00368 for (int con = 0; con < line_con_comps->n; con++) { 00369 line_con_comps->box[con]->x += line_box->x; 00370 line_con_comps->box[con]->y += line_box->y; 00371 } 00372 00373 return line_con_comps; 00374 } 00375 00376 // create a union of two arbitrary pix 00377 Pix *CubeLineSegmenter::PixUnion(Pix *dest_pix, Box *dest_box, 00378 Pix *src_pix, Box *src_box) { 00379 // compute dimensions of union rect 00380 BOX *union_box = boxBoundingRegion(src_box, dest_box); 00381 00382 // create the union pix 00383 Pix *union_pix = pixCreate(union_box->w, union_box->h, src_pix->d); 00384 if (union_pix == NULL) { 00385 return NULL; 00386 } 00387 00388 // blt the src and dest pix 00389 pixRasterop(union_pix, 00390 src_box->x - union_box->x, src_box->y - union_box->y, 00391 src_box->w, src_box->h, PIX_SRC | PIX_DST, src_pix, 0, 0); 00392 00393 pixRasterop(union_pix, 00394 dest_box->x - union_box->x, dest_box->y - union_box->y, 00395 dest_box->w, dest_box->h, PIX_SRC | PIX_DST, dest_pix, 0, 0); 00396 00397 // replace the dest_box 00398 *dest_box = *union_box; 00399 00400 boxDestroy(&union_box); 00401 00402 return union_pix; 00403 } 00404 00405 // create a union of a number of arbitrary pix 00406 Pix *CubeLineSegmenter::Pixa2Pix(Pixa *pixa, Box **dest_box, 00407 int start_pix, int pix_cnt) { 00408 // compute union_box 00409 int min_x = INT_MAX, 00410 max_x = INT_MIN, 00411 min_y = INT_MAX, 00412 max_y = INT_MIN; 00413 00414 for (int pix_idx = start_pix; pix_idx < (start_pix + pix_cnt); pix_idx++) { 00415 Box *pix_box = pixa->boxa->box[pix_idx]; 00416 00417 UpdateRange(pix_box->x, pix_box->x + pix_box->w, &min_x, &max_x); 00418 UpdateRange(pix_box->y, pix_box->y + pix_box->h, &min_y, &max_y); 00419 } 00420 00421 (*dest_box) = boxCreate(min_x, min_y, max_x - min_x, max_y - min_y); 00422 if ((*dest_box) == NULL) { 00423 return NULL; 00424 } 00425 00426 // create the union pix 00427 Pix *union_pix = pixCreate((*dest_box)->w, (*dest_box)->h, img_->d); 00428 if (union_pix == NULL) { 00429 boxDestroy(dest_box); 00430 return NULL; 00431 } 00432 00433 // create a pix corresponding to the union of all pixs 00434 // blt the src and dest pix 00435 for (int pix_idx = start_pix; pix_idx < (start_pix + pix_cnt); pix_idx++) { 00436 Box *pix_box = pixa->boxa->box[pix_idx]; 00437 Pix *con_pix = pixa->pix[pix_idx]; 00438 00439 pixRasterop(union_pix, 00440 pix_box->x - (*dest_box)->x, pix_box->y - (*dest_box)->y, 00441 pix_box->w, pix_box->h, PIX_SRC | PIX_DST, con_pix, 0, 0); 00442 } 00443 00444 return union_pix; 00445 } 00446 00447 // create a union of a number of arbitrary pix 00448 Pix *CubeLineSegmenter::Pixa2Pix(Pixa *pixa, Box **dest_box) { 00449 return Pixa2Pix(pixa, dest_box, 0, pixa->n); 00450 } 00451 00452 // merges a number of lines into one line given a bounding box and a mask 00453 bool CubeLineSegmenter::MergeLine(Pix *line_mask_pix, Box *line_box, 00454 Pixa *lines, Boxaa *lines_con_comps) { 00455 // compute the connected components of the lines to be merged 00456 Pixa *small_con_comps_pix; 00457 Boxa *small_line_con_comps = ComputeLineConComps(line_mask_pix, 00458 line_box, &small_con_comps_pix); 00459 00460 if (small_line_con_comps == NULL) { 00461 return false; 00462 } 00463 00464 // for each connected component 00465 for (int con = 0; con < small_line_con_comps->n; con++) { 00466 Box *small_con_comp_box = small_line_con_comps->box[con]; 00467 int best_line = -1, 00468 best_dist = INT_MAX, 00469 small_box_right = small_con_comp_box->x + small_con_comp_box->w, 00470 small_box_bottom = small_con_comp_box->y + small_con_comp_box->h; 00471 00472 // for each valid line 00473 for (int line = 0; line < lines->n; line++) { 00474 if (SmallLine(lines->boxa->box[line]) == true) { 00475 continue; 00476 } 00477 00478 // for all the connected components in the line 00479 Boxa *line_con_comps = lines_con_comps->boxa[line]; 00480 00481 for (int lcon = 0; lcon < line_con_comps->n; lcon++) { 00482 Box *con_comp_box = line_con_comps->box[lcon]; 00483 int xdist, 00484 ydist, 00485 box_right = con_comp_box->x + con_comp_box->w, 00486 box_bottom = con_comp_box->y + con_comp_box->h; 00487 00488 xdist = MAX(small_con_comp_box->x, con_comp_box->x) - 00489 MIN(small_box_right, box_right); 00490 00491 ydist = MAX(small_con_comp_box->y, con_comp_box->y) - 00492 MIN(small_box_bottom, box_bottom); 00493 00494 // if there is an overlap in x-direction 00495 if (xdist <= 0) { 00496 if (best_line == -1 || ydist < best_dist) { 00497 best_dist = ydist; 00498 best_line = line; 00499 } 00500 } 00501 } 00502 } 00503 00504 // if the distance is too big, do not merged 00505 if (best_line != -1 && best_dist < est_alef_hgt_) { 00506 // add the pix to the best line 00507 Pix *new_line = PixUnion(lines->pix[best_line], 00508 lines->boxa->box[best_line], 00509 small_con_comps_pix->pix[con], small_con_comp_box); 00510 00511 if (new_line == NULL) { 00512 return false; 00513 } 00514 00515 pixDestroy(&lines->pix[best_line]); 00516 lines->pix[best_line] = new_line; 00517 } 00518 } 00519 00520 pixaDestroy(&small_con_comps_pix); 00521 boxaDestroy(&small_line_con_comps); 00522 00523 return true; 00524 } 00525 00526 // Creates new set of lines from the computed columns 00527 bool CubeLineSegmenter::AddLines(Pixa *lines) { 00528 // create an array that will hold the bounding boxes 00529 // of the concomps belonging to each line 00530 Boxaa *lines_con_comps = boxaaCreate(lines->n); 00531 if (lines_con_comps == NULL) { 00532 return false; 00533 } 00534 00535 for (int line = 0; line < lines->n; line++) { 00536 // if the line is not valid 00537 if (ValidLine(lines->pix[line], lines->boxa->box[line]) == false) { 00538 // split it 00539 Pixa *split_lines = SplitLine(lines->pix[line], 00540 lines->boxa->box[line]); 00541 00542 // remove the old line 00543 if (pixaRemovePix(lines, line) != 0) { 00544 return false; 00545 } 00546 00547 line--; 00548 00549 if (split_lines == NULL) { 00550 continue; 00551 } 00552 00553 // add the split lines instead and move the pointer 00554 for (int s_line = 0; s_line < split_lines->n; s_line++) { 00555 Pix *sp_line = pixaGetPix(split_lines, s_line, L_CLONE); 00556 Box *sp_box = boxaGetBox(split_lines->boxa, s_line, L_CLONE); 00557 00558 if (sp_line == NULL || sp_box == NULL) { 00559 return false; 00560 } 00561 00562 // insert the new line 00563 if (pixaInsertPix(lines, ++line, sp_line, sp_box) != 0) { 00564 return false; 00565 } 00566 } 00567 00568 // remove the split lines 00569 pixaDestroy(&split_lines); 00570 } 00571 } 00572 00573 // compute the concomps bboxes of each line 00574 for (int line = 0; line < lines->n; line++) { 00575 Boxa *line_con_comps = ComputeLineConComps(lines->pix[line], 00576 lines->boxa->box[line], NULL); 00577 00578 if (line_con_comps == NULL) { 00579 return false; 00580 } 00581 00582 // insert it into the boxaa array 00583 if (boxaaAddBoxa(lines_con_comps, line_con_comps, L_INSERT) != 0) { 00584 return false; 00585 } 00586 } 00587 00588 // post process the lines: 00589 // merge the contents of "small" lines info legitimate lines 00590 for (int line = 0; line < lines->n; line++) { 00591 // a small line detected 00592 if (SmallLine(lines->boxa->box[line]) == true) { 00593 // merge its components to one of the valid lines 00594 if (MergeLine(lines->pix[line], lines->boxa->box[line], 00595 lines, lines_con_comps) == true) { 00596 // remove the small line 00597 if (pixaRemovePix(lines, line) != 0) { 00598 return false; 00599 } 00600 00601 if (boxaaRemoveBoxa(lines_con_comps, line) != 0) { 00602 return false; 00603 } 00604 00605 line--; 00606 } 00607 } 00608 } 00609 00610 boxaaDestroy(&lines_con_comps); 00611 00612 // add the pix masks 00613 if (pixaaAddPixa(columns_, lines, L_INSERT) != 0) { 00614 return false; 00615 } 00616 00617 return true; 00618 } 00619 00620 // Index the specific pixa using RTL reading order 00621 int *CubeLineSegmenter::IndexRTL(Pixa *pixa) { 00622 int *pix_index = new int[pixa->n]; 00623 if (pix_index == NULL) { 00624 return NULL; 00625 } 00626 00627 for (int pix = 0; pix < pixa->n; pix++) { 00628 pix_index[pix] = pix; 00629 } 00630 00631 for (int ipix = 0; ipix < pixa->n; ipix++) { 00632 for (int jpix = ipix + 1; jpix < pixa->n; jpix++) { 00633 Box *ipix_box = pixa->boxa->box[pix_index[ipix]], 00634 *jpix_box = pixa->boxa->box[pix_index[jpix]]; 00635 00636 // swap? 00637 if ((ipix_box->x + ipix_box->w) < (jpix_box->x + jpix_box->w)) { 00638 int temp = pix_index[ipix]; 00639 pix_index[ipix] = pix_index[jpix]; 00640 pix_index[jpix] = temp; 00641 } 00642 } 00643 } 00644 00645 return pix_index; 00646 } 00647 00648 // Performs line segmentation 00649 bool CubeLineSegmenter::LineSegment() { 00650 // Use full image morphology to find columns 00651 // This only works for simple layouts where each column 00652 // of text extends the full height of the input image. 00653 Pix *pix_temp1 = pixMorphCompSequence(img_, "c5.500", 0); 00654 if (pix_temp1 == NULL) { 00655 return false; 00656 } 00657 00658 // Mask with a single component over each column 00659 Pixa *pixam; 00660 Boxa *boxa = pixConnComp(pix_temp1, &pixam, 8); 00661 00662 if (boxa == NULL) { 00663 return false; 00664 } 00665 00666 int init_morph_min_hgt = kLineSepMorphMinHgt; 00667 char sequence_str[16]; 00668 sprintf(sequence_str, "c100.%d", init_morph_min_hgt); 00669 00670 // Use selective region-based morphology to get the textline mask. 00671 Pixa *pixad = pixaMorphSequenceByRegion(img_, pixam, sequence_str, 0, 0); 00672 if (pixad == NULL) { 00673 return false; 00674 } 00675 00676 // for all columns 00677 int col_cnt = boxaGetCount(boxa); 00678 00679 // create columns 00680 columns_ = pixaaCreate(col_cnt); 00681 if (columns_ == NULL) { 00682 return false; 00683 } 00684 00685 // index columns based on readind order (RTL) 00686 int *col_order = IndexRTL(pixad); 00687 if (col_order == NULL) { 00688 return false; 00689 } 00690 00691 line_cnt_ = 0; 00692 00693 for (int col_idx = 0; col_idx < col_cnt; col_idx++) { 00694 int col = col_order[col_idx]; 00695 00696 // get the pix and box corresponding to the column 00697 Pix *pixt3 = pixaGetPix(pixad, col, L_CLONE); 00698 if (pixt3 == NULL) { 00699 delete []col_order; 00700 return false; 00701 } 00702 00703 Box *col_box = pixad->boxa->box[col]; 00704 00705 Pixa *pixac; 00706 Boxa *boxa2 = pixConnComp(pixt3, &pixac, 8); 00707 if (boxa2 == NULL) { 00708 delete []col_order; 00709 return false; 00710 } 00711 00712 // offset the boxes by the column box 00713 for (int line = 0; line < pixac->n; line++) { 00714 pixac->boxa->box[line]->x += col_box->x; 00715 pixac->boxa->box[line]->y += col_box->y; 00716 } 00717 00718 // add the lines 00719 if (AddLines(pixac) == true) { 00720 if (pixaaAddBox(columns_, col_box, L_CLONE) != 0) { 00721 delete []col_order; 00722 return false; 00723 } 00724 } 00725 00726 pixDestroy(&pixt3); 00727 boxaDestroy(&boxa2); 00728 00729 line_cnt_ += columns_->pixa[col_idx]->n; 00730 } 00731 00732 pixaDestroy(&pixam); 00733 pixaDestroy(&pixad); 00734 boxaDestroy(&boxa); 00735 00736 delete []col_order; 00737 pixDestroy(&pix_temp1); 00738 00739 return true; 00740 } 00741 00742 // Estimate the parameters of the font(s) used in the page 00743 bool CubeLineSegmenter::EstimateFontParams() { 00744 int hgt_hist[kHgtBins]; 00745 int max_hgt; 00746 double mean_hgt; 00747 00748 // init hgt histogram of concomps 00749 memset(hgt_hist, 0, sizeof(hgt_hist)); 00750 00751 // compute max hgt 00752 max_hgt = 0; 00753 00754 for (int con = 0; con < con_comps_->n; con++) { 00755 // skip conn comps that are too long or too wide 00756 if (con_comps_->boxa->box[con]->h > kMaxConnCompHgt || 00757 con_comps_->boxa->box[con]->w > kMaxConnCompWid) { 00758 continue; 00759 } 00760 00761 max_hgt = MAX(max_hgt, con_comps_->boxa->box[con]->h); 00762 } 00763 00764 if (max_hgt <= 0) { 00765 return false; 00766 } 00767 00768 // init hgt histogram of concomps 00769 memset(hgt_hist, 0, sizeof(hgt_hist)); 00770 00771 // compute histogram 00772 mean_hgt = 0.0; 00773 for (int con = 0; con < con_comps_->n; con++) { 00774 // skip conn comps that are too long or too wide 00775 if (con_comps_->boxa->box[con]->h > kMaxConnCompHgt || 00776 con_comps_->boxa->box[con]->w > kMaxConnCompWid) { 00777 continue; 00778 } 00779 00780 int bin = static_cast<int>(kHgtBins * con_comps_->boxa->box[con]->h / 00781 max_hgt); 00782 bin = MIN(bin, kHgtBins - 1); 00783 hgt_hist[bin]++; 00784 mean_hgt += con_comps_->boxa->box[con]->h; 00785 } 00786 00787 mean_hgt /= con_comps_->n; 00788 00789 // find the top 2 bins 00790 int idx[kHgtBins]; 00791 00792 for (int bin = 0; bin < kHgtBins; bin++) { 00793 idx[bin] = bin; 00794 } 00795 00796 for (int ibin = 0; ibin < 2; ibin++) { 00797 for (int jbin = ibin + 1; jbin < kHgtBins; jbin++) { 00798 if (hgt_hist[idx[ibin]] < hgt_hist[idx[jbin]]) { 00799 int swap = idx[ibin]; 00800 idx[ibin] = idx[jbin]; 00801 idx[jbin] = swap; 00802 } 00803 } 00804 } 00805 00806 // emperically, we found out that the 2 highest freq bins correspond 00807 // respectively to the dot and alef 00808 est_dot_hgt_ = (1.0 * (idx[0] + 1) * max_hgt / kHgtBins); 00809 est_alef_hgt_ = (1.0 * (idx[1] + 1) * max_hgt / kHgtBins); 00810 00811 // as a sanity check the dot hgt must be significanly lower than alef 00812 if (est_alef_hgt_ < (est_dot_hgt_ * 2)) { 00813 // use max_hgt to estimate instead 00814 est_alef_hgt_ = mean_hgt * 1.5; 00815 est_dot_hgt_ = est_alef_hgt_ / 5.0; 00816 } 00817 00818 est_alef_hgt_ = MAX(est_alef_hgt_, est_dot_hgt_ * 4.0); 00819 00820 return true; 00821 } 00822 00823 // clean up the image 00824 Pix *CubeLineSegmenter::CleanUp(Pix *orig_img) { 00825 // get rid of long horizontal lines 00826 Pix *pix_temp0 = pixMorphCompSequence(orig_img, "o300.2", 0); 00827 pixXor(pix_temp0, pix_temp0, orig_img); 00828 00829 // get rid of long vertical lines 00830 Pix *pix_temp1 = pixMorphCompSequence(pix_temp0, "o2.300", 0); 00831 pixXor(pix_temp1, pix_temp1, pix_temp0); 00832 00833 pixDestroy(&pix_temp0); 00834 00835 // detect connected components 00836 Pixa *con_comps; 00837 Boxa *boxa = pixConnComp(pix_temp1, &con_comps, 8); 00838 if (boxa == NULL) { 00839 return NULL; 00840 } 00841 00842 // detect and remove suspicious conn comps 00843 for (int con = 0; con < con_comps->n; con++) { 00844 Box *box = boxa->box[con]; 00845 00846 // remove if suspc. conn comp 00847 if ((box->w > (box->h * kMaxHorzAspectRatio)) || 00848 (box->h > (box->w * kMaxVertAspectRatio)) || 00849 (box->w < kMinWid && box->h < kMinHgt)) { 00850 pixRasterop(pix_temp1, box->x, box->y, box->w, box->h, 00851 PIX_SRC ^ PIX_DST, con_comps->pix[con], 0, 0); 00852 } 00853 } 00854 00855 pixaDestroy(&con_comps); 00856 boxaDestroy(&boxa); 00857 00858 return pix_temp1; 00859 } 00860 00861 // Init the page segmenter 00862 bool CubeLineSegmenter::Init() { 00863 if (init_ == true) { 00864 return true; 00865 } 00866 00867 if (orig_img_ == NULL) { 00868 return false; 00869 } 00870 00871 // call the internal line segmentation 00872 return FindLines(); 00873 } 00874 00875 // return the pix mask and box of a specific line 00876 Pix *CubeLineSegmenter::Line(int line, Box **line_box) { 00877 if (init_ == false && Init() == false) { 00878 return NULL; 00879 } 00880 00881 if (line < 0 || line >= line_cnt_) { 00882 return NULL; 00883 } 00884 00885 (*line_box) = lines_pixa_->boxa->box[line]; 00886 return lines_pixa_->pix[line]; 00887 } 00888 00889 // Implements a basic rudimentary layout analysis based on Leptonica 00890 // works OK for Arabic. For other languages, the function TesseractPageAnalysis 00891 // should be called instead. 00892 bool CubeLineSegmenter::FindLines() { 00893 // convert the image to gray scale if necessary 00894 Pix *gray_scale_img = NULL; 00895 if (orig_img_->d != 2 && orig_img_->d != 8) { 00896 gray_scale_img = pixConvertTo8(orig_img_, false); 00897 if (gray_scale_img == NULL) { 00898 return false; 00899 } 00900 } else { 00901 gray_scale_img = orig_img_; 00902 } 00903 00904 // threshold image 00905 Pix *thresholded_img; 00906 thresholded_img = pixThresholdToBinary(gray_scale_img, 128); 00907 // free the gray scale image if necessary 00908 if (gray_scale_img != orig_img_) { 00909 pixDestroy(&gray_scale_img); 00910 } 00911 // bail-out if thresholding failed 00912 if (thresholded_img == NULL) { 00913 return false; 00914 } 00915 00916 // deskew 00917 Pix *deskew_img = pixDeskew(thresholded_img, 2); 00918 if (deskew_img == NULL) { 00919 return false; 00920 } 00921 00922 pixDestroy(&thresholded_img); 00923 00924 img_ = CleanUp(deskew_img); 00925 pixDestroy(&deskew_img); 00926 if (img_ == NULL) { 00927 return false; 00928 } 00929 00930 pixDestroy(&deskew_img); 00931 00932 // compute connected components 00933 Boxa *boxa = pixConnComp(img_, &con_comps_, 8); 00934 if (boxa == NULL) { 00935 return false; 00936 } 00937 00938 boxaDestroy(&boxa); 00939 00940 // estimate dot and alef hgts 00941 if (EstimateFontParams() == false) { 00942 return false; 00943 } 00944 00945 // perform line segmentation 00946 if (LineSegment() == false) { 00947 return false; 00948 } 00949 00950 // success 00951 init_ = true; 00952 return true; 00953 } 00954 00955 }