|
tesseract 3.04.01
|
00001 /********************************************************************** 00002 * File: rejctmap.cpp (Formerly rejmap.c) 00003 * Description: REJ and REJMAP class functions. 00004 * Author: Phil Cheatle 00005 * Created: Thu Jun 9 13:46:38 BST 1994 00006 * 00007 * (C) Copyright 1994, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include "host.h" 00021 #include "rejctmap.h" 00022 #include "params.h" 00023 00024 BOOL8 REJ::perm_rejected() { //Is char perm reject? 00025 return (flag (R_TESS_FAILURE) || 00026 flag (R_SMALL_XHT) || 00027 flag (R_EDGE_CHAR) || 00028 flag (R_1IL_CONFLICT) || 00029 flag (R_POSTNN_1IL) || 00030 flag (R_REJ_CBLOB) || 00031 flag (R_BAD_REPETITION) || flag (R_MM_REJECT)); 00032 } 00033 00034 00035 BOOL8 REJ::rej_before_nn_accept() { 00036 return flag (R_POOR_MATCH) || 00037 flag (R_NOT_TESS_ACCEPTED) || 00038 flag (R_CONTAINS_BLANKS) || flag (R_BAD_PERMUTER); 00039 } 00040 00041 00042 BOOL8 REJ::rej_between_nn_and_mm() { 00043 return flag (R_HYPHEN) || 00044 flag (R_DUBIOUS) || 00045 flag (R_NO_ALPHANUMS) || flag (R_MOSTLY_REJ) || flag (R_XHT_FIXUP); 00046 } 00047 00048 00049 BOOL8 REJ::rej_between_mm_and_quality_accept() { 00050 return flag (R_BAD_QUALITY); 00051 } 00052 00053 00054 BOOL8 REJ::rej_between_quality_and_minimal_rej_accept() { 00055 return flag (R_DOC_REJ) || 00056 flag (R_BLOCK_REJ) || flag (R_ROW_REJ) || flag (R_UNLV_REJ); 00057 } 00058 00059 00060 BOOL8 REJ::rej_before_mm_accept() { 00061 return rej_between_nn_and_mm () || 00062 (rej_before_nn_accept () && 00063 !flag (R_NN_ACCEPT) && !flag (R_HYPHEN_ACCEPT)); 00064 } 00065 00066 00067 BOOL8 REJ::rej_before_quality_accept() { 00068 return rej_between_mm_and_quality_accept () || 00069 (!flag (R_MM_ACCEPT) && rej_before_mm_accept ()); 00070 } 00071 00072 00073 BOOL8 REJ::rejected() { //Is char rejected? 00074 if (flag (R_MINIMAL_REJ_ACCEPT)) 00075 return FALSE; 00076 else 00077 return (perm_rejected () || 00078 rej_between_quality_and_minimal_rej_accept () || 00079 (!flag (R_QUALITY_ACCEPT) && rej_before_quality_accept ())); 00080 } 00081 00082 00083 BOOL8 REJ::accept_if_good_quality() { //potential rej? 00084 return (rejected () && 00085 !perm_rejected () && 00086 flag (R_BAD_PERMUTER) && 00087 !flag (R_POOR_MATCH) && 00088 !flag (R_NOT_TESS_ACCEPTED) && 00089 !flag (R_CONTAINS_BLANKS) && 00090 (!rej_between_nn_and_mm () && 00091 !rej_between_mm_and_quality_accept () && 00092 !rej_between_quality_and_minimal_rej_accept ())); 00093 } 00094 00095 00096 void REJ::setrej_tess_failure() { //Tess generated blank 00097 set_flag(R_TESS_FAILURE); 00098 } 00099 00100 00101 void REJ::setrej_small_xht() { //Small xht char/wd 00102 set_flag(R_SMALL_XHT); 00103 } 00104 00105 00106 void REJ::setrej_edge_char() { //Close to image edge 00107 set_flag(R_EDGE_CHAR); 00108 } 00109 00110 00111 void REJ::setrej_1Il_conflict() { //Initial reject map 00112 set_flag(R_1IL_CONFLICT); 00113 } 00114 00115 00116 void REJ::setrej_postNN_1Il() { //1Il after NN 00117 set_flag(R_POSTNN_1IL); 00118 } 00119 00120 00121 void REJ::setrej_rej_cblob() { //Insert duff blob 00122 set_flag(R_REJ_CBLOB); 00123 } 00124 00125 00126 void REJ::setrej_mm_reject() { //Matrix matcher 00127 set_flag(R_MM_REJECT); 00128 } 00129 00130 00131 void REJ::setrej_bad_repetition() { //Odd repeated char 00132 set_flag(R_BAD_REPETITION); 00133 } 00134 00135 00136 void REJ::setrej_poor_match() { //Failed Rays heuristic 00137 set_flag(R_POOR_MATCH); 00138 } 00139 00140 00141 void REJ::setrej_not_tess_accepted() { 00142 //TEMP reject_word 00143 set_flag(R_NOT_TESS_ACCEPTED); 00144 } 00145 00146 00147 void REJ::setrej_contains_blanks() { 00148 //TEMP reject_word 00149 set_flag(R_CONTAINS_BLANKS); 00150 } 00151 00152 00153 void REJ::setrej_bad_permuter() { //POTENTIAL reject_word 00154 set_flag(R_BAD_PERMUTER); 00155 } 00156 00157 00158 void REJ::setrej_hyphen() { //PostNN dubious hyphen or . 00159 set_flag(R_HYPHEN); 00160 } 00161 00162 00163 void REJ::setrej_dubious() { //PostNN dubious limit 00164 set_flag(R_DUBIOUS); 00165 } 00166 00167 00168 void REJ::setrej_no_alphanums() { //TEMP reject_word 00169 set_flag(R_NO_ALPHANUMS); 00170 } 00171 00172 00173 void REJ::setrej_mostly_rej() { //TEMP reject_word 00174 set_flag(R_MOSTLY_REJ); 00175 } 00176 00177 00178 void REJ::setrej_xht_fixup() { //xht fixup 00179 set_flag(R_XHT_FIXUP); 00180 } 00181 00182 00183 void REJ::setrej_bad_quality() { //TEMP reject_word 00184 set_flag(R_BAD_QUALITY); 00185 } 00186 00187 00188 void REJ::setrej_doc_rej() { //TEMP reject_word 00189 set_flag(R_DOC_REJ); 00190 } 00191 00192 00193 void REJ::setrej_block_rej() { //TEMP reject_word 00194 set_flag(R_BLOCK_REJ); 00195 } 00196 00197 00198 void REJ::setrej_row_rej() { //TEMP reject_word 00199 set_flag(R_ROW_REJ); 00200 } 00201 00202 00203 void REJ::setrej_unlv_rej() { //TEMP reject_word 00204 set_flag(R_UNLV_REJ); 00205 } 00206 00207 00208 void REJ::setrej_hyphen_accept() { //NN Flipped a char 00209 set_flag(R_HYPHEN_ACCEPT); 00210 } 00211 00212 00213 void REJ::setrej_nn_accept() { //NN Flipped a char 00214 set_flag(R_NN_ACCEPT); 00215 } 00216 00217 00218 void REJ::setrej_mm_accept() { //Matrix matcher 00219 set_flag(R_MM_ACCEPT); 00220 } 00221 00222 00223 void REJ::setrej_quality_accept() { //Quality flip a char 00224 set_flag(R_QUALITY_ACCEPT); 00225 } 00226 00227 00228 void REJ::setrej_minimal_rej_accept() { 00229 //Accept all except blank 00230 set_flag(R_MINIMAL_REJ_ACCEPT); 00231 } 00232 00233 00234 void REJ::full_print(FILE *fp) { 00235 fprintf (fp, "R_TESS_FAILURE: %s\n", flag (R_TESS_FAILURE) ? "T" : "F"); 00236 fprintf (fp, "R_SMALL_XHT: %s\n", flag (R_SMALL_XHT) ? "T" : "F"); 00237 fprintf (fp, "R_EDGE_CHAR: %s\n", flag (R_EDGE_CHAR) ? "T" : "F"); 00238 fprintf (fp, "R_1IL_CONFLICT: %s\n", flag (R_1IL_CONFLICT) ? "T" : "F"); 00239 fprintf (fp, "R_POSTNN_1IL: %s\n", flag (R_POSTNN_1IL) ? "T" : "F"); 00240 fprintf (fp, "R_REJ_CBLOB: %s\n", flag (R_REJ_CBLOB) ? "T" : "F"); 00241 fprintf (fp, "R_MM_REJECT: %s\n", flag (R_MM_REJECT) ? "T" : "F"); 00242 fprintf (fp, "R_BAD_REPETITION: %s\n", flag (R_BAD_REPETITION) ? "T" : "F"); 00243 fprintf (fp, "R_POOR_MATCH: %s\n", flag (R_POOR_MATCH) ? "T" : "F"); 00244 fprintf (fp, "R_NOT_TESS_ACCEPTED: %s\n", 00245 flag (R_NOT_TESS_ACCEPTED) ? "T" : "F"); 00246 fprintf (fp, "R_CONTAINS_BLANKS: %s\n", 00247 flag (R_CONTAINS_BLANKS) ? "T" : "F"); 00248 fprintf (fp, "R_BAD_PERMUTER: %s\n", flag (R_BAD_PERMUTER) ? "T" : "F"); 00249 fprintf (fp, "R_HYPHEN: %s\n", flag (R_HYPHEN) ? "T" : "F"); 00250 fprintf (fp, "R_DUBIOUS: %s\n", flag (R_DUBIOUS) ? "T" : "F"); 00251 fprintf (fp, "R_NO_ALPHANUMS: %s\n", flag (R_NO_ALPHANUMS) ? "T" : "F"); 00252 fprintf (fp, "R_MOSTLY_REJ: %s\n", flag (R_MOSTLY_REJ) ? "T" : "F"); 00253 fprintf (fp, "R_XHT_FIXUP: %s\n", flag (R_XHT_FIXUP) ? "T" : "F"); 00254 fprintf (fp, "R_BAD_QUALITY: %s\n", flag (R_BAD_QUALITY) ? "T" : "F"); 00255 fprintf (fp, "R_DOC_REJ: %s\n", flag (R_DOC_REJ) ? "T" : "F"); 00256 fprintf (fp, "R_BLOCK_REJ: %s\n", flag (R_BLOCK_REJ) ? "T" : "F"); 00257 fprintf (fp, "R_ROW_REJ: %s\n", flag (R_ROW_REJ) ? "T" : "F"); 00258 fprintf (fp, "R_UNLV_REJ: %s\n", flag (R_UNLV_REJ) ? "T" : "F"); 00259 fprintf (fp, "R_HYPHEN_ACCEPT: %s\n", flag (R_HYPHEN_ACCEPT) ? "T" : "F"); 00260 fprintf (fp, "R_NN_ACCEPT: %s\n", flag (R_NN_ACCEPT) ? "T" : "F"); 00261 fprintf (fp, "R_MM_ACCEPT: %s\n", flag (R_MM_ACCEPT) ? "T" : "F"); 00262 fprintf (fp, "R_QUALITY_ACCEPT: %s\n", flag (R_QUALITY_ACCEPT) ? "T" : "F"); 00263 fprintf (fp, "R_MINIMAL_REJ_ACCEPT: %s\n", 00264 flag (R_MINIMAL_REJ_ACCEPT) ? "T" : "F"); 00265 } 00266 00267 00268 //The REJMAP class has been hacked to use alloc_struct instead of new []. 00269 //This is to reduce memory fragmentation only as it is rather kludgy. 00270 //alloc_struct by-passes the call to the contsructor of REJ on each 00271 //array element. Although the constructor is empty, the BITS16 members 00272 //do have a constructor which sets all the flags to 0. The memset 00273 //replaces this functionality. 00274 00275 REJMAP::REJMAP( //classwise copy 00276 const REJMAP &source) { 00277 REJ *to; 00278 REJ *from = source.ptr; 00279 int i; 00280 00281 len = source.length (); 00282 00283 if (len > 0) { 00284 ptr = (REJ *) alloc_struct (len * sizeof (REJ), "REJ"); 00285 to = ptr; 00286 for (i = 0; i < len; i++) { 00287 *to = *from; 00288 to++; 00289 from++; 00290 } 00291 } 00292 else 00293 ptr = NULL; 00294 } 00295 00296 00297 REJMAP & REJMAP::operator= ( //assign REJMAP 00298 const REJMAP & source //from this 00299 ) { 00300 REJ * 00301 to; 00302 REJ * 00303 from = source.ptr; 00304 int 00305 i; 00306 00307 initialise (source.len); 00308 to = ptr; 00309 for (i = 0; i < len; i++) { 00310 *to = *from; 00311 to++; 00312 from++; 00313 } 00314 return *this; 00315 } 00316 00317 00318 void REJMAP::initialise( //Redefine map 00319 inT16 length) { 00320 if (ptr != NULL) 00321 free_struct (ptr, len * sizeof (REJ), "REJ"); 00322 len = length; 00323 if (len > 0) 00324 ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"), 00325 0, len * sizeof (REJ)); 00326 else 00327 ptr = NULL; 00328 } 00329 00330 00331 inT16 REJMAP::accept_count() { //How many accepted? 00332 int i; 00333 inT16 count = 0; 00334 00335 for (i = 0; i < len; i++) { 00336 if (ptr[i].accepted ()) 00337 count++; 00338 } 00339 return count; 00340 } 00341 00342 00343 BOOL8 REJMAP::recoverable_rejects() { //Any non perm rejs? 00344 int i; 00345 00346 for (i = 0; i < len; i++) { 00347 if (ptr[i].recoverable ()) 00348 return TRUE; 00349 } 00350 return FALSE; 00351 } 00352 00353 00354 BOOL8 REJMAP::quality_recoverable_rejects() { //Any potential rejs? 00355 int i; 00356 00357 for (i = 0; i < len; i++) { 00358 if (ptr[i].accept_if_good_quality ()) 00359 return TRUE; 00360 } 00361 return FALSE; 00362 } 00363 00364 00365 void REJMAP::remove_pos( //Cut out an element 00366 inT16 pos //element to remove 00367 ) { 00368 REJ *new_ptr; //new, smaller map 00369 int i; 00370 00371 ASSERT_HOST (pos >= 0); 00372 ASSERT_HOST (pos < len); 00373 ASSERT_HOST (len > 0); 00374 00375 len--; 00376 if (len > 0) 00377 new_ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"), 00378 0, len * sizeof (REJ)); 00379 else 00380 new_ptr = NULL; 00381 00382 for (i = 0; i < pos; i++) 00383 new_ptr[i] = ptr[i]; //copy pre pos 00384 00385 for (; pos < len; pos++) 00386 new_ptr[pos] = ptr[pos + 1]; //copy post pos 00387 00388 //delete old map 00389 free_struct (ptr, (len + 1) * sizeof (REJ), "REJ"); 00390 ptr = new_ptr; 00391 } 00392 00393 00394 void REJMAP::print(FILE *fp) { 00395 int i; 00396 char buff[512]; 00397 00398 for (i = 0; i < len; i++) { 00399 buff[i] = ptr[i].display_char (); 00400 } 00401 buff[i] = '\0'; 00402 fprintf (fp, "\"%s\"", buff); 00403 } 00404 00405 00406 void REJMAP::full_print(FILE *fp) { 00407 int i; 00408 00409 for (i = 0; i < len; i++) { 00410 ptr[i].full_print (fp); 00411 fprintf (fp, "\n"); 00412 } 00413 } 00414 00415 00416 void REJMAP::rej_word_small_xht() { //Reject whole word 00417 int i; 00418 00419 for (i = 0; i < len; i++) { 00420 ptr[i].setrej_small_xht (); 00421 } 00422 } 00423 00424 00425 void REJMAP::rej_word_tess_failure() { //Reject whole word 00426 int i; 00427 00428 for (i = 0; i < len; i++) { 00429 ptr[i].setrej_tess_failure (); 00430 } 00431 } 00432 00433 00434 void REJMAP::rej_word_not_tess_accepted() { //Reject whole word 00435 int i; 00436 00437 for (i = 0; i < len; i++) { 00438 if (ptr[i].accepted()) ptr[i].setrej_not_tess_accepted(); 00439 } 00440 } 00441 00442 00443 void REJMAP::rej_word_contains_blanks() { //Reject whole word 00444 int i; 00445 00446 for (i = 0; i < len; i++) { 00447 if (ptr[i].accepted()) ptr[i].setrej_contains_blanks(); 00448 } 00449 } 00450 00451 00452 void REJMAP::rej_word_bad_permuter() { //Reject whole word 00453 int i; 00454 00455 for (i = 0; i < len; i++) { 00456 if (ptr[i].accepted()) ptr[i].setrej_bad_permuter (); 00457 } 00458 } 00459 00460 00461 void REJMAP::rej_word_xht_fixup() { //Reject whole word 00462 int i; 00463 00464 for (i = 0; i < len; i++) { 00465 if (ptr[i].accepted()) ptr[i].setrej_xht_fixup(); 00466 } 00467 } 00468 00469 00470 void REJMAP::rej_word_no_alphanums() { //Reject whole word 00471 int i; 00472 00473 for (i = 0; i < len; i++) { 00474 if (ptr[i].accepted()) ptr[i].setrej_no_alphanums(); 00475 } 00476 } 00477 00478 00479 void REJMAP::rej_word_mostly_rej() { //Reject whole word 00480 int i; 00481 00482 for (i = 0; i < len; i++) { 00483 if (ptr[i].accepted()) ptr[i].setrej_mostly_rej(); 00484 } 00485 } 00486 00487 00488 void REJMAP::rej_word_bad_quality() { //Reject whole word 00489 int i; 00490 00491 for (i = 0; i < len; i++) { 00492 if (ptr[i].accepted()) ptr[i].setrej_bad_quality(); 00493 } 00494 } 00495 00496 00497 void REJMAP::rej_word_doc_rej() { //Reject whole word 00498 int i; 00499 00500 for (i = 0; i < len; i++) { 00501 if (ptr[i].accepted()) ptr[i].setrej_doc_rej(); 00502 } 00503 } 00504 00505 00506 void REJMAP::rej_word_block_rej() { //Reject whole word 00507 int i; 00508 00509 for (i = 0; i < len; i++) { 00510 if (ptr[i].accepted()) ptr[i].setrej_block_rej(); 00511 } 00512 } 00513 00514 00515 void REJMAP::rej_word_row_rej() { //Reject whole word 00516 int i; 00517 00518 for (i = 0; i < len; i++) { 00519 if (ptr[i].accepted()) ptr[i].setrej_row_rej(); 00520 } 00521 }