tesseract 3.04.01

ccstruct/rejctmap.cpp

Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        rejctmap.cpp  (Formerly rejmap.c)
00003  * Description: REJ and REJMAP class functions.
00004  * Author:              Phil Cheatle
00005  * Created:             Thu Jun  9 13:46:38 BST 1994
00006  *
00007  * (C) Copyright 1994, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include          "host.h"
00021 #include          "rejctmap.h"
00022 #include          "params.h"
00023 
00024 BOOL8 REJ::perm_rejected() {  //Is char perm reject?
00025   return (flag (R_TESS_FAILURE) ||
00026     flag (R_SMALL_XHT) ||
00027     flag (R_EDGE_CHAR) ||
00028     flag (R_1IL_CONFLICT) ||
00029     flag (R_POSTNN_1IL) ||
00030     flag (R_REJ_CBLOB) ||
00031     flag (R_BAD_REPETITION) || flag (R_MM_REJECT));
00032 }
00033 
00034 
00035 BOOL8 REJ::rej_before_nn_accept() {
00036   return flag (R_POOR_MATCH) ||
00037     flag (R_NOT_TESS_ACCEPTED) ||
00038     flag (R_CONTAINS_BLANKS) || flag (R_BAD_PERMUTER);
00039 }
00040 
00041 
00042 BOOL8 REJ::rej_between_nn_and_mm() {
00043   return flag (R_HYPHEN) ||
00044     flag (R_DUBIOUS) ||
00045     flag (R_NO_ALPHANUMS) || flag (R_MOSTLY_REJ) || flag (R_XHT_FIXUP);
00046 }
00047 
00048 
00049 BOOL8 REJ::rej_between_mm_and_quality_accept() {
00050   return flag (R_BAD_QUALITY);
00051 }
00052 
00053 
00054 BOOL8 REJ::rej_between_quality_and_minimal_rej_accept() {
00055   return flag (R_DOC_REJ) ||
00056     flag (R_BLOCK_REJ) || flag (R_ROW_REJ) || flag (R_UNLV_REJ);
00057 }
00058 
00059 
00060 BOOL8 REJ::rej_before_mm_accept() {
00061   return rej_between_nn_and_mm () ||
00062     (rej_before_nn_accept () &&
00063     !flag (R_NN_ACCEPT) && !flag (R_HYPHEN_ACCEPT));
00064 }
00065 
00066 
00067 BOOL8 REJ::rej_before_quality_accept() {
00068   return rej_between_mm_and_quality_accept () ||
00069     (!flag (R_MM_ACCEPT) && rej_before_mm_accept ());
00070 }
00071 
00072 
00073 BOOL8 REJ::rejected() {  //Is char rejected?
00074   if (flag (R_MINIMAL_REJ_ACCEPT))
00075     return FALSE;
00076   else
00077     return (perm_rejected () ||
00078       rej_between_quality_and_minimal_rej_accept () ||
00079       (!flag (R_QUALITY_ACCEPT) && rej_before_quality_accept ()));
00080 }
00081 
00082 
00083 BOOL8 REJ::accept_if_good_quality() {  //potential rej?
00084   return (rejected () &&
00085     !perm_rejected () &&
00086     flag (R_BAD_PERMUTER) &&
00087     !flag (R_POOR_MATCH) &&
00088     !flag (R_NOT_TESS_ACCEPTED) &&
00089     !flag (R_CONTAINS_BLANKS) &&
00090     (!rej_between_nn_and_mm () &&
00091      !rej_between_mm_and_quality_accept () &&
00092      !rej_between_quality_and_minimal_rej_accept ()));
00093 }
00094 
00095 
00096 void REJ::setrej_tess_failure() {  //Tess generated blank
00097   set_flag(R_TESS_FAILURE);
00098 }
00099 
00100 
00101 void REJ::setrej_small_xht() {  //Small xht char/wd
00102   set_flag(R_SMALL_XHT);
00103 }
00104 
00105 
00106 void REJ::setrej_edge_char() {  //Close to image edge
00107   set_flag(R_EDGE_CHAR);
00108 }
00109 
00110 
00111 void REJ::setrej_1Il_conflict() {  //Initial reject map
00112   set_flag(R_1IL_CONFLICT);
00113 }
00114 
00115 
00116 void REJ::setrej_postNN_1Il() {  //1Il after NN
00117   set_flag(R_POSTNN_1IL);
00118 }
00119 
00120 
00121 void REJ::setrej_rej_cblob() {  //Insert duff blob
00122   set_flag(R_REJ_CBLOB);
00123 }
00124 
00125 
00126 void REJ::setrej_mm_reject() {  //Matrix matcher
00127   set_flag(R_MM_REJECT);
00128 }
00129 
00130 
00131 void REJ::setrej_bad_repetition() {  //Odd repeated char
00132   set_flag(R_BAD_REPETITION);
00133 }
00134 
00135 
00136 void REJ::setrej_poor_match() {  //Failed Rays heuristic
00137   set_flag(R_POOR_MATCH);
00138 }
00139 
00140 
00141 void REJ::setrej_not_tess_accepted() {
00142                                  //TEMP reject_word
00143   set_flag(R_NOT_TESS_ACCEPTED);
00144 }
00145 
00146 
00147 void REJ::setrej_contains_blanks() {
00148                                  //TEMP reject_word
00149   set_flag(R_CONTAINS_BLANKS);
00150 }
00151 
00152 
00153 void REJ::setrej_bad_permuter() {  //POTENTIAL reject_word
00154   set_flag(R_BAD_PERMUTER);
00155 }
00156 
00157 
00158 void REJ::setrej_hyphen() {  //PostNN dubious hyphen or .
00159   set_flag(R_HYPHEN);
00160 }
00161 
00162 
00163 void REJ::setrej_dubious() {  //PostNN dubious limit
00164   set_flag(R_DUBIOUS);
00165 }
00166 
00167 
00168 void REJ::setrej_no_alphanums() {  //TEMP reject_word
00169   set_flag(R_NO_ALPHANUMS);
00170 }
00171 
00172 
00173 void REJ::setrej_mostly_rej() {  //TEMP reject_word
00174   set_flag(R_MOSTLY_REJ);
00175 }
00176 
00177 
00178 void REJ::setrej_xht_fixup() {  //xht fixup
00179   set_flag(R_XHT_FIXUP);
00180 }
00181 
00182 
00183 void REJ::setrej_bad_quality() {  //TEMP reject_word
00184   set_flag(R_BAD_QUALITY);
00185 }
00186 
00187 
00188 void REJ::setrej_doc_rej() {  //TEMP reject_word
00189   set_flag(R_DOC_REJ);
00190 }
00191 
00192 
00193 void REJ::setrej_block_rej() {  //TEMP reject_word
00194   set_flag(R_BLOCK_REJ);
00195 }
00196 
00197 
00198 void REJ::setrej_row_rej() {  //TEMP reject_word
00199   set_flag(R_ROW_REJ);
00200 }
00201 
00202 
00203 void REJ::setrej_unlv_rej() {  //TEMP reject_word
00204   set_flag(R_UNLV_REJ);
00205 }
00206 
00207 
00208 void REJ::setrej_hyphen_accept() {  //NN Flipped a char
00209   set_flag(R_HYPHEN_ACCEPT);
00210 }
00211 
00212 
00213 void REJ::setrej_nn_accept() {  //NN Flipped a char
00214   set_flag(R_NN_ACCEPT);
00215 }
00216 
00217 
00218 void REJ::setrej_mm_accept() {  //Matrix matcher
00219   set_flag(R_MM_ACCEPT);
00220 }
00221 
00222 
00223 void REJ::setrej_quality_accept() {  //Quality flip a char
00224   set_flag(R_QUALITY_ACCEPT);
00225 }
00226 
00227 
00228 void REJ::setrej_minimal_rej_accept() {
00229                                  //Accept all except blank
00230   set_flag(R_MINIMAL_REJ_ACCEPT);
00231 }
00232 
00233 
00234 void REJ::full_print(FILE *fp) {
00235   fprintf (fp, "R_TESS_FAILURE: %s\n", flag (R_TESS_FAILURE) ? "T" : "F");
00236   fprintf (fp, "R_SMALL_XHT: %s\n", flag (R_SMALL_XHT) ? "T" : "F");
00237   fprintf (fp, "R_EDGE_CHAR: %s\n", flag (R_EDGE_CHAR) ? "T" : "F");
00238   fprintf (fp, "R_1IL_CONFLICT: %s\n", flag (R_1IL_CONFLICT) ? "T" : "F");
00239   fprintf (fp, "R_POSTNN_1IL: %s\n", flag (R_POSTNN_1IL) ? "T" : "F");
00240   fprintf (fp, "R_REJ_CBLOB: %s\n", flag (R_REJ_CBLOB) ? "T" : "F");
00241   fprintf (fp, "R_MM_REJECT: %s\n", flag (R_MM_REJECT) ? "T" : "F");
00242   fprintf (fp, "R_BAD_REPETITION: %s\n", flag (R_BAD_REPETITION) ? "T" : "F");
00243   fprintf (fp, "R_POOR_MATCH: %s\n", flag (R_POOR_MATCH) ? "T" : "F");
00244   fprintf (fp, "R_NOT_TESS_ACCEPTED: %s\n",
00245     flag (R_NOT_TESS_ACCEPTED) ? "T" : "F");
00246   fprintf (fp, "R_CONTAINS_BLANKS: %s\n",
00247     flag (R_CONTAINS_BLANKS) ? "T" : "F");
00248   fprintf (fp, "R_BAD_PERMUTER: %s\n", flag (R_BAD_PERMUTER) ? "T" : "F");
00249   fprintf (fp, "R_HYPHEN: %s\n", flag (R_HYPHEN) ? "T" : "F");
00250   fprintf (fp, "R_DUBIOUS: %s\n", flag (R_DUBIOUS) ? "T" : "F");
00251   fprintf (fp, "R_NO_ALPHANUMS: %s\n", flag (R_NO_ALPHANUMS) ? "T" : "F");
00252   fprintf (fp, "R_MOSTLY_REJ: %s\n", flag (R_MOSTLY_REJ) ? "T" : "F");
00253   fprintf (fp, "R_XHT_FIXUP: %s\n", flag (R_XHT_FIXUP) ? "T" : "F");
00254   fprintf (fp, "R_BAD_QUALITY: %s\n", flag (R_BAD_QUALITY) ? "T" : "F");
00255   fprintf (fp, "R_DOC_REJ: %s\n", flag (R_DOC_REJ) ? "T" : "F");
00256   fprintf (fp, "R_BLOCK_REJ: %s\n", flag (R_BLOCK_REJ) ? "T" : "F");
00257   fprintf (fp, "R_ROW_REJ: %s\n", flag (R_ROW_REJ) ? "T" : "F");
00258   fprintf (fp, "R_UNLV_REJ: %s\n", flag (R_UNLV_REJ) ? "T" : "F");
00259   fprintf (fp, "R_HYPHEN_ACCEPT: %s\n", flag (R_HYPHEN_ACCEPT) ? "T" : "F");
00260   fprintf (fp, "R_NN_ACCEPT: %s\n", flag (R_NN_ACCEPT) ? "T" : "F");
00261   fprintf (fp, "R_MM_ACCEPT: %s\n", flag (R_MM_ACCEPT) ? "T" : "F");
00262   fprintf (fp, "R_QUALITY_ACCEPT: %s\n", flag (R_QUALITY_ACCEPT) ? "T" : "F");
00263   fprintf (fp, "R_MINIMAL_REJ_ACCEPT: %s\n",
00264     flag (R_MINIMAL_REJ_ACCEPT) ? "T" : "F");
00265 }
00266 
00267 
00268 //The REJMAP class has been hacked to use alloc_struct instead of new [].
00269 //This is to reduce memory fragmentation only as it is rather kludgy.
00270 //alloc_struct by-passes the call to the contsructor of REJ on each
00271 //array element. Although the constructor is empty, the BITS16 members
00272 //do have a constructor which sets all the flags to 0. The memset
00273 //replaces this functionality.
00274 
00275 REJMAP::REJMAP(  //classwise copy
00276                const REJMAP &source) {
00277   REJ *to;
00278   REJ *from = source.ptr;
00279   int i;
00280 
00281   len = source.length ();
00282 
00283   if (len > 0) {
00284     ptr = (REJ *) alloc_struct (len * sizeof (REJ), "REJ");
00285     to = ptr;
00286     for (i = 0; i < len; i++) {
00287       *to = *from;
00288       to++;
00289       from++;
00290     }
00291   }
00292   else
00293     ptr = NULL;
00294 }
00295 
00296 
00297 REJMAP & REJMAP::operator= (     //assign REJMAP
00298 const REJMAP & source            //from this
00299 ) {
00300   REJ *
00301     to;
00302   REJ *
00303     from = source.ptr;
00304   int
00305     i;
00306 
00307   initialise (source.len);
00308   to = ptr;
00309   for (i = 0; i < len; i++) {
00310     *to = *from;
00311     to++;
00312     from++;
00313   }
00314   return *this;
00315 }
00316 
00317 
00318 void REJMAP::initialise(  //Redefine map
00319                         inT16 length) {
00320   if (ptr != NULL)
00321     free_struct (ptr, len * sizeof (REJ), "REJ");
00322   len = length;
00323   if (len > 0)
00324     ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"),
00325       0, len * sizeof (REJ));
00326   else
00327     ptr = NULL;
00328 }
00329 
00330 
00331 inT16 REJMAP::accept_count() {  //How many accepted?
00332   int i;
00333   inT16 count = 0;
00334 
00335   for (i = 0; i < len; i++) {
00336     if (ptr[i].accepted ())
00337       count++;
00338   }
00339   return count;
00340 }
00341 
00342 
00343 BOOL8 REJMAP::recoverable_rejects() {  //Any non perm rejs?
00344   int i;
00345 
00346   for (i = 0; i < len; i++) {
00347     if (ptr[i].recoverable ())
00348       return TRUE;
00349   }
00350   return FALSE;
00351 }
00352 
00353 
00354 BOOL8 REJMAP::quality_recoverable_rejects() {  //Any potential rejs?
00355   int i;
00356 
00357   for (i = 0; i < len; i++) {
00358     if (ptr[i].accept_if_good_quality ())
00359       return TRUE;
00360   }
00361   return FALSE;
00362 }
00363 
00364 
00365 void REJMAP::remove_pos(           //Cut out an element
00366                         inT16 pos  //element to remove
00367                        ) {
00368   REJ *new_ptr;                  //new, smaller map
00369   int i;
00370 
00371   ASSERT_HOST (pos >= 0);
00372   ASSERT_HOST (pos < len);
00373   ASSERT_HOST (len > 0);
00374 
00375   len--;
00376   if (len > 0)
00377     new_ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"),
00378       0, len * sizeof (REJ));
00379   else
00380     new_ptr = NULL;
00381 
00382   for (i = 0; i < pos; i++)
00383     new_ptr[i] = ptr[i];         //copy pre pos
00384 
00385   for (; pos < len; pos++)
00386     new_ptr[pos] = ptr[pos + 1]; //copy post pos
00387 
00388                                  //delete old map
00389   free_struct (ptr, (len + 1) * sizeof (REJ), "REJ");
00390   ptr = new_ptr;
00391 }
00392 
00393 
00394 void REJMAP::print(FILE *fp) {
00395   int i;
00396   char buff[512];
00397 
00398   for (i = 0; i < len; i++) {
00399     buff[i] = ptr[i].display_char ();
00400   }
00401   buff[i] = '\0';
00402   fprintf (fp, "\"%s\"", buff);
00403 }
00404 
00405 
00406 void REJMAP::full_print(FILE *fp) {
00407   int i;
00408 
00409   for (i = 0; i < len; i++) {
00410     ptr[i].full_print (fp);
00411     fprintf (fp, "\n");
00412   }
00413 }
00414 
00415 
00416 void REJMAP::rej_word_small_xht() {  //Reject whole word
00417   int i;
00418 
00419   for (i = 0; i < len; i++) {
00420     ptr[i].setrej_small_xht ();
00421   }
00422 }
00423 
00424 
00425 void REJMAP::rej_word_tess_failure() {  //Reject whole word
00426   int i;
00427 
00428   for (i = 0; i < len; i++) {
00429     ptr[i].setrej_tess_failure ();
00430   }
00431 }
00432 
00433 
00434 void REJMAP::rej_word_not_tess_accepted() {  //Reject whole word
00435   int i;
00436 
00437   for (i = 0; i < len; i++) {
00438     if (ptr[i].accepted()) ptr[i].setrej_not_tess_accepted();
00439   }
00440 }
00441 
00442 
00443 void REJMAP::rej_word_contains_blanks() {  //Reject whole word
00444   int i;
00445 
00446   for (i = 0; i < len; i++) {
00447     if (ptr[i].accepted()) ptr[i].setrej_contains_blanks();
00448   }
00449 }
00450 
00451 
00452 void REJMAP::rej_word_bad_permuter() {  //Reject whole word
00453   int i;
00454 
00455   for (i = 0; i < len; i++) {
00456     if (ptr[i].accepted()) ptr[i].setrej_bad_permuter ();
00457   }
00458 }
00459 
00460 
00461 void REJMAP::rej_word_xht_fixup() {  //Reject whole word
00462   int i;
00463 
00464   for (i = 0; i < len; i++) {
00465     if (ptr[i].accepted()) ptr[i].setrej_xht_fixup();
00466   }
00467 }
00468 
00469 
00470 void REJMAP::rej_word_no_alphanums() {  //Reject whole word
00471   int i;
00472 
00473   for (i = 0; i < len; i++) {
00474     if (ptr[i].accepted()) ptr[i].setrej_no_alphanums();
00475   }
00476 }
00477 
00478 
00479 void REJMAP::rej_word_mostly_rej() {  //Reject whole word
00480   int i;
00481 
00482   for (i = 0; i < len; i++) {
00483     if (ptr[i].accepted()) ptr[i].setrej_mostly_rej();
00484   }
00485 }
00486 
00487 
00488 void REJMAP::rej_word_bad_quality() {  //Reject whole word
00489   int i;
00490 
00491   for (i = 0; i < len; i++) {
00492     if (ptr[i].accepted()) ptr[i].setrej_bad_quality();
00493   }
00494 }
00495 
00496 
00497 void REJMAP::rej_word_doc_rej() {  //Reject whole word
00498   int i;
00499 
00500   for (i = 0; i < len; i++) {
00501     if (ptr[i].accepted()) ptr[i].setrej_doc_rej();
00502   }
00503 }
00504 
00505 
00506 void REJMAP::rej_word_block_rej() {  //Reject whole word
00507   int i;
00508 
00509   for (i = 0; i < len; i++) {
00510     if (ptr[i].accepted()) ptr[i].setrej_block_rej();
00511   }
00512 }
00513 
00514 
00515 void REJMAP::rej_word_row_rej() {  //Reject whole word
00516   int i;
00517 
00518   for (i = 0; i < len; i++) {
00519     if (ptr[i].accepted()) ptr[i].setrej_row_rej();
00520   }
00521 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines