|
tesseract 3.04.01
|
00001 /********************************************************************** 00002 * File: adaptions.cpp (Formerly adaptions.c) 00003 * Description: Functions used to adapt to blobs already confidently 00004 * identified 00005 * Author: Chris Newton 00006 * Created: Thu Oct 7 10:17:28 BST 1993 00007 * 00008 * (C) Copyright 1992, Hewlett-Packard Ltd. 00009 ** Licensed under the Apache License, Version 2.0 (the "License"); 00010 ** you may not use this file except in compliance with the License. 00011 ** You may obtain a copy of the License at 00012 ** http://www.apache.org/licenses/LICENSE-2.0 00013 ** Unless required by applicable law or agreed to in writing, software 00014 ** distributed under the License is distributed on an "AS IS" BASIS, 00015 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 ** See the License for the specific language governing permissions and 00017 ** limitations under the License. 00018 * 00019 **********************************************************************/ 00020 00021 #ifdef _MSC_VER 00022 #pragma warning(disable:4244) // Conversion warnings 00023 #pragma warning(disable:4305) // int/float warnings 00024 #endif 00025 00026 #ifdef __UNIX__ 00027 #include <assert.h> 00028 #endif 00029 #include <ctype.h> 00030 #include <string.h> 00031 #include "tessbox.h" 00032 #include "tessvars.h" 00033 #include "memry.h" 00034 #include "reject.h" 00035 #include "control.h" 00036 #include "stopper.h" 00037 #include "tesseractclass.h" 00038 00039 // Include automatically generated configuration file if running autoconf. 00040 #ifdef HAVE_CONFIG_H 00041 #include "config_auto.h" 00042 #endif 00043 00044 namespace tesseract { 00045 BOOL8 Tesseract::word_adaptable( //should we adapt? 00046 WERD_RES *word, 00047 uinT16 mode) { 00048 if (tessedit_adaption_debug) { 00049 tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n", 00050 word->best_choice == NULL ? "" : 00051 word->best_choice->unichar_string().string(), 00052 word->best_choice->rating(), word->best_choice->certainty()); 00053 } 00054 00055 BOOL8 status = FALSE; 00056 BITS16 flags(mode); 00057 00058 enum MODES 00059 { 00060 ADAPTABLE_WERD, 00061 ACCEPTABLE_WERD, 00062 CHECK_DAWGS, 00063 CHECK_SPACES, 00064 CHECK_ONE_ELL_CONFLICT, 00065 CHECK_AMBIG_WERD 00066 }; 00067 00068 /* 00069 0: NO adaption 00070 */ 00071 if (mode == 0) { 00072 if (tessedit_adaption_debug) tprintf("adaption disabled\n"); 00073 return FALSE; 00074 } 00075 00076 if (flags.bit (ADAPTABLE_WERD)) { 00077 status |= word->tess_would_adapt; // result of Classify::AdaptableWord() 00078 if (tessedit_adaption_debug && !status) { 00079 tprintf("tess_would_adapt bit is false\n"); 00080 } 00081 } 00082 00083 if (flags.bit (ACCEPTABLE_WERD)) { 00084 status |= word->tess_accepted; 00085 if (tessedit_adaption_debug && !status) { 00086 tprintf("tess_accepted bit is false\n"); 00087 } 00088 } 00089 00090 if (!status) { // If not set then 00091 return FALSE; // ignore other checks 00092 } 00093 00094 if (flags.bit (CHECK_DAWGS) && 00095 (word->best_choice->permuter () != SYSTEM_DAWG_PERM) && 00096 (word->best_choice->permuter () != FREQ_DAWG_PERM) && 00097 (word->best_choice->permuter () != USER_DAWG_PERM) && 00098 (word->best_choice->permuter () != NUMBER_PERM)) { 00099 if (tessedit_adaption_debug) tprintf("word not in dawgs\n"); 00100 return FALSE; 00101 } 00102 00103 if (flags.bit (CHECK_ONE_ELL_CONFLICT) && one_ell_conflict (word, FALSE)) { 00104 if (tessedit_adaption_debug) tprintf("word has ell conflict\n"); 00105 return FALSE; 00106 } 00107 00108 if (flags.bit (CHECK_SPACES) && 00109 (strchr(word->best_choice->unichar_string().string(), ' ') != NULL)) { 00110 if (tessedit_adaption_debug) tprintf("word contains spaces\n"); 00111 return FALSE; 00112 } 00113 00114 if (flags.bit (CHECK_AMBIG_WERD) && 00115 word->best_choice->dangerous_ambig_found()) { 00116 if (tessedit_adaption_debug) tprintf("word is ambiguous\n"); 00117 return FALSE; 00118 } 00119 00120 if (tessedit_adaption_debug) { 00121 tprintf("returning status %d\n", status); 00122 } 00123 return status; 00124 } 00125 00126 } // namespace tesseract