|
tesseract 3.04.01
|
00001 /********************************************************************** 00002 * File: strngs.h (Formerly strings.h) 00003 * Description: STRING class definition. 00004 * Author: Ray Smith 00005 * Created: Fri Feb 15 09:15:01 GMT 1991 00006 * 00007 * (C) Copyright 1991, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #ifndef STRNGS_H 00021 #define STRNGS_H 00022 00023 #include <stdio.h> 00024 #include <string.h> 00025 #include "platform.h" 00026 #include "memry.h" 00027 00028 namespace tesseract { 00029 class TFile; 00030 } // namespace tesseract. 00031 00032 // STRING_IS_PROTECTED means that string[index] = X is invalid 00033 // because you have to go through strings interface to modify it. 00034 // This allows the string to ensure internal integrity and maintain 00035 // its own string length. Unfortunately this is not possible because 00036 // STRINGS are used as direct-manipulation data buffers for things 00037 // like length arrays and many places cast away the const on string() 00038 // to mutate the string. Turning this off means that internally we 00039 // cannot assume we know the strlen. 00040 #define STRING_IS_PROTECTED 0 00041 00042 template <typename T> class GenericVector; 00043 00044 class TESS_API STRING 00045 { 00046 public: 00047 STRING(); 00048 STRING(const STRING &string); 00049 STRING(const char *string); 00050 STRING(const char *data, int length); 00051 ~STRING (); 00052 00053 // Writes to the given file. Returns false in case of error. 00054 bool Serialize(FILE* fp) const; 00055 // Reads from the given file. Returns false in case of error. 00056 // If swap is true, assumes a big/little-endian swap is needed. 00057 bool DeSerialize(bool swap, FILE* fp); 00058 // Writes to the given file. Returns false in case of error. 00059 bool Serialize(tesseract::TFile* fp) const; 00060 // Reads from the given file. Returns false in case of error. 00061 // If swap is true, assumes a big/little-endian swap is needed. 00062 bool DeSerialize(bool swap, tesseract::TFile* fp); 00063 00064 BOOL8 contains(const char c) const; 00065 inT32 length() const; 00066 inT32 size() const { return length(); } 00067 const char *string() const; 00068 const char *c_str() const; 00069 00070 inline char* strdup() const { 00071 inT32 len = length() + 1; 00072 return strncpy(new char[len], GetCStr(), len); 00073 } 00074 00075 #if STRING_IS_PROTECTED 00076 const char &operator[] (inT32 index) const; 00077 // len is number of chars in s to insert starting at index in this string 00078 void insert_range(inT32 index, const char*s, int len); 00079 void erase_range(inT32 index, int len); 00080 #else 00081 char &operator[] (inT32 index) const; 00082 #endif 00083 void split(const char c, GenericVector<STRING> *splited); 00084 void truncate_at(inT32 index); 00085 00086 BOOL8 operator== (const STRING & string) const; 00087 BOOL8 operator!= (const STRING & string) const; 00088 BOOL8 operator!= (const char *string) const; 00089 00090 STRING & operator= (const char *string); 00091 STRING & operator= (const STRING & string); 00092 00093 STRING operator+ (const STRING & string) const; 00094 STRING operator+ (const char ch) const; 00095 00096 STRING & operator+= (const char *string); 00097 STRING & operator+= (const STRING & string); 00098 STRING & operator+= (const char ch); 00099 00100 // Assignment for strings which are not null-terminated. 00101 void assign(const char *cstr, int len); 00102 00103 // Appends the given string and int (as a %d) to this. 00104 // += cannot be used for ints as there as a char += operator that would 00105 // be ambiguous, and ints usually need a string before or between them 00106 // anyway. 00107 void add_str_int(const char* str, int number); 00108 // Appends the given string and double (as a %.8g) to this. 00109 void add_str_double(const char* str, double number); 00110 00111 // ensure capacity but keep pointer encapsulated 00112 inline void ensure(inT32 min_capacity) { ensure_cstr(min_capacity); } 00113 00114 private: 00115 typedef struct STRING_HEADER { 00116 // How much space was allocated in the string buffer for char data. 00117 int capacity_; 00118 00119 // used_ is how much of the capacity is currently being used, 00120 // including a '\0' terminator. 00121 // 00122 // If used_ is 0 then string is NULL (not even the '\0') 00123 // else if used_ > 0 then it is strlen() + 1 (because it includes '\0') 00124 // else strlen is >= 0 (not NULL) but needs to be computed. 00125 // this condition is set when encapsulation is violated because 00126 // an API returned a mutable string. 00127 // 00128 // capacity_ - used_ = excess capacity that the string can grow 00129 // without reallocating 00130 mutable int used_; 00131 } STRING_HEADER; 00132 00133 // To preserve the behavior of the old serialization, we only have space 00134 // for one pointer in this structure. So we are embedding a data structure 00135 // at the start of the storage that will hold additional state variables, 00136 // then storing the actual string contents immediately after. 00137 STRING_HEADER* data_; 00138 00139 // returns the header part of the storage 00140 inline STRING_HEADER* GetHeader() { 00141 return data_; 00142 } 00143 inline const STRING_HEADER* GetHeader() const { 00144 return data_; 00145 } 00146 00147 // returns the string data part of storage 00148 inline char* GetCStr() { 00149 return ((char *)data_) + sizeof(STRING_HEADER); 00150 }; 00151 00152 inline const char* GetCStr() const { 00153 return ((const char *)data_) + sizeof(STRING_HEADER); 00154 }; 00155 inline bool InvariantOk() const { 00156 #if STRING_IS_PROTECTED 00157 return (GetHeader()->used_ == 0) ? 00158 (string() == NULL) : (GetHeader()->used_ == (strlen(string()) + 1)); 00159 #else 00160 return true; 00161 #endif 00162 } 00163 00164 // Ensure string has requested capacity as optimization 00165 // to avoid unnecessary reallocations. 00166 // The return value is a cstr buffer with at least requested capacity 00167 char* ensure_cstr(inT32 min_capacity); 00168 00169 void FixHeader() const; // make used_ non-negative, even if const 00170 00171 char* AllocData(int used, int capacity); 00172 void DiscardData(); 00173 }; 00174 #endif