tesseract 3.04.01

ccutil/strngs.h

Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        strngs.h  (Formerly strings.h)
00003  * Description: STRING class definition.
00004  * Author:                                      Ray Smith
00005  * Created:                                     Fri Feb 15 09:15:01 GMT 1991
00006  *
00007  * (C) Copyright 1991, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #ifndef           STRNGS_H
00021 #define           STRNGS_H
00022 
00023 #include          <stdio.h>
00024 #include          <string.h>
00025 #include          "platform.h"
00026 #include          "memry.h"
00027 
00028 namespace tesseract {
00029 class TFile;
00030 }  // namespace tesseract.
00031 
00032 // STRING_IS_PROTECTED means that  string[index] = X is invalid
00033 // because you have to go through strings interface to modify it.
00034 // This allows the string to ensure internal integrity and maintain
00035 // its own string length. Unfortunately this is not possible because
00036 // STRINGS are used as direct-manipulation data buffers for things
00037 // like length arrays and many places cast away the const on string()
00038 // to mutate the string. Turning this off means that internally we
00039 // cannot assume we know the strlen.
00040 #define STRING_IS_PROTECTED  0
00041 
00042 template <typename T> class GenericVector;
00043 
00044 class TESS_API STRING
00045 {
00046   public:
00047     STRING();
00048     STRING(const STRING &string);
00049     STRING(const char *string);
00050     STRING(const char *data, int length);
00051     ~STRING ();
00052 
00053     // Writes to the given file. Returns false in case of error.
00054     bool Serialize(FILE* fp) const;
00055     // Reads from the given file. Returns false in case of error.
00056     // If swap is true, assumes a big/little-endian swap is needed.
00057     bool DeSerialize(bool swap, FILE* fp);
00058     // Writes to the given file. Returns false in case of error.
00059     bool Serialize(tesseract::TFile* fp) const;
00060     // Reads from the given file. Returns false in case of error.
00061     // If swap is true, assumes a big/little-endian swap is needed.
00062     bool DeSerialize(bool swap, tesseract::TFile* fp);
00063 
00064     BOOL8 contains(const char c) const;
00065     inT32 length() const;
00066     inT32 size() const { return length(); }
00067     const char *string() const;
00068     const char *c_str() const;
00069 
00070     inline char* strdup() const {
00071      inT32 len = length() + 1;
00072      return strncpy(new char[len], GetCStr(), len);
00073     }
00074 
00075 #if STRING_IS_PROTECTED
00076     const char &operator[] (inT32 index) const;
00077     // len is number of chars in s to insert starting at index in this string
00078     void insert_range(inT32 index, const char*s, int len);
00079     void erase_range(inT32 index, int len);
00080 #else
00081     char &operator[] (inT32 index) const;
00082 #endif
00083     void split(const char c, GenericVector<STRING> *splited);
00084     void truncate_at(inT32 index);
00085 
00086     BOOL8 operator== (const STRING & string) const;
00087     BOOL8 operator!= (const STRING & string) const;
00088     BOOL8 operator!= (const char *string) const;
00089 
00090     STRING & operator= (const char *string);
00091     STRING & operator= (const STRING & string);
00092 
00093     STRING operator+ (const STRING & string) const;
00094     STRING operator+ (const char ch) const;
00095 
00096     STRING & operator+= (const char *string);
00097     STRING & operator+= (const STRING & string);
00098     STRING & operator+= (const char ch);
00099 
00100     // Assignment for strings which are not null-terminated.
00101     void assign(const char *cstr, int len);
00102 
00103     // Appends the given string and int (as a %d) to this.
00104     // += cannot be used for ints as there as a char += operator that would
00105     // be ambiguous, and ints usually need a string before or between them
00106     // anyway.
00107     void add_str_int(const char* str, int number);
00108     // Appends the given string and double (as a %.8g) to this.
00109     void add_str_double(const char* str, double number);
00110 
00111     // ensure capacity but keep pointer encapsulated
00112     inline void ensure(inT32 min_capacity) { ensure_cstr(min_capacity); }
00113 
00114   private:
00115     typedef struct STRING_HEADER {
00116       // How much space was allocated in the string buffer for char data.
00117       int capacity_;
00118 
00119       // used_ is how much of the capacity is currently being used,
00120       // including a '\0' terminator.
00121       //
00122       // If used_ is 0 then string is NULL (not even the '\0')
00123       // else if used_ > 0 then it is strlen() + 1 (because it includes '\0')
00124       // else strlen is >= 0 (not NULL) but needs to be computed.
00125       //      this condition is set when encapsulation is violated because
00126       //      an API returned a mutable string.
00127       //
00128       // capacity_ - used_ = excess capacity that the string can grow
00129       //                     without reallocating
00130       mutable int used_;
00131     } STRING_HEADER;
00132 
00133     // To preserve the behavior of the old serialization, we only have space
00134     // for one pointer in this structure. So we are embedding a data structure
00135     // at the start of the storage that will hold additional state variables,
00136     // then storing the actual string contents immediately after.
00137     STRING_HEADER* data_;
00138 
00139     // returns the header part of the storage
00140     inline STRING_HEADER* GetHeader() {
00141       return data_;
00142     }
00143     inline const STRING_HEADER* GetHeader() const {
00144       return data_;
00145     }
00146 
00147     // returns the string data part of storage
00148     inline char* GetCStr() {
00149       return ((char *)data_) + sizeof(STRING_HEADER);
00150     };
00151 
00152     inline const char* GetCStr() const {
00153       return ((const char *)data_) + sizeof(STRING_HEADER);
00154     };
00155     inline bool InvariantOk() const {
00156 #if STRING_IS_PROTECTED
00157       return (GetHeader()->used_ == 0) ?
00158         (string() == NULL) : (GetHeader()->used_ == (strlen(string()) + 1));
00159 #else
00160       return true;
00161 #endif
00162     }
00163 
00164     // Ensure string has requested capacity as optimization
00165     // to avoid unnecessary reallocations.
00166     // The return value is a cstr buffer with at least requested capacity
00167     char* ensure_cstr(inT32 min_capacity);
00168 
00169     void FixHeader() const;  // make used_ non-negative, even if const
00170 
00171     char* AllocData(int used, int capacity);
00172     void DiscardData();
00173 };
00174 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines