32 #ifndef _QORE_ENCODING_H 34 #define _QORE_ENCODING_H 42 #include <qore/QoreThreadLock.h> 71 struct qore_encoding_private;
96 unsigned char maxwidth;
98 qore_encoding_private* priv;
103 DLLLOCAL ~QoreEncoding();
167 DLLEXPORT
const char*
getCode()
const;
170 DLLEXPORT
const char*
getDesc()
const;
189 DLLLOCAL
unsigned getUnicode(
const char* p)
const;
193 typedef std::map<const char*, QoreEncoding*, ltcstrcase> encoding_map_t;
194 typedef std::map<const char*, const QoreEncoding*, ltcstrcase> const_encoding_map_t;
203 DLLLOCAL
static encoding_map_t emap;
204 DLLLOCAL
static const_encoding_map_t amap;
208 DLLLOCAL
static const QoreEncoding* findUnlocked(
const char* name);
212 DLLEXPORT
static void addAlias(
const QoreEncoding* qcs,
const char* alias);
215 DLLEXPORT
static const QoreEncoding* findCreate(
const char* name);
221 DLLEXPORT
static void showEncodings();
224 DLLEXPORT
static void showAliases();
229 DLLLOCAL
static void init(
const char* def);
231 DLLLOCAL ~QoreEncodingManager();
266 #endif // _QORE_ENCODING_H qore_size_t(* mbcs_end_t)(const char *str, const char *end, qore_size_t num_chars, bool &invalid)
for multi-byte character set encodings: gives the number of bytes for the number of chars ...
Definition: QoreEncoding.h:55
DLLEXPORT const QoreEncoding * QCS_UTF8
UTF-8 multi-byte encoding (only UTF-8 and UTF-16 are multi-byte encodings)
DLLEXPORT qore_size_t getLength(const char *p, const char *end, bool &invalid) const
gives the length of the string in characters
defines string encoding functions in Qore
Definition: QoreEncoding.h:85
DLLEXPORT const QoreEncoding * QCS_ISO_8859_8
Hebrew character set.
DLLEXPORT const QoreEncoding * QCS_ISO_8859_1
latin-1, Western European encoding
DLLEXPORT const QoreEncoding * QCS_UTF16
UTF-16 (only UTF-8 and UTF-16* are multi-byte encodings) - do not use; use UTF-8 instead.
DLLEXPORT const QoreEncoding * QCS_DEFAULT
the default encoding for the Qore library
DLLEXPORT qore_size_t getCharLen(const char *p, qore_size_t valid_len) const
gives the number of total bytes for the character given one or more characters
DLLEXPORT int getMaxCharWidth() const
returns the maximum character width in bytes for the encoding
qore_size_t(* mbcs_charlen_t)(const char *str, qore_size_t valid_len)
for multi-byte encodings: gives the number of total bytes for the character given one or more charact...
Definition: QoreEncoding.h:65
DLLLOCAL unsigned getUnicode(const char *p) const
returns the unicode code point for the given character, must be a complete character and only one cha...
DLLEXPORT qore_size_t getByteLen(const char *p, const char *end, qore_size_t c, bool &invalid) const
gives the number of bytes for the number of chars in the string or up to the end of the string ...
DLLEXPORT const QoreEncoding * QCS_ISO_8859_2
latin-2, Central European encoding
DLLEXPORT const QoreEncoding * QCS_ISO_8859_11
Thai character set.
manages encodings in Qore
Definition: QoreEncoding.h:201
DLLEXPORT const QoreEncoding * QCS_ISO_8859_3
latin-3, Southern European character set
size_t qore_size_t
used for sizes (same range as a pointer)
Definition: common.h:71
DLLEXPORT const QoreEncoding * QCS_ISO_8859_4
latin-4, Northern European character set
DLLEXPORT const QoreEncoding * QCS_USASCII
ascii encoding
qore_size_t(* mbcs_pos_t)(const char *str, const char *ptr, bool &invalid)
for multi-byte character set encodings: gives the character position of the ptr
Definition: QoreEncoding.h:58
unsigned(* mbcs_get_unicode_t)(const char *p)
for multi-byte non-ascii compatible character encodings: returns the unicode code point for the given...
Definition: QoreEncoding.h:68
DLLEXPORT const char * getDesc() const
returns the description for the encoding
DLLEXPORT const QoreEncoding * QCS_ISO_8859_10
latin-6, Nordic character set
Qore's string type supported by the QoreEncoding class.
Definition: QoreString.h:82
DLLEXPORT const QoreEncoding * QCS_KOI8_U
Ukrainian: Kod Obmena Informatsiey, 8 bit.
DLLEXPORT QoreEncodingManager QEM
the QoreEncodingManager object
DLLEXPORT const QoreEncoding * QCS_ISO_8859_9
latin-5, Turkish character set
DLLEXPORT const QoreEncoding * QCS_UTF16BE
UTF-16BE (only UTF-8 and UTF-16* are multi-byte encodings) - do not use; use UTF-8 instead...
DLLEXPORT const QoreEncoding * QCS_KOI7
Russian: Kod Obmena Informatsiey, 7 bit characters.
DLLEXPORT bool isAsciiCompat() const
returns true if the character encoding is backwards-compatible with ASCII
qore_size_t(* mbcs_length_t)(const char *str, const char *end, bool &invalid)
for multi-byte character set encodings: gives the length of the string in characters ...
Definition: QoreEncoding.h:52
container for holding Qore-language exception information and also for registering a "thread_exit" ca...
Definition: ExceptionSink.h:43
DLLEXPORT const QoreEncoding * QCS_ISO_8859_14
latin-8, Celtic character set
DLLEXPORT const QoreEncoding * QCS_ISO_8859_6
Arabic character set.
DLLEXPORT unsigned getMinCharWidth() const
returns the minimum character width in bytes for the encoding
DLLEXPORT const QoreEncoding * QCS_ISO_8859_5
Cyrillic character set.
DLLEXPORT qore_size_t getCharPos(const char *p, const char *end, bool &invalid) const
gives the character position (number of characters) starting from the first pointer to the second ...
provides a mutually-exclusive thread lock
Definition: QoreThreadLock.h:49
DLLEXPORT const QoreEncoding * QCS_UTF16LE
UTF-16LE (only UTF-8 and UTF-16* are multi-byte encodings) - do not use; use UTF-8 instead...
DLLEXPORT const QoreEncoding * QCS_ISO_8859_16
latin-10, Southeast European character set
DLLEXPORT const char * getCode() const
returns the string code (ex: "UTF-8") for the encoding
DLLEXPORT bool isMultiByte() const
returns true if the encoding is a multi-byte encoding
DLLEXPORT const QoreEncoding * QCS_ISO_8859_15
latin-9, Western European with euro symbol
DLLEXPORT const QoreEncoding * QCS_KOI8_R
Russian: Kod Obmena Informatsiey, 8 bit.
DLLEXPORT const QoreEncoding * QCS_ISO_8859_7
Greek character set.
DLLEXPORT const QoreEncoding * QCS_ISO_8859_13
latin-7, Baltic rim character set