-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Access Unicode character database
--   
--   <tt>unicode-data</tt> provides Haskell APIs to efficiently access the
--   unicode character database. Performance is the primary goal in the
--   design of this package.
--   
--   The Haskell data structures are generated programmatically from the
--   unicode character database (UCD) files. The latest unicode version
--   supported by this library is 13.0.0.
@package unicode-data
@version 0.1.0.1


-- | Fast, static bitmap lookup utilities
module Unicode.Internal.Bits

-- | <tt>lookup64 addr index</tt> looks up the bit stored at bit index
--   <tt>index</tt> using a bitmap starting at the address <tt>addr</tt>.
--   Looks up the 64-bit word containing the bit and then the bit in that
--   word. The caller must make sure that the 64-bit word at the byte
--   address (addr + index / 64) * 8 is legally accessible memory.
lookupBit64 :: Addr# -> Int -> Bool


module Unicode.Internal.Char.DerivedCoreProperties
isUppercase :: Char -> Bool
isLowercase :: Char -> Bool
isAlphabetic :: Char -> Bool


-- | Case and case mapping related functions.
module Unicode.Char.Case

-- | Returns <a>True</a> for lower-case letters.
--   
--   <pre>
--   isLower c == Data.Char.isLower c
--   </pre>
isLower :: Char -> Bool

-- | Returns <a>True</a> for upper-case or title-case letters. Title case
--   is used by a small number of letter ligatures like the
--   single-character form of <i>Lj</i>.
--   
--   <pre>
--   isUpper c == Data.Char.isUpper c
--   </pre>
isUpper :: Char -> Bool


module Unicode.Internal.Char.PropList
isWhite_Space :: Char -> Bool


module Unicode.Internal.Char.UnicodeData.CombiningClass
combiningClass :: Char -> Int
isCombining :: Char -> Bool


module Unicode.Internal.Char.UnicodeData.Compositions
compose :: Char -> Char -> Maybe Char
composeStarters :: Char -> Char -> Maybe Char
isSecondStarter :: Char -> Bool


module Unicode.Internal.Char.UnicodeData.Decomposable
isDecomposable :: Char -> Bool


module Unicode.Internal.Char.UnicodeData.DecomposableK
isDecomposable :: Char -> Bool


module Unicode.Internal.Char.UnicodeData.Decompositions
decompose :: Char -> [Char]


module Unicode.Internal.Char.UnicodeData.DecompositionsK2
decompose :: Char -> [Char]


module Unicode.Internal.Char.UnicodeData.DecompositionsK
decompose :: Char -> [Char]


-- | Fast division by known constants.
--   
--   Division by a constant can be replaced by a double-word
--   multiplication. Roughly speaking, instead of dividing by x, multiply
--   by 2^64/x, obtaining 128-bit-long product, and take upper 64 bits. The
--   peculiar details can be found in Hacker's Delight, Ch. 10.
--   
--   Even GHC 8.10 does not provide a primitive for a signed double-word
--   multiplication, but since our applications does not involve negative
--   integers, we convert <a>Int</a> to <a>Word</a> and use
--   <a>timesWord#</a>.
--   
--   Textbook unsigned division by 21 or 28 becomes involved, when an
--   argument is allowed to take the full range of <a>Word</a> up to 2^64.
--   Luckily, in our case the argument was casted from <a>Int</a>, so we
--   can guarantee that it is below 2^63.
module Unicode.Internal.Division

-- | Input must be non-negative.
--   
--   Instead of division by 21, we compute floor(floor((2^68+17)<i>21 * n)
--   </i> 2^68) = floor((2^68+17)<i>21 * n</i>2^68) = floor(n<i>21 +
--   (n</i>2^63 * 17<i>32)</i>21) = floor(n/21), because n<i>2^63 *
--   17</i>32 &lt; 1.
quotRem21 :: Int -> (Int, Int)

-- | Input must be non-negative.
--   
--   Instead of division by 28, we compute floor(floor((2^65+3)<i>7 * n)
--   </i> 2^67) = floor((2^65+3)<i>7 * n</i>2^67) = floor(n<i>28 +
--   (n</i>2^63 * 3<i>4)</i>28) = floor(n/28), because n<i>2^63 * 3</i>4
--   &lt; 1.
quotRem28 :: Int -> (Int, Int)


-- | General character property related functions.
module Unicode.Char.General

-- | Returns <a>True</a> for alphabetic Unicode characters (lower-case,
--   upper-case and title-case letters, plus letters of caseless scripts
--   and modifiers letters).
--   
--   <pre>
--   isLetter c == Data.Char.isLetter c
--   </pre>
isLetter :: Char -> Bool

-- | Returns <a>True</a> for any whitespace characters, and the control
--   characters <tt>\t</tt>, <tt>\n</tt>, <tt>\r</tt>, <tt>\f</tt>,
--   <tt>\v</tt>.
--   
--   <pre>
--   isSpace c == Data.Char.isSpace c
--   </pre>
isSpace :: Char -> Bool

-- | Determine whether a character is a jamo L, V or T character.
isJamo :: Char -> Bool

-- | Total count of all jamo characters.
--   
--   <pre>
--   jamoNCount = jamoVCount * jamoTCount
--   </pre>
jamoNCount :: Int

-- | First leading consonant jamo.
jamoLFirst :: Int

-- | Given a Unicode character, if it is a leading jamo, return its index
--   in the list of leading jamo consonants, otherwise return
--   <a>Nothing</a>.
jamoLIndex :: Char -> Maybe Int

-- | Last leading consonant jamo.
jamoLLast :: Int

-- | First vowel jamo.
jamoVFirst :: Int

-- | Total count of vowel jamo.
jamoVCount :: Int

-- | Given a Unicode character, if it is a vowel jamo, return its index in
--   the list of vowel jamo, otherwise return <a>Nothing</a>.
jamoVIndex :: Char -> Maybe Int

-- | Last vowel jamo.
jamoVLast :: Int

-- | The first trailing consonant jamo.
--   
--   Note that <a>jamoTFirst</a> does not represent a valid T, it
--   represents a missing T i.e. LV without a T. See comments under
--   <a>jamoTIndex</a> .
jamoTFirst :: Int

-- | Total count of trailing consonant jamo.
jamoTCount :: Int

-- | Given a Unicode character, if it is a trailing jamo consonant, return
--   its index in the list of trailing jamo consonants, otherwise return
--   <a>Nothing</a>.
--   
--   Note that index 0 is not a valid index for a trailing consonant. Index
--   0 corresponds to an LV syllable, without a T. See "Hangul Syllable
--   Decomposition" in the Conformance chapter of the Unicode standard for
--   more details.
jamoTIndex :: Char -> Maybe Int

-- | Last trailing consonant jamo.
jamoTLast :: Int

-- | Codepoint of the first pre-composed Hangul character.
hangulFirst :: Int

-- | Codepoint of the last Hangul character.
hangulLast :: Int

-- | Determine if the given character is a precomposed Hangul syllable.
isHangul :: Char -> Bool

-- | Determine if the given character is a Hangul LV syllable.
isHangulLV :: Char -> Bool


-- | Low level Unicode database functions to facilitate Unicode
--   normalization.
--   
--   For more information on Unicode normalization please refer to the
--   following sections of the <a>Unicode standard</a>:
--   
--   <ul>
--   <li>2 General Structure<ul><li>2.3 Compatibility
--   Characters</li><li>2.12 Equivalent Sequences</li></ul></li>
--   <li>3 Conformance<ul><li>3.6 Combination</li><li>3.7
--   Decomposition</li><li>3.11 Normalization Forms</li><li>3.12 Conjoining
--   Jamo Behavior</li></ul></li>
--   <li>4 Character Properties<ul><li>4.3 Combining Classes</li></ul></li>
--   <li><a>Unicode® Standard Annex #15 - Unicode Normalization
--   Forms</a></li>
--   <li><a>Unicode® Standard Annex #44 - Unicode Character
--   Database</a></li>
--   </ul>
module Unicode.Char.Normalization

-- | Returns <a>True</a> if a character is a combining character.
isCombining :: Char -> Bool

-- | Returns the combining class of a character.
combiningClass :: Char -> Int

-- | Return <a>True</a> if a starter character may combine with some
--   preceding starter character.
isCombiningStarter :: Char -> Bool

-- | Compose a starter character (combining class 0) with a combining
--   character (non-zero combining class). Returns the composed character
--   if the starter combines with the combining character, returns
--   <a>Nothing</a> otherwise.
compose :: Char -> Char -> Maybe Char

-- | Compose a starter character with another starter character. Returns
--   the composed character if the two starters combine, returns
--   <a>Nothing</a> otherwise.
composeStarters :: Char -> Char -> Maybe Char

-- | Whether we are decomposing in canonical or compatibility mode.
data DecomposeMode
Canonical :: DecomposeMode
Kompat :: DecomposeMode

-- | Given a non-Hangul character determine if the character is
--   decomposable. Note that in case compatibility decompositions a
--   character may decompose into a single compatibility character.
isDecomposable :: DecomposeMode -> Char -> Bool

-- | Decompose a non-Hangul character into its canonical or compatibility
--   decompositions. Note that the resulting characters may further
--   decompose.
decompose :: DecomposeMode -> Char -> [Char]

-- | Decompose a Hangul syllable into its corresponding Jamo characters.
decomposeHangul :: Char -> (Char, Char, Char)


-- | This module provides APIs to access the Unicode character database
--   (UCD) corresponding to <a>Unicode Standard version 13.0.0</a>.
--   
--   This module re-exports several sub-modules under it. The sub-module
--   structure under <a>Char</a> is largely based on the <a>"Property Index
--   by Scope of Use" in Unicode® Standard Annex #44</a>.
--   
--   The <tt>Unicode.Char.*</tt> modules in turn depend on
--   <tt>Unicode.Internal.Char.*</tt> modules which are programmatically
--   generated from the Unicode standard's Unicode character database
--   files. The module structure under <tt>Unicode.Internal.Char</tt> is
--   largely based on the UCD text file names from which the properties are
--   generated.
--   
--   For the original UCD files used in this code please refer to the
--   <tt>UCD</tt> section on the Unicode standard page. See
--   <a>https://www.unicode.org/reports/tr44/</a> to understand the
--   contents and the format of the unicode database files.
module Unicode.Char
