Class Utils

java.lang.Object
org.languagetool.tagging.ga.Utils

public class Utils extends Object
  • Field Details

    • guesses

      private static final List<Utils.SuffixGuess> guesses
    • MATHEMATICAL_BOLD_CAPITAL_A

      private static final int MATHEMATICAL_BOLD_CAPITAL_A
      See Also:
    • MATHEMATICAL_BOLD_CAPITAL_Z

      private static final int MATHEMATICAL_BOLD_CAPITAL_Z
      See Also:
    • MATHEMATICAL_BOLD_SMALL_A

      private static final int MATHEMATICAL_BOLD_SMALL_A
      See Also:
    • MATHEMATICAL_BOLD_SMALL_Z

      private static final int MATHEMATICAL_BOLD_SMALL_Z
      See Also:
    • MATHEMATICAL_ITALIC_CAPITAL_A

      private static final int MATHEMATICAL_ITALIC_CAPITAL_A
      See Also:
    • MATHEMATICAL_ITALIC_CAPITAL_Z

      private static final int MATHEMATICAL_ITALIC_CAPITAL_Z
      See Also:
    • MATHEMATICAL_ITALIC_SMALL_A

      private static final int MATHEMATICAL_ITALIC_SMALL_A
      See Also:
    • MATHEMATICAL_ITALIC_SMALL_Z

      private static final int MATHEMATICAL_ITALIC_SMALL_Z
      See Also:
    • MATHEMATICAL_BOLD_ITALIC_CAPITAL_A

      private static final int MATHEMATICAL_BOLD_ITALIC_CAPITAL_A
      See Also:
    • MATHEMATICAL_BOLD_ITALIC_CAPITAL_Z

      private static final int MATHEMATICAL_BOLD_ITALIC_CAPITAL_Z
      See Also:
    • MATHEMATICAL_BOLD_ITALIC_SMALL_A

      private static final int MATHEMATICAL_BOLD_ITALIC_SMALL_A
      See Also:
    • MATHEMATICAL_BOLD_ITALIC_SMALL_Z

      private static final int MATHEMATICAL_BOLD_ITALIC_SMALL_Z
      See Also:
    • MATHEMATICAL_SCRIPT_CAPITAL_A

      private static final int MATHEMATICAL_SCRIPT_CAPITAL_A
      See Also:
    • MATHEMATICAL_SCRIPT_CAPITAL_Z

      private static final int MATHEMATICAL_SCRIPT_CAPITAL_Z
      See Also:
    • MATHEMATICAL_SCRIPT_SMALL_A

      private static final int MATHEMATICAL_SCRIPT_SMALL_A
      See Also:
    • MATHEMATICAL_SCRIPT_SMALL_Z

      private static final int MATHEMATICAL_SCRIPT_SMALL_Z
      See Also:
    • MATHEMATICAL_BOLD_SCRIPT_CAPITAL_A

      private static final int MATHEMATICAL_BOLD_SCRIPT_CAPITAL_A
      See Also:
    • MATHEMATICAL_BOLD_SCRIPT_CAPITAL_Z

      private static final int MATHEMATICAL_BOLD_SCRIPT_CAPITAL_Z
      See Also:
    • MATHEMATICAL_BOLD_SCRIPT_SMALL_A

      private static final int MATHEMATICAL_BOLD_SCRIPT_SMALL_A
      See Also:
    • MATHEMATICAL_BOLD_SCRIPT_SMALL_Z

      private static final int MATHEMATICAL_BOLD_SCRIPT_SMALL_Z
      See Also:
    • MATHEMATICAL_FRAKTUR_CAPITAL_A

      private static final int MATHEMATICAL_FRAKTUR_CAPITAL_A
      See Also:
    • MATHEMATICAL_FRAKTUR_CAPITAL_Z

      private static final int MATHEMATICAL_FRAKTUR_CAPITAL_Z
      See Also:
    • MATHEMATICAL_FRAKTUR_SMALL_A

      private static final int MATHEMATICAL_FRAKTUR_SMALL_A
      See Also:
    • MATHEMATICAL_FRAKTUR_SMALL_Z

      private static final int MATHEMATICAL_FRAKTUR_SMALL_Z
      See Also:
    • MATHEMATICAL_DOUBLESTRUCK_CAPITAL_A

      private static final int MATHEMATICAL_DOUBLESTRUCK_CAPITAL_A
      See Also:
    • MATHEMATICAL_DOUBLESTRUCK_CAPITAL_Z

      private static final int MATHEMATICAL_DOUBLESTRUCK_CAPITAL_Z
      See Also:
    • MATHEMATICAL_DOUBLESTRUCK_SMALL_A

      private static final int MATHEMATICAL_DOUBLESTRUCK_SMALL_A
      See Also:
    • MATHEMATICAL_DOUBLESTRUCK_SMALL_Z

      private static final int MATHEMATICAL_DOUBLESTRUCK_SMALL_Z
      See Also:
    • MATHEMATICAL_BOLD_FRAKTUR_CAPITAL_A

      private static final int MATHEMATICAL_BOLD_FRAKTUR_CAPITAL_A
      See Also:
    • MATHEMATICAL_BOLD_FRAKTUR_CAPITAL_Z

      private static final int MATHEMATICAL_BOLD_FRAKTUR_CAPITAL_Z
      See Also:
    • MATHEMATICAL_BOLD_FRAKTUR_SMALL_A

      private static final int MATHEMATICAL_BOLD_FRAKTUR_SMALL_A
      See Also:
    • MATHEMATICAL_BOLD_FRAKTUR_SMALL_Z

      private static final int MATHEMATICAL_BOLD_FRAKTUR_SMALL_Z
      See Also:
    • MATHEMATICAL_SANSSERIF_CAPITAL_A

      private static final int MATHEMATICAL_SANSSERIF_CAPITAL_A
      See Also:
    • MATHEMATICAL_SANSSERIF_CAPITAL_Z

      private static final int MATHEMATICAL_SANSSERIF_CAPITAL_Z
      See Also:
    • MATHEMATICAL_SANSSERIF_SMALL_A

      private static final int MATHEMATICAL_SANSSERIF_SMALL_A
      See Also:
    • MATHEMATICAL_SANSSERIF_SMALL_Z

      private static final int MATHEMATICAL_SANSSERIF_SMALL_Z
      See Also:
    • MATHEMATICAL_SANSSERIF_BOLD_CAPITAL_A

      private static final int MATHEMATICAL_SANSSERIF_BOLD_CAPITAL_A
      See Also:
    • MATHEMATICAL_SANSSERIF_BOLD_CAPITAL_Z

      private static final int MATHEMATICAL_SANSSERIF_BOLD_CAPITAL_Z
      See Also:
    • MATHEMATICAL_SANSSERIF_BOLD_SMALL_A

      private static final int MATHEMATICAL_SANSSERIF_BOLD_SMALL_A
      See Also:
    • MATHEMATICAL_SANSSERIF_BOLD_SMALL_Z

      private static final int MATHEMATICAL_SANSSERIF_BOLD_SMALL_Z
      See Also:
    • MATHEMATICAL_SANSSERIF_ITALIC_CAPITAL_A

      private static final int MATHEMATICAL_SANSSERIF_ITALIC_CAPITAL_A
      See Also:
    • MATHEMATICAL_SANSSERIF_ITALIC_CAPITAL_Z

      private static final int MATHEMATICAL_SANSSERIF_ITALIC_CAPITAL_Z
      See Also:
    • MATHEMATICAL_SANSSERIF_ITALIC_SMALL_A

      private static final int MATHEMATICAL_SANSSERIF_ITALIC_SMALL_A
      See Also:
    • MATHEMATICAL_SANSSERIF_ITALIC_SMALL_Z

      private static final int MATHEMATICAL_SANSSERIF_ITALIC_SMALL_Z
      See Also:
    • MATHEMATICAL_SANSSERIF_BOLD_ITALIC_CAPITAL_A

      private static final int MATHEMATICAL_SANSSERIF_BOLD_ITALIC_CAPITAL_A
      See Also:
    • MATHEMATICAL_SANSSERIF_BOLD_ITALIC_CAPITAL_Z

      private static final int MATHEMATICAL_SANSSERIF_BOLD_ITALIC_CAPITAL_Z
      See Also:
    • MATHEMATICAL_SANSSERIF_BOLD_ITALIC_SMALL_A

      private static final int MATHEMATICAL_SANSSERIF_BOLD_ITALIC_SMALL_A
      See Also:
    • MATHEMATICAL_SANSSERIF_BOLD_ITALIC_SMALL_Z

      private static final int MATHEMATICAL_SANSSERIF_BOLD_ITALIC_SMALL_Z
      See Also:
    • MATHEMATICAL_MONOSPACE_CAPITAL_A

      private static final int MATHEMATICAL_MONOSPACE_CAPITAL_A
      See Also:
    • MATHEMATICAL_MONOSPACE_CAPITAL_Z

      private static final int MATHEMATICAL_MONOSPACE_CAPITAL_Z
      See Also:
    • MATHEMATICAL_MONOSPACE_SMALL_A

      private static final int MATHEMATICAL_MONOSPACE_SMALL_A
      See Also:
    • MATHEMATICAL_MONOSPACE_SMALL_Z

      private static final int MATHEMATICAL_MONOSPACE_SMALL_Z
      See Also:
    • MATHEMATICAL_ITALIC_SMALL_DOTLESS_I

      private static final int MATHEMATICAL_ITALIC_SMALL_DOTLESS_I
      See Also:
    • MATHEMATICAL_ITALIC_SMALL_DOTLESS_J

      private static final int MATHEMATICAL_ITALIC_SMALL_DOTLESS_J
      See Also:
    • MATHEMATICAL_BOLD_CAPITAL_ALPHA

      private static final int MATHEMATICAL_BOLD_CAPITAL_ALPHA
      See Also:
    • MATHEMATICAL_BOLD_CAPITAL_OMEGA

      private static final int MATHEMATICAL_BOLD_CAPITAL_OMEGA
      See Also:
    • MATHEMATICAL_BOLD_SMALL_ALPHA

      private static final int MATHEMATICAL_BOLD_SMALL_ALPHA
      See Also:
    • MATHEMATICAL_BOLD_SMALL_OMEGA

      private static final int MATHEMATICAL_BOLD_SMALL_OMEGA
      See Also:
    • MATHEMATICAL_ITALIC_CAPITAL_ALPHA

      private static final int MATHEMATICAL_ITALIC_CAPITAL_ALPHA
      See Also:
    • MATHEMATICAL_ITALIC_CAPITAL_OMEGA

      private static final int MATHEMATICAL_ITALIC_CAPITAL_OMEGA
      See Also:
    • MATHEMATICAL_ITALIC_SMALL_ALPHA

      private static final int MATHEMATICAL_ITALIC_SMALL_ALPHA
      See Also:
    • MATHEMATICAL_ITALIC_SMALL_OMEGA

      private static final int MATHEMATICAL_ITALIC_SMALL_OMEGA
      See Also:
    • MATHEMATICAL_BOLD_ITALIC_CAPITAL_ALPHA

      private static final int MATHEMATICAL_BOLD_ITALIC_CAPITAL_ALPHA
      See Also:
    • MATHEMATICAL_BOLD_ITALIC_CAPITAL_OMEGA

      private static final int MATHEMATICAL_BOLD_ITALIC_CAPITAL_OMEGA
      See Also:
    • MATHEMATICAL_BOLD_ITALIC_SMALL_ALPHA

      private static final int MATHEMATICAL_BOLD_ITALIC_SMALL_ALPHA
      See Also:
    • MATHEMATICAL_BOLD_ITALIC_SMALL_OMEGA

      private static final int MATHEMATICAL_BOLD_ITALIC_SMALL_OMEGA
      See Also:
    • MATHEMATICAL_SANSSERIF_BOLD_CAPITAL_ALPHA

      private static final int MATHEMATICAL_SANSSERIF_BOLD_CAPITAL_ALPHA
      See Also:
    • MATHEMATICAL_SANSSERIF_BOLD_CAPITAL_OMEGA

      private static final int MATHEMATICAL_SANSSERIF_BOLD_CAPITAL_OMEGA
      See Also:
    • MATHEMATICAL_SANSSERIF_BOLD_SMALL_ALPHA

      private static final int MATHEMATICAL_SANSSERIF_BOLD_SMALL_ALPHA
      See Also:
    • MATHEMATICAL_SANSSERIF_BOLD_SMALL_OMEGA

      private static final int MATHEMATICAL_SANSSERIF_BOLD_SMALL_OMEGA
      See Also:
    • MATHEMATICAL_SANSSERIF_BOLD_ITALIC_CAPITAL_ALPHA

      private static final int MATHEMATICAL_SANSSERIF_BOLD_ITALIC_CAPITAL_ALPHA
      See Also:
    • MATHEMATICAL_SANSSERIF_BOLD_ITALIC_CAPITAL_OMEGA

      private static final int MATHEMATICAL_SANSSERIF_BOLD_ITALIC_CAPITAL_OMEGA
      See Also:
    • MATHEMATICAL_SANSSERIF_BOLD_ITALIC_SMALL_ALPHA

      private static final int MATHEMATICAL_SANSSERIF_BOLD_ITALIC_SMALL_ALPHA
      See Also:
    • MATHEMATICAL_SANSSERIF_BOLD_ITALIC_SMALL_OMEGA

      private static final int MATHEMATICAL_SANSSERIF_BOLD_ITALIC_SMALL_OMEGA
      See Also:
    • MATHEMATICAL_BOLD_DIGIT_ZERO

      private static final int MATHEMATICAL_BOLD_DIGIT_ZERO
      See Also:
    • MATHEMATICAL_BOLD_DIGIT_NINE

      private static final int MATHEMATICAL_BOLD_DIGIT_NINE
      See Also:
    • MATHEMATICAL_DOUBLESTRUCK_DIGIT_ZERO

      private static final int MATHEMATICAL_DOUBLESTRUCK_DIGIT_ZERO
      See Also:
    • MATHEMATICAL_DOUBLESTRUCK_DIGIT_NINE

      private static final int MATHEMATICAL_DOUBLESTRUCK_DIGIT_NINE
      See Also:
    • MATHEMATICAL_SANSSERIF_DIGIT_ZERO

      private static final int MATHEMATICAL_SANSSERIF_DIGIT_ZERO
      See Also:
    • MATHEMATICAL_SANSSERIF_DIGIT_NINE

      private static final int MATHEMATICAL_SANSSERIF_DIGIT_NINE
      See Also:
    • MATHEMATICAL_SANSSERIF_BOLD_DIGIT_ZERO

      private static final int MATHEMATICAL_SANSSERIF_BOLD_DIGIT_ZERO
      See Also:
    • MATHEMATICAL_SANSSERIF_BOLD_DIGIT_NINE

      private static final int MATHEMATICAL_SANSSERIF_BOLD_DIGIT_NINE
      See Also:
    • MATHEMATICAL_MONOSPACE_DIGIT_ZERO

      private static final int MATHEMATICAL_MONOSPACE_DIGIT_ZERO
      See Also:
    • MATHEMATICAL_MONOSPACE_DIGIT_NINE

      private static final int MATHEMATICAL_MONOSPACE_DIGIT_NINE
      See Also:
    • CAPITAL_A

      private static final int CAPITAL_A
      See Also:
    • SMALL_A

      private static final int SMALL_A
      See Also:
    • CAPITAL_ALPHA

      private static final int CAPITAL_ALPHA
      See Also:
    • SMALL_ALPHA

      private static final int SMALL_ALPHA
      See Also:
    • DIGIT_ZERO

      private static final int DIGIT_ZERO
      See Also:
  • Constructor Details

    • Utils

      public Utils()
  • Method Details

    • fixSuffix

      public static Retaggable fixSuffix(String in)
    • morphWord

      public static List<Retaggable> morphWord(String in)
    • demutate

      public static Retaggable demutate(String in)
    • unEclipse

      public static String unEclipse(String in)
    • unLenite

      public static String unLenite(String in)
      Attempts to unlenite a string (See lenite(String)) Deliberately does not check if first character is one that ought to be lenited (this can be checked in XML rules)
    • unLeniteDefiniteS

      public static String unLeniteDefiniteS(String in)
      Removes lenition from a word beginning with 's', following the definite article; as an exception to conventional lenition, this is a 't' prefix. The standard representation is a lowercase 't', regardless of the case of the word; this function additionally checks for incorrect (e.g., capital 'T') and prestandard (e.g., hyphenated 't-') versions.
      Parameters:
      in - The written form
      Returns:
      The form with lenition removed
    • unEclipseF

      public static String unEclipseF(String in)
    • unEclipseChar

      public static String unEclipseChar(String in, char first, char second)
      Helper to uneclipse single-letter consonant eclipsis (i.e., not bhfear or n-éin), handling miscapitalised eclipsed words: Gcarr -> Carr, etc.
      Parameters:
      in - string to uneclipse
      first - first (eclipsis) character
      second - second character; first character of the word proper
      Returns:
      String with uneclipsed word or null if no match
    • isUpperVowel

      public static boolean isUpperVowel(char c)
    • isLowerVowel

      public static boolean isLowerVowel(char c)
    • isVowel

      public static boolean isVowel(char c)
    • isUpperLenitable

      public static boolean isUpperLenitable(char c)
    • isLowerLenitable

      public static boolean isLowerLenitable(char c)
    • isSLenitable

      public static boolean isSLenitable(char c)
      The (non-definite) eclipsed form of 's', 'sh', is pronounced like 'h' in English; words beginning with 's' can only have lenition applied if the following letter would be easily pronounced after this sound: this function checks if the that second letter is one of them
      Parameters:
      c - The second letter of a word beginning with 's'
      Returns:
      true if the word can be lenited
    • lenite

      public static String lenite(String in)
      lenites a word ("Lenition" in Irish grammar is an initial mutation, historically related to phonetic lenition; its written representation is an 'h' after the initial consonant). In this context, to "lenite" is to apply lenition)
      Parameters:
      in - word form to be lenited
      Returns:
      lenited form, or unmodified string if it cannot be lenited
    • eclipse

      public static String eclipse(String in)
      eclipses a word ("Eclipsis" in Irish grammar is an initial mutation, represented as a prefix to the word that replaces the pronunciation of the letter for consonants, i.e., 'f' is eclipsed as 'bh' - 'focal' becomes 'bhfocal' - but only 'bh' (not 'f') is pronounced; or, with vowels, an initial 'n' is added (hyphenated before a lowercase word, lowercased but not hyphenated before an uppercase or titlecase word). In this context, to "eclipse" is to apply eclipsis)
      Parameters:
      in - word form to be eclipsed
      Returns:
      eclipsed form, or unmodified string if it cannot be eclipsed
    • toLowerCaseIrish

      public static String toLowerCaseIrish(String s)
      Case folding in Irish is non-trivial: initial mutations that prefix the word are always written in lowercase; 'n' and 't' are written with a hyphen before a lowercase vowel. Converting to uppercase is impossible without a dictionary: unlike 'n' and 't' (and unlike Scots Gaelic), 'h' is not written hyphenated as 'h' was not traditionally a 'letter', per se, but was used to indicate phonetic changes: in modern Irish, there are enough words that begin with 'h' that converting to uppercase is impossible without a dictionary.
      Parameters:
      s - the word to lowercase
      Returns:
      lowercased word
    • startsWithUppercase

      public static boolean startsWithUppercase(String s)
      Equivalent of StringTools.startsWithUppercase(String), adapted for Irish case folding oddities.
      Parameters:
      s - String to check
      Returns:
      true if string starts with uppercase, taking into account initial mutations which must remain lowercase.
    • isAllUppercase

      public static boolean isAllUppercase(String s)
    • startsWithMutatedUppercase

      private static boolean startsWithMutatedUppercase(String s)
    • isUpperPonc

      private static boolean isUpperPonc(char c)
    • isLowerPonc

      private static boolean isLowerPonc(char c)
    • isPonc

      public static boolean isPonc(char c)
      Check if the character is dotted ('ponc' in Irish)
      Parameters:
      c - the character to check
      Returns:
      true if the character is dotted, false otherwise
    • containsPonc

      public static boolean containsPonc(String s)
    • unPonc

      private static char unPonc(char c)
    • unPonc

      public static String unPonc(String s)
      Converts pre-standard lenition to modern (converts dotted (= ponc) letters to the equivalent undotted, followed by 'h'
      Parameters:
      s - string to convert
      Returns:
      converted string
    • isAllMathsChars

      public static boolean isAllMathsChars(String s)
    • isAllHalfWidthChars

      public static boolean isAllHalfWidthChars(String s)
    • halfwidthLatinToLatin

      public static String halfwidthLatinToLatin(String s)
    • getMathsChar

      private static char getMathsChar(char c)
    • getMathsChar

      private static char getMathsChar(char c, boolean normaliseGreek, boolean normaliseDigits)
    • simplifyMathematical

      public static String simplifyMathematical(String s)
    • simplifyMathematical

      public static String simplifyMathematical(String s, boolean normaliseGreek, boolean normaliseDigits)
    • greekLookalikes

      private static char greekLookalikes(char c)
    • greekToLatin

      public static String greekToLatin(String s)
    • hasMixedGreekAndLatin

      public static boolean hasMixedGreekAndLatin(String s)
    • hasMixedGreekAndCyrillic

      public static boolean hasMixedGreekAndCyrillic(String s)