Package org.languagetool.tagging.uk
Class CompoundTagger
java.lang.Object
org.languagetool.tagging.uk.CompoundTagger
Allows to tag compound words with hyphen dynamically by analyzing each part
- Since:
- 3.0
-
Field Summary
FieldsModifier and TypeFieldDescriptionprivate static final Patternprivate static final Patternprivate static final Stringprivate static final Stringprivate final CompoundDebugLoggerprivate final Localeprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Pattern(package private) static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Patternprivate static final Stringprivate static final Stringprivate static final Patternprivate final UkrainianTaggerprivate final WordTaggerprivate static final Pattern -
Constructor Summary
ConstructorsConstructorDescriptionCompoundTagger(UkrainianTagger ukrainianTagger, WordTagger wordTagger, Locale conversionLocale) -
Method Summary
Modifier and TypeMethodDescriptionprivate List<AnalyzedToken> addPluralNvTokens(String word, List<AnalyzedToken> rightAnalyzedTokens, String addTag) private voidaddPluralNvTokens(String word, List<AnalyzedToken> newAnalyzedTokens, String rightPosTag, String addTag) private Stringcapitalize(String word) private static StringcollapseStretch(String word) private List<AnalyzedToken> doGuessCompoundTag(String word) private List<AnalyzedToken> doGuessMultiHyphens(String word, int firstDashIdx, int dashIdx) private List<AnalyzedToken> doGuessTwoHyphens(String word, int firstDashIdx, int dashIdx) private static Stringprivate static booleanequalParts(String lemma) (package private) Set<AnalyzedToken> generateEntities(String word) private static List<AnalyzedToken> generateTokensWithRighInflected(String word, String leftWord, List<AnalyzedToken> rightAnalyzedTokens, String posTagStart, String addTag, Pattern dropTag) private StringgetAgreedPosTag(String leftPosTag, String rightPosTag, boolean leftNv, String word) private StringgetNumAgreedPosTag(String leftPosTag, String rightPosTag, boolean leftNv) private static List<AnalyzedToken> getNvPrefixLatWithAdjMatch(String word, List<AnalyzedToken> analyzedTokens, String leftWord) private static List<AnalyzedToken> getNvPrefixNounMatch(String word, List<AnalyzedToken> analyzedTokens, String leftWord, String extraTag) private StringgetTryPrefix(String rightWord) guessCompoundTag(String word) (package private) List<AnalyzedToken> guessOtherTags(String word) private List<AnalyzedToken> guessOtherTagsInternal(String word) private booleanisJuniorSenior(AnalyzedToken leftAnalyzedToken, AnalyzedToken rightAnalyzedToken) private static booleanprivate static booleanprivate static booleanisSameAnimStatus(String leftPosTag, String rightPosTag) private List<AnalyzedToken> matchDigitCompound(String word, String leftWord, String rightWord) private List<AnalyzedToken> matchNumberedProperNoun(String word, String leftWord, String rightWord) private List<AnalyzedToken> numrAdjMatch(String word, List<AnalyzedToken> analyzedTokens, String leftWord) private List<AnalyzedToken> oAdjMatch(String word, List<AnalyzedToken> analyzedTokens, String leftWord) private static Stringprivate List<AnalyzedToken> poAdvMatch(String word, List<AnalyzedToken> analyzedTokens, String adjTag) private static StringstripPerfImperf(String leftPosTag) private List<TaggedWord> tagAsIsAndWithLowerCase(String leftWord) (package private) List<TaggedWord> tagBothCases(String leftWord, Pattern posTagMatcher) private List<TaggedWord> tagEitherCase(String word) private List<AnalyzedToken> tagMatch(String word, List<AnalyzedToken> leftAnalyzedTokens, List<AnalyzedToken> rightAnalyzedTokens) private StringtryAnimInanim(String leftPosTag, String rightPosTag, String leftLemma, String rightLemma, boolean leftNv, boolean rightNv, String word) private List<AnalyzedToken> tryOWithAdj(String word, String leftWord, List<AnalyzedToken> rightAnalyzedTokens)
-
Field Details
-
TAG_ANIM
- See Also:
-
TAG_INANIM
- See Also:
-
EXTRA_TAGS
-
EXTRA_TAGS_DROP
-
EXTRA_TAGS_DROP_NONINFL
-
NOUN_SING_V_ROD_REGEX
-
SING_REGEX_F
-
O_ADJ_PATTERN
-
NUMR_ADJ_PATTERN
-
DASH_PREFIX_LAT_PATTERN
-
YEAR_NUMBER
-
NOUN_PREFIX_NUMBER
-
NOUN_WITH_INTERVAL_PREFIX_NUMBER
-
NOUN_SUFFIX_NUMBER_LETTER
-
ADJ_PREFIX_NUMBER
-
REQ_NUM_DVA_PATTERN
-
REQ_NUM_DESYAT_PATTERN
-
REQ_NUM_STO_PATTERN
-
INTJ_PATTERN
-
NONINFL_PATTERN
-
UKR_LETTERS_PATTERN
-
GEO_V_NAZ
-
FNAME
-
LNAME_V_NAZ
-
LNAME_V_ROD
-
NAME
-
PROP_V_NAZ
-
MNP_NAZ_REGEX
-
MNP_ZNA_REGEX
-
MNP_ROD_REGEX
-
stdNounTagRegex
-
dashPrefixes
-
leftMasterSet
-
numberedEntities
-
rightPartsWithLeftTagMap
-
followerSet
-
dashPrefixesInvalid
-
noDashPrefixes2019
-
noDashPrefixes
-
ADJ_TAG_FOR_PO_ADV_MIS
- See Also:
-
ADJ_TAG_FOR_PO_ADV_NAZ
- See Also:
-
PREFIX_NO_DASH_POSTAG_PATTERN
-
LEFT_O_ADJ
-
LEFT_O_ADJ_INVALID
-
LEFT_O_ADJ_INVALID_PATTERN
-
WORDS_WITH_YEAR
-
WORDS_WITH_NUM
-
NAME_SUFFIX
-
BAD_SUFFIX
-
SKY_PATTERN
-
SKYI_PATTERN
-
wordTagger
-
conversionLocale
-
ukrainianTagger
-
compoundDebugLogger
-
ABBR_PATTERN
-
STRETCH_PATTERN
-
-
Constructor Details
-
CompoundTagger
CompoundTagger(UkrainianTagger ukrainianTagger, WordTagger wordTagger, Locale conversionLocale)
-
-
Method Details
-
guessCompoundTag
-
doGuessCompoundTag
-
addPluralNvTokens
private List<AnalyzedToken> addPluralNvTokens(String word, List<AnalyzedToken> rightAnalyzedTokens, String addTag) -
addPluralNvTokens
private void addPluralNvTokens(String word, List<AnalyzedToken> newAnalyzedTokens, String rightPosTag, String addTag) -
equalParts
-
tagEitherCase
-
tryOWithAdj
private List<AnalyzedToken> tryOWithAdj(String word, String leftWord, List<AnalyzedToken> rightAnalyzedTokens) -
doGuessMultiHyphens
-
collapseStretch
-
doGuessTwoHyphens
-
generateTokensWithRighInflected
private static List<AnalyzedToken> generateTokensWithRighInflected(String word, String leftWord, List<AnalyzedToken> rightAnalyzedTokens, String posTagStart, String addTag, Pattern dropTag) -
matchNumberedProperNoun
-
generateEntities
-
matchDigitCompound
-
getTryPrefix
-
dropExtra
-
tagMatch
@Nullable private List<AnalyzedToken> tagMatch(String word, List<AnalyzedToken> leftAnalyzedTokens, List<AnalyzedToken> rightAnalyzedTokens) -
stripPerfImperf
-
isJuniorSenior
-
getNumAgreedPosTag
-
getAgreedPosTag
-
isMinMax
-
tryAnimInanim
-
isSameAnimStatus
-
isPlural
-
oAdjMatch
@Nullable private List<AnalyzedToken> oAdjMatch(String word, List<AnalyzedToken> analyzedTokens, String leftWord) -
numrAdjMatch
@Nullable private List<AnalyzedToken> numrAdjMatch(String word, List<AnalyzedToken> analyzedTokens, String leftWord) -
oToYj
-
getNvPrefixNounMatch
@Nullable private static List<AnalyzedToken> getNvPrefixNounMatch(String word, List<AnalyzedToken> analyzedTokens, String leftWord, String extraTag) -
getNvPrefixLatWithAdjMatch
@Nullable private static List<AnalyzedToken> getNvPrefixLatWithAdjMatch(String word, List<AnalyzedToken> analyzedTokens, String leftWord) -
poAdvMatch
@Nullable private List<AnalyzedToken> poAdvMatch(String word, List<AnalyzedToken> analyzedTokens, String adjTag) -
capitalize
-
tagBothCases
-
tagAsIsAndWithLowerCase
-
guessOtherTags
-
guessOtherTagsInternal
-