public class ErrorReportingTokenizer extends Tokenizer
| Modifier and Type | Field and Description |
|---|---|
private boolean |
alreadyComplainedAboutNonAscii
Used together with
nonAsciiProhibited. |
private boolean |
alreadyWarnedAboutPrivateUseCharacters
Keeps track of PUA warnings.
|
private int |
col
The current column number in the current resource being tokenized.
|
private int |
colPrev |
private XmlViolationPolicy |
contentNonXmlCharPolicy
The policy for non-space non-XML characters.
|
private java.util.HashMap<java.lang.String,java.lang.String> |
errorProfileMap |
private int |
line
The current line number in the current resource being parsed.
|
private int |
linePrev |
private boolean |
nextCharOnNewLine |
private char |
prev |
private static int |
SURROGATE_OFFSET
Magic value for UTF-16 operations.
|
private int |
transitionBaseOffset |
private TransitionHandler |
transitionHandler |
AFTER_ATTRIBUTE_NAME, AFTER_ATTRIBUTE_VALUE_QUOTED, AFTER_DOCTYPE_NAME, AFTER_DOCTYPE_PUBLIC_IDENTIFIER, AFTER_DOCTYPE_PUBLIC_KEYWORD, AFTER_DOCTYPE_SYSTEM_IDENTIFIER, AFTER_DOCTYPE_SYSTEM_KEYWORD, ampersandLocation, ATTRIBUTE_NAME, ATTRIBUTE_VALUE_DOUBLE_QUOTED, ATTRIBUTE_VALUE_SINGLE_QUOTED, ATTRIBUTE_VALUE_UNQUOTED, attributeName, BEFORE_ATTRIBUTE_NAME, BEFORE_ATTRIBUTE_VALUE, BEFORE_DOCTYPE_NAME, BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, BOGUS_COMMENT, BOGUS_COMMENT_HYPHEN, BOGUS_DOCTYPE, CDATA_RSQB, CDATA_RSQB_RSQB, CDATA_SECTION, CDATA_START, CHARACTER_REFERENCE_HILO_LOOKUP, CHARACTER_REFERENCE_TAIL, CLOSE_TAG_OPEN, COMMENT, COMMENT_END, COMMENT_END_BANG, COMMENT_END_DASH, COMMENT_START, COMMENT_START_DASH, confident, CONSUME_CHARACTER_REFERENCE, CONSUME_NCR, cstart, DATA, DECIMAL_NRC_LOOP, DOCTYPE, DOCTYPE_NAME, DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, DOCTYPE_UBLIC, DOCTYPE_YSTEM, encodingDeclarationHandler, endTag, endTagExpectation, errorHandler, HANDLE_NCR_VALUE, HANDLE_NCR_VALUE_RECONSUME, HEX_NCR_LOOP, html4, index, lastCR, MARKUP_DECLARATION_HYPHEN, MARKUP_DECLARATION_OCTYPE, MARKUP_DECLARATION_OPEN, NON_DATA_END_TAG_NAME, PLAINTEXT, PROCESSING_INSTRUCTION, PROCESSING_INSTRUCTION_QUESTION_MARK, RAWTEXT, RAWTEXT_RCDATA_LESS_THAN_SIGN, RCDATA, SCRIPT_DATA, SCRIPT_DATA_DOUBLE_ESCAPE_END, SCRIPT_DATA_DOUBLE_ESCAPE_START, SCRIPT_DATA_DOUBLE_ESCAPED, SCRIPT_DATA_DOUBLE_ESCAPED_DASH, SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH, SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, SCRIPT_DATA_ESCAPE_START, SCRIPT_DATA_ESCAPE_START_DASH, SCRIPT_DATA_ESCAPED, SCRIPT_DATA_ESCAPED_DASH, SCRIPT_DATA_ESCAPED_DASH_DASH, SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, SCRIPT_DATA_LESS_THAN_SIGN, SELF_CLOSING_START_TAG, stateSave, TAG_NAME, TAG_OPEN, tokenHandler, value| Constructor and Description |
|---|
ErrorReportingTokenizer(TokenHandler tokenHandler) |
ErrorReportingTokenizer(TokenHandler tokenHandler,
boolean newAttributesEachTime) |
| Modifier and Type | Method and Description |
|---|---|
protected char |
checkChar(char[] buf,
int pos) |
private void |
complainAboutNonAscii() |
protected void |
errAstralNonCharacter(int ch) |
protected void |
errAttributeValueMissing() |
protected void |
errBadCharAfterLt(char c) |
protected void |
errBadCharBeforeAttributeNameOrNull(char c) |
protected void |
errBogusComment() |
protected void |
errBogusDoctype() |
protected void |
errCharRefLacksSemicolon() |
protected void |
errConsecutiveHyphens() |
protected void |
errDuplicateAttribute() |
protected void |
errEofAfterLt() |
protected void |
errEofInAttributeName() |
protected void |
errEofInAttributeValue() |
protected void |
errEofInComment() |
protected void |
errEofInDoctype() |
protected void |
errEofInEndTag() |
protected void |
errEofInPublicId() |
protected void |
errEofInSystemId() |
protected void |
errEofInTagName() |
protected void |
errEofWithoutGt() |
protected void |
errEqualsSignBeforeAttributeName() |
protected void |
errExpectedPublicId() |
protected void |
errExpectedSystemId() |
protected void |
errGarbageAfterLtSlash() |
protected void |
errGtInPublicId() |
protected void |
errGtInSystemId() |
protected void |
errHtml4LtSlashInRcdata(char folded) |
protected void |
errHtml4NonNameInUnquotedAttribute(char c) |
protected void |
errHtml4XmlVoidSyntax() |
protected void |
errHyphenHyphenBang() |
protected void |
errLtGt() |
protected void |
errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char c) |
protected void |
errLtSlashGt() |
protected void |
errMissingSpaceBeforeDoctypeName() |
protected void |
errNamelessDoctype() |
protected void |
errNcrControlChar() |
protected char |
errNcrControlChar(char ch) |
protected void |
errNcrCr() |
protected void |
errNcrInC1Range() |
protected char |
errNcrNonCharacter(char ch) |
protected void |
errNcrOutOfRange() |
protected void |
errNcrSurrogate() |
protected void |
errNcrUnassigned() |
protected void |
errNcrZero() |
protected void |
errNoDigitsInNCR() |
protected void |
errNoNamedCharacterMatch() |
protected void |
errNoSpaceBetweenAttributes() |
protected void |
errNoSpaceBetweenDoctypePublicKeywordAndQuote() |
protected void |
errNoSpaceBetweenDoctypeSystemKeywordAndQuote() |
protected void |
errNoSpaceBetweenPublicAndSystemIds() |
protected void |
errNotSemicolonTerminated() |
protected void |
errPrematureEndOfComment() |
protected void |
errProcessingInstruction() |
protected void |
errQuoteBeforeAttributeName(char c) |
protected void |
errQuoteOrLtInAttributeNameOrNull(char c) |
protected void |
errSlashNotFollowedByGt() |
protected void |
errUnescapedAmpersandInterpretedAsCharacterReference() |
protected void |
errUnquotedAttributeValOrNull(char c) |
protected void |
errWarnLtSlashInRcdata() |
protected void |
flushChars(char[] buf,
int pos)
Flushes coalesced character tokens.
|
int |
getCol()
Returns the col.
|
int |
getColumnNumber() |
int |
getLine()
Returns the line.
|
int |
getLineNumber() |
boolean |
isAlreadyComplainedAboutNonAscii()
Returns the alreadyComplainedAboutNonAscii.
|
private boolean |
isAstralPrivateUse(int c)
Tells if the argument is an astral PUA character.
|
boolean |
isNextCharOnNewLine()
Returns the nextCharOnNewLine.
|
private boolean |
isPrivateUse(char c)
Tells if the argument is a BMP PUA character.
|
protected void |
maybeErrAttributesOnEndTag(HtmlAttributes attrs) |
protected void |
maybeErrSlashInEndTag(boolean selfClosing) |
protected void |
maybeWarnPrivateUse(char ch) |
protected void |
maybeWarnPrivateUseAstral() |
void |
note(java.lang.String profile,
java.lang.String message)
Reports on an event based on profile selected.
|
protected void |
noteAttributeWithoutValue() |
protected void |
noteUnquotedAttributeValue() |
void |
setContentNonXmlCharPolicy(XmlViolationPolicy contentNonXmlCharPolicy)
Sets the contentNonXmlCharPolicy.
|
void |
setErrorProfile(java.util.HashMap<java.lang.String,java.lang.String> errorProfileMap)
Sets the errorProfile.
|
void |
setTransitionBaseOffset(int offset)
Sets an offset to be added to the position reported to
TransitionHandler. |
void |
setTransitionHandler(TransitionHandler transitionHandler)
Sets the transitionHandler.
|
protected void |
silentCarriageReturn() |
protected void |
silentLineFeed() |
protected void |
startErrorReporting() |
private java.lang.String |
toUPlusString(int c) |
protected int |
transition(int from,
int to,
boolean reconsume,
int pos) |
private void |
warnAboutPrivateUseChar()
Emits a warning about private use characters if the warning has not been
emitted yet.
|
becomeConfident, destructor, emptyAttributes, end, eof, err, errTreeBuilder, fatal, getErrorHandler, getPublicId, getSystemId, initializeWithoutStarting, initLocation, internalEncodingDeclaration, isInDataState, isMappingLangToXmlLang, isPrevCR, loadState, notifyAboutMetaBoundary, requestSuspension, resetToDataState, setCommentPolicy, setContentSpacePolicy, setEncodingDeclarationHandler, setErrorHandler, setHtml4ModeCompatibleWithXhtml1Schemata, setInterner, setLineNumber, setMappingLangToXmlLang, setNamePolicy, setStateAndEndTagExpectation, setStateAndEndTagExpectation, setXmlnsPolicy, start, strBufToString, tokenizeBuffer, turnOnAdditionalHtml4Errors, warnprivate static final int SURROGATE_OFFSET
private XmlViolationPolicy contentNonXmlCharPolicy
private boolean alreadyComplainedAboutNonAscii
nonAsciiProhibited.private boolean alreadyWarnedAboutPrivateUseCharacters
private int line
private int linePrev
private int col
private int colPrev
private boolean nextCharOnNewLine
private char prev
private java.util.HashMap<java.lang.String,java.lang.String> errorProfileMap
private TransitionHandler transitionHandler
private int transitionBaseOffset
public ErrorReportingTokenizer(TokenHandler tokenHandler, boolean newAttributesEachTime)
tokenHandler - newAttributesEachTime - public ErrorReportingTokenizer(TokenHandler tokenHandler)
tokenHandler - public int getLineNumber()
getLineNumber in interface org.xml.sax.LocatorgetLineNumber in class TokenizerLocator.getLineNumber()public int getColumnNumber()
getColumnNumber in interface org.xml.sax.LocatorgetColumnNumber in class TokenizerLocator.getColumnNumber()public void setContentNonXmlCharPolicy(XmlViolationPolicy contentNonXmlCharPolicy)
setContentNonXmlCharPolicy in class TokenizercontentNonXmlCharPolicy - the contentNonXmlCharPolicy to setpublic void setErrorProfile(java.util.HashMap<java.lang.String,java.lang.String> errorProfileMap)
errorProfile - public void note(java.lang.String profile,
java.lang.String message)
throws org.xml.sax.SAXException
profile - the profile this message belongs tomessage - the message itselforg.xml.sax.SAXExceptionprotected void startErrorReporting()
throws org.xml.sax.SAXException
startErrorReporting in class Tokenizerorg.xml.sax.SAXExceptionprotected void silentCarriageReturn()
silentCarriageReturn in class Tokenizerprotected void silentLineFeed()
silentLineFeed in class Tokenizerpublic int getLine()
public boolean isNextCharOnNewLine()
isNextCharOnNewLine in class Tokenizerprivate void complainAboutNonAscii()
throws org.xml.sax.SAXException
org.xml.sax.SAXExceptionpublic boolean isAlreadyComplainedAboutNonAscii()
isAlreadyComplainedAboutNonAscii in class Tokenizerprotected void flushChars(char[] buf,
int pos)
throws org.xml.sax.SAXException
flushChars in class Tokenizerbuf - TODOpos - TODOorg.xml.sax.SAXExceptionprotected char checkChar(char[] buf,
int pos)
throws org.xml.sax.SAXException
protected int transition(int from,
int to,
boolean reconsume,
int pos)
throws org.xml.sax.SAXException
transition in class Tokenizerorg.xml.sax.SAXExceptionTokenizer.transition(int, int, boolean,
int)private java.lang.String toUPlusString(int c)
private void warnAboutPrivateUseChar()
throws org.xml.sax.SAXException
org.xml.sax.SAXExceptionprivate boolean isPrivateUse(char c)
c - the UTF-16 code unit to checktrue if PUA characterprivate boolean isAstralPrivateUse(int c)
c - the code point to checktrue if astral private useprotected void errGarbageAfterLtSlash()
throws org.xml.sax.SAXException
errGarbageAfterLtSlash in class Tokenizerorg.xml.sax.SAXExceptionprotected void errLtSlashGt()
throws org.xml.sax.SAXException
errLtSlashGt in class Tokenizerorg.xml.sax.SAXExceptionprotected void errWarnLtSlashInRcdata()
throws org.xml.sax.SAXException
errWarnLtSlashInRcdata in class Tokenizerorg.xml.sax.SAXExceptionprotected void errHtml4LtSlashInRcdata(char folded)
throws org.xml.sax.SAXException
errHtml4LtSlashInRcdata in class Tokenizerorg.xml.sax.SAXExceptionprotected void errCharRefLacksSemicolon()
throws org.xml.sax.SAXException
errCharRefLacksSemicolon in class Tokenizerorg.xml.sax.SAXExceptionprotected void errNoDigitsInNCR()
throws org.xml.sax.SAXException
errNoDigitsInNCR in class Tokenizerorg.xml.sax.SAXExceptionprotected void errGtInSystemId()
throws org.xml.sax.SAXException
errGtInSystemId in class Tokenizerorg.xml.sax.SAXExceptionprotected void errGtInPublicId()
throws org.xml.sax.SAXException
errGtInPublicId in class Tokenizerorg.xml.sax.SAXExceptionprotected void errNamelessDoctype()
throws org.xml.sax.SAXException
errNamelessDoctype in class Tokenizerorg.xml.sax.SAXExceptionprotected void errConsecutiveHyphens()
throws org.xml.sax.SAXException
errConsecutiveHyphens in class Tokenizerorg.xml.sax.SAXExceptionprotected void errPrematureEndOfComment()
throws org.xml.sax.SAXException
errPrematureEndOfComment in class Tokenizerorg.xml.sax.SAXExceptionprotected void errBogusComment()
throws org.xml.sax.SAXException
errBogusComment in class Tokenizerorg.xml.sax.SAXExceptionprotected void errUnquotedAttributeValOrNull(char c)
throws org.xml.sax.SAXException
errUnquotedAttributeValOrNull in class Tokenizerorg.xml.sax.SAXExceptionprotected void errSlashNotFollowedByGt()
throws org.xml.sax.SAXException
errSlashNotFollowedByGt in class Tokenizerorg.xml.sax.SAXExceptionprotected void errHtml4XmlVoidSyntax()
throws org.xml.sax.SAXException
errHtml4XmlVoidSyntax in class Tokenizerorg.xml.sax.SAXExceptionprotected void errNoSpaceBetweenAttributes()
throws org.xml.sax.SAXException
errNoSpaceBetweenAttributes in class Tokenizerorg.xml.sax.SAXExceptionprotected void errHtml4NonNameInUnquotedAttribute(char c)
throws org.xml.sax.SAXException
errHtml4NonNameInUnquotedAttribute in class Tokenizerorg.xml.sax.SAXExceptionprotected void errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char c)
throws org.xml.sax.SAXException
errLtOrEqualsOrGraveInUnquotedAttributeOrNull in class Tokenizerorg.xml.sax.SAXExceptionprotected void errAttributeValueMissing()
throws org.xml.sax.SAXException
errAttributeValueMissing in class Tokenizerorg.xml.sax.SAXExceptionprotected void errBadCharBeforeAttributeNameOrNull(char c)
throws org.xml.sax.SAXException
errBadCharBeforeAttributeNameOrNull in class Tokenizerorg.xml.sax.SAXExceptionprotected void errEqualsSignBeforeAttributeName()
throws org.xml.sax.SAXException
errEqualsSignBeforeAttributeName in class Tokenizerorg.xml.sax.SAXExceptionprotected void errBadCharAfterLt(char c)
throws org.xml.sax.SAXException
errBadCharAfterLt in class Tokenizerorg.xml.sax.SAXExceptionprotected void errLtGt()
throws org.xml.sax.SAXException
protected void errProcessingInstruction()
throws org.xml.sax.SAXException
errProcessingInstruction in class Tokenizerorg.xml.sax.SAXExceptionprotected void errUnescapedAmpersandInterpretedAsCharacterReference()
throws org.xml.sax.SAXException
errUnescapedAmpersandInterpretedAsCharacterReference in class Tokenizerorg.xml.sax.SAXExceptionprotected void errNotSemicolonTerminated()
throws org.xml.sax.SAXException
errNotSemicolonTerminated in class Tokenizerorg.xml.sax.SAXExceptionprotected void errNoNamedCharacterMatch()
throws org.xml.sax.SAXException
errNoNamedCharacterMatch in class Tokenizerorg.xml.sax.SAXExceptionprotected void errQuoteBeforeAttributeName(char c)
throws org.xml.sax.SAXException
errQuoteBeforeAttributeName in class Tokenizerorg.xml.sax.SAXExceptionprotected void errQuoteOrLtInAttributeNameOrNull(char c)
throws org.xml.sax.SAXException
errQuoteOrLtInAttributeNameOrNull in class Tokenizerorg.xml.sax.SAXExceptionprotected void errExpectedPublicId()
throws org.xml.sax.SAXException
errExpectedPublicId in class Tokenizerorg.xml.sax.SAXExceptionprotected void errBogusDoctype()
throws org.xml.sax.SAXException
errBogusDoctype in class Tokenizerorg.xml.sax.SAXExceptionprotected void maybeWarnPrivateUseAstral()
throws org.xml.sax.SAXException
maybeWarnPrivateUseAstral in class Tokenizerorg.xml.sax.SAXExceptionprotected void maybeWarnPrivateUse(char ch)
throws org.xml.sax.SAXException
maybeWarnPrivateUse in class Tokenizerorg.xml.sax.SAXExceptionprotected void maybeErrAttributesOnEndTag(HtmlAttributes attrs) throws org.xml.sax.SAXException
maybeErrAttributesOnEndTag in class Tokenizerorg.xml.sax.SAXExceptionprotected void maybeErrSlashInEndTag(boolean selfClosing)
throws org.xml.sax.SAXException
maybeErrSlashInEndTag in class Tokenizerorg.xml.sax.SAXExceptionprotected char errNcrNonCharacter(char ch)
throws org.xml.sax.SAXException
errNcrNonCharacter in class Tokenizerorg.xml.sax.SAXExceptionprotected void errAstralNonCharacter(int ch)
throws org.xml.sax.SAXException
errAstralNonCharacter in class Tokenizerorg.xml.sax.SAXExceptionTokenizer.errAstralNonCharacter(int)protected void errNcrSurrogate()
throws org.xml.sax.SAXException
errNcrSurrogate in class Tokenizerorg.xml.sax.SAXExceptionprotected char errNcrControlChar(char ch)
throws org.xml.sax.SAXException
errNcrControlChar in class Tokenizerorg.xml.sax.SAXExceptionprotected void errNcrCr()
throws org.xml.sax.SAXException
protected void errNcrInC1Range()
throws org.xml.sax.SAXException
errNcrInC1Range in class Tokenizerorg.xml.sax.SAXExceptionprotected void errEofInPublicId()
throws org.xml.sax.SAXException
errEofInPublicId in class Tokenizerorg.xml.sax.SAXExceptionprotected void errEofInComment()
throws org.xml.sax.SAXException
errEofInComment in class Tokenizerorg.xml.sax.SAXExceptionprotected void errEofInDoctype()
throws org.xml.sax.SAXException
errEofInDoctype in class Tokenizerorg.xml.sax.SAXExceptionprotected void errEofInAttributeValue()
throws org.xml.sax.SAXException
errEofInAttributeValue in class Tokenizerorg.xml.sax.SAXExceptionprotected void errEofInAttributeName()
throws org.xml.sax.SAXException
errEofInAttributeName in class Tokenizerorg.xml.sax.SAXExceptionprotected void errEofWithoutGt()
throws org.xml.sax.SAXException
errEofWithoutGt in class Tokenizerorg.xml.sax.SAXExceptionprotected void errEofInTagName()
throws org.xml.sax.SAXException
errEofInTagName in class Tokenizerorg.xml.sax.SAXExceptionprotected void errEofInEndTag()
throws org.xml.sax.SAXException
errEofInEndTag in class Tokenizerorg.xml.sax.SAXExceptionprotected void errEofAfterLt()
throws org.xml.sax.SAXException
errEofAfterLt in class Tokenizerorg.xml.sax.SAXExceptionprotected void errNcrOutOfRange()
throws org.xml.sax.SAXException
errNcrOutOfRange in class Tokenizerorg.xml.sax.SAXExceptionprotected void errNcrUnassigned()
throws org.xml.sax.SAXException
errNcrUnassigned in class Tokenizerorg.xml.sax.SAXExceptionprotected void errDuplicateAttribute()
throws org.xml.sax.SAXException
errDuplicateAttribute in class Tokenizerorg.xml.sax.SAXExceptionprotected void errEofInSystemId()
throws org.xml.sax.SAXException
errEofInSystemId in class Tokenizerorg.xml.sax.SAXExceptionprotected void errExpectedSystemId()
throws org.xml.sax.SAXException
errExpectedSystemId in class Tokenizerorg.xml.sax.SAXExceptionprotected void errMissingSpaceBeforeDoctypeName()
throws org.xml.sax.SAXException
errMissingSpaceBeforeDoctypeName in class Tokenizerorg.xml.sax.SAXExceptionprotected void errHyphenHyphenBang()
throws org.xml.sax.SAXException
errHyphenHyphenBang in class Tokenizerorg.xml.sax.SAXExceptionprotected void errNcrControlChar()
throws org.xml.sax.SAXException
errNcrControlChar in class Tokenizerorg.xml.sax.SAXExceptionprotected void errNcrZero()
throws org.xml.sax.SAXException
errNcrZero in class Tokenizerorg.xml.sax.SAXExceptionprotected void errNoSpaceBetweenDoctypeSystemKeywordAndQuote()
throws org.xml.sax.SAXException
errNoSpaceBetweenDoctypeSystemKeywordAndQuote in class Tokenizerorg.xml.sax.SAXExceptionprotected void errNoSpaceBetweenPublicAndSystemIds()
throws org.xml.sax.SAXException
errNoSpaceBetweenPublicAndSystemIds in class Tokenizerorg.xml.sax.SAXExceptionprotected void errNoSpaceBetweenDoctypePublicKeywordAndQuote()
throws org.xml.sax.SAXException
errNoSpaceBetweenDoctypePublicKeywordAndQuote in class Tokenizerorg.xml.sax.SAXExceptionprotected void noteAttributeWithoutValue()
throws org.xml.sax.SAXException
noteAttributeWithoutValue in class Tokenizerorg.xml.sax.SAXExceptionprotected void noteUnquotedAttributeValue()
throws org.xml.sax.SAXException
noteUnquotedAttributeValue in class Tokenizerorg.xml.sax.SAXExceptionpublic void setTransitionHandler(TransitionHandler transitionHandler)
transitionHandler - the transitionHandler to setpublic void setTransitionBaseOffset(int offset)
TransitionHandler.setTransitionBaseOffset in class Tokenizeroffset - the offset