# regenerate parser.rb using `tt parser.treetop` module Asciidoctor module PDF module FormattedText grammar Markup
rule text
complex
end
rule complex
(cdata / element / charref)* {
def content
elements.map {|e| e.content }
end
}
end
rule element
# strict tag matching (costs a minor toll)
# void_element / start_tag complex end_tag &{|seq| seq[0].name == seq[2].name } {
void_element / start_tag complex end_tag {
# NOTE content only applies to non-void elements (second part of rule)
def content
{ type: :element, name: (tag_element = elements[0]).name.to_sym, attributes: tag_element.attributes, pcdata: elements[1].content }
end
}
end
rule void_element
'<' void_tag_name attributes (spaces? '/')? '>' {
def content
{ type: :element, name: elements[1].text_value.to_sym, attributes: elements[2].content }
end
}
end
rule start_tag
'<' tag_name attributes '>' {
def name
elements[1].text_value
end
def attributes
elements[2].content
end
}
end
rule tag_name
# QUESTION faster to do regex?
# QUESTION can we cut stuff we aren't using? what about supporting hr?
#'a' / 'b' / 'code' / 'color' / 'del' / 'em' / 'font' / 'i' / 'img' / 'link' / 'span' / 'strikethrough' / 'strong' / 'sub' / 'sup' / 'u'
'a' / 'code' / 'color' / 'del' / 'em' / 'font' / 'span' / 'strong' / 'sub' / 'sup'
end
rule void_tag_name
'br' / 'img'
end
rule attributes
attribute* {
def content
attrs = {}
elements.each {|e|
attr_name, attr_val = e.content
attrs[attr_name.to_sym] = attr_val
}
attrs
end
}
end
rule attribute
spaces [a-z_]+ '=' '"' [^"]* '"' {
def content
[elements[1].text_value, elements[4].text_value]
end
}
end
rule end_tag
'</' tag_name '>' {
def name
elements[1].text_value
end
}
end
rule cdata
[^<&]+ {
def content
{ type: :text, value: text_value }
end
}
end
rule charref
'&' ('#' character_decimal / '#x' character_hex / character_name) ';' {
def content
if (ref_data = elements[1]).terminal?
{ type: :charref, reference_type: :name, value: ref_data.text_value.to_sym }
elsif ref_data.elements[0].text_value == '#'
{ type: :charref, reference_type: :decimal, value: ref_data.elements[1].text_value.to_i }
else
{ type: :charref, reference_type: :hex, value: ref_data.elements[1].text_value }
end
end
}
end
rule character_decimal
# NOTE 6 decimals only supported in Asciidoctor 1.5.5 and up
[0-9] 2..6
end
rule character_hex
# NOTE 5 hexadecimals only supported in Asciidoctor 1.5.5 and up
[0-9a-f] 2..5
end
rule character_name
'amp' / 'apos' / 'gt' / 'lt' / 'nbsp' / 'quot'
end
rule spaces
' '+
end
end end end end