Fantom

 

//
// Copyright (c) 2008, Brian Frank and Andy Frank
// Licensed under the Academic Free License version 3.0
//
// History:
//   10 Aug 08  Brian Frank  Creation
//

using fwt
using syntax

**
** Parser is responsible for tokenizing a document line
** into syntax color coding.
**
internal class Parser
{
//////////////////////////////////////////////////////////////////////////
// Constructor
//////////////////////////////////////////////////////////////////////////

  **
  ** Construct with source line.
  **
  new make(Doc doc)
  {
    options  = doc.options
    rules    = doc.rules
    brackets = rules.brackets

    // build keyword map, and also a prefix map
    // of the first two characters for fast yes/no
    keywords = Str :Bool[:] { def=false }
    keywordPrefixes = Int:Bool[:] { def=false }
    if (rules.keywords != null)
    {
      rules.keywords.each |Str k|
      {
        keywords[k] = true
        keywordPrefixes[k[0].shiftl(16).or(k[1])] = true
      }
    }

    // single line comments
    comments= Matcher[,]
    rules.comments?.each |Str s| { comments.add(toMatcher(s)) }

    // block comments
    commentStart = toMatcher(rules.blockCommentStart)
    commentEnd   = toMatcher(rules.blockCommentEnd)
    commentOpen  = BlockOpen(this, rules.blockCommentStart, [0, options.comment].ro)

    // str literals
    strs = StrMatch[,]
    if (rules.strs != null)
      rules.strs.each |SyntaxStr s| { strs.add(toStrMatch(s)) }
  }

//////////////////////////////////////////////////////////////////////////
// Tokenize
//////////////////////////////////////////////////////////////////////////

  **
  ** Parse the specified line text into a new Line instance.
  ** If close is non-null, then we are reparsing the line with
  ** the knowledge that the start of the line is inside a
  ** multi-line block.
  **
  Line parseLine(Str text, BlockClose? close := null)
  {
    try
    {
      if (options.convertTabsToSpaces)
        text = convertTabsToSpaces(text, options.tabSpacing)

      init(text)

      styling := Obj[,]
      if (close != null)
      {
        styling.addAll(close.stylingOverride)
        consumeN(close.pos)
      }

      parseStyling(styling)

      if (!needFatLine) return Line { it.text = text; it.styling = styling }
      return FatLine
      {
        it.text = text
        it.styling = styling
        it.commentNesting = this.commentNesting
        it.opens = this.opens
        it.closeBlocks = this.closes
      }
    }
    catch (Err e)
    {
      e.trace
      return Line { it.text = text; it.styling = [0, options.text] }
    }
  }

  internal static Str convertTabsToSpaces(Str text, Int ts)
  {
    if (!text.contains("\t")) return text
    s := StrBuf()
    text.each |Int ch, Int i|
    {
      if (ch == '\t')
        s.add(Str.spaces(ts - (s.size%ts)))
      else
        s.addChar(ch)
    }
    return s.toStr
  }

  private Void parseStyling(Obj[] styling)
  {
    while (cur != 0)
    {
      p := pos
      tok := next
      switch (tok)
      {
        case Token.bracket: addStyle(styling, p, options.bracket)
        case Token.keyword: addStyle(styling, p, options.keyword)
        case Token.literal: addStyle(styling, p, options.literal)
        case Token.comment: addStyle(styling, p, options.comment)
        default:            addStyle(styling, p, options.text)
      }
    }
  }

  private Void addStyle(Obj[] styling, Int pos, RichTextStyle style)
  {
    if (styling.last === style) return
    styling.add(pos).add(style)
  }

  private Bool needFatLine()
  {
    return commentNesting != 0 || opens != null || closes != null
  }

//////////////////////////////////////////////////////////////////////////
// Access
//////////////////////////////////////////////////////////////////////////

  **
  ** Return the next token.
  **
  private Token next()
  {
    // check for end-of-line comments
    for (i:=0; i<comments.size; ++i)
    {
      if (comments[i].isMatch)
      {
        cur = 0
        return Token.comment
      }
    }

    // check for block comments
    if (commentStart.isMatch) return blockComment
    if (commentEnd.isMatch)   return blockComment

    // check for str literals
    for (i:=0; i<strs.size; ++i)
      if (strs[i].start.isMatch) return strLiteral(strs[i])

    // brackets
    if (brackets.containsChar(cur))
    {
      consume
      return Token.bracket
    }

    // number
    /*
    if (cur === '-' && peek.isDigit) consume
    if (cur.isDigit) return number
    */

    // identifier which might be keyword
    if (keywordPrefixes[cur.shiftl(16).or(peek)])
    {
      start := pos
      consume
      consume
      while (cur.isAlphaNum || cur == '_') consume
      word := text[start..<pos]
      if (keywords[word]) return Token.keyword
      return Token.text
    }

    // tokenize an identifier in one big swoop
    if (cur.isAlpha)
    {
      while (cur.isAlphaNum || cur == '_') consume
      return Token.text
    }

    // consume symbols one at a time
    consume
    return Token.text
  }

  **
  ** Parse number literal:
  **   123
  **   2.6f
  **   2.6e-5f
  **   0xab_12
  **   2.5sec
  **
  private Token number()
  {
    while (true)
    {
      if (cur.isAlphaNum ||  cur == '_') { consume; continue }
      if (cur == '.' && peek.isDigit) { consume; continue }
      if (peek == '-' && (cur == 'e' || cur == 'E')) { consume; consume; continue }
      break
    }
    return Token.literal
  }

  **
  ** Parse str literal
  **
  private Token strLiteral(StrMatch s)
  {
    s.start.consume
    while (cur != 0)
    {
      if (s.end.isMatch && countEscapes(s.escape).isEven)
      {
        s.end.consume
        return Token.literal
      }
      consume
    }
    if (s.multiLine) opens = s.blockOpen
    return Token.literal
  }

  **
  ** Count the number of escape chars preceeding the current char.
  **
  private Int countEscapes(Int esc)
  {
    n := 0
    while (text[pos-n-1] == esc) n++
    return n
  }

  **
  ** Block comment to end token or end of line,
  ** keep track of nesting.
  **
  Token blockComment()
  {
    thisNesting := 0
    while (cur != 0)
    {
      if (commentStart.isMatch)
      {
        commentStart.consume
        commentNesting++
        thisNesting++
      }

      if (commentEnd.isMatch)
      {
        commentEnd.consume
        commentNesting--
        thisNesting--
      }

      if (thisNesting <= 0) return Token.comment
      consume
    }
    opens = commentOpen
    return Token.comment
  }

//////////////////////////////////////////////////////////////////////////
// Matching Functions
//////////////////////////////////////////////////////////////////////////

  StrMatch toStrMatch(SyntaxStr s)
  {
    return StrMatch
    {
      start     = toMatcher(s.delimiter, s.escape)
      end       = toMatcher(s.delimiterEnd ?: s.delimiter, s.escape)
      escape    = s.escape
      multiLine = s.multiLine
      blockOpen = BlockOpen(this, s.delimiter, [0, options.literal].ro)
    }
  }

  Matcher toMatcher(Str? tok, Int esc := 0)
  {
    tok = tok?.trim ?: ""
    switch (tok.size)
    {
      case 0:
        return Matcher(0, |->Bool| { noMatch }, |->| {})
      case 1:
        if (esc > 0)
          return Matcher(1, |->Bool| { match1Esc(tok[0], esc) }, |->| { consume })
        else
          return Matcher(1, |->Bool| { match1(tok[0]) }, |->| { consume })
      case 2:
        if (esc > 0)
          return Matcher(2, |->Bool| { match2Esc(tok[0], tok[1], esc) }, |->| { consume; consume })
        else
          return Matcher(2, |->Bool| { match2(tok[0], tok[1]) }, |->| { consume; consume })
      default:
        return Matcher(tok.size, |->Bool| { matchN(tok) }, |->| { consumeN(tok.size) })
    }
  }

  Bool noMatch() { return false }

  Bool match1(Int ch1) { return cur == ch1 }

  Bool match2(Int ch1, Int ch2) { return cur == ch1 && peek == ch2 }

  Bool match1Esc(Int ch1, Int esc) { return cur == ch1 && countEscapes(esc).isEven }

  Bool match2Esc(Int ch1, Int ch2, Int esc) { return cur == ch1 && peek == ch2 && countEscapes(esc).isEven }

  Bool matchN(Str chars) // assume no escape for 3 or more
  {
    try
    {
      if (cur != chars[0] || peek != chars[1]) return false
      for (i:=2; i<chars.size; ++i) if (chars[i] != text[pos+i]) return false
      return true
    }
    catch (Err e)
    {
      return false
    }
  }

////////////////////////////////////////////////////////////////
// Consume
////////////////////////////////////////////////////////////////

  **
  ** Initialize state to parse specified line.
  **
  private Void init(Str text)
  {
    this.text = text
    cur = peek = ' '
    if (text.size > 0) cur  = text[0]
    if (text.size > 1) peek = text[1]
    pos = commentNesting = 0
    opens = null
    closes = null
    checkCloses
  }

  **
  ** Consume the cur char and advance to next char
  ** in buffer and update cur/peek fields.
  **
  private Void consume()
  {
    cur = peek
    pos++
    if (pos+1 < text.size)
      peek = text[pos+1] // next peek is cur+1
    else
      peek = 0
    checkCloses
  }

  **
  ** Consume n characters
  **
  Void consumeN(Int n)
  {
    n.times { consume }
  }
  **
  ** Consume remaining characters
  **
  Void consumeRest()
  {
    consumeN(text.size-pos)
  }

  **
  ** Check if the current token is a match for closing
  ** multi-line blocks.  If so add it to our closes list
  **
  private Void checkCloses()
  {
    strs.each |StrMatch m|
    {
      if (m.multiLine && m.end.isMatch)
      {
        if (closes == null) closes = Block[,]
        closes.add(BlockClose(m.blockOpen, pos+m.end.size))
      }
    }

    if (commentEnd.isMatch)
    {
      if (closes == null) closes = Block[,]
      closes.add(BlockClose(commentOpen, pos+commentEnd.size))
    }
  }

//////////////////////////////////////////////////////////////////////////
// Fields
//////////////////////////////////////////////////////////////////////////

  internal TextEditorOptions options // configured options
  internal SyntaxRules rules       // syntax rules for current document

  private Str brackets             // str of bracket symbols
  private Int:Bool keywordPrefixes // first two letter of keywords
  private Str:Bool keywords        // keywords
  private Matcher[] comments       // matchers for eol comments
  private Matcher commentStart     // matcher to check comment start
  private Matcher commentEnd       // matcher to check comment end
  private BlockOpen commentOpen    // open handle for block comments
  private StrMatch[] strs          // matchers for str literals

  private Str text := ""           // line being parsed
  private Int pos                  // index into text for cur
  private Int cur                  // current char
  private Int peek                 // next char

  private Int commentNesting       // levels of block comments opened/closed
  private Block? opens             // if current line opens block
  private Block[]? closes          // if current line closes block
}


**************************************************************************
** Token
**************************************************************************

** Token represents a string of color coded chars
internal enum class Token
{
  text,
  bracket,
  keyword,
  literal,
  preprocessor,
  comment
}

**************************************************************************
** Matcher
**************************************************************************

** Matcher is used to match a specific token
** against the current character
internal class Matcher
{
  new make(Int sz, |->Bool| m, |->| c) { size = sz; matchFunc = m; consumeFunc = c }
  Bool isMatch() { return matchFunc.call }
  Void consume() { consumeFunc.call }
  |->Bool| matchFunc
  |->| consumeFunc
  const Int size
}

**************************************************************************
** StrMatch
**************************************************************************

** StrMatch handles matching the start and end
** delimiter and managing multi-line string blocks
internal class StrMatch
{
  Matcher? start
  Matcher? end
  Int escape
  Bool multiLine
  BlockOpen? blockOpen
}

**************************************************************************
** BlockOpen
**************************************************************************

** BlockOpen implements the Block interface when we detect
** that a block comment or multi-line string is opened.  BlockOpens
** are reused by the entire parser (see commentOpen and StrMatch).
** They are paired with BlockCloses.
internal class BlockOpen : Block
{
  new make(Parser p, Str? n, Obj[] s) { parser = p; name = n; stylingOverride = s }

  override Line? closes(Line line, Block open) { throw Err("illegal state") }

  override Str toStr() { return name }

  override Obj[]? stylingOverride
  Parser parser { private set }
  const Str? name
}

**************************************************************************
** BlockClose
**************************************************************************

** BlockClose instances are used whenever we detect a potential
** closing token for a block comment or multi-line string.  Each
** instance is allocated per line to cache the re-parse.  But we
** pair with the open block to efficiently manage memory.
internal class BlockClose : Block
{
  new make(BlockOpen open, Int pos) { this.open = open; this.pos = pos }

  override Obj[]? stylingOverride() { return open.stylingOverride }

  override Line? closes(Line line, Block open)
  {
    if (open !== this.open) return null
    if (cachedLineOnClose == null)
      cachedLineOnClose = ((BlockOpen)open).parser.parseLine(line.text, this)
     return cachedLineOnClose
  }

  override Str toStr() { return "$open.name:$pos" }

  BlockOpen open { private set }
  const Int pos
  Line? cachedLineOnClose
}