Fantom

 

//
// Copyright (c) 2011, Brian Frank and Andy Frank
// Licensed under the Academic Free License version 3.0
//
// History:
//    9 May 11  Brian Frank  Creation
//

**
** Tokenizer for fanr query language.
** See `docFanr::Queries` for details and formal grammer.
**
internal class Tokenizer
{

//////////////////////////////////////////////////////////////////////////
// Constructor
//////////////////////////////////////////////////////////////////////////

  new make(Str input)
  {
    this.index = -1
    this.input = input
    this.tok   = Token.eof
    consume
    consume
  }

//////////////////////////////////////////////////////////////////////////
// Tokenizing
//////////////////////////////////////////////////////////////////////////

  **
  ** Read the next token, store result in `tok` and `val`
  **
  Token next()
  {
    // reset
    val = null

    // skip whitespace or comments
    while (cur.isSpace) consume

    // handle various starting chars
    if (cur.isAlpha || cur == '*') return tok = word
    if (cur == '"' || cur == '\'')  return tok = str
    if (cur.isDigit) return tok = num

    // symbol
    return tok = symbol
  }

//////////////////////////////////////////////////////////////////////////
// Token Productions
//////////////////////////////////////////////////////////////////////////

  private Token word()
  {
    s := StrBuf()
    stars := 0
    while (cur.isAlphaNum || cur == '_' || cur == '*')
    {
      if (cur == '*') stars++
      s.addChar(cur)
      consume
    }
    id := s.toStr
    val = id

    if (stars > 0) return Token.idPattern
    return Token.id
  }

  private Token num()
  {
    // consume all the things that might be part of this number token
    s := StrBuf().addChar(cur);
    consume
    dashes := 0; dots := 0
    while ((cur.isDigit) ||
           (cur == '_')  ||
           (cur == '-' && dots == 0) ||
           (cur == '.' && dashes == 0))
    {
      if (cur == '-') dashes++
      if (cur == '.') dots++
      if (cur != '_') s.addChar(cur)
      consume
    }

    // check for Date
    if (dashes == 2)
    {
      val = Date.fromStr(s.toStr)
      return Token.date
    }

    // check for Version
    if (dots > 0)
    {
      val = Version.fromStr(s.toStr)
      return Token.version
    }

    // parse as Number
    val = Int.fromStr(s.toStr)
    return Token.int
  }

  private Token str()
  {
    quote := cur
    consume // opening quote
    s := StrBuf()
    while (true)
    {
      ch := cur
      if (ch == quote) { consume; break }
      if (ch == '$') throw err("String interpolation not supported")
      if (ch == 0) throw err("Unexpected end of str")
      if (ch == '\\') { s.addChar(escape); continue }
      consume
      s.addChar(ch)
    }
    val = s.toStr
    return Token.str
  }

  private Int escape()
  {
    // consume slash
    consume

    // check basics
    switch (cur)
    {
      case 'b':   consume; return '\b'
      case 'f':   consume; return '\f'
      case 'n':   consume; return '\n'
      case 'r':   consume; return '\r'
      case 't':   consume; return '\t'
      case '"':   consume; return '"'
      case '$':   consume; return '$'
      case '\'':  consume; return '\''
      case '`':   consume; return '`'
      case '\\':  consume; return '\\'
    }

    // check for uxxxx
    if (cur == 'u')
    {
      consume
      n3 := cur.fromDigit(16); consume
      n2 := cur.fromDigit(16); consume
      n1 := cur.fromDigit(16); consume
      n0 := cur.fromDigit(16); consume
      if (n3 == null || n2 == null || n1 == null || n0 == null) throw err("Invalid hex value for \\uxxxx")
      return n3.shiftl(12).or(n2.shiftl(8)).or(n1.shiftl(4)).or(n0)
    }

    throw err("Invalid escape sequence")
  }

  **
  ** Parse a symbol token (typically into an operator).
  **
  private Token symbol()
  {
    c := cur
    consume
    switch (c)
    {
      case 0: return Token.eof
      case '\r':
        throw err("Carriage return \\r not allowed in source")
      case ',':
        return Token.comma
      case '-':
        return Token.minus
      case '+':
        return Token.plus
      case '.':
        return Token.dot
      case '<':
        if (cur == '=')
        {
          consume
          return Token.ltEq
        }
        return Token.lt
      case '=':
        if (cur == '=') { consume; return Token.eq }
      case '>':
        if (cur == '=') { consume; return Token.gtEq }
        return Token.gt
      case '!':
        if (cur == '=') { consume; return Token.notEq }
      case '~':
        if (cur == '=') { consume; return Token.like }
    }

    if (c == 0) return Token.eof

    throw err("Unexpected symbol: " + c.toChar + " (0x" + c.toHex + ")")
  }

//////////////////////////////////////////////////////////////////////////
// Error Handling
//////////////////////////////////////////////////////////////////////////

  ParseErr err(Str msg) { ParseErr("$msg: $input") }

//////////////////////////////////////////////////////////////////////////
// Char Reads
//////////////////////////////////////////////////////////////////////////

  private Void consume()
  {
    cur  = peek
    peek = ++index < input.size ? input[index] : 0
  }

//////////////////////////////////////////////////////////////////////////
// Fields
//////////////////////////////////////////////////////////////////////////

  Token tok          // current token type
  Obj? val           // token literal or identifier
  private Str input  // query being parsed
  private Int index  // index into str of cur
  private Int cur    // current char
  private Int peek   // next char
}

**************************************************************************
** Token
**************************************************************************

internal enum class Token
{

  // identifer/literals
  id        ("identifier"),
  idPattern ("identifier pattern"),
  int       ("Int"),
  date      ("Date"),
  version   ("Version"),
  str       ("Str"),
  dot       ("."),
  comma     (","),
  minus     ("-"),
  plus      ("+"),
  eq        ("==", QueryOp.eq),
  notEq     ("==", QueryOp.notEq),
  like      ("~=", QueryOp.like),
  lt        ("<",  QueryOp.lt),
  ltEq      ("<=", QueryOp.ltEq),
  gt        (">",  QueryOp.gt),
  gtEq      (">=", QueryOp.gtEq),
  eof       ("eof");

  private new make(Str s, QueryOp? q := null) { symbol = s; queryOp = q}
  override Str toStr() { symbol }
  Bool isScalar() { this === int || this === str || this === date || this === version }
  const Str symbol
  const QueryOp? queryOp
}