123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533 |
- package parse
- import (
- "bufio"
- "bytes"
- "fmt"
- "github.com/yuin/gopher-lua/ast"
- "io"
- "reflect"
- "strconv"
- "strings"
- )
- const EOF = -1
- const whitespace1 = 1<<'\t' | 1<<'\r' | 1<<' '
- const whitespace2 = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' '
- type Error struct {
- Pos ast.Position
- Message string
- Token string
- }
- func (e *Error) Error() string {
- pos := e.Pos
- if pos.Line == EOF {
- return fmt.Sprintf("%v at EOF: %s\n", pos.Source, e.Message)
- } else {
- return fmt.Sprintf("%v line:%d(column:%d) near '%v': %s\n", pos.Source, pos.Line, pos.Column, e.Token, e.Message)
- }
- }
- func writeChar(buf *bytes.Buffer, c int) { buf.WriteByte(byte(c)) }
- func isDecimal(ch int) bool { return '0' <= ch && ch <= '9' }
- func isIdent(ch int, pos int) bool {
- return ch == '_' || 'A' <= ch && ch <= 'Z' || 'a' <= ch && ch <= 'z' || isDecimal(ch) && pos > 0
- }
- func isDigit(ch int) bool {
- return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
- }
- type Scanner struct {
- Pos ast.Position
- reader *bufio.Reader
- }
- func NewScanner(reader io.Reader, source string) *Scanner {
- return &Scanner{
- Pos: ast.Position{source, 1, 0},
- reader: bufio.NewReaderSize(reader, 4096),
- }
- }
- func (sc *Scanner) Error(tok string, msg string) *Error { return &Error{sc.Pos, msg, tok} }
- func (sc *Scanner) TokenError(tok ast.Token, msg string) *Error { return &Error{tok.Pos, msg, tok.Str} }
- func (sc *Scanner) readNext() int {
- ch, err := sc.reader.ReadByte()
- if err == io.EOF {
- return EOF
- }
- return int(ch)
- }
- func (sc *Scanner) Newline(ch int) {
- if ch < 0 {
- return
- }
- sc.Pos.Line += 1
- sc.Pos.Column = 0
- next := sc.Peek()
- if ch == '\n' && next == '\r' || ch == '\r' && next == '\n' {
- sc.reader.ReadByte()
- }
- }
- func (sc *Scanner) Next() int {
- ch := sc.readNext()
- switch ch {
- case '\n', '\r':
- sc.Newline(ch)
- ch = int('\n')
- case EOF:
- sc.Pos.Line = EOF
- sc.Pos.Column = 0
- default:
- sc.Pos.Column++
- }
- return ch
- }
- func (sc *Scanner) Peek() int {
- ch := sc.readNext()
- if ch != EOF {
- sc.reader.UnreadByte()
- }
- return ch
- }
- func (sc *Scanner) skipWhiteSpace(whitespace int64) int {
- ch := sc.Next()
- for ; whitespace&(1<<uint(ch)) != 0; ch = sc.Next() {
- }
- return ch
- }
- func (sc *Scanner) skipComments(ch int) error {
- // multiline comment
- if sc.Peek() == '[' {
- ch = sc.Next()
- if sc.Peek() == '[' || sc.Peek() == '=' {
- var buf bytes.Buffer
- if err := sc.scanMultilineString(sc.Next(), &buf); err != nil {
- return sc.Error(buf.String(), "invalid multiline comment")
- }
- return nil
- }
- }
- for {
- if ch == '\n' || ch == '\r' || ch < 0 {
- break
- }
- ch = sc.Next()
- }
- return nil
- }
- func (sc *Scanner) scanIdent(ch int, buf *bytes.Buffer) error {
- writeChar(buf, ch)
- for isIdent(sc.Peek(), 1) {
- writeChar(buf, sc.Next())
- }
- return nil
- }
- func (sc *Scanner) scanDecimal(ch int, buf *bytes.Buffer) error {
- writeChar(buf, ch)
- for isDecimal(sc.Peek()) {
- writeChar(buf, sc.Next())
- }
- return nil
- }
- func (sc *Scanner) scanNumber(ch int, buf *bytes.Buffer) error {
- if ch == '0' { // octal
- if sc.Peek() == 'x' || sc.Peek() == 'X' {
- writeChar(buf, ch)
- writeChar(buf, sc.Next())
- hasvalue := false
- for isDigit(sc.Peek()) {
- writeChar(buf, sc.Next())
- hasvalue = true
- }
- if !hasvalue {
- return sc.Error(buf.String(), "illegal hexadecimal number")
- }
- return nil
- } else if sc.Peek() != '.' && isDecimal(sc.Peek()) {
- ch = sc.Next()
- }
- }
- sc.scanDecimal(ch, buf)
- if sc.Peek() == '.' {
- sc.scanDecimal(sc.Next(), buf)
- }
- if ch = sc.Peek(); ch == 'e' || ch == 'E' {
- writeChar(buf, sc.Next())
- if ch = sc.Peek(); ch == '-' || ch == '+' {
- writeChar(buf, sc.Next())
- }
- sc.scanDecimal(sc.Next(), buf)
- }
- return nil
- }
- func (sc *Scanner) scanString(quote int, buf *bytes.Buffer) error {
- ch := sc.Next()
- for ch != quote {
- if ch == '\n' || ch == '\r' || ch < 0 {
- return sc.Error(buf.String(), "unterminated string")
- }
- if ch == '\\' {
- if err := sc.scanEscape(ch, buf); err != nil {
- return err
- }
- } else {
- writeChar(buf, ch)
- }
- ch = sc.Next()
- }
- return nil
- }
- func (sc *Scanner) scanEscape(ch int, buf *bytes.Buffer) error {
- ch = sc.Next()
- switch ch {
- case 'a':
- buf.WriteByte('\a')
- case 'b':
- buf.WriteByte('\b')
- case 'f':
- buf.WriteByte('\f')
- case 'n':
- buf.WriteByte('\n')
- case 'r':
- buf.WriteByte('\r')
- case 't':
- buf.WriteByte('\t')
- case 'v':
- buf.WriteByte('\v')
- case '\\':
- buf.WriteByte('\\')
- case '"':
- buf.WriteByte('"')
- case '\'':
- buf.WriteByte('\'')
- case '\n':
- buf.WriteByte('\n')
- case '\r':
- buf.WriteByte('\n')
- sc.Newline('\r')
- default:
- if '0' <= ch && ch <= '9' {
- bytes := []byte{byte(ch)}
- for i := 0; i < 2 && isDecimal(sc.Peek()); i++ {
- bytes = append(bytes, byte(sc.Next()))
- }
- val, _ := strconv.ParseInt(string(bytes), 10, 32)
- writeChar(buf, int(val))
- } else {
- buf.WriteByte('\\')
- writeChar(buf, ch)
- return sc.Error(buf.String(), "Invalid escape sequence")
- }
- }
- return nil
- }
- func (sc *Scanner) countSep(ch int) (int, int) {
- count := 0
- for ; ch == '='; count = count + 1 {
- ch = sc.Next()
- }
- return count, ch
- }
- func (sc *Scanner) scanMultilineString(ch int, buf *bytes.Buffer) error {
- var count1, count2 int
- count1, ch = sc.countSep(ch)
- if ch != '[' {
- return sc.Error(string(ch), "invalid multiline string")
- }
- ch = sc.Next()
- if ch == '\n' || ch == '\r' {
- ch = sc.Next()
- }
- for {
- if ch < 0 {
- return sc.Error(buf.String(), "unterminated multiline string")
- } else if ch == ']' {
- count2, ch = sc.countSep(sc.Next())
- if count1 == count2 && ch == ']' {
- goto finally
- }
- buf.WriteByte(']')
- buf.WriteString(strings.Repeat("=", count2))
- continue
- }
- writeChar(buf, ch)
- ch = sc.Next()
- }
- finally:
- return nil
- }
- var reservedWords = map[string]int{
- "and": TAnd, "break": TBreak, "do": TDo, "else": TElse, "elseif": TElseIf,
- "end": TEnd, "false": TFalse, "for": TFor, "function": TFunction,
- "if": TIf, "in": TIn, "local": TLocal, "nil": TNil, "not": TNot, "or": TOr,
- "return": TReturn, "repeat": TRepeat, "then": TThen, "true": TTrue,
- "until": TUntil, "while": TWhile}
- func (sc *Scanner) Scan(lexer *Lexer) (ast.Token, error) {
- redo:
- var err error
- tok := ast.Token{}
- newline := false
- ch := sc.skipWhiteSpace(whitespace1)
- if ch == '\n' || ch == '\r' {
- newline = true
- ch = sc.skipWhiteSpace(whitespace2)
- }
- if ch == '(' {
- lexer.PNewLine = newline
- }
- var _buf bytes.Buffer
- buf := &_buf
- tok.Pos = sc.Pos
- switch {
- case isIdent(ch, 0):
- tok.Type = TIdent
- err = sc.scanIdent(ch, buf)
- tok.Str = buf.String()
- if err != nil {
- goto finally
- }
- if typ, ok := reservedWords[tok.Str]; ok {
- tok.Type = typ
- }
- case isDecimal(ch):
- tok.Type = TNumber
- err = sc.scanNumber(ch, buf)
- tok.Str = buf.String()
- default:
- switch ch {
- case EOF:
- tok.Type = EOF
- case '-':
- if sc.Peek() == '-' {
- err = sc.skipComments(sc.Next())
- if err != nil {
- goto finally
- }
- goto redo
- } else {
- tok.Type = ch
- tok.Str = string(ch)
- }
- case '"', '\'':
- tok.Type = TString
- err = sc.scanString(ch, buf)
- tok.Str = buf.String()
- case '[':
- if c := sc.Peek(); c == '[' || c == '=' {
- tok.Type = TString
- err = sc.scanMultilineString(sc.Next(), buf)
- tok.Str = buf.String()
- } else {
- tok.Type = ch
- tok.Str = string(ch)
- }
- case '=':
- if sc.Peek() == '=' {
- tok.Type = TEqeq
- tok.Str = "=="
- sc.Next()
- } else {
- tok.Type = ch
- tok.Str = string(ch)
- }
- case '~':
- if sc.Peek() == '=' {
- tok.Type = TNeq
- tok.Str = "~="
- sc.Next()
- } else {
- err = sc.Error("~", "Invalid '~' token")
- }
- case '<':
- if sc.Peek() == '=' {
- tok.Type = TLte
- tok.Str = "<="
- sc.Next()
- } else {
- tok.Type = ch
- tok.Str = string(ch)
- }
- case '>':
- if sc.Peek() == '=' {
- tok.Type = TGte
- tok.Str = ">="
- sc.Next()
- } else {
- tok.Type = ch
- tok.Str = string(ch)
- }
- case '.':
- ch2 := sc.Peek()
- switch {
- case isDecimal(ch2):
- tok.Type = TNumber
- err = sc.scanNumber(ch, buf)
- tok.Str = buf.String()
- case ch2 == '.':
- writeChar(buf, ch)
- writeChar(buf, sc.Next())
- if sc.Peek() == '.' {
- writeChar(buf, sc.Next())
- tok.Type = T3Comma
- } else {
- tok.Type = T2Comma
- }
- default:
- tok.Type = '.'
- }
- tok.Str = buf.String()
- case '+', '*', '/', '%', '^', '#', '(', ')', '{', '}', ']', ';', ':', ',':
- tok.Type = ch
- tok.Str = string(ch)
- default:
- writeChar(buf, ch)
- err = sc.Error(buf.String(), "Invalid token")
- goto finally
- }
- }
- finally:
- tok.Name = TokenName(int(tok.Type))
- return tok, err
- }
- // yacc interface {{{
- type Lexer struct {
- scanner *Scanner
- Stmts []ast.Stmt
- PNewLine bool
- Token ast.Token
- }
- func (lx *Lexer) Lex(lval *yySymType) int {
- tok, err := lx.scanner.Scan(lx)
- if err != nil {
- panic(err)
- }
- if tok.Type < 0 {
- return 0
- }
- lval.token = tok
- lx.Token = tok
- return int(tok.Type)
- }
- func (lx *Lexer) Error(message string) {
- panic(lx.scanner.Error(lx.Token.Str, message))
- }
- func (lx *Lexer) TokenError(tok ast.Token, message string) {
- panic(lx.scanner.TokenError(tok, message))
- }
- func Parse(reader io.Reader, name string) (chunk []ast.Stmt, err error) {
- lexer := &Lexer{NewScanner(reader, name), nil, false, ast.Token{Str: ""}}
- chunk = nil
- defer func() {
- if e := recover(); e != nil {
- err, _ = e.(error)
- }
- }()
- yyParse(lexer)
- chunk = lexer.Stmts
- return
- }
- // }}}
- // Dump {{{
- func isInlineDumpNode(rv reflect.Value) bool {
- switch rv.Kind() {
- case reflect.Struct, reflect.Slice, reflect.Interface, reflect.Ptr:
- return false
- default:
- return true
- }
- }
- func dump(node interface{}, level int, s string) string {
- rt := reflect.TypeOf(node)
- if fmt.Sprint(rt) == "<nil>" {
- return strings.Repeat(s, level) + "<nil>"
- }
- rv := reflect.ValueOf(node)
- buf := []string{}
- switch rt.Kind() {
- case reflect.Slice:
- if rv.Len() == 0 {
- return strings.Repeat(s, level) + "<empty>"
- }
- for i := 0; i < rv.Len(); i++ {
- buf = append(buf, dump(rv.Index(i).Interface(), level, s))
- }
- case reflect.Ptr:
- vt := rv.Elem()
- tt := rt.Elem()
- indicies := []int{}
- for i := 0; i < tt.NumField(); i++ {
- if strings.Index(tt.Field(i).Name, "Base") > -1 {
- continue
- }
- indicies = append(indicies, i)
- }
- switch {
- case len(indicies) == 0:
- return strings.Repeat(s, level) + "<empty>"
- case len(indicies) == 1 && isInlineDumpNode(vt.Field(indicies[0])):
- for _, i := range indicies {
- buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name()+": "+dump(vt.Field(i).Interface(), 0, s))
- }
- default:
- buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name())
- for _, i := range indicies {
- if isInlineDumpNode(vt.Field(i)) {
- inf := dump(vt.Field(i).Interface(), 0, s)
- buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": "+inf)
- } else {
- buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": ")
- buf = append(buf, dump(vt.Field(i).Interface(), level+2, s))
- }
- }
- }
- default:
- buf = append(buf, strings.Repeat(s, level)+fmt.Sprint(node))
- }
- return strings.Join(buf, "\n")
- }
- func Dump(chunk []ast.Stmt) string {
- return dump(chunk, 0, " ")
- }
- // }}
|