xgo/stdlib/json/decode.go
Daniel 2cde0eaeea
json module faster implementation (#173)
* json module faster implementation

* add some decoding error test
2019-04-06 05:25:23 -07:00

374 lines
7.1 KiB
Go

// A modified version of Go's JSON implementation.
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package json
import (
"strconv"
"unicode"
"unicode/utf16"
"unicode/utf8"
"github.com/d5/tengo/objects"
)
// Decode parses the JSON-encoded data and returns the result object.
func Decode(data []byte) (objects.Object, error) {
var d decodeState
err := checkValid(data, &d.scan)
if err != nil {
return nil, err
}
d.init(data)
d.scan.reset()
d.scanWhile(scanSkipSpace)
return d.value()
}
// decodeState represents the state while decoding a JSON value.
type decodeState struct {
data []byte
off int // next read offset in data
opcode int // last read result
scan scanner
}
// readIndex returns the position of the last byte read.
func (d *decodeState) readIndex() int {
return d.off - 1
}
const phasePanicMsg = "JSON decoder out of sync - data changing underfoot?"
func (d *decodeState) init(data []byte) *decodeState {
d.data = data
d.off = 0
return d
}
// scanNext processes the byte at d.data[d.off].
func (d *decodeState) scanNext() {
if d.off < len(d.data) {
d.opcode = d.scan.step(&d.scan, d.data[d.off])
d.off++
} else {
d.opcode = d.scan.eof()
d.off = len(d.data) + 1 // mark processed EOF with len+1
}
}
// scanWhile processes bytes in d.data[d.off:] until it
// receives a scan code not equal to op.
func (d *decodeState) scanWhile(op int) {
s, data, i := &d.scan, d.data, d.off
for i < len(data) {
newOp := s.step(s, data[i])
i++
if newOp != op {
d.opcode = newOp
d.off = i
return
}
}
d.off = len(data) + 1 // mark processed EOF with len+1
d.opcode = d.scan.eof()
}
func (d *decodeState) value() (objects.Object, error) {
switch d.opcode {
default:
panic(phasePanicMsg)
case scanBeginArray:
o, err := d.array()
if err != nil {
return nil, err
}
d.scanNext()
return o, nil
case scanBeginObject:
o, err := d.object()
if err != nil {
return nil, err
}
d.scanNext()
return o, nil
case scanBeginLiteral:
return d.literal()
}
}
func (d *decodeState) array() (objects.Object, error) {
var arr []objects.Object
for {
// Look ahead for ] - can only happen on first iteration.
d.scanWhile(scanSkipSpace)
if d.opcode == scanEndArray {
break
}
o, err := d.value()
if err != nil {
return nil, err
}
arr = append(arr, o)
// Next token must be , or ].
if d.opcode == scanSkipSpace {
d.scanWhile(scanSkipSpace)
}
if d.opcode == scanEndArray {
break
}
if d.opcode != scanArrayValue {
panic(phasePanicMsg)
}
}
return &objects.Array{Value: arr}, nil
}
func (d *decodeState) object() (objects.Object, error) {
m := make(map[string]objects.Object)
for {
// Read opening " of string key or closing }.
d.scanWhile(scanSkipSpace)
if d.opcode == scanEndObject {
// closing } - can only happen on first iteration.
break
}
if d.opcode != scanBeginLiteral {
panic(phasePanicMsg)
}
// Read string key.
start := d.readIndex()
d.scanWhile(scanContinue)
item := d.data[start:d.readIndex()]
key, ok := unquote(item)
if !ok {
panic(phasePanicMsg)
}
// Read : before value.
if d.opcode == scanSkipSpace {
d.scanWhile(scanSkipSpace)
}
if d.opcode != scanObjectKey {
panic(phasePanicMsg)
}
d.scanWhile(scanSkipSpace)
// Read value.
o, err := d.value()
if err != nil {
return nil, err
}
m[key] = o
// Next token must be , or }.
if d.opcode == scanSkipSpace {
d.scanWhile(scanSkipSpace)
}
if d.opcode == scanEndObject {
break
}
if d.opcode != scanObjectValue {
panic(phasePanicMsg)
}
}
return &objects.Map{Value: m}, nil
}
func (d *decodeState) literal() (objects.Object, error) {
// All bytes inside literal return scanContinue op code.
start := d.readIndex()
d.scanWhile(scanContinue)
item := d.data[start:d.readIndex()]
switch c := item[0]; c {
case 'n': // null
return objects.UndefinedValue, nil
case 't', 'f': // true, false
if c == 't' {
return objects.TrueValue, nil
}
return objects.FalseValue, nil
case '"': // string
s, ok := unquote(item)
if !ok {
panic(phasePanicMsg)
}
return &objects.String{Value: s}, nil
default: // number
if c != '-' && (c < '0' || c > '9') {
panic(phasePanicMsg)
}
n, _ := strconv.ParseFloat(string(item), 10)
return &objects.Float{Value: n}, nil
}
}
// getu4 decodes \uXXXX from the beginning of s, returning the hex value,
// or it returns -1.
func getu4(s []byte) rune {
if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
return -1
}
var r rune
for _, c := range s[2:6] {
switch {
case '0' <= c && c <= '9':
c = c - '0'
case 'a' <= c && c <= 'f':
c = c - 'a' + 10
case 'A' <= c && c <= 'F':
c = c - 'A' + 10
default:
return -1
}
r = r*16 + rune(c)
}
return r
}
// unquote converts a quoted JSON string literal s into an actual string t.
// The rules are different than for Go, so cannot use strconv.Unquote.
func unquote(s []byte) (t string, ok bool) {
s, ok = unquoteBytes(s)
t = string(s)
return
}
func unquoteBytes(s []byte) (t []byte, ok bool) {
if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
return
}
s = s[1 : len(s)-1]
// Check for unusual characters. If there are none,
// then no unquoting is needed, so return a slice of the
// original bytes.
r := 0
for r < len(s) {
c := s[r]
if c == '\\' || c == '"' || c < ' ' {
break
}
if c < utf8.RuneSelf {
r++
continue
}
rr, size := utf8.DecodeRune(s[r:])
if rr == utf8.RuneError && size == 1 {
break
}
r += size
}
if r == len(s) {
return s, true
}
b := make([]byte, len(s)+2*utf8.UTFMax)
w := copy(b, s[0:r])
for r < len(s) {
// Out of room? Can only happen if s is full of
// malformed UTF-8 and we're replacing each
// byte with RuneError.
if w >= len(b)-2*utf8.UTFMax {
nb := make([]byte, (len(b)+utf8.UTFMax)*2)
copy(nb, b[0:w])
b = nb
}
switch c := s[r]; {
case c == '\\':
r++
if r >= len(s) {
return
}
switch s[r] {
default:
return
case '"', '\\', '/', '\'':
b[w] = s[r]
r++
w++
case 'b':
b[w] = '\b'
r++
w++
case 'f':
b[w] = '\f'
r++
w++
case 'n':
b[w] = '\n'
r++
w++
case 'r':
b[w] = '\r'
r++
w++
case 't':
b[w] = '\t'
r++
w++
case 'u':
r--
rr := getu4(s[r:])
if rr < 0 {
return
}
r += 6
if utf16.IsSurrogate(rr) {
rr1 := getu4(s[r:])
if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
// A valid pair; consume.
r += 6
w += utf8.EncodeRune(b[w:], dec)
break
}
// Invalid surrogate; fall back to replacement rune.
rr = unicode.ReplacementChar
}
w += utf8.EncodeRune(b[w:], rr)
}
// Quote, control characters are invalid.
case c == '"', c < ' ':
return
// ASCII
case c < utf8.RuneSelf:
b[w] = c
r++
w++
// Coerce to well-formed UTF-8.
default:
rr, size := utf8.DecodeRune(s[r:])
r += size
w += utf8.EncodeRune(b[w:], rr)
}
}
return b[0:w], true
}