48308d45d9
* fix json decode
366 lines
7.2 KiB
Go
366 lines
7.2 KiB
Go
// A modified version of Go's JSON implementation.
|
|
|
|
// Copyright 2010 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package json
|
|
|
|
import (
|
|
"strconv"
|
|
"unicode"
|
|
"unicode/utf16"
|
|
"unicode/utf8"
|
|
|
|
"github.com/d5/tengo/v2"
|
|
)
|
|
|
|
// Decode parses the JSON-encoded data and returns the result object.
|
|
func Decode(data []byte) (tengo.Object, error) {
|
|
var d decodeState
|
|
err := checkValid(data, &d.scan)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
d.init(data)
|
|
d.scan.reset()
|
|
d.scanWhile(scanSkipSpace)
|
|
return d.value()
|
|
}
|
|
|
|
// decodeState represents the state while decoding a JSON value.
|
|
type decodeState struct {
|
|
data []byte
|
|
off int // next read offset in data
|
|
opcode int // last read result
|
|
scan scanner
|
|
}
|
|
|
|
// readIndex returns the position of the last byte read.
|
|
func (d *decodeState) readIndex() int {
|
|
return d.off - 1
|
|
}
|
|
|
|
const phasePanicMsg = "JSON decoder out of sync - data changing underfoot?"
|
|
|
|
func (d *decodeState) init(data []byte) *decodeState {
|
|
d.data = data
|
|
d.off = 0
|
|
return d
|
|
}
|
|
|
|
// scanNext processes the byte at d.data[d.off].
|
|
func (d *decodeState) scanNext() {
|
|
if d.off < len(d.data) {
|
|
d.opcode = d.scan.step(&d.scan, d.data[d.off])
|
|
d.off++
|
|
} else {
|
|
d.opcode = d.scan.eof()
|
|
d.off = len(d.data) + 1 // mark processed EOF with len+1
|
|
}
|
|
}
|
|
|
|
// scanWhile processes bytes in d.data[d.off:] until it
|
|
// receives a scan code not equal to op.
|
|
func (d *decodeState) scanWhile(op int) (isFloat bool) {
|
|
s, data, i := &d.scan, d.data, d.off
|
|
for i < len(data) {
|
|
if data[i] == '.' || data[i] == 'e' || data[i] == 'E' {
|
|
isFloat = true
|
|
}
|
|
newOp := s.step(s, data[i])
|
|
i++
|
|
if newOp != op {
|
|
d.opcode = newOp
|
|
d.off = i
|
|
return
|
|
}
|
|
}
|
|
|
|
d.off = len(data) + 1 // mark processed EOF with len+1
|
|
d.opcode = d.scan.eof()
|
|
return
|
|
}
|
|
|
|
func (d *decodeState) value() (tengo.Object, error) {
|
|
switch d.opcode {
|
|
default:
|
|
panic(phasePanicMsg)
|
|
case scanBeginArray:
|
|
o, err := d.array()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
d.scanNext()
|
|
return o, nil
|
|
case scanBeginObject:
|
|
o, err := d.object()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
d.scanNext()
|
|
return o, nil
|
|
case scanBeginLiteral:
|
|
return d.literal()
|
|
}
|
|
}
|
|
|
|
func (d *decodeState) array() (tengo.Object, error) {
|
|
var arr []tengo.Object
|
|
for {
|
|
// Look ahead for ] - can only happen on first iteration.
|
|
d.scanWhile(scanSkipSpace)
|
|
if d.opcode == scanEndArray {
|
|
break
|
|
}
|
|
o, err := d.value()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
arr = append(arr, o)
|
|
|
|
// Next token must be , or ].
|
|
if d.opcode == scanSkipSpace {
|
|
d.scanWhile(scanSkipSpace)
|
|
}
|
|
if d.opcode == scanEndArray {
|
|
break
|
|
}
|
|
if d.opcode != scanArrayValue {
|
|
panic(phasePanicMsg)
|
|
}
|
|
}
|
|
return &tengo.Array{Value: arr}, nil
|
|
}
|
|
|
|
func (d *decodeState) object() (tengo.Object, error) {
|
|
m := make(map[string]tengo.Object)
|
|
for {
|
|
// Read opening " of string key or closing }.
|
|
d.scanWhile(scanSkipSpace)
|
|
if d.opcode == scanEndObject {
|
|
// closing } - can only happen on first iteration.
|
|
break
|
|
}
|
|
if d.opcode != scanBeginLiteral {
|
|
panic(phasePanicMsg)
|
|
}
|
|
|
|
// Read string key.
|
|
start := d.readIndex()
|
|
d.scanWhile(scanContinue)
|
|
item := d.data[start:d.readIndex()]
|
|
key, ok := unquote(item)
|
|
if !ok {
|
|
panic(phasePanicMsg)
|
|
}
|
|
|
|
// Read : before value.
|
|
if d.opcode == scanSkipSpace {
|
|
d.scanWhile(scanSkipSpace)
|
|
}
|
|
if d.opcode != scanObjectKey {
|
|
panic(phasePanicMsg)
|
|
}
|
|
d.scanWhile(scanSkipSpace)
|
|
|
|
// Read value.
|
|
o, err := d.value()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
m[key] = o
|
|
|
|
// Next token must be , or }.
|
|
if d.opcode == scanSkipSpace {
|
|
d.scanWhile(scanSkipSpace)
|
|
}
|
|
if d.opcode == scanEndObject {
|
|
break
|
|
}
|
|
if d.opcode != scanObjectValue {
|
|
panic(phasePanicMsg)
|
|
}
|
|
}
|
|
return &tengo.Map{Value: m}, nil
|
|
}
|
|
|
|
func (d *decodeState) literal() (tengo.Object, error) {
|
|
// All bytes inside literal return scanContinue op code.
|
|
start := d.readIndex()
|
|
isFloat := d.scanWhile(scanContinue)
|
|
|
|
item := d.data[start:d.readIndex()]
|
|
|
|
switch c := item[0]; c {
|
|
case 'n': // null
|
|
return tengo.UndefinedValue, nil
|
|
|
|
case 't', 'f': // true, false
|
|
if c == 't' {
|
|
return tengo.TrueValue, nil
|
|
}
|
|
return tengo.FalseValue, nil
|
|
|
|
case '"': // string
|
|
s, ok := unquote(item)
|
|
if !ok {
|
|
panic(phasePanicMsg)
|
|
}
|
|
return &tengo.String{Value: s}, nil
|
|
|
|
default: // number
|
|
if c != '-' && (c < '0' || c > '9') {
|
|
panic(phasePanicMsg)
|
|
}
|
|
if isFloat {
|
|
n, _ := strconv.ParseFloat(string(item), 10)
|
|
return &tengo.Float{Value: n}, nil
|
|
}
|
|
n, _ := strconv.ParseInt(string(item), 10, 64)
|
|
return &tengo.Int{Value: n}, nil
|
|
}
|
|
}
|
|
|
|
// getu4 decodes \uXXXX from the beginning of s, returning the hex value,
|
|
// or it returns -1.
|
|
func getu4(s []byte) rune {
|
|
if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
|
|
return -1
|
|
}
|
|
var r rune
|
|
for _, c := range s[2:6] {
|
|
switch {
|
|
case '0' <= c && c <= '9':
|
|
c = c - '0'
|
|
case 'a' <= c && c <= 'f':
|
|
c = c - 'a' + 10
|
|
case 'A' <= c && c <= 'F':
|
|
c = c - 'A' + 10
|
|
default:
|
|
return -1
|
|
}
|
|
r = r*16 + rune(c)
|
|
}
|
|
return r
|
|
}
|
|
|
|
// unquote converts a quoted JSON string literal s into an actual string t.
|
|
// The rules are different than for Go, so cannot use strconv.Unquote.
|
|
func unquote(s []byte) (t string, ok bool) {
|
|
s, ok = unquoteBytes(s)
|
|
t = string(s)
|
|
return
|
|
}
|
|
|
|
func unquoteBytes(s []byte) (t []byte, ok bool) {
|
|
if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
|
|
return
|
|
}
|
|
s = s[1 : len(s)-1]
|
|
|
|
// Check for unusual characters. If there are none, then no unquoting is
|
|
// needed, so return a slice of the original bytes.
|
|
r := 0
|
|
for r < len(s) {
|
|
c := s[r]
|
|
if c == '\\' || c == '"' || c < ' ' {
|
|
break
|
|
}
|
|
if c < utf8.RuneSelf {
|
|
r++
|
|
continue
|
|
}
|
|
rr, size := utf8.DecodeRune(s[r:])
|
|
if rr == utf8.RuneError && size == 1 {
|
|
break
|
|
}
|
|
r += size
|
|
}
|
|
if r == len(s) {
|
|
return s, true
|
|
}
|
|
|
|
b := make([]byte, len(s)+2*utf8.UTFMax)
|
|
w := copy(b, s[0:r])
|
|
for r < len(s) {
|
|
// Out of room? Can only happen if s is full of
|
|
// malformed UTF-8 and we're replacing each
|
|
// byte with RuneError.
|
|
if w >= len(b)-2*utf8.UTFMax {
|
|
nb := make([]byte, (len(b)+utf8.UTFMax)*2)
|
|
copy(nb, b[0:w])
|
|
b = nb
|
|
}
|
|
switch c := s[r]; {
|
|
case c == '\\':
|
|
r++
|
|
if r >= len(s) {
|
|
return
|
|
}
|
|
switch s[r] {
|
|
default:
|
|
return
|
|
case '"', '\\', '/', '\'':
|
|
b[w] = s[r]
|
|
r++
|
|
w++
|
|
case 'b':
|
|
b[w] = '\b'
|
|
r++
|
|
w++
|
|
case 'f':
|
|
b[w] = '\f'
|
|
r++
|
|
w++
|
|
case 'n':
|
|
b[w] = '\n'
|
|
r++
|
|
w++
|
|
case 'r':
|
|
b[w] = '\r'
|
|
r++
|
|
w++
|
|
case 't':
|
|
b[w] = '\t'
|
|
r++
|
|
w++
|
|
case 'u':
|
|
r--
|
|
rr := getu4(s[r:])
|
|
if rr < 0 {
|
|
return
|
|
}
|
|
r += 6
|
|
if utf16.IsSurrogate(rr) {
|
|
rr1 := getu4(s[r:])
|
|
dec := utf16.DecodeRune(rr, rr1)
|
|
if dec != unicode.ReplacementChar {
|
|
// A valid pair; consume.
|
|
r += 6
|
|
w += utf8.EncodeRune(b[w:], dec)
|
|
break
|
|
}
|
|
// Invalid surrogate; fall back to replacement rune.
|
|
rr = unicode.ReplacementChar
|
|
}
|
|
w += utf8.EncodeRune(b[w:], rr)
|
|
}
|
|
// Quote, control characters are invalid.
|
|
case c == '"', c < ' ':
|
|
return
|
|
// ASCII
|
|
case c < utf8.RuneSelf:
|
|
b[w] = c
|
|
r++
|
|
w++
|
|
// Coerce to well-formed UTF-8.
|
|
default:
|
|
rr, size := utf8.DecodeRune(s[r:])
|
|
r += size
|
|
w += utf8.EncodeRune(b[w:], rr)
|
|
}
|
|
}
|
|
return b[0:w], true
|
|
}
|