123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358 |
- // A modified version of Go's JSON implementation.
- // Copyright 2010 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package json
- import (
- "strconv"
- "unicode"
- "unicode/utf16"
- "unicode/utf8"
- "github.com/d5/tengo/v2"
- )
- // Decode parses the JSON-encoded data and returns the result object.
- func Decode(data []byte) (tengo.Object, error) {
- var d decodeState
- err := checkValid(data, &d.scan)
- if err != nil {
- return nil, err
- }
- d.init(data)
- d.scan.reset()
- d.scanWhile(scanSkipSpace)
- return d.value()
- }
- // decodeState represents the state while decoding a JSON value.
- type decodeState struct {
- data []byte
- off int // next read offset in data
- opcode int // last read result
- scan scanner
- }
- // readIndex returns the position of the last byte read.
- func (d *decodeState) readIndex() int {
- return d.off - 1
- }
- const phasePanicMsg = "JSON decoder out of sync - data changing underfoot?"
- func (d *decodeState) init(data []byte) *decodeState {
- d.data = data
- d.off = 0
- return d
- }
- // scanNext processes the byte at d.data[d.off].
- func (d *decodeState) scanNext() {
- if d.off < len(d.data) {
- d.opcode = d.scan.step(&d.scan, d.data[d.off])
- d.off++
- } else {
- d.opcode = d.scan.eof()
- d.off = len(d.data) + 1 // mark processed EOF with len+1
- }
- }
- // scanWhile processes bytes in d.data[d.off:] until it
- // receives a scan code not equal to op.
- func (d *decodeState) scanWhile(op int) {
- s, data, i := &d.scan, d.data, d.off
- for i < len(data) {
- newOp := s.step(s, data[i])
- i++
- if newOp != op {
- d.opcode = newOp
- d.off = i
- return
- }
- }
- d.off = len(data) + 1 // mark processed EOF with len+1
- d.opcode = d.scan.eof()
- }
- func (d *decodeState) value() (tengo.Object, error) {
- switch d.opcode {
- default:
- panic(phasePanicMsg)
- case scanBeginArray:
- o, err := d.array()
- if err != nil {
- return nil, err
- }
- d.scanNext()
- return o, nil
- case scanBeginObject:
- o, err := d.object()
- if err != nil {
- return nil, err
- }
- d.scanNext()
- return o, nil
- case scanBeginLiteral:
- return d.literal()
- }
- }
- func (d *decodeState) array() (tengo.Object, error) {
- var arr []tengo.Object
- for {
- // Look ahead for ] - can only happen on first iteration.
- d.scanWhile(scanSkipSpace)
- if d.opcode == scanEndArray {
- break
- }
- o, err := d.value()
- if err != nil {
- return nil, err
- }
- arr = append(arr, o)
- // Next token must be , or ].
- if d.opcode == scanSkipSpace {
- d.scanWhile(scanSkipSpace)
- }
- if d.opcode == scanEndArray {
- break
- }
- if d.opcode != scanArrayValue {
- panic(phasePanicMsg)
- }
- }
- return &tengo.Array{Value: arr}, nil
- }
- func (d *decodeState) object() (tengo.Object, error) {
- m := make(map[string]tengo.Object)
- for {
- // Read opening " of string key or closing }.
- d.scanWhile(scanSkipSpace)
- if d.opcode == scanEndObject {
- // closing } - can only happen on first iteration.
- break
- }
- if d.opcode != scanBeginLiteral {
- panic(phasePanicMsg)
- }
- // Read string key.
- start := d.readIndex()
- d.scanWhile(scanContinue)
- item := d.data[start:d.readIndex()]
- key, ok := unquote(item)
- if !ok {
- panic(phasePanicMsg)
- }
- // Read : before value.
- if d.opcode == scanSkipSpace {
- d.scanWhile(scanSkipSpace)
- }
- if d.opcode != scanObjectKey {
- panic(phasePanicMsg)
- }
- d.scanWhile(scanSkipSpace)
- // Read value.
- o, err := d.value()
- if err != nil {
- return nil, err
- }
- m[key] = o
- // Next token must be , or }.
- if d.opcode == scanSkipSpace {
- d.scanWhile(scanSkipSpace)
- }
- if d.opcode == scanEndObject {
- break
- }
- if d.opcode != scanObjectValue {
- panic(phasePanicMsg)
- }
- }
- return &tengo.Map{Value: m}, nil
- }
- func (d *decodeState) literal() (tengo.Object, error) {
- // All bytes inside literal return scanContinue op code.
- start := d.readIndex()
- d.scanWhile(scanContinue)
- item := d.data[start:d.readIndex()]
- switch c := item[0]; c {
- case 'n': // null
- return tengo.UndefinedValue, nil
- case 't', 'f': // true, false
- if c == 't' {
- return tengo.TrueValue, nil
- }
- return tengo.FalseValue, nil
- case '"': // string
- s, ok := unquote(item)
- if !ok {
- panic(phasePanicMsg)
- }
- return &tengo.String{Value: s}, nil
- default: // number
- if c != '-' && (c < '0' || c > '9') {
- panic(phasePanicMsg)
- }
- n, _ := strconv.ParseFloat(string(item), 10)
- return tengo.Float{Value: n}, nil
- }
- }
- // getu4 decodes \uXXXX from the beginning of s, returning the hex value,
- // or it returns -1.
- func getu4(s []byte) rune {
- if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
- return -1
- }
- var r rune
- for _, c := range s[2:6] {
- switch {
- case '0' <= c && c <= '9':
- c = c - '0'
- case 'a' <= c && c <= 'f':
- c = c - 'a' + 10
- case 'A' <= c && c <= 'F':
- c = c - 'A' + 10
- default:
- return -1
- }
- r = r*16 + rune(c)
- }
- return r
- }
- // unquote converts a quoted JSON string literal s into an actual string t.
- // The rules are different than for Go, so cannot use strconv.Unquote.
- func unquote(s []byte) (t string, ok bool) {
- s, ok = unquoteBytes(s)
- t = string(s)
- return
- }
- func unquoteBytes(s []byte) (t []byte, ok bool) {
- if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
- return
- }
- s = s[1 : len(s)-1]
- // Check for unusual characters. If there are none, then no unquoting is
- // needed, so return a slice of the original bytes.
- r := 0
- for r < len(s) {
- c := s[r]
- if c == '\\' || c == '"' || c < ' ' {
- break
- }
- if c < utf8.RuneSelf {
- r++
- continue
- }
- rr, size := utf8.DecodeRune(s[r:])
- if rr == utf8.RuneError && size == 1 {
- break
- }
- r += size
- }
- if r == len(s) {
- return s, true
- }
- b := make([]byte, len(s)+2*utf8.UTFMax)
- w := copy(b, s[0:r])
- for r < len(s) {
- // Out of room? Can only happen if s is full of
- // malformed UTF-8 and we're replacing each
- // byte with RuneError.
- if w >= len(b)-2*utf8.UTFMax {
- nb := make([]byte, (len(b)+utf8.UTFMax)*2)
- copy(nb, b[0:w])
- b = nb
- }
- switch c := s[r]; {
- case c == '\\':
- r++
- if r >= len(s) {
- return
- }
- switch s[r] {
- default:
- return
- case '"', '\\', '/', '\'':
- b[w] = s[r]
- r++
- w++
- case 'b':
- b[w] = '\b'
- r++
- w++
- case 'f':
- b[w] = '\f'
- r++
- w++
- case 'n':
- b[w] = '\n'
- r++
- w++
- case 'r':
- b[w] = '\r'
- r++
- w++
- case 't':
- b[w] = '\t'
- r++
- w++
- case 'u':
- r--
- rr := getu4(s[r:])
- if rr < 0 {
- return
- }
- r += 6
- if utf16.IsSurrogate(rr) {
- rr1 := getu4(s[r:])
- dec := utf16.DecodeRune(rr, rr1)
- if dec != unicode.ReplacementChar {
- // A valid pair; consume.
- r += 6
- w += utf8.EncodeRune(b[w:], dec)
- break
- }
- // Invalid surrogate; fall back to replacement rune.
- rr = unicode.ReplacementChar
- }
- w += utf8.EncodeRune(b[w:], rr)
- }
- // Quote, control characters are invalid.
- case c == '"', c < ' ':
- return
- // ASCII
- case c < utf8.RuneSelf:
- b[w] = c
- r++
- w++
- // Coerce to well-formed UTF-8.
- default:
- rr, size := utf8.DecodeRune(s[r:])
- r += size
- w += utf8.EncodeRune(b[w:], rr)
- }
- }
- return b[0:w], true
- }
|