json module faster implementation (#173)

* json module faster implementation

* add some decoding error test
This commit is contained in:
Daniel 2019-04-06 05:25:23 -07:00 committed by GitHub
parent 17a50b7c50
commit 2cde0eaeea
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 1327 additions and 54 deletions

View file

@ -6,5 +6,20 @@ json := import("json")
## Functions ## Functions
- `parse(v)`: Parses the JSON string and returns an object. - `decode(b string/bytes) => object`: Parses the JSON string and returns an object.
- `stringify(v)`: Returns the JSON string representation of the object. - `encode(o object) => bytes`: Returns the JSON string (bytes) of the object. Unlike Go's JSON package, this function does not HTML-escape texts, but, one can use `html_escape` function if needed.
- `indent(b string/bytes) => bytes`: Returns an indented form of input JSON bytes string.
- `html_escape(b string/bytes) => bytes`: Return an HTML-safe form of input JSON bytes string.
## Examples
```golang
json := import("json")
encoded := json.encode({a: 1, b: [2, 3, 4]}) // JSON-encoded bytes string
indentded := json.indent(encoded) // indented form
html_safe := json.html_escape(encoded) // HTML escaped form
decoded := json.decode(encoded) // {a: 1, b: [2, 3, 4]}
```

View file

@ -1,35 +1,38 @@
package stdlib package stdlib
import ( import (
"encoding/json" "bytes"
gojson "encoding/json"
"github.com/d5/tengo"
"github.com/d5/tengo/objects" "github.com/d5/tengo/objects"
"github.com/d5/tengo/stdlib/json"
) )
var jsonModule = map[string]objects.Object{ var jsonModule = map[string]objects.Object{
"parse": &objects.UserFunction{Name: "parse", Value: jsonParse}, "decode": &objects.UserFunction{Name: "decode", Value: jsonDecode},
"stringify": &objects.UserFunction{Name: "stringify", Value: jsonStringify}, "encode": &objects.UserFunction{Name: "encode", Value: jsonEncode},
"indent": &objects.UserFunction{Name: "encode", Value: jsonIndent},
"html_escape": &objects.UserFunction{Name: "html_escape", Value: jsonHTMLEscape},
} }
func jsonParse(args ...objects.Object) (ret objects.Object, err error) { func jsonDecode(args ...objects.Object) (ret objects.Object, err error) {
if len(args) != 1 { if len(args) != 1 {
return nil, objects.ErrWrongNumArguments return nil, objects.ErrWrongNumArguments
} }
var target interface{}
switch o := args[0].(type) { switch o := args[0].(type) {
case *objects.Bytes: case *objects.Bytes:
err := json.Unmarshal(o.Value, &target) v, err := json.Decode(o.Value)
if err != nil { if err != nil {
return &objects.Error{Value: &objects.String{Value: err.Error()}}, nil return &objects.Error{Value: &objects.String{Value: err.Error()}}, nil
} }
return v, nil
case *objects.String: case *objects.String:
err := json.Unmarshal([]byte(o.Value), &target) v, err := json.Decode([]byte(o.Value))
if err != nil { if err != nil {
return &objects.Error{Value: &objects.String{Value: err.Error()}}, nil return &objects.Error{Value: &objects.String{Value: err.Error()}}, nil
} }
return v, nil
default: default:
return nil, objects.ErrInvalidArgumentType{ return nil, objects.ErrInvalidArgumentType{
Name: "first", Name: "first",
@ -37,33 +40,87 @@ func jsonParse(args ...objects.Object) (ret objects.Object, err error) {
Found: args[0].TypeName(), Found: args[0].TypeName(),
} }
} }
res, err := objects.FromInterface(target)
if err != nil {
return nil, err
}
return res, nil
} }
func jsonStringify(args ...objects.Object) (ret objects.Object, err error) { func jsonEncode(args ...objects.Object) (ret objects.Object, err error) {
if len(args) != 1 { if len(args) != 1 {
return nil, objects.ErrWrongNumArguments return nil, objects.ErrWrongNumArguments
} }
v := objects.ToInterface(args[0]) b, err := json.Encode(args[0])
if vErr, isErr := v.(error); isErr {
v = vErr.Error()
}
res, err := json.Marshal(v)
if err != nil { if err != nil {
return &objects.Error{Value: &objects.String{Value: err.Error()}}, nil return &objects.Error{Value: &objects.String{Value: err.Error()}}, nil
} }
if len(res) > tengo.MaxBytesLen { return &objects.Bytes{Value: b}, nil
return nil, objects.ErrBytesLimit }
func jsonIndent(args ...objects.Object) (ret objects.Object, err error) {
if len(args) != 3 {
return nil, objects.ErrWrongNumArguments
} }
return &objects.String{Value: string(res)}, nil prefix, ok := objects.ToString(args[1])
if !ok {
return nil, objects.ErrInvalidArgumentType{
Name: "prefix",
Expected: "string(compatible)",
Found: args[1].TypeName(),
}
}
indent, ok := objects.ToString(args[2])
if !ok {
return nil, objects.ErrInvalidArgumentType{
Name: "indent",
Expected: "string(compatible)",
Found: args[2].TypeName(),
}
}
switch o := args[0].(type) {
case *objects.Bytes:
var dst bytes.Buffer
err := gojson.Indent(&dst, o.Value, prefix, indent)
if err != nil {
return &objects.Error{Value: &objects.String{Value: err.Error()}}, nil
}
return &objects.Bytes{Value: dst.Bytes()}, nil
case *objects.String:
var dst bytes.Buffer
err := gojson.Indent(&dst, []byte(o.Value), prefix, indent)
if err != nil {
return &objects.Error{Value: &objects.String{Value: err.Error()}}, nil
}
return &objects.Bytes{Value: dst.Bytes()}, nil
default:
return nil, objects.ErrInvalidArgumentType{
Name: "first",
Expected: "bytes/string",
Found: args[0].TypeName(),
}
}
}
func jsonHTMLEscape(args ...objects.Object) (ret objects.Object, err error) {
if len(args) != 1 {
return nil, objects.ErrWrongNumArguments
}
switch o := args[0].(type) {
case *objects.Bytes:
var dst bytes.Buffer
gojson.HTMLEscape(&dst, o.Value)
return &objects.Bytes{Value: dst.Bytes()}, nil
case *objects.String:
var dst bytes.Buffer
gojson.HTMLEscape(&dst, []byte(o.Value))
return &objects.Bytes{Value: dst.Bytes()}, nil
default:
return nil, objects.ErrInvalidArgumentType{
Name: "first",
Expected: "bytes/string",
Found: args[0].TypeName(),
}
}
} }

374
stdlib/json/decode.go Normal file
View file

@ -0,0 +1,374 @@
// A modified version of Go's JSON implementation.
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package json
import (
"strconv"
"unicode"
"unicode/utf16"
"unicode/utf8"
"github.com/d5/tengo/objects"
)
// Decode parses the JSON-encoded data and returns the result object.
func Decode(data []byte) (objects.Object, error) {
var d decodeState
err := checkValid(data, &d.scan)
if err != nil {
return nil, err
}
d.init(data)
d.scan.reset()
d.scanWhile(scanSkipSpace)
return d.value()
}
// decodeState represents the state while decoding a JSON value.
type decodeState struct {
data []byte
off int // next read offset in data
opcode int // last read result
scan scanner
}
// readIndex returns the position of the last byte read.
func (d *decodeState) readIndex() int {
return d.off - 1
}
const phasePanicMsg = "JSON decoder out of sync - data changing underfoot?"
func (d *decodeState) init(data []byte) *decodeState {
d.data = data
d.off = 0
return d
}
// scanNext processes the byte at d.data[d.off].
func (d *decodeState) scanNext() {
if d.off < len(d.data) {
d.opcode = d.scan.step(&d.scan, d.data[d.off])
d.off++
} else {
d.opcode = d.scan.eof()
d.off = len(d.data) + 1 // mark processed EOF with len+1
}
}
// scanWhile processes bytes in d.data[d.off:] until it
// receives a scan code not equal to op.
func (d *decodeState) scanWhile(op int) {
s, data, i := &d.scan, d.data, d.off
for i < len(data) {
newOp := s.step(s, data[i])
i++
if newOp != op {
d.opcode = newOp
d.off = i
return
}
}
d.off = len(data) + 1 // mark processed EOF with len+1
d.opcode = d.scan.eof()
}
func (d *decodeState) value() (objects.Object, error) {
switch d.opcode {
default:
panic(phasePanicMsg)
case scanBeginArray:
o, err := d.array()
if err != nil {
return nil, err
}
d.scanNext()
return o, nil
case scanBeginObject:
o, err := d.object()
if err != nil {
return nil, err
}
d.scanNext()
return o, nil
case scanBeginLiteral:
return d.literal()
}
}
func (d *decodeState) array() (objects.Object, error) {
var arr []objects.Object
for {
// Look ahead for ] - can only happen on first iteration.
d.scanWhile(scanSkipSpace)
if d.opcode == scanEndArray {
break
}
o, err := d.value()
if err != nil {
return nil, err
}
arr = append(arr, o)
// Next token must be , or ].
if d.opcode == scanSkipSpace {
d.scanWhile(scanSkipSpace)
}
if d.opcode == scanEndArray {
break
}
if d.opcode != scanArrayValue {
panic(phasePanicMsg)
}
}
return &objects.Array{Value: arr}, nil
}
func (d *decodeState) object() (objects.Object, error) {
m := make(map[string]objects.Object)
for {
// Read opening " of string key or closing }.
d.scanWhile(scanSkipSpace)
if d.opcode == scanEndObject {
// closing } - can only happen on first iteration.
break
}
if d.opcode != scanBeginLiteral {
panic(phasePanicMsg)
}
// Read string key.
start := d.readIndex()
d.scanWhile(scanContinue)
item := d.data[start:d.readIndex()]
key, ok := unquote(item)
if !ok {
panic(phasePanicMsg)
}
// Read : before value.
if d.opcode == scanSkipSpace {
d.scanWhile(scanSkipSpace)
}
if d.opcode != scanObjectKey {
panic(phasePanicMsg)
}
d.scanWhile(scanSkipSpace)
// Read value.
o, err := d.value()
if err != nil {
return nil, err
}
m[key] = o
// Next token must be , or }.
if d.opcode == scanSkipSpace {
d.scanWhile(scanSkipSpace)
}
if d.opcode == scanEndObject {
break
}
if d.opcode != scanObjectValue {
panic(phasePanicMsg)
}
}
return &objects.Map{Value: m}, nil
}
func (d *decodeState) literal() (objects.Object, error) {
// All bytes inside literal return scanContinue op code.
start := d.readIndex()
d.scanWhile(scanContinue)
item := d.data[start:d.readIndex()]
switch c := item[0]; c {
case 'n': // null
return objects.UndefinedValue, nil
case 't', 'f': // true, false
if c == 't' {
return objects.TrueValue, nil
}
return objects.FalseValue, nil
case '"': // string
s, ok := unquote(item)
if !ok {
panic(phasePanicMsg)
}
return &objects.String{Value: s}, nil
default: // number
if c != '-' && (c < '0' || c > '9') {
panic(phasePanicMsg)
}
n, _ := strconv.ParseFloat(string(item), 10)
return &objects.Float{Value: n}, nil
}
}
// getu4 decodes \uXXXX from the beginning of s, returning the hex value,
// or it returns -1.
func getu4(s []byte) rune {
if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
return -1
}
var r rune
for _, c := range s[2:6] {
switch {
case '0' <= c && c <= '9':
c = c - '0'
case 'a' <= c && c <= 'f':
c = c - 'a' + 10
case 'A' <= c && c <= 'F':
c = c - 'A' + 10
default:
return -1
}
r = r*16 + rune(c)
}
return r
}
// unquote converts a quoted JSON string literal s into an actual string t.
// The rules are different than for Go, so cannot use strconv.Unquote.
func unquote(s []byte) (t string, ok bool) {
s, ok = unquoteBytes(s)
t = string(s)
return
}
func unquoteBytes(s []byte) (t []byte, ok bool) {
if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
return
}
s = s[1 : len(s)-1]
// Check for unusual characters. If there are none,
// then no unquoting is needed, so return a slice of the
// original bytes.
r := 0
for r < len(s) {
c := s[r]
if c == '\\' || c == '"' || c < ' ' {
break
}
if c < utf8.RuneSelf {
r++
continue
}
rr, size := utf8.DecodeRune(s[r:])
if rr == utf8.RuneError && size == 1 {
break
}
r += size
}
if r == len(s) {
return s, true
}
b := make([]byte, len(s)+2*utf8.UTFMax)
w := copy(b, s[0:r])
for r < len(s) {
// Out of room? Can only happen if s is full of
// malformed UTF-8 and we're replacing each
// byte with RuneError.
if w >= len(b)-2*utf8.UTFMax {
nb := make([]byte, (len(b)+utf8.UTFMax)*2)
copy(nb, b[0:w])
b = nb
}
switch c := s[r]; {
case c == '\\':
r++
if r >= len(s) {
return
}
switch s[r] {
default:
return
case '"', '\\', '/', '\'':
b[w] = s[r]
r++
w++
case 'b':
b[w] = '\b'
r++
w++
case 'f':
b[w] = '\f'
r++
w++
case 'n':
b[w] = '\n'
r++
w++
case 'r':
b[w] = '\r'
r++
w++
case 't':
b[w] = '\t'
r++
w++
case 'u':
r--
rr := getu4(s[r:])
if rr < 0 {
return
}
r += 6
if utf16.IsSurrogate(rr) {
rr1 := getu4(s[r:])
if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
// A valid pair; consume.
r += 6
w += utf8.EncodeRune(b[w:], dec)
break
}
// Invalid surrogate; fall back to replacement rune.
rr = unicode.ReplacementChar
}
w += utf8.EncodeRune(b[w:], rr)
}
// Quote, control characters are invalid.
case c == '"', c < ' ':
return
// ASCII
case c < utf8.RuneSelf:
b[w] = c
r++
w++
// Coerce to well-formed UTF-8.
default:
rr, size := utf8.DecodeRune(s[r:])
r += size
w += utf8.EncodeRune(b[w:], rr)
}
}
return b[0:w], true
}

147
stdlib/json/encode.go Normal file
View file

@ -0,0 +1,147 @@
// A modified version of Go's JSON implementation.
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package json
import (
"encoding/base64"
"errors"
"math"
"strconv"
"github.com/d5/tengo/objects"
)
// Encode returns the JSON encoding of the object.
func Encode(o objects.Object) ([]byte, error) {
var b []byte
switch o := o.(type) {
case *objects.Array:
b = append(b, '[')
len1 := len(o.Value) - 1
for idx, elem := range o.Value {
eb, err := Encode(elem)
if err != nil {
return nil, err
}
b = append(b, eb...)
if idx < len1 {
b = append(b, ',')
}
}
b = append(b, ']')
case *objects.ImmutableArray:
b = append(b, '[')
len1 := len(o.Value) - 1
for idx, elem := range o.Value {
eb, err := Encode(elem)
if err != nil {
return nil, err
}
b = append(b, eb...)
if idx < len1 {
b = append(b, ',')
}
}
b = append(b, ']')
case *objects.Map:
b = append(b, '{')
len1 := len(o.Value) - 1
idx := 0
for key, value := range o.Value {
b = strconv.AppendQuote(b, key)
b = append(b, ':')
eb, err := Encode(value)
if err != nil {
return nil, err
}
b = append(b, eb...)
if idx < len1 {
b = append(b, ',')
}
idx++
}
b = append(b, '}')
case *objects.ImmutableMap:
b = append(b, '{')
len1 := len(o.Value) - 1
idx := 0
for key, value := range o.Value {
b = strconv.AppendQuote(b, key)
b = append(b, ':')
eb, err := Encode(value)
if err != nil {
return nil, err
}
b = append(b, eb...)
if idx < len1 {
b = append(b, ',')
}
idx++
}
b = append(b, '}')
case *objects.Bool:
if o.IsFalsy() {
b = strconv.AppendBool(b, false)
} else {
b = strconv.AppendBool(b, true)
}
case *objects.Bytes:
b = append(b, '"')
encodedLen := base64.StdEncoding.EncodedLen(len(o.Value))
dst := make([]byte, encodedLen)
base64.StdEncoding.Encode(dst, o.Value)
b = append(b, dst...)
b = append(b, '"')
case *objects.Char:
b = strconv.AppendInt(b, int64(o.Value), 10)
case *objects.Float:
var y []byte
f := o.Value
if math.IsInf(f, 0) || math.IsNaN(f) {
return nil, errors.New("unsupported float value")
}
// Convert as if by ES6 number to string conversion.
// This matches most other JSON generators.
abs := math.Abs(f)
fmt := byte('f')
if abs != 0 {
if abs < 1e-6 || abs >= 1e21 {
fmt = 'e'
}
}
y = strconv.AppendFloat(y, f, fmt, -1, 64)
if fmt == 'e' {
// clean up e-09 to e-9
n := len(y)
if n >= 4 && y[n-4] == 'e' && y[n-3] == '-' && y[n-2] == '0' {
y[n-2] = y[n-1]
y = y[:n-1]
}
}
b = append(b, y...)
case *objects.Int:
b = strconv.AppendInt(b, o.Value, 10)
case *objects.String:
b = strconv.AppendQuote(b, o.Value)
case *objects.Time:
y, err := o.Value.MarshalJSON()
if err != nil {
return nil, err
}
b = append(b, y...)
case *objects.Undefined:
b = append(b, "null"...)
default:
// unknown type: ignore
}
return b, nil
}

109
stdlib/json/json_test.go Normal file
View file

@ -0,0 +1,109 @@
package json_test
import (
gojson "encoding/json"
"testing"
"github.com/d5/tengo/assert"
"github.com/d5/tengo/objects"
"github.com/d5/tengo/stdlib/json"
)
type ARR = []interface{}
type MAP = map[string]interface{}
func TestJSON(t *testing.T) {
testJSONEncodeDecode(t, nil)
testJSONEncodeDecode(t, 0)
testJSONEncodeDecode(t, 1)
testJSONEncodeDecode(t, -1)
testJSONEncodeDecode(t, 1984)
testJSONEncodeDecode(t, -1984)
testJSONEncodeDecode(t, 0.0)
testJSONEncodeDecode(t, 1.0)
testJSONEncodeDecode(t, -1.0)
testJSONEncodeDecode(t, 19.84)
testJSONEncodeDecode(t, -19.84)
testJSONEncodeDecode(t, "")
testJSONEncodeDecode(t, "foo")
testJSONEncodeDecode(t, "foo bar")
testJSONEncodeDecode(t, "foo \"bar\"")
testJSONEncodeDecode(t, true)
testJSONEncodeDecode(t, false)
testJSONEncodeDecode(t, ARR{})
testJSONEncodeDecode(t, ARR{0})
testJSONEncodeDecode(t, ARR{false})
testJSONEncodeDecode(t, ARR{1, 2, 3, "four", false})
testJSONEncodeDecode(t, ARR{1, 2, 3, "four", false, MAP{"a": 0, "b": "bee", "bool": true}})
testJSONEncodeDecode(t, MAP{})
testJSONEncodeDecode(t, MAP{"a": 0})
testJSONEncodeDecode(t, MAP{"a": 0, "b": "bee"})
testJSONEncodeDecode(t, MAP{"a": 0, "b": "bee", "bool": true})
testJSONEncodeDecode(t, MAP{"a": 0, "b": "bee", "arr": ARR{1, 2, 3, "four"}})
testJSONEncodeDecode(t, MAP{"a": 0, "b": "bee", "arr": ARR{1, 2, 3, MAP{"a": false, "b": 109.4}}})
}
func TestDecode(t *testing.T) {
testDecodeError(t, `{`)
testDecodeError(t, `}`)
testDecodeError(t, `{}a`)
testDecodeError(t, `{{}`)
testDecodeError(t, `{}}`)
testDecodeError(t, `[`)
testDecodeError(t, `]`)
testDecodeError(t, `[]a`)
testDecodeError(t, `[[]`)
testDecodeError(t, `[]]`)
testDecodeError(t, `"`)
testDecodeError(t, `"abc`)
testDecodeError(t, `abc"`)
testDecodeError(t, `.123`)
testDecodeError(t, `123.`)
testDecodeError(t, `1.2.3`)
testDecodeError(t, `'a'`)
testDecodeError(t, `true, false`)
testDecodeError(t, `{"a:"b"}`)
testDecodeError(t, `{a":"b"}`)
testDecodeError(t, `{"a":"b":"c"}`)
}
func testDecodeError(t *testing.T, input string) {
_, err := json.Decode([]byte(input))
assert.Error(t, err)
}
func testJSONEncodeDecode(t *testing.T, v interface{}) bool {
o, err := objects.FromInterface(v)
if !assert.NoError(t, err) {
return false
}
b, err := json.Encode(o)
if !assert.NoError(t, err) {
return false
}
a, err := json.Decode(b)
if !assert.NoError(t, err, string(b)) {
return false
}
vj, err := gojson.Marshal(v)
if !assert.NoError(t, err) {
return false
}
aj, err := gojson.Marshal(objects.ToInterface(a))
if !assert.NoError(t, err) {
return false
}
return assert.Equal(t, vj, aj)
}

559
stdlib/json/scanner.go Normal file
View file

@ -0,0 +1,559 @@
// A modified version of Go's JSON implementation.
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package json
import "strconv"
func checkValid(data []byte, scan *scanner) error {
scan.reset()
for _, c := range data {
scan.bytes++
if scan.step(scan, c) == scanError {
return scan.err
}
}
if scan.eof() == scanError {
return scan.err
}
return nil
}
// A SyntaxError is a description of a JSON syntax error.
type SyntaxError struct {
msg string // description of error
Offset int64 // error occurred after reading Offset bytes
}
func (e *SyntaxError) Error() string { return e.msg }
// A scanner is a JSON scanning state machine.
// Callers call scan.reset() and then pass bytes in one at a time
// by calling scan.step(&scan, c) for each byte.
// The return value, referred to as an opcode, tells the
// caller about significant parsing events like beginning
// and ending literals, objects, and arrays, so that the
// caller can follow along if it wishes.
// The return value scanEnd indicates that a single top-level
// JSON value has been completed, *before* the byte that
// just got passed in. (The indication must be delayed in order
// to recognize the end of numbers: is 123 a whole value or
// the beginning of 12345e+6?).
type scanner struct {
// The step is a func to be called to execute the next transition.
// Also tried using an integer constant and a single func
// with a switch, but using the func directly was 10% faster
// on a 64-bit Mac Mini, and it's nicer to read.
step func(*scanner, byte) int
// Reached end of top-level value.
endTop bool
// Stack of what we're in the middle of - array values, object keys, object values.
parseState []int
// Error that happened, if any.
err error
// total bytes consumed, updated by decoder.Decode
bytes int64
}
// These values are returned by the state transition functions
// assigned to scanner.state and the method scanner.eof.
// They give details about the current state of the scan that
// callers might be interested to know about.
// It is okay to ignore the return value of any particular
// call to scanner.state: if one call returns scanError,
// every subsequent call will return scanError too.
const (
// Continue.
scanContinue = iota // uninteresting byte
scanBeginLiteral // end implied by next result != scanContinue
scanBeginObject // begin object
scanObjectKey // just finished object key (string)
scanObjectValue // just finished non-last object value
scanEndObject // end object (implies scanObjectValue if possible)
scanBeginArray // begin array
scanArrayValue // just finished array value
scanEndArray // end array (implies scanArrayValue if possible)
scanSkipSpace // space byte; can skip; known to be last "continue" result
// Stop.
scanEnd // top-level value ended *before* this byte; known to be first "stop" result
scanError // hit an error, scanner.err.
)
// These values are stored in the parseState stack.
// They give the current state of a composite value
// being scanned. If the parser is inside a nested value
// the parseState describes the nested state, outermost at entry 0.
const (
parseObjectKey = iota // parsing object key (before colon)
parseObjectValue // parsing object value (after colon)
parseArrayValue // parsing array value
)
// reset prepares the scanner for use.
// It must be called before calling s.step.
func (s *scanner) reset() {
s.step = stateBeginValue
s.parseState = s.parseState[0:0]
s.err = nil
s.endTop = false
}
// eof tells the scanner that the end of input has been reached.
// It returns a scan status just as s.step does.
func (s *scanner) eof() int {
if s.err != nil {
return scanError
}
if s.endTop {
return scanEnd
}
s.step(s, ' ')
if s.endTop {
return scanEnd
}
if s.err == nil {
s.err = &SyntaxError{"unexpected end of JSON input", s.bytes}
}
return scanError
}
// pushParseState pushes a new parse state p onto the parse stack.
func (s *scanner) pushParseState(p int) {
s.parseState = append(s.parseState, p)
}
// popParseState pops a parse state (already obtained) off the stack
// and updates s.step accordingly.
func (s *scanner) popParseState() {
n := len(s.parseState) - 1
s.parseState = s.parseState[0:n]
if n == 0 {
s.step = stateEndTop
s.endTop = true
} else {
s.step = stateEndValue
}
}
func isSpace(c byte) bool {
return c == ' ' || c == '\t' || c == '\r' || c == '\n'
}
// stateBeginValueOrEmpty is the state after reading `[`.
func stateBeginValueOrEmpty(s *scanner, c byte) int {
if c <= ' ' && isSpace(c) {
return scanSkipSpace
}
if c == ']' {
return stateEndValue(s, c)
}
return stateBeginValue(s, c)
}
// stateBeginValue is the state at the beginning of the input.
func stateBeginValue(s *scanner, c byte) int {
if c <= ' ' && isSpace(c) {
return scanSkipSpace
}
switch c {
case '{':
s.step = stateBeginStringOrEmpty
s.pushParseState(parseObjectKey)
return scanBeginObject
case '[':
s.step = stateBeginValueOrEmpty
s.pushParseState(parseArrayValue)
return scanBeginArray
case '"':
s.step = stateInString
return scanBeginLiteral
case '-':
s.step = stateNeg
return scanBeginLiteral
case '0': // beginning of 0.123
s.step = state0
return scanBeginLiteral
case 't': // beginning of true
s.step = stateT
return scanBeginLiteral
case 'f': // beginning of false
s.step = stateF
return scanBeginLiteral
case 'n': // beginning of null
s.step = stateN
return scanBeginLiteral
}
if '1' <= c && c <= '9' { // beginning of 1234.5
s.step = state1
return scanBeginLiteral
}
return s.error(c, "looking for beginning of value")
}
// stateBeginStringOrEmpty is the state after reading `{`.
func stateBeginStringOrEmpty(s *scanner, c byte) int {
if c <= ' ' && isSpace(c) {
return scanSkipSpace
}
if c == '}' {
n := len(s.parseState)
s.parseState[n-1] = parseObjectValue
return stateEndValue(s, c)
}
return stateBeginString(s, c)
}
// stateBeginString is the state after reading `{"key": value,`.
func stateBeginString(s *scanner, c byte) int {
if c <= ' ' && isSpace(c) {
return scanSkipSpace
}
if c == '"' {
s.step = stateInString
return scanBeginLiteral
}
return s.error(c, "looking for beginning of object key string")
}
// stateEndValue is the state after completing a value,
// such as after reading `{}` or `true` or `["x"`.
func stateEndValue(s *scanner, c byte) int {
n := len(s.parseState)
if n == 0 {
// Completed top-level before the current byte.
s.step = stateEndTop
s.endTop = true
return stateEndTop(s, c)
}
if c <= ' ' && isSpace(c) {
s.step = stateEndValue
return scanSkipSpace
}
ps := s.parseState[n-1]
switch ps {
case parseObjectKey:
if c == ':' {
s.parseState[n-1] = parseObjectValue
s.step = stateBeginValue
return scanObjectKey
}
return s.error(c, "after object key")
case parseObjectValue:
if c == ',' {
s.parseState[n-1] = parseObjectKey
s.step = stateBeginString
return scanObjectValue
}
if c == '}' {
s.popParseState()
return scanEndObject
}
return s.error(c, "after object key:value pair")
case parseArrayValue:
if c == ',' {
s.step = stateBeginValue
return scanArrayValue
}
if c == ']' {
s.popParseState()
return scanEndArray
}
return s.error(c, "after array element")
}
return s.error(c, "")
}
// stateEndTop is the state after finishing the top-level value,
// such as after reading `{}` or `[1,2,3]`.
// Only space characters should be seen now.
func stateEndTop(s *scanner, c byte) int {
if !isSpace(c) {
// Complain about non-space byte on next call.
s.error(c, "after top-level value")
}
return scanEnd
}
// stateInString is the state after reading `"`.
func stateInString(s *scanner, c byte) int {
if c == '"' {
s.step = stateEndValue
return scanContinue
}
if c == '\\' {
s.step = stateInStringEsc
return scanContinue
}
if c < 0x20 {
return s.error(c, "in string literal")
}
return scanContinue
}
// stateInStringEsc is the state after reading `"\` during a quoted string.
func stateInStringEsc(s *scanner, c byte) int {
switch c {
case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
s.step = stateInString
return scanContinue
case 'u':
s.step = stateInStringEscU
return scanContinue
}
return s.error(c, "in string escape code")
}
// stateInStringEscU is the state after reading `"\u` during a quoted string.
func stateInStringEscU(s *scanner, c byte) int {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInStringEscU1
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateInStringEscU1 is the state after reading `"\u1` during a quoted string.
func stateInStringEscU1(s *scanner, c byte) int {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInStringEscU12
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateInStringEscU12 is the state after reading `"\u12` during a quoted string.
func stateInStringEscU12(s *scanner, c byte) int {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInStringEscU123
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateInStringEscU123 is the state after reading `"\u123` during a quoted string.
func stateInStringEscU123(s *scanner, c byte) int {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInString
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateNeg is the state after reading `-` during a number.
func stateNeg(s *scanner, c byte) int {
if c == '0' {
s.step = state0
return scanContinue
}
if '1' <= c && c <= '9' {
s.step = state1
return scanContinue
}
return s.error(c, "in numeric literal")
}
// state1 is the state after reading a non-zero integer during a number,
// such as after reading `1` or `100` but not `0`.
func state1(s *scanner, c byte) int {
if '0' <= c && c <= '9' {
s.step = state1
return scanContinue
}
return state0(s, c)
}
// state0 is the state after reading `0` during a number.
func state0(s *scanner, c byte) int {
if c == '.' {
s.step = stateDot
return scanContinue
}
if c == 'e' || c == 'E' {
s.step = stateE
return scanContinue
}
return stateEndValue(s, c)
}
// stateDot is the state after reading the integer and decimal point in a number,
// such as after reading `1.`.
func stateDot(s *scanner, c byte) int {
if '0' <= c && c <= '9' {
s.step = stateDot0
return scanContinue
}
return s.error(c, "after decimal point in numeric literal")
}
// stateDot0 is the state after reading the integer, decimal point, and subsequent
// digits of a number, such as after reading `3.14`.
func stateDot0(s *scanner, c byte) int {
if '0' <= c && c <= '9' {
return scanContinue
}
if c == 'e' || c == 'E' {
s.step = stateE
return scanContinue
}
return stateEndValue(s, c)
}
// stateE is the state after reading the mantissa and e in a number,
// such as after reading `314e` or `0.314e`.
func stateE(s *scanner, c byte) int {
if c == '+' || c == '-' {
s.step = stateESign
return scanContinue
}
return stateESign(s, c)
}
// stateESign is the state after reading the mantissa, e, and sign in a number,
// such as after reading `314e-` or `0.314e+`.
func stateESign(s *scanner, c byte) int {
if '0' <= c && c <= '9' {
s.step = stateE0
return scanContinue
}
return s.error(c, "in exponent of numeric literal")
}
// stateE0 is the state after reading the mantissa, e, optional sign,
// and at least one digit of the exponent in a number,
// such as after reading `314e-2` or `0.314e+1` or `3.14e0`.
func stateE0(s *scanner, c byte) int {
if '0' <= c && c <= '9' {
return scanContinue
}
return stateEndValue(s, c)
}
// stateT is the state after reading `t`.
func stateT(s *scanner, c byte) int {
if c == 'r' {
s.step = stateTr
return scanContinue
}
return s.error(c, "in literal true (expecting 'r')")
}
// stateTr is the state after reading `tr`.
func stateTr(s *scanner, c byte) int {
if c == 'u' {
s.step = stateTru
return scanContinue
}
return s.error(c, "in literal true (expecting 'u')")
}
// stateTru is the state after reading `tru`.
func stateTru(s *scanner, c byte) int {
if c == 'e' {
s.step = stateEndValue
return scanContinue
}
return s.error(c, "in literal true (expecting 'e')")
}
// stateF is the state after reading `f`.
func stateF(s *scanner, c byte) int {
if c == 'a' {
s.step = stateFa
return scanContinue
}
return s.error(c, "in literal false (expecting 'a')")
}
// stateFa is the state after reading `fa`.
func stateFa(s *scanner, c byte) int {
if c == 'l' {
s.step = stateFal
return scanContinue
}
return s.error(c, "in literal false (expecting 'l')")
}
// stateFal is the state after reading `fal`.
func stateFal(s *scanner, c byte) int {
if c == 's' {
s.step = stateFals
return scanContinue
}
return s.error(c, "in literal false (expecting 's')")
}
// stateFals is the state after reading `fals`.
func stateFals(s *scanner, c byte) int {
if c == 'e' {
s.step = stateEndValue
return scanContinue
}
return s.error(c, "in literal false (expecting 'e')")
}
// stateN is the state after reading `n`.
func stateN(s *scanner, c byte) int {
if c == 'u' {
s.step = stateNu
return scanContinue
}
return s.error(c, "in literal null (expecting 'u')")
}
// stateNu is the state after reading `nu`.
func stateNu(s *scanner, c byte) int {
if c == 'l' {
s.step = stateNul
return scanContinue
}
return s.error(c, "in literal null (expecting 'l')")
}
// stateNul is the state after reading `nul`.
func stateNul(s *scanner, c byte) int {
if c == 'l' {
s.step = stateEndValue
return scanContinue
}
return s.error(c, "in literal null (expecting 'l')")
}
// stateError is the state after reaching a syntax error,
// such as after reading `[1}` or `5.1.2`.
func stateError(s *scanner, c byte) int {
return scanError
}
// error records an error and switches to the error state.
func (s *scanner) error(c byte, context string) int {
s.step = stateError
s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes}
return scanError
}
// quoteChar formats c as a quoted character literal
func quoteChar(c byte) string {
// special cases - different from quoted strings
if c == '\'' {
return `'\''`
}
if c == '"' {
return `'"'`
}
// use quoted string with different quotation marks
s := strconv.Quote(string(c))
return "'" + s[1:len(s)-1] + "'"
}

View file

@ -3,31 +3,43 @@ package stdlib_test
import "testing" import "testing"
func TestJSON(t *testing.T) { func TestJSON(t *testing.T) {
module(t, "json").call("stringify", 5).expect("5") module(t, "json").call("encode", 5).expect([]byte("5"))
module(t, "json").call("stringify", "foobar").expect(`"foobar"`) module(t, "json").call("encode", "foobar").expect([]byte(`"foobar"`))
module(t, "json").call("stringify", MAP{"foo": 5}).expect("{\"foo\":5}") module(t, "json").call("encode", MAP{"foo": 5}).expect([]byte("{\"foo\":5}"))
module(t, "json").call("stringify", IMAP{"foo": 5}).expect("{\"foo\":5}") module(t, "json").call("encode", IMAP{"foo": 5}).expect([]byte("{\"foo\":5}"))
module(t, "json").call("stringify", ARR{1, 2, 3}).expect("[1,2,3]") module(t, "json").call("encode", ARR{1, 2, 3}).expect([]byte("[1,2,3]"))
module(t, "json").call("stringify", IARR{1, 2, 3}).expect("[1,2,3]") module(t, "json").call("encode", IARR{1, 2, 3}).expect([]byte("[1,2,3]"))
module(t, "json").call("stringify", MAP{"foo": "bar"}).expect("{\"foo\":\"bar\"}") module(t, "json").call("encode", MAP{"foo": "bar"}).expect([]byte("{\"foo\":\"bar\"}"))
module(t, "json").call("stringify", MAP{"foo": 1.8}).expect("{\"foo\":1.8}") module(t, "json").call("encode", MAP{"foo": 1.8}).expect([]byte("{\"foo\":1.8}"))
module(t, "json").call("stringify", MAP{"foo": true}).expect("{\"foo\":true}") module(t, "json").call("encode", MAP{"foo": true}).expect([]byte("{\"foo\":true}"))
module(t, "json").call("stringify", MAP{"foo": '8'}).expect("{\"foo\":56}") module(t, "json").call("encode", MAP{"foo": '8'}).expect([]byte("{\"foo\":56}"))
module(t, "json").call("stringify", MAP{"foo": []byte("foo")}).expect("{\"foo\":\"Zm9v\"}") // json encoding returns []byte as base64 encoded string module(t, "json").call("encode", MAP{"foo": []byte("foo")}).expect([]byte("{\"foo\":\"Zm9v\"}")) // json encoding returns []byte as base64 encoded string
module(t, "json").call("stringify", MAP{"foo": ARR{"bar", 1, 1.8, '8', true}}).expect("{\"foo\":[\"bar\",1,1.8,56,true]}") module(t, "json").call("encode", MAP{"foo": ARR{"bar", 1, 1.8, '8', true}}).expect([]byte("{\"foo\":[\"bar\",1,1.8,56,true]}"))
module(t, "json").call("stringify", MAP{"foo": IARR{"bar", 1, 1.8, '8', true}}).expect("{\"foo\":[\"bar\",1,1.8,56,true]}") module(t, "json").call("encode", MAP{"foo": IARR{"bar", 1, 1.8, '8', true}}).expect([]byte("{\"foo\":[\"bar\",1,1.8,56,true]}"))
module(t, "json").call("stringify", MAP{"foo": ARR{ARR{"bar", 1}, ARR{"bar", 1}}}).expect("{\"foo\":[[\"bar\",1],[\"bar\",1]]}") module(t, "json").call("encode", MAP{"foo": ARR{ARR{"bar", 1}, ARR{"bar", 1}}}).expect([]byte("{\"foo\":[[\"bar\",1],[\"bar\",1]]}"))
module(t, "json").call("stringify", MAP{"foo": MAP{"string": "bar", "int": 1, "float": 1.8, "char": '8', "bool": true}}).expect("{\"foo\":{\"bool\":true,\"char\":56,\"float\":1.8,\"int\":1,\"string\":\"bar\"}}") module(t, "json").call("encode", MAP{"foo": MAP{"string": "bar"}}).expect([]byte("{\"foo\":{\"string\":\"bar\"}}"))
module(t, "json").call("stringify", MAP{"foo": IMAP{"string": "bar", "int": 1, "float": 1.8, "char": '8', "bool": true}}).expect("{\"foo\":{\"bool\":true,\"char\":56,\"float\":1.8,\"int\":1,\"string\":\"bar\"}}") module(t, "json").call("encode", MAP{"foo": IMAP{"string": "bar"}}).expect([]byte("{\"foo\":{\"string\":\"bar\"}}"))
module(t, "json").call("stringify", MAP{"foo": MAP{"map1": MAP{"string": "bar"}, "map2": MAP{"int": "1"}}}).expect("{\"foo\":{\"map1\":{\"string\":\"bar\"},\"map2\":{\"int\":\"1\"}}}") module(t, "json").call("encode", MAP{"foo": MAP{"map1": MAP{"string": "bar"}}}).expect([]byte("{\"foo\":{\"map1\":{\"string\":\"bar\"}}}"))
module(t, "json").call("stringify", ARR{ARR{"bar", 1}, ARR{"bar", 1}}).expect("[[\"bar\",1],[\"bar\",1]]") module(t, "json").call("encode", ARR{ARR{"bar", 1}, ARR{"bar", 1}}).expect([]byte("[[\"bar\",1],[\"bar\",1]]"))
module(t, "json").call("parse", `5`).expect(5.0) module(t, "json").call("decode", `5`).expect(5.0)
module(t, "json").call("parse", `"foo"`).expect("foo") module(t, "json").call("decode", `"foo"`).expect("foo")
module(t, "json").call("parse", `[1,2,3,"bar"]`).expect(ARR{1.0, 2.0, 3.0, "bar"}) module(t, "json").call("decode", `[1,2,3,"bar"]`).expect(ARR{1.0, 2.0, 3.0, "bar"})
module(t, "json").call("parse", `{"foo":5}`).expect(MAP{"foo": 5.0}) module(t, "json").call("decode", `{"foo":5}`).expect(MAP{"foo": 5.0})
module(t, "json").call("parse", `{"foo":2.5}`).expect(MAP{"foo": 2.5}) module(t, "json").call("decode", `{"foo":2.5}`).expect(MAP{"foo": 2.5})
module(t, "json").call("parse", `{"foo":true}`).expect(MAP{"foo": true}) module(t, "json").call("decode", `{"foo":true}`).expect(MAP{"foo": true})
module(t, "json").call("parse", `{"foo":"bar"}`).expect(MAP{"foo": "bar"}) module(t, "json").call("decode", `{"foo":"bar"}`).expect(MAP{"foo": "bar"})
module(t, "json").call("parse", `{"foo":[1,2,3,"bar"]}`).expect(MAP{"foo": ARR{1.0, 2.0, 3.0, "bar"}}) module(t, "json").call("decode", `{"foo":[1,2,3,"bar"]}`).expect(MAP{"foo": ARR{1.0, 2.0, 3.0, "bar"}})
module(t, "json").call("indent", []byte("{\"foo\":[\"bar\",1,1.8,56,true]}"), "", " ").expect([]byte(`{
"foo": [
"bar",
1,
1.8,
56,
true
]
}`))
module(t, "json").call("html_escape", []byte(`{"M":"<html>foo &`+"\xe2\x80\xa8 \xe2\x80\xa9"+`</html>"}`)).expect([]byte(`{"M":"\u003chtml\u003efoo \u0026\u2028 \u2029\u003c/html\u003e"}`))
} }