json module faster implementation (#173)

* json module faster implementation * add some decoding error test
2019-04-06 05:25:23 -07:00 · 2019-04-06 05:25:23 -07:00 · 2cde0eaeea
commit 2cde0eaeea
parent 17a50b7c50
7 changed files with 1327 additions and 54 deletions
--- a/docs/stdlib-json.md
+++ b/docs/stdlib-json.md
@ -6,5 +6,20 @@ json := import("json")

 ## Functions

- `parse(v)`: Parses the JSON string and returns an object.
- `stringify(v)`: Returns the JSON string representation of the object.
+- `decode(b string/bytes) => object`: Parses the JSON string and returns an object.
+- `encode(o object) => bytes`: Returns the JSON string (bytes) of the object. Unlike Go's JSON package, this function does not HTML-escape texts, but, one can use `html_escape` function if needed.
+- `indent(b string/bytes) => bytes`: Returns an indented form of input JSON bytes string.
+- `html_escape(b string/bytes) => bytes`: Return an HTML-safe form of input JSON bytes string.
+
+
+## Examples
+
+```golang
+json := import("json")
+
+encoded := json.encode({a: 1, b: [2, 3, 4]})  // JSON-encoded bytes string
+indentded := json.indent(encoded)             // indented form
+html_safe := json.html_escape(encoded)        // HTML escaped form
+
+decoded := json.decode(encoded)               // {a: 1, b: [2, 3, 4]} 
+``` 
--- a/stdlib/json.go
+++ b/stdlib/json.go
@ -1,35 +1,38 @@
 package stdlib

 import (
-	"encoding/json"
+	"bytes"
+	gojson "encoding/json"

-	"github.com/d5/tengo"
 	"github.com/d5/tengo/objects"
+	"github.com/d5/tengo/stdlib/json"
 )

 var jsonModule = map[string]objects.Object{
-	"parse":     &objects.UserFunction{Name: "parse", Value: jsonParse},
-	"stringify": &objects.UserFunction{Name: "stringify", Value: jsonStringify},
+	"decode":      &objects.UserFunction{Name: "decode", Value: jsonDecode},
+	"encode":      &objects.UserFunction{Name: "encode", Value: jsonEncode},
+	"indent":      &objects.UserFunction{Name: "encode", Value: jsonIndent},
+	"html_escape": &objects.UserFunction{Name: "html_escape", Value: jsonHTMLEscape},
 }

-func jsonParse(args ...objects.Object) (ret objects.Object, err error) {
+func jsonDecode(args ...objects.Object) (ret objects.Object, err error) {
 	if len(args) != 1 {
 		return nil, objects.ErrWrongNumArguments
 	}

-	var target interface{}
-
 	switch o := args[0].(type) {
 	case *objects.Bytes:
-		err := json.Unmarshal(o.Value, &target)
+		v, err := json.Decode(o.Value)
 		if err != nil {
 			return &objects.Error{Value: &objects.String{Value: err.Error()}}, nil
 		}
+		return v, nil
 	case *objects.String:
-		err := json.Unmarshal([]byte(o.Value), &target)
+		v, err := json.Decode([]byte(o.Value))
 		if err != nil {
 			return &objects.Error{Value: &objects.String{Value: err.Error()}}, nil
 		}
+		return v, nil
 	default:
 		return nil, objects.ErrInvalidArgumentType{
 			Name:     "first",
@ -37,33 +40,87 @@ func jsonParse(args ...objects.Object) (ret objects.Object, err error) {
 			Found:    args[0].TypeName(),
 		}
 	}
-
-	res, err := objects.FromInterface(target)
-	if err != nil {
-		return nil, err
-	}
-
-	return res, nil
 }

-func jsonStringify(args ...objects.Object) (ret objects.Object, err error) {
+func jsonEncode(args ...objects.Object) (ret objects.Object, err error) {
 	if len(args) != 1 {
 		return nil, objects.ErrWrongNumArguments
 	}

-	v := objects.ToInterface(args[0])
-	if vErr, isErr := v.(error); isErr {
-		v = vErr.Error()
-	}
-
-	res, err := json.Marshal(v)
+	b, err := json.Encode(args[0])
 	if err != nil {
 		return &objects.Error{Value: &objects.String{Value: err.Error()}}, nil
 	}

-	if len(res) > tengo.MaxBytesLen {
-		return nil, objects.ErrBytesLimit
+	return &objects.Bytes{Value: b}, nil
+}
+
+func jsonIndent(args ...objects.Object) (ret objects.Object, err error) {
+	if len(args) != 3 {
+		return nil, objects.ErrWrongNumArguments
 	}

-	return &objects.String{Value: string(res)}, nil
+	prefix, ok := objects.ToString(args[1])
+	if !ok {
+		return nil, objects.ErrInvalidArgumentType{
+			Name:     "prefix",
+			Expected: "string(compatible)",
+			Found:    args[1].TypeName(),
+		}
+	}
+
+	indent, ok := objects.ToString(args[2])
+	if !ok {
+		return nil, objects.ErrInvalidArgumentType{
+			Name:     "indent",
+			Expected: "string(compatible)",
+			Found:    args[2].TypeName(),
+		}
+	}
+
+	switch o := args[0].(type) {
+	case *objects.Bytes:
+		var dst bytes.Buffer
+		err := gojson.Indent(&dst, o.Value, prefix, indent)
+		if err != nil {
+			return &objects.Error{Value: &objects.String{Value: err.Error()}}, nil
+		}
+		return &objects.Bytes{Value: dst.Bytes()}, nil
+	case *objects.String:
+		var dst bytes.Buffer
+		err := gojson.Indent(&dst, []byte(o.Value), prefix, indent)
+		if err != nil {
+			return &objects.Error{Value: &objects.String{Value: err.Error()}}, nil
+		}
+		return &objects.Bytes{Value: dst.Bytes()}, nil
+	default:
+		return nil, objects.ErrInvalidArgumentType{
+			Name:     "first",
+			Expected: "bytes/string",
+			Found:    args[0].TypeName(),
+		}
+	}
+}
+
+func jsonHTMLEscape(args ...objects.Object) (ret objects.Object, err error) {
+	if len(args) != 1 {
+		return nil, objects.ErrWrongNumArguments
+	}
+
+	switch o := args[0].(type) {
+	case *objects.Bytes:
+		var dst bytes.Buffer
+		gojson.HTMLEscape(&dst, o.Value)
+		return &objects.Bytes{Value: dst.Bytes()}, nil
+	case *objects.String:
+		var dst bytes.Buffer
+		gojson.HTMLEscape(&dst, []byte(o.Value))
+		return &objects.Bytes{Value: dst.Bytes()}, nil
+	default:
+		return nil, objects.ErrInvalidArgumentType{
+			Name:     "first",
+			Expected: "bytes/string",
+			Found:    args[0].TypeName(),
+		}
+	}
 }
--- a/stdlib/json/decode.go
+++ b/stdlib/json/decode.go
@ -0,0 +1,374 @@
+// A modified version of Go's JSON implementation.
+
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+	"strconv"
+	"unicode"
+	"unicode/utf16"
+	"unicode/utf8"
+
+	"github.com/d5/tengo/objects"
+)
+
+// Decode parses the JSON-encoded data and returns the result object.
+func Decode(data []byte) (objects.Object, error) {
+	var d decodeState
+	err := checkValid(data, &d.scan)
+	if err != nil {
+		return nil, err
+	}
+
+	d.init(data)
+	d.scan.reset()
+	d.scanWhile(scanSkipSpace)
+
+	return d.value()
+}
+
+// decodeState represents the state while decoding a JSON value.
+type decodeState struct {
+	data   []byte
+	off    int // next read offset in data
+	opcode int // last read result
+	scan   scanner
+}
+
+// readIndex returns the position of the last byte read.
+func (d *decodeState) readIndex() int {
+	return d.off - 1
+}
+
+const phasePanicMsg = "JSON decoder out of sync - data changing underfoot?"
+
+func (d *decodeState) init(data []byte) *decodeState {
+	d.data = data
+	d.off = 0
+	return d
+}
+
+// scanNext processes the byte at d.data[d.off].
+func (d *decodeState) scanNext() {
+	if d.off < len(d.data) {
+		d.opcode = d.scan.step(&d.scan, d.data[d.off])
+		d.off++
+	} else {
+		d.opcode = d.scan.eof()
+		d.off = len(d.data) + 1 // mark processed EOF with len+1
+	}
+}
+
+// scanWhile processes bytes in d.data[d.off:] until it
+// receives a scan code not equal to op.
+func (d *decodeState) scanWhile(op int) {
+	s, data, i := &d.scan, d.data, d.off
+	for i < len(data) {
+		newOp := s.step(s, data[i])
+		i++
+		if newOp != op {
+			d.opcode = newOp
+			d.off = i
+			return
+		}
+	}
+
+	d.off = len(data) + 1 // mark processed EOF with len+1
+	d.opcode = d.scan.eof()
+}
+
+func (d *decodeState) value() (objects.Object, error) {
+	switch d.opcode {
+	default:
+		panic(phasePanicMsg)
+
+	case scanBeginArray:
+		o, err := d.array()
+		if err != nil {
+			return nil, err
+		}
+
+		d.scanNext()
+
+		return o, nil
+
+	case scanBeginObject:
+		o, err := d.object()
+		if err != nil {
+			return nil, err
+		}
+
+		d.scanNext()
+
+		return o, nil
+
+	case scanBeginLiteral:
+		return d.literal()
+	}
+}
+
+func (d *decodeState) array() (objects.Object, error) {
+	var arr []objects.Object
+	for {
+		// Look ahead for ] - can only happen on first iteration.
+		d.scanWhile(scanSkipSpace)
+		if d.opcode == scanEndArray {
+			break
+		}
+
+		o, err := d.value()
+		if err != nil {
+			return nil, err
+		}
+		arr = append(arr, o)
+
+		// Next token must be , or ].
+		if d.opcode == scanSkipSpace {
+			d.scanWhile(scanSkipSpace)
+		}
+		if d.opcode == scanEndArray {
+			break
+		}
+		if d.opcode != scanArrayValue {
+			panic(phasePanicMsg)
+		}
+	}
+
+	return &objects.Array{Value: arr}, nil
+}
+
+func (d *decodeState) object() (objects.Object, error) {
+	m := make(map[string]objects.Object)
+	for {
+		// Read opening " of string key or closing }.
+		d.scanWhile(scanSkipSpace)
+		if d.opcode == scanEndObject {
+			// closing } - can only happen on first iteration.
+			break
+		}
+		if d.opcode != scanBeginLiteral {
+			panic(phasePanicMsg)
+		}
+
+		// Read string key.
+		start := d.readIndex()
+		d.scanWhile(scanContinue)
+		item := d.data[start:d.readIndex()]
+		key, ok := unquote(item)
+		if !ok {
+			panic(phasePanicMsg)
+		}
+
+		// Read : before value.
+		if d.opcode == scanSkipSpace {
+			d.scanWhile(scanSkipSpace)
+		}
+		if d.opcode != scanObjectKey {
+			panic(phasePanicMsg)
+		}
+		d.scanWhile(scanSkipSpace)
+
+		// Read value.
+		o, err := d.value()
+		if err != nil {
+			return nil, err
+		}
+
+		m[key] = o
+
+		// Next token must be , or }.
+		if d.opcode == scanSkipSpace {
+			d.scanWhile(scanSkipSpace)
+		}
+		if d.opcode == scanEndObject {
+			break
+		}
+		if d.opcode != scanObjectValue {
+			panic(phasePanicMsg)
+		}
+	}
+
+	return &objects.Map{Value: m}, nil
+}
+
+func (d *decodeState) literal() (objects.Object, error) {
+	// All bytes inside literal return scanContinue op code.
+	start := d.readIndex()
+	d.scanWhile(scanContinue)
+
+	item := d.data[start:d.readIndex()]
+
+	switch c := item[0]; c {
+	case 'n': // null
+		return objects.UndefinedValue, nil
+
+	case 't', 'f': // true, false
+		if c == 't' {
+			return objects.TrueValue, nil
+		}
+		return objects.FalseValue, nil
+
+	case '"': // string
+		s, ok := unquote(item)
+		if !ok {
+			panic(phasePanicMsg)
+		}
+		return &objects.String{Value: s}, nil
+
+	default: // number
+		if c != '-' && (c < '0' || c > '9') {
+			panic(phasePanicMsg)
+		}
+
+		n, _ := strconv.ParseFloat(string(item), 10)
+		return &objects.Float{Value: n}, nil
+	}
+}
+
+// getu4 decodes \uXXXX from the beginning of s, returning the hex value,
+// or it returns -1.
+func getu4(s []byte) rune {
+	if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
+		return -1
+	}
+	var r rune
+	for _, c := range s[2:6] {
+		switch {
+		case '0' <= c && c <= '9':
+			c = c - '0'
+		case 'a' <= c && c <= 'f':
+			c = c - 'a' + 10
+		case 'A' <= c && c <= 'F':
+			c = c - 'A' + 10
+		default:
+			return -1
+		}
+		r = r*16 + rune(c)
+	}
+	return r
+}
+
+// unquote converts a quoted JSON string literal s into an actual string t.
+// The rules are different than for Go, so cannot use strconv.Unquote.
+func unquote(s []byte) (t string, ok bool) {
+	s, ok = unquoteBytes(s)
+	t = string(s)
+	return
+}
+
+func unquoteBytes(s []byte) (t []byte, ok bool) {
+	if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
+		return
+	}
+	s = s[1 : len(s)-1]
+
+	// Check for unusual characters. If there are none,
+	// then no unquoting is needed, so return a slice of the
+	// original bytes.
+	r := 0
+	for r < len(s) {
+		c := s[r]
+		if c == '\\' || c == '"' || c < ' ' {
+			break
+		}
+		if c < utf8.RuneSelf {
+			r++
+			continue
+		}
+		rr, size := utf8.DecodeRune(s[r:])
+		if rr == utf8.RuneError && size == 1 {
+			break
+		}
+		r += size
+	}
+	if r == len(s) {
+		return s, true
+	}
+
+	b := make([]byte, len(s)+2*utf8.UTFMax)
+	w := copy(b, s[0:r])
+	for r < len(s) {
+		// Out of room? Can only happen if s is full of
+		// malformed UTF-8 and we're replacing each
+		// byte with RuneError.
+		if w >= len(b)-2*utf8.UTFMax {
+			nb := make([]byte, (len(b)+utf8.UTFMax)*2)
+			copy(nb, b[0:w])
+			b = nb
+		}
+		switch c := s[r]; {
+		case c == '\\':
+			r++
+			if r >= len(s) {
+				return
+			}
+			switch s[r] {
+			default:
+				return
+			case '"', '\\', '/', '\'':
+				b[w] = s[r]
+				r++
+				w++
+			case 'b':
+				b[w] = '\b'
+				r++
+				w++
+			case 'f':
+				b[w] = '\f'
+				r++
+				w++
+			case 'n':
+				b[w] = '\n'
+				r++
+				w++
+			case 'r':
+				b[w] = '\r'
+				r++
+				w++
+			case 't':
+				b[w] = '\t'
+				r++
+				w++
+			case 'u':
+				r--
+				rr := getu4(s[r:])
+				if rr < 0 {
+					return
+				}
+				r += 6
+				if utf16.IsSurrogate(rr) {
+					rr1 := getu4(s[r:])
+					if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
+						// A valid pair; consume.
+						r += 6
+						w += utf8.EncodeRune(b[w:], dec)
+						break
+					}
+					// Invalid surrogate; fall back to replacement rune.
+					rr = unicode.ReplacementChar
+				}
+				w += utf8.EncodeRune(b[w:], rr)
+			}
+
+		// Quote, control characters are invalid.
+		case c == '"', c < ' ':
+			return
+
+		// ASCII
+		case c < utf8.RuneSelf:
+			b[w] = c
+			r++
+			w++
+
+		// Coerce to well-formed UTF-8.
+		default:
+			rr, size := utf8.DecodeRune(s[r:])
+			r += size
+			w += utf8.EncodeRune(b[w:], rr)
+		}
+	}
+	return b[0:w], true
+}
--- a/stdlib/json/encode.go
+++ b/stdlib/json/encode.go
@ -0,0 +1,147 @@
+// A modified version of Go's JSON implementation.
+
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+	"encoding/base64"
+	"errors"
+	"math"
+	"strconv"
+
+	"github.com/d5/tengo/objects"
+)
+
+// Encode returns the JSON encoding of the object.
+func Encode(o objects.Object) ([]byte, error) {
+	var b []byte
+
+	switch o := o.(type) {
+	case *objects.Array:
+		b = append(b, '[')
+		len1 := len(o.Value) - 1
+		for idx, elem := range o.Value {
+			eb, err := Encode(elem)
+			if err != nil {
+				return nil, err
+			}
+			b = append(b, eb...)
+			if idx < len1 {
+				b = append(b, ',')
+			}
+		}
+		b = append(b, ']')
+	case *objects.ImmutableArray:
+		b = append(b, '[')
+		len1 := len(o.Value) - 1
+		for idx, elem := range o.Value {
+			eb, err := Encode(elem)
+			if err != nil {
+				return nil, err
+			}
+			b = append(b, eb...)
+			if idx < len1 {
+				b = append(b, ',')
+			}
+		}
+		b = append(b, ']')
+	case *objects.Map:
+		b = append(b, '{')
+		len1 := len(o.Value) - 1
+		idx := 0
+		for key, value := range o.Value {
+			b = strconv.AppendQuote(b, key)
+			b = append(b, ':')
+			eb, err := Encode(value)
+			if err != nil {
+				return nil, err
+			}
+			b = append(b, eb...)
+			if idx < len1 {
+				b = append(b, ',')
+			}
+			idx++
+		}
+		b = append(b, '}')
+	case *objects.ImmutableMap:
+		b = append(b, '{')
+		len1 := len(o.Value) - 1
+		idx := 0
+		for key, value := range o.Value {
+			b = strconv.AppendQuote(b, key)
+			b = append(b, ':')
+			eb, err := Encode(value)
+			if err != nil {
+				return nil, err
+			}
+			b = append(b, eb...)
+			if idx < len1 {
+				b = append(b, ',')
+			}
+			idx++
+		}
+		b = append(b, '}')
+	case *objects.Bool:
+		if o.IsFalsy() {
+			b = strconv.AppendBool(b, false)
+		} else {
+			b = strconv.AppendBool(b, true)
+		}
+	case *objects.Bytes:
+		b = append(b, '"')
+		encodedLen := base64.StdEncoding.EncodedLen(len(o.Value))
+		dst := make([]byte, encodedLen)
+		base64.StdEncoding.Encode(dst, o.Value)
+		b = append(b, dst...)
+		b = append(b, '"')
+	case *objects.Char:
+		b = strconv.AppendInt(b, int64(o.Value), 10)
+	case *objects.Float:
+		var y []byte
+
+		f := o.Value
+		if math.IsInf(f, 0) || math.IsNaN(f) {
+			return nil, errors.New("unsupported float value")
+		}
+
+		// Convert as if by ES6 number to string conversion.
+		// This matches most other JSON generators.
+		abs := math.Abs(f)
+		fmt := byte('f')
+		if abs != 0 {
+			if abs < 1e-6 || abs >= 1e21 {
+				fmt = 'e'
+			}
+		}
+		y = strconv.AppendFloat(y, f, fmt, -1, 64)
+		if fmt == 'e' {
+			// clean up e-09 to e-9
+			n := len(y)
+			if n >= 4 && y[n-4] == 'e' && y[n-3] == '-' && y[n-2] == '0' {
+				y[n-2] = y[n-1]
+				y = y[:n-1]
+			}
+		}
+
+		b = append(b, y...)
+	case *objects.Int:
+		b = strconv.AppendInt(b, o.Value, 10)
+	case *objects.String:
+		b = strconv.AppendQuote(b, o.Value)
+	case *objects.Time:
+		y, err := o.Value.MarshalJSON()
+		if err != nil {
+			return nil, err
+		}
+		b = append(b, y...)
+	case *objects.Undefined:
+		b = append(b, "null"...)
+	default:
+		// unknown type: ignore
+	}
+
+	return b, nil
+}
--- a/stdlib/json/json_test.go
+++ b/stdlib/json/json_test.go
@ -0,0 +1,109 @@
+package json_test
+
+import (
+	gojson "encoding/json"
+	"testing"
+
+	"github.com/d5/tengo/assert"
+	"github.com/d5/tengo/objects"
+	"github.com/d5/tengo/stdlib/json"
+)
+
+type ARR = []interface{}
+type MAP = map[string]interface{}
+
+func TestJSON(t *testing.T) {
+	testJSONEncodeDecode(t, nil)
+
+	testJSONEncodeDecode(t, 0)
+	testJSONEncodeDecode(t, 1)
+	testJSONEncodeDecode(t, -1)
+	testJSONEncodeDecode(t, 1984)
+	testJSONEncodeDecode(t, -1984)
+
+	testJSONEncodeDecode(t, 0.0)
+	testJSONEncodeDecode(t, 1.0)
+	testJSONEncodeDecode(t, -1.0)
+	testJSONEncodeDecode(t, 19.84)
+	testJSONEncodeDecode(t, -19.84)
+
+	testJSONEncodeDecode(t, "")
+	testJSONEncodeDecode(t, "foo")
+	testJSONEncodeDecode(t, "foo bar")
+	testJSONEncodeDecode(t, "foo \"bar\"")
+
+	testJSONEncodeDecode(t, true)
+	testJSONEncodeDecode(t, false)
+
+	testJSONEncodeDecode(t, ARR{})
+	testJSONEncodeDecode(t, ARR{0})
+	testJSONEncodeDecode(t, ARR{false})
+	testJSONEncodeDecode(t, ARR{1, 2, 3, "four", false})
+	testJSONEncodeDecode(t, ARR{1, 2, 3, "four", false, MAP{"a": 0, "b": "bee", "bool": true}})
+
+	testJSONEncodeDecode(t, MAP{})
+	testJSONEncodeDecode(t, MAP{"a": 0})
+	testJSONEncodeDecode(t, MAP{"a": 0, "b": "bee"})
+	testJSONEncodeDecode(t, MAP{"a": 0, "b": "bee", "bool": true})
+
+	testJSONEncodeDecode(t, MAP{"a": 0, "b": "bee", "arr": ARR{1, 2, 3, "four"}})
+	testJSONEncodeDecode(t, MAP{"a": 0, "b": "bee", "arr": ARR{1, 2, 3, MAP{"a": false, "b": 109.4}}})
+}
+
+func TestDecode(t *testing.T) {
+	testDecodeError(t, `{`)
+	testDecodeError(t, `}`)
+	testDecodeError(t, `{}a`)
+	testDecodeError(t, `{{}`)
+	testDecodeError(t, `{}}`)
+	testDecodeError(t, `[`)
+	testDecodeError(t, `]`)
+	testDecodeError(t, `[]a`)
+	testDecodeError(t, `[[]`)
+	testDecodeError(t, `[]]`)
+	testDecodeError(t, `"`)
+	testDecodeError(t, `"abc`)
+	testDecodeError(t, `abc"`)
+	testDecodeError(t, `.123`)
+	testDecodeError(t, `123.`)
+	testDecodeError(t, `1.2.3`)
+	testDecodeError(t, `'a'`)
+	testDecodeError(t, `true, false`)
+	testDecodeError(t, `{"a:"b"}`)
+	testDecodeError(t, `{a":"b"}`)
+	testDecodeError(t, `{"a":"b":"c"}`)
+}
+
+func testDecodeError(t *testing.T, input string) {
+	_, err := json.Decode([]byte(input))
+	assert.Error(t, err)
+}
+
+func testJSONEncodeDecode(t *testing.T, v interface{}) bool {
+	o, err := objects.FromInterface(v)
+	if !assert.NoError(t, err) {
+		return false
+	}
+
+	b, err := json.Encode(o)
+	if !assert.NoError(t, err) {
+		return false
+	}
+
+	a, err := json.Decode(b)
+	if !assert.NoError(t, err, string(b)) {
+		return false
+	}
+
+	vj, err := gojson.Marshal(v)
+	if !assert.NoError(t, err) {
+		return false
+	}
+
+	aj, err := gojson.Marshal(objects.ToInterface(a))
+	if !assert.NoError(t, err) {
+		return false
+	}
+
+	return assert.Equal(t, vj, aj)
+}
--- a/stdlib/json/scanner.go
+++ b/stdlib/json/scanner.go
@ -0,0 +1,559 @@
+// A modified version of Go's JSON implementation.
+
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json
+
+import "strconv"
+
+func checkValid(data []byte, scan *scanner) error {
+	scan.reset()
+	for _, c := range data {
+		scan.bytes++
+		if scan.step(scan, c) == scanError {
+			return scan.err
+		}
+	}
+	if scan.eof() == scanError {
+		return scan.err
+	}
+	return nil
+}
+
+// A SyntaxError is a description of a JSON syntax error.
+type SyntaxError struct {
+	msg    string // description of error
+	Offset int64  // error occurred after reading Offset bytes
+}
+
+func (e *SyntaxError) Error() string { return e.msg }
+
+// A scanner is a JSON scanning state machine.
+// Callers call scan.reset() and then pass bytes in one at a time
+// by calling scan.step(&scan, c) for each byte.
+// The return value, referred to as an opcode, tells the
+// caller about significant parsing events like beginning
+// and ending literals, objects, and arrays, so that the
+// caller can follow along if it wishes.
+// The return value scanEnd indicates that a single top-level
+// JSON value has been completed, *before* the byte that
+// just got passed in.  (The indication must be delayed in order
+// to recognize the end of numbers: is 123 a whole value or
+// the beginning of 12345e+6?).
+type scanner struct {
+	// The step is a func to be called to execute the next transition.
+	// Also tried using an integer constant and a single func
+	// with a switch, but using the func directly was 10% faster
+	// on a 64-bit Mac Mini, and it's nicer to read.
+	step func(*scanner, byte) int
+
+	// Reached end of top-level value.
+	endTop bool
+
+	// Stack of what we're in the middle of - array values, object keys, object values.
+	parseState []int
+
+	// Error that happened, if any.
+	err error
+
+	// total bytes consumed, updated by decoder.Decode
+	bytes int64
+}
+
+// These values are returned by the state transition functions
+// assigned to scanner.state and the method scanner.eof.
+// They give details about the current state of the scan that
+// callers might be interested to know about.
+// It is okay to ignore the return value of any particular
+// call to scanner.state: if one call returns scanError,
+// every subsequent call will return scanError too.
+const (
+	// Continue.
+	scanContinue     = iota // uninteresting byte
+	scanBeginLiteral        // end implied by next result != scanContinue
+	scanBeginObject         // begin object
+	scanObjectKey           // just finished object key (string)
+	scanObjectValue         // just finished non-last object value
+	scanEndObject           // end object (implies scanObjectValue if possible)
+	scanBeginArray          // begin array
+	scanArrayValue          // just finished array value
+	scanEndArray            // end array (implies scanArrayValue if possible)
+	scanSkipSpace           // space byte; can skip; known to be last "continue" result
+
+	// Stop.
+	scanEnd   // top-level value ended *before* this byte; known to be first "stop" result
+	scanError // hit an error, scanner.err.
+)
+
+// These values are stored in the parseState stack.
+// They give the current state of a composite value
+// being scanned. If the parser is inside a nested value
+// the parseState describes the nested state, outermost at entry 0.
+const (
+	parseObjectKey   = iota // parsing object key (before colon)
+	parseObjectValue        // parsing object value (after colon)
+	parseArrayValue         // parsing array value
+)
+
+// reset prepares the scanner for use.
+// It must be called before calling s.step.
+func (s *scanner) reset() {
+	s.step = stateBeginValue
+	s.parseState = s.parseState[0:0]
+	s.err = nil
+	s.endTop = false
+}
+
+// eof tells the scanner that the end of input has been reached.
+// It returns a scan status just as s.step does.
+func (s *scanner) eof() int {
+	if s.err != nil {
+		return scanError
+	}
+	if s.endTop {
+		return scanEnd
+	}
+	s.step(s, ' ')
+	if s.endTop {
+		return scanEnd
+	}
+	if s.err == nil {
+		s.err = &SyntaxError{"unexpected end of JSON input", s.bytes}
+	}
+	return scanError
+}
+
+// pushParseState pushes a new parse state p onto the parse stack.
+func (s *scanner) pushParseState(p int) {
+	s.parseState = append(s.parseState, p)
+}
+
+// popParseState pops a parse state (already obtained) off the stack
+// and updates s.step accordingly.
+func (s *scanner) popParseState() {
+	n := len(s.parseState) - 1
+	s.parseState = s.parseState[0:n]
+	if n == 0 {
+		s.step = stateEndTop
+		s.endTop = true
+	} else {
+		s.step = stateEndValue
+	}
+}
+
+func isSpace(c byte) bool {
+	return c == ' ' || c == '\t' || c == '\r' || c == '\n'
+}
+
+// stateBeginValueOrEmpty is the state after reading `[`.
+func stateBeginValueOrEmpty(s *scanner, c byte) int {
+	if c <= ' ' && isSpace(c) {
+		return scanSkipSpace
+	}
+	if c == ']' {
+		return stateEndValue(s, c)
+	}
+	return stateBeginValue(s, c)
+}
+
+// stateBeginValue is the state at the beginning of the input.
+func stateBeginValue(s *scanner, c byte) int {
+	if c <= ' ' && isSpace(c) {
+		return scanSkipSpace
+	}
+	switch c {
+	case '{':
+		s.step = stateBeginStringOrEmpty
+		s.pushParseState(parseObjectKey)
+		return scanBeginObject
+	case '[':
+		s.step = stateBeginValueOrEmpty
+		s.pushParseState(parseArrayValue)
+		return scanBeginArray
+	case '"':
+		s.step = stateInString
+		return scanBeginLiteral
+	case '-':
+		s.step = stateNeg
+		return scanBeginLiteral
+	case '0': // beginning of 0.123
+		s.step = state0
+		return scanBeginLiteral
+	case 't': // beginning of true
+		s.step = stateT
+		return scanBeginLiteral
+	case 'f': // beginning of false
+		s.step = stateF
+		return scanBeginLiteral
+	case 'n': // beginning of null
+		s.step = stateN
+		return scanBeginLiteral
+	}
+	if '1' <= c && c <= '9' { // beginning of 1234.5
+		s.step = state1
+		return scanBeginLiteral
+	}
+	return s.error(c, "looking for beginning of value")
+}
+
+// stateBeginStringOrEmpty is the state after reading `{`.
+func stateBeginStringOrEmpty(s *scanner, c byte) int {
+	if c <= ' ' && isSpace(c) {
+		return scanSkipSpace
+	}
+	if c == '}' {
+		n := len(s.parseState)
+		s.parseState[n-1] = parseObjectValue
+		return stateEndValue(s, c)
+	}
+	return stateBeginString(s, c)
+}
+
+// stateBeginString is the state after reading `{"key": value,`.
+func stateBeginString(s *scanner, c byte) int {
+	if c <= ' ' && isSpace(c) {
+		return scanSkipSpace
+	}
+	if c == '"' {
+		s.step = stateInString
+		return scanBeginLiteral
+	}
+	return s.error(c, "looking for beginning of object key string")
+}
+
+// stateEndValue is the state after completing a value,
+// such as after reading `{}` or `true` or `["x"`.
+func stateEndValue(s *scanner, c byte) int {
+	n := len(s.parseState)
+	if n == 0 {
+		// Completed top-level before the current byte.
+		s.step = stateEndTop
+		s.endTop = true
+		return stateEndTop(s, c)
+	}
+	if c <= ' ' && isSpace(c) {
+		s.step = stateEndValue
+		return scanSkipSpace
+	}
+	ps := s.parseState[n-1]
+	switch ps {
+	case parseObjectKey:
+		if c == ':' {
+			s.parseState[n-1] = parseObjectValue
+			s.step = stateBeginValue
+			return scanObjectKey
+		}
+		return s.error(c, "after object key")
+	case parseObjectValue:
+		if c == ',' {
+			s.parseState[n-1] = parseObjectKey
+			s.step = stateBeginString
+			return scanObjectValue
+		}
+		if c == '}' {
+			s.popParseState()
+			return scanEndObject
+		}
+		return s.error(c, "after object key:value pair")
+	case parseArrayValue:
+		if c == ',' {
+			s.step = stateBeginValue
+			return scanArrayValue
+		}
+		if c == ']' {
+			s.popParseState()
+			return scanEndArray
+		}
+		return s.error(c, "after array element")
+	}
+	return s.error(c, "")
+}
+
+// stateEndTop is the state after finishing the top-level value,
+// such as after reading `{}` or `[1,2,3]`.
+// Only space characters should be seen now.
+func stateEndTop(s *scanner, c byte) int {
+	if !isSpace(c) {
+		// Complain about non-space byte on next call.
+		s.error(c, "after top-level value")
+	}
+	return scanEnd
+}
+
+// stateInString is the state after reading `"`.
+func stateInString(s *scanner, c byte) int {
+	if c == '"' {
+		s.step = stateEndValue
+		return scanContinue
+	}
+	if c == '\\' {
+		s.step = stateInStringEsc
+		return scanContinue
+	}
+	if c < 0x20 {
+		return s.error(c, "in string literal")
+	}
+	return scanContinue
+}
+
+// stateInStringEsc is the state after reading `"\` during a quoted string.
+func stateInStringEsc(s *scanner, c byte) int {
+	switch c {
+	case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
+		s.step = stateInString
+		return scanContinue
+	case 'u':
+		s.step = stateInStringEscU
+		return scanContinue
+	}
+	return s.error(c, "in string escape code")
+}
+
+// stateInStringEscU is the state after reading `"\u` during a quoted string.
+func stateInStringEscU(s *scanner, c byte) int {
+	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
+		s.step = stateInStringEscU1
+		return scanContinue
+	}
+	// numbers
+	return s.error(c, "in \\u hexadecimal character escape")
+}
+
+// stateInStringEscU1 is the state after reading `"\u1` during a quoted string.
+func stateInStringEscU1(s *scanner, c byte) int {
+	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
+		s.step = stateInStringEscU12
+		return scanContinue
+	}
+	// numbers
+	return s.error(c, "in \\u hexadecimal character escape")
+}
+
+// stateInStringEscU12 is the state after reading `"\u12` during a quoted string.
+func stateInStringEscU12(s *scanner, c byte) int {
+	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
+		s.step = stateInStringEscU123
+		return scanContinue
+	}
+	// numbers
+	return s.error(c, "in \\u hexadecimal character escape")
+}
+
+// stateInStringEscU123 is the state after reading `"\u123` during a quoted string.
+func stateInStringEscU123(s *scanner, c byte) int {
+	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
+		s.step = stateInString
+		return scanContinue
+	}
+	// numbers
+	return s.error(c, "in \\u hexadecimal character escape")
+}
+
+// stateNeg is the state after reading `-` during a number.
+func stateNeg(s *scanner, c byte) int {
+	if c == '0' {
+		s.step = state0
+		return scanContinue
+	}
+	if '1' <= c && c <= '9' {
+		s.step = state1
+		return scanContinue
+	}
+	return s.error(c, "in numeric literal")
+}
+
+// state1 is the state after reading a non-zero integer during a number,
+// such as after reading `1` or `100` but not `0`.
+func state1(s *scanner, c byte) int {
+	if '0' <= c && c <= '9' {
+		s.step = state1
+		return scanContinue
+	}
+	return state0(s, c)
+}
+
+// state0 is the state after reading `0` during a number.
+func state0(s *scanner, c byte) int {
+	if c == '.' {
+		s.step = stateDot
+		return scanContinue
+	}
+	if c == 'e' || c == 'E' {
+		s.step = stateE
+		return scanContinue
+	}
+	return stateEndValue(s, c)
+}
+
+// stateDot is the state after reading the integer and decimal point in a number,
+// such as after reading `1.`.
+func stateDot(s *scanner, c byte) int {
+	if '0' <= c && c <= '9' {
+		s.step = stateDot0
+		return scanContinue
+	}
+	return s.error(c, "after decimal point in numeric literal")
+}
+
+// stateDot0 is the state after reading the integer, decimal point, and subsequent
+// digits of a number, such as after reading `3.14`.
+func stateDot0(s *scanner, c byte) int {
+	if '0' <= c && c <= '9' {
+		return scanContinue
+	}
+	if c == 'e' || c == 'E' {
+		s.step = stateE
+		return scanContinue
+	}
+	return stateEndValue(s, c)
+}
+
+// stateE is the state after reading the mantissa and e in a number,
+// such as after reading `314e` or `0.314e`.
+func stateE(s *scanner, c byte) int {
+	if c == '+' || c == '-' {
+		s.step = stateESign
+		return scanContinue
+	}
+	return stateESign(s, c)
+}
+
+// stateESign is the state after reading the mantissa, e, and sign in a number,
+// such as after reading `314e-` or `0.314e+`.
+func stateESign(s *scanner, c byte) int {
+	if '0' <= c && c <= '9' {
+		s.step = stateE0
+		return scanContinue
+	}
+	return s.error(c, "in exponent of numeric literal")
+}
+
+// stateE0 is the state after reading the mantissa, e, optional sign,
+// and at least one digit of the exponent in a number,
+// such as after reading `314e-2` or `0.314e+1` or `3.14e0`.
+func stateE0(s *scanner, c byte) int {
+	if '0' <= c && c <= '9' {
+		return scanContinue
+	}
+	return stateEndValue(s, c)
+}
+
+// stateT is the state after reading `t`.
+func stateT(s *scanner, c byte) int {
+	if c == 'r' {
+		s.step = stateTr
+		return scanContinue
+	}
+	return s.error(c, "in literal true (expecting 'r')")
+}
+
+// stateTr is the state after reading `tr`.
+func stateTr(s *scanner, c byte) int {
+	if c == 'u' {
+		s.step = stateTru
+		return scanContinue
+	}
+	return s.error(c, "in literal true (expecting 'u')")
+}
+
+// stateTru is the state after reading `tru`.
+func stateTru(s *scanner, c byte) int {
+	if c == 'e' {
+		s.step = stateEndValue
+		return scanContinue
+	}
+	return s.error(c, "in literal true (expecting 'e')")
+}
+
+// stateF is the state after reading `f`.
+func stateF(s *scanner, c byte) int {
+	if c == 'a' {
+		s.step = stateFa
+		return scanContinue
+	}
+	return s.error(c, "in literal false (expecting 'a')")
+}
+
+// stateFa is the state after reading `fa`.
+func stateFa(s *scanner, c byte) int {
+	if c == 'l' {
+		s.step = stateFal
+		return scanContinue
+	}
+	return s.error(c, "in literal false (expecting 'l')")
+}
+
+// stateFal is the state after reading `fal`.
+func stateFal(s *scanner, c byte) int {
+	if c == 's' {
+		s.step = stateFals
+		return scanContinue
+	}
+	return s.error(c, "in literal false (expecting 's')")
+}
+
+// stateFals is the state after reading `fals`.
+func stateFals(s *scanner, c byte) int {
+	if c == 'e' {
+		s.step = stateEndValue
+		return scanContinue
+	}
+	return s.error(c, "in literal false (expecting 'e')")
+}
+
+// stateN is the state after reading `n`.
+func stateN(s *scanner, c byte) int {
+	if c == 'u' {
+		s.step = stateNu
+		return scanContinue
+	}
+	return s.error(c, "in literal null (expecting 'u')")
+}
+
+// stateNu is the state after reading `nu`.
+func stateNu(s *scanner, c byte) int {
+	if c == 'l' {
+		s.step = stateNul
+		return scanContinue
+	}
+	return s.error(c, "in literal null (expecting 'l')")
+}
+
+// stateNul is the state after reading `nul`.
+func stateNul(s *scanner, c byte) int {
+	if c == 'l' {
+		s.step = stateEndValue
+		return scanContinue
+	}
+	return s.error(c, "in literal null (expecting 'l')")
+}
+
+// stateError is the state after reaching a syntax error,
+// such as after reading `[1}` or `5.1.2`.
+func stateError(s *scanner, c byte) int {
+	return scanError
+}
+
+// error records an error and switches to the error state.
+func (s *scanner) error(c byte, context string) int {
+	s.step = stateError
+	s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes}
+	return scanError
+}
+
+// quoteChar formats c as a quoted character literal
+func quoteChar(c byte) string {
+	// special cases - different from quoted strings
+	if c == '\'' {
+		return `'\''`
+	}
+	if c == '"' {
+		return `'"'`
+	}
+
+	// use quoted string with different quotation marks
+	s := strconv.Quote(string(c))
+	return "'" + s[1:len(s)-1] + "'"
+}
--- a/stdlib/json_test.go
+++ b/stdlib/json_test.go
@ -3,31 +3,43 @@ package stdlib_test
 import "testing"

 func TestJSON(t *testing.T) {
-	module(t, "json").call("stringify", 5).expect("5")
-	module(t, "json").call("stringify", "foobar").expect(`"foobar"`)
-	module(t, "json").call("stringify", MAP{"foo": 5}).expect("{\"foo\":5}")
-	module(t, "json").call("stringify", IMAP{"foo": 5}).expect("{\"foo\":5}")
-	module(t, "json").call("stringify", ARR{1, 2, 3}).expect("[1,2,3]")
-	module(t, "json").call("stringify", IARR{1, 2, 3}).expect("[1,2,3]")
-	module(t, "json").call("stringify", MAP{"foo": "bar"}).expect("{\"foo\":\"bar\"}")
-	module(t, "json").call("stringify", MAP{"foo": 1.8}).expect("{\"foo\":1.8}")
-	module(t, "json").call("stringify", MAP{"foo": true}).expect("{\"foo\":true}")
-	module(t, "json").call("stringify", MAP{"foo": '8'}).expect("{\"foo\":56}")
-	module(t, "json").call("stringify", MAP{"foo": []byte("foo")}).expect("{\"foo\":\"Zm9v\"}") // json encoding returns []byte as base64 encoded string
-	module(t, "json").call("stringify", MAP{"foo": ARR{"bar", 1, 1.8, '8', true}}).expect("{\"foo\":[\"bar\",1,1.8,56,true]}")
-	module(t, "json").call("stringify", MAP{"foo": IARR{"bar", 1, 1.8, '8', true}}).expect("{\"foo\":[\"bar\",1,1.8,56,true]}")
-	module(t, "json").call("stringify", MAP{"foo": ARR{ARR{"bar", 1}, ARR{"bar", 1}}}).expect("{\"foo\":[[\"bar\",1],[\"bar\",1]]}")
-	module(t, "json").call("stringify", MAP{"foo": MAP{"string": "bar", "int": 1, "float": 1.8, "char": '8', "bool": true}}).expect("{\"foo\":{\"bool\":true,\"char\":56,\"float\":1.8,\"int\":1,\"string\":\"bar\"}}")
-	module(t, "json").call("stringify", MAP{"foo": IMAP{"string": "bar", "int": 1, "float": 1.8, "char": '8', "bool": true}}).expect("{\"foo\":{\"bool\":true,\"char\":56,\"float\":1.8,\"int\":1,\"string\":\"bar\"}}")
-	module(t, "json").call("stringify", MAP{"foo": MAP{"map1": MAP{"string": "bar"}, "map2": MAP{"int": "1"}}}).expect("{\"foo\":{\"map1\":{\"string\":\"bar\"},\"map2\":{\"int\":\"1\"}}}")
-	module(t, "json").call("stringify", ARR{ARR{"bar", 1}, ARR{"bar", 1}}).expect("[[\"bar\",1],[\"bar\",1]]")
+	module(t, "json").call("encode", 5).expect([]byte("5"))
+	module(t, "json").call("encode", "foobar").expect([]byte(`"foobar"`))
+	module(t, "json").call("encode", MAP{"foo": 5}).expect([]byte("{\"foo\":5}"))
+	module(t, "json").call("encode", IMAP{"foo": 5}).expect([]byte("{\"foo\":5}"))
+	module(t, "json").call("encode", ARR{1, 2, 3}).expect([]byte("[1,2,3]"))
+	module(t, "json").call("encode", IARR{1, 2, 3}).expect([]byte("[1,2,3]"))
+	module(t, "json").call("encode", MAP{"foo": "bar"}).expect([]byte("{\"foo\":\"bar\"}"))
+	module(t, "json").call("encode", MAP{"foo": 1.8}).expect([]byte("{\"foo\":1.8}"))
+	module(t, "json").call("encode", MAP{"foo": true}).expect([]byte("{\"foo\":true}"))
+	module(t, "json").call("encode", MAP{"foo": '8'}).expect([]byte("{\"foo\":56}"))
+	module(t, "json").call("encode", MAP{"foo": []byte("foo")}).expect([]byte("{\"foo\":\"Zm9v\"}")) // json encoding returns []byte as base64 encoded string
+	module(t, "json").call("encode", MAP{"foo": ARR{"bar", 1, 1.8, '8', true}}).expect([]byte("{\"foo\":[\"bar\",1,1.8,56,true]}"))
+	module(t, "json").call("encode", MAP{"foo": IARR{"bar", 1, 1.8, '8', true}}).expect([]byte("{\"foo\":[\"bar\",1,1.8,56,true]}"))
+	module(t, "json").call("encode", MAP{"foo": ARR{ARR{"bar", 1}, ARR{"bar", 1}}}).expect([]byte("{\"foo\":[[\"bar\",1],[\"bar\",1]]}"))
+	module(t, "json").call("encode", MAP{"foo": MAP{"string": "bar"}}).expect([]byte("{\"foo\":{\"string\":\"bar\"}}"))
+	module(t, "json").call("encode", MAP{"foo": IMAP{"string": "bar"}}).expect([]byte("{\"foo\":{\"string\":\"bar\"}}"))
+	module(t, "json").call("encode", MAP{"foo": MAP{"map1": MAP{"string": "bar"}}}).expect([]byte("{\"foo\":{\"map1\":{\"string\":\"bar\"}}}"))
+	module(t, "json").call("encode", ARR{ARR{"bar", 1}, ARR{"bar", 1}}).expect([]byte("[[\"bar\",1],[\"bar\",1]]"))

-	module(t, "json").call("parse", `5`).expect(5.0)
-	module(t, "json").call("parse", `"foo"`).expect("foo")
-	module(t, "json").call("parse", `[1,2,3,"bar"]`).expect(ARR{1.0, 2.0, 3.0, "bar"})
-	module(t, "json").call("parse", `{"foo":5}`).expect(MAP{"foo": 5.0})
-	module(t, "json").call("parse", `{"foo":2.5}`).expect(MAP{"foo": 2.5})
-	module(t, "json").call("parse", `{"foo":true}`).expect(MAP{"foo": true})
-	module(t, "json").call("parse", `{"foo":"bar"}`).expect(MAP{"foo": "bar"})
-	module(t, "json").call("parse", `{"foo":[1,2,3,"bar"]}`).expect(MAP{"foo": ARR{1.0, 2.0, 3.0, "bar"}})
+	module(t, "json").call("decode", `5`).expect(5.0)
+	module(t, "json").call("decode", `"foo"`).expect("foo")
+	module(t, "json").call("decode", `[1,2,3,"bar"]`).expect(ARR{1.0, 2.0, 3.0, "bar"})
+	module(t, "json").call("decode", `{"foo":5}`).expect(MAP{"foo": 5.0})
+	module(t, "json").call("decode", `{"foo":2.5}`).expect(MAP{"foo": 2.5})
+	module(t, "json").call("decode", `{"foo":true}`).expect(MAP{"foo": true})
+	module(t, "json").call("decode", `{"foo":"bar"}`).expect(MAP{"foo": "bar"})
+	module(t, "json").call("decode", `{"foo":[1,2,3,"bar"]}`).expect(MAP{"foo": ARR{1.0, 2.0, 3.0, "bar"}})
+
+	module(t, "json").call("indent", []byte("{\"foo\":[\"bar\",1,1.8,56,true]}"), "", "  ").expect([]byte(`{
+  "foo": [
+    "bar",
+    1,
+    1.8,
+    56,
+    true
+  ]
+}`))
+
+	module(t, "json").call("html_escape", []byte(`{"M":"<html>foo &`+"\xe2\x80\xa8 \xe2\x80\xa9"+`</html>"}`)).expect([]byte(`{"M":"\u003chtml\u003efoo \u0026\u2028 \u2029\u003c/html\u003e"}`))
 }