decode.go 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. // A modified version of Go's JSON implementation.
  2. // Copyright 2010 The Go Authors. All rights reserved.
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file.
  5. package json
  6. import (
  7. "strconv"
  8. "unicode"
  9. "unicode/utf16"
  10. "unicode/utf8"
  11. "github.com/d5/tengo/v2"
  12. )
  13. // Decode parses the JSON-encoded data and returns the result object.
  14. func Decode(data []byte) (tengo.Object, error) {
  15. var d decodeState
  16. err := checkValid(data, &d.scan)
  17. if err != nil {
  18. return nil, err
  19. }
  20. d.init(data)
  21. d.scan.reset()
  22. d.scanWhile(scanSkipSpace)
  23. return d.value()
  24. }
  25. // decodeState represents the state while decoding a JSON value.
  26. type decodeState struct {
  27. data []byte
  28. off int // next read offset in data
  29. opcode int // last read result
  30. scan scanner
  31. }
  32. // readIndex returns the position of the last byte read.
  33. func (d *decodeState) readIndex() int {
  34. return d.off - 1
  35. }
  36. const phasePanicMsg = "JSON decoder out of sync - data changing underfoot?"
  37. func (d *decodeState) init(data []byte) *decodeState {
  38. d.data = data
  39. d.off = 0
  40. return d
  41. }
  42. // scanNext processes the byte at d.data[d.off].
  43. func (d *decodeState) scanNext() {
  44. if d.off < len(d.data) {
  45. d.opcode = d.scan.step(&d.scan, d.data[d.off])
  46. d.off++
  47. } else {
  48. d.opcode = d.scan.eof()
  49. d.off = len(d.data) + 1 // mark processed EOF with len+1
  50. }
  51. }
  52. // scanWhile processes bytes in d.data[d.off:] until it
  53. // receives a scan code not equal to op.
  54. func (d *decodeState) scanWhile(op int) {
  55. s, data, i := &d.scan, d.data, d.off
  56. for i < len(data) {
  57. newOp := s.step(s, data[i])
  58. i++
  59. if newOp != op {
  60. d.opcode = newOp
  61. d.off = i
  62. return
  63. }
  64. }
  65. d.off = len(data) + 1 // mark processed EOF with len+1
  66. d.opcode = d.scan.eof()
  67. }
  68. func (d *decodeState) value() (tengo.Object, error) {
  69. switch d.opcode {
  70. default:
  71. panic(phasePanicMsg)
  72. case scanBeginArray:
  73. o, err := d.array()
  74. if err != nil {
  75. return nil, err
  76. }
  77. d.scanNext()
  78. return o, nil
  79. case scanBeginObject:
  80. o, err := d.object()
  81. if err != nil {
  82. return nil, err
  83. }
  84. d.scanNext()
  85. return o, nil
  86. case scanBeginLiteral:
  87. return d.literal()
  88. }
  89. }
  90. func (d *decodeState) array() (tengo.Object, error) {
  91. var arr []tengo.Object
  92. for {
  93. // Look ahead for ] - can only happen on first iteration.
  94. d.scanWhile(scanSkipSpace)
  95. if d.opcode == scanEndArray {
  96. break
  97. }
  98. o, err := d.value()
  99. if err != nil {
  100. return nil, err
  101. }
  102. arr = append(arr, o)
  103. // Next token must be , or ].
  104. if d.opcode == scanSkipSpace {
  105. d.scanWhile(scanSkipSpace)
  106. }
  107. if d.opcode == scanEndArray {
  108. break
  109. }
  110. if d.opcode != scanArrayValue {
  111. panic(phasePanicMsg)
  112. }
  113. }
  114. return &tengo.Array{Value: arr}, nil
  115. }
  116. func (d *decodeState) object() (tengo.Object, error) {
  117. m := make(map[string]tengo.Object)
  118. for {
  119. // Read opening " of string key or closing }.
  120. d.scanWhile(scanSkipSpace)
  121. if d.opcode == scanEndObject {
  122. // closing } - can only happen on first iteration.
  123. break
  124. }
  125. if d.opcode != scanBeginLiteral {
  126. panic(phasePanicMsg)
  127. }
  128. // Read string key.
  129. start := d.readIndex()
  130. d.scanWhile(scanContinue)
  131. item := d.data[start:d.readIndex()]
  132. key, ok := unquote(item)
  133. if !ok {
  134. panic(phasePanicMsg)
  135. }
  136. // Read : before value.
  137. if d.opcode == scanSkipSpace {
  138. d.scanWhile(scanSkipSpace)
  139. }
  140. if d.opcode != scanObjectKey {
  141. panic(phasePanicMsg)
  142. }
  143. d.scanWhile(scanSkipSpace)
  144. // Read value.
  145. o, err := d.value()
  146. if err != nil {
  147. return nil, err
  148. }
  149. m[key] = o
  150. // Next token must be , or }.
  151. if d.opcode == scanSkipSpace {
  152. d.scanWhile(scanSkipSpace)
  153. }
  154. if d.opcode == scanEndObject {
  155. break
  156. }
  157. if d.opcode != scanObjectValue {
  158. panic(phasePanicMsg)
  159. }
  160. }
  161. return &tengo.Map{Value: m}, nil
  162. }
  163. func (d *decodeState) literal() (tengo.Object, error) {
  164. // All bytes inside literal return scanContinue op code.
  165. start := d.readIndex()
  166. d.scanWhile(scanContinue)
  167. item := d.data[start:d.readIndex()]
  168. switch c := item[0]; c {
  169. case 'n': // null
  170. return tengo.UndefinedValue, nil
  171. case 't', 'f': // true, false
  172. if c == 't' {
  173. return tengo.TrueValue, nil
  174. }
  175. return tengo.FalseValue, nil
  176. case '"': // string
  177. s, ok := unquote(item)
  178. if !ok {
  179. panic(phasePanicMsg)
  180. }
  181. return &tengo.String{Value: s}, nil
  182. default: // number
  183. if c != '-' && (c < '0' || c > '9') {
  184. panic(phasePanicMsg)
  185. }
  186. n, _ := strconv.ParseFloat(string(item), 10)
  187. return tengo.Float{Value: n}, nil
  188. }
  189. }
  190. // getu4 decodes \uXXXX from the beginning of s, returning the hex value,
  191. // or it returns -1.
  192. func getu4(s []byte) rune {
  193. if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
  194. return -1
  195. }
  196. var r rune
  197. for _, c := range s[2:6] {
  198. switch {
  199. case '0' <= c && c <= '9':
  200. c = c - '0'
  201. case 'a' <= c && c <= 'f':
  202. c = c - 'a' + 10
  203. case 'A' <= c && c <= 'F':
  204. c = c - 'A' + 10
  205. default:
  206. return -1
  207. }
  208. r = r*16 + rune(c)
  209. }
  210. return r
  211. }
  212. // unquote converts a quoted JSON string literal s into an actual string t.
  213. // The rules are different than for Go, so cannot use strconv.Unquote.
  214. func unquote(s []byte) (t string, ok bool) {
  215. s, ok = unquoteBytes(s)
  216. t = string(s)
  217. return
  218. }
  219. func unquoteBytes(s []byte) (t []byte, ok bool) {
  220. if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
  221. return
  222. }
  223. s = s[1 : len(s)-1]
  224. // Check for unusual characters. If there are none, then no unquoting is
  225. // needed, so return a slice of the original bytes.
  226. r := 0
  227. for r < len(s) {
  228. c := s[r]
  229. if c == '\\' || c == '"' || c < ' ' {
  230. break
  231. }
  232. if c < utf8.RuneSelf {
  233. r++
  234. continue
  235. }
  236. rr, size := utf8.DecodeRune(s[r:])
  237. if rr == utf8.RuneError && size == 1 {
  238. break
  239. }
  240. r += size
  241. }
  242. if r == len(s) {
  243. return s, true
  244. }
  245. b := make([]byte, len(s)+2*utf8.UTFMax)
  246. w := copy(b, s[0:r])
  247. for r < len(s) {
  248. // Out of room? Can only happen if s is full of
  249. // malformed UTF-8 and we're replacing each
  250. // byte with RuneError.
  251. if w >= len(b)-2*utf8.UTFMax {
  252. nb := make([]byte, (len(b)+utf8.UTFMax)*2)
  253. copy(nb, b[0:w])
  254. b = nb
  255. }
  256. switch c := s[r]; {
  257. case c == '\\':
  258. r++
  259. if r >= len(s) {
  260. return
  261. }
  262. switch s[r] {
  263. default:
  264. return
  265. case '"', '\\', '/', '\'':
  266. b[w] = s[r]
  267. r++
  268. w++
  269. case 'b':
  270. b[w] = '\b'
  271. r++
  272. w++
  273. case 'f':
  274. b[w] = '\f'
  275. r++
  276. w++
  277. case 'n':
  278. b[w] = '\n'
  279. r++
  280. w++
  281. case 'r':
  282. b[w] = '\r'
  283. r++
  284. w++
  285. case 't':
  286. b[w] = '\t'
  287. r++
  288. w++
  289. case 'u':
  290. r--
  291. rr := getu4(s[r:])
  292. if rr < 0 {
  293. return
  294. }
  295. r += 6
  296. if utf16.IsSurrogate(rr) {
  297. rr1 := getu4(s[r:])
  298. dec := utf16.DecodeRune(rr, rr1)
  299. if dec != unicode.ReplacementChar {
  300. // A valid pair; consume.
  301. r += 6
  302. w += utf8.EncodeRune(b[w:], dec)
  303. break
  304. }
  305. // Invalid surrogate; fall back to replacement rune.
  306. rr = unicode.ReplacementChar
  307. }
  308. w += utf8.EncodeRune(b[w:], rr)
  309. }
  310. // Quote, control characters are invalid.
  311. case c == '"', c < ' ':
  312. return
  313. // ASCII
  314. case c < utf8.RuneSelf:
  315. b[w] = c
  316. r++
  317. w++
  318. // Coerce to well-formed UTF-8.
  319. default:
  320. rr, size := utf8.DecodeRune(s[r:])
  321. r += size
  322. w += utf8.EncodeRune(b[w:], rr)
  323. }
  324. }
  325. return b[0:w], true
  326. }