decode.go 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. // A modified version of Go's JSON implementation.
  2. // Copyright 2010 The Go Authors. All rights reserved.
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file.
  5. package json
  6. import (
  7. "strconv"
  8. "unicode"
  9. "unicode/utf16"
  10. "unicode/utf8"
  11. "github.com/d5/tengo/v2"
  12. )
  13. // Decode parses the JSON-encoded data and returns the result object.
  14. func Decode(data []byte) (tengo.Object, error) {
  15. var d decodeState
  16. err := checkValid(data, &d.scan)
  17. if err != nil {
  18. return nil, err
  19. }
  20. d.init(data)
  21. d.scan.reset()
  22. d.scanWhile(scanSkipSpace)
  23. return d.value()
  24. }
  25. // decodeState represents the state while decoding a JSON value.
  26. type decodeState struct {
  27. data []byte
  28. off int // next read offset in data
  29. opcode int // last read result
  30. scan scanner
  31. }
  32. // readIndex returns the position of the last byte read.
  33. func (d *decodeState) readIndex() int {
  34. return d.off - 1
  35. }
  36. const phasePanicMsg = "JSON decoder out of sync - data changing underfoot?"
  37. func (d *decodeState) init(data []byte) *decodeState {
  38. d.data = data
  39. d.off = 0
  40. return d
  41. }
  42. // scanNext processes the byte at d.data[d.off].
  43. func (d *decodeState) scanNext() {
  44. if d.off < len(d.data) {
  45. d.opcode = d.scan.step(&d.scan, d.data[d.off])
  46. d.off++
  47. } else {
  48. d.opcode = d.scan.eof()
  49. d.off = len(d.data) + 1 // mark processed EOF with len+1
  50. }
  51. }
  52. // scanWhile processes bytes in d.data[d.off:] until it
  53. // receives a scan code not equal to op.
  54. func (d *decodeState) scanWhile(op int) (isFloat bool) {
  55. s, data, i := &d.scan, d.data, d.off
  56. for i < len(data) {
  57. if data[i] == '.' || data[i] == 'e' || data[i] == 'E' {
  58. isFloat = true
  59. }
  60. newOp := s.step(s, data[i])
  61. i++
  62. if newOp != op {
  63. d.opcode = newOp
  64. d.off = i
  65. return
  66. }
  67. }
  68. d.off = len(data) + 1 // mark processed EOF with len+1
  69. d.opcode = d.scan.eof()
  70. return
  71. }
  72. func (d *decodeState) value() (tengo.Object, error) {
  73. switch d.opcode {
  74. default:
  75. panic(phasePanicMsg)
  76. case scanBeginArray:
  77. o, err := d.array()
  78. if err != nil {
  79. return nil, err
  80. }
  81. d.scanNext()
  82. return o, nil
  83. case scanBeginObject:
  84. o, err := d.object()
  85. if err != nil {
  86. return nil, err
  87. }
  88. d.scanNext()
  89. return o, nil
  90. case scanBeginLiteral:
  91. return d.literal()
  92. }
  93. }
  94. func (d *decodeState) array() (tengo.Object, error) {
  95. var arr []tengo.Object
  96. for {
  97. // Look ahead for ] - can only happen on first iteration.
  98. d.scanWhile(scanSkipSpace)
  99. if d.opcode == scanEndArray {
  100. break
  101. }
  102. o, err := d.value()
  103. if err != nil {
  104. return nil, err
  105. }
  106. arr = append(arr, o)
  107. // Next token must be , or ].
  108. if d.opcode == scanSkipSpace {
  109. d.scanWhile(scanSkipSpace)
  110. }
  111. if d.opcode == scanEndArray {
  112. break
  113. }
  114. if d.opcode != scanArrayValue {
  115. panic(phasePanicMsg)
  116. }
  117. }
  118. return &tengo.Array{Value: arr}, nil
  119. }
  120. func (d *decodeState) object() (tengo.Object, error) {
  121. m := make(map[string]tengo.Object)
  122. for {
  123. // Read opening " of string key or closing }.
  124. d.scanWhile(scanSkipSpace)
  125. if d.opcode == scanEndObject {
  126. // closing } - can only happen on first iteration.
  127. break
  128. }
  129. if d.opcode != scanBeginLiteral {
  130. panic(phasePanicMsg)
  131. }
  132. // Read string key.
  133. start := d.readIndex()
  134. d.scanWhile(scanContinue)
  135. item := d.data[start:d.readIndex()]
  136. key, ok := unquote(item)
  137. if !ok {
  138. panic(phasePanicMsg)
  139. }
  140. // Read : before value.
  141. if d.opcode == scanSkipSpace {
  142. d.scanWhile(scanSkipSpace)
  143. }
  144. if d.opcode != scanObjectKey {
  145. panic(phasePanicMsg)
  146. }
  147. d.scanWhile(scanSkipSpace)
  148. // Read value.
  149. o, err := d.value()
  150. if err != nil {
  151. return nil, err
  152. }
  153. m[key] = o
  154. // Next token must be , or }.
  155. if d.opcode == scanSkipSpace {
  156. d.scanWhile(scanSkipSpace)
  157. }
  158. if d.opcode == scanEndObject {
  159. break
  160. }
  161. if d.opcode != scanObjectValue {
  162. panic(phasePanicMsg)
  163. }
  164. }
  165. return &tengo.Map{Value: m}, nil
  166. }
  167. func (d *decodeState) literal() (tengo.Object, error) {
  168. // All bytes inside literal return scanContinue op code.
  169. start := d.readIndex()
  170. isFloat := d.scanWhile(scanContinue)
  171. item := d.data[start:d.readIndex()]
  172. switch c := item[0]; c {
  173. case 'n': // null
  174. return tengo.UndefinedValue, nil
  175. case 't', 'f': // true, false
  176. if c == 't' {
  177. return tengo.TrueValue, nil
  178. }
  179. return tengo.FalseValue, nil
  180. case '"': // string
  181. s, ok := unquote(item)
  182. if !ok {
  183. panic(phasePanicMsg)
  184. }
  185. return &tengo.String{Value: s}, nil
  186. default: // number
  187. if c != '-' && (c < '0' || c > '9') {
  188. panic(phasePanicMsg)
  189. }
  190. if isFloat {
  191. n, _ := strconv.ParseFloat(string(item), 10)
  192. return &tengo.Float{Value: n}, nil
  193. }
  194. n, _ := strconv.ParseInt(string(item), 10, 64)
  195. return &tengo.Int{Value: n}, nil
  196. }
  197. }
  198. // getu4 decodes \uXXXX from the beginning of s, returning the hex value,
  199. // or it returns -1.
  200. func getu4(s []byte) rune {
  201. if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
  202. return -1
  203. }
  204. var r rune
  205. for _, c := range s[2:6] {
  206. switch {
  207. case '0' <= c && c <= '9':
  208. c = c - '0'
  209. case 'a' <= c && c <= 'f':
  210. c = c - 'a' + 10
  211. case 'A' <= c && c <= 'F':
  212. c = c - 'A' + 10
  213. default:
  214. return -1
  215. }
  216. r = r*16 + rune(c)
  217. }
  218. return r
  219. }
  220. // unquote converts a quoted JSON string literal s into an actual string t.
  221. // The rules are different than for Go, so cannot use strconv.Unquote.
  222. func unquote(s []byte) (t string, ok bool) {
  223. s, ok = unquoteBytes(s)
  224. t = string(s)
  225. return
  226. }
  227. func unquoteBytes(s []byte) (t []byte, ok bool) {
  228. if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
  229. return
  230. }
  231. s = s[1 : len(s)-1]
  232. // Check for unusual characters. If there are none, then no unquoting is
  233. // needed, so return a slice of the original bytes.
  234. r := 0
  235. for r < len(s) {
  236. c := s[r]
  237. if c == '\\' || c == '"' || c < ' ' {
  238. break
  239. }
  240. if c < utf8.RuneSelf {
  241. r++
  242. continue
  243. }
  244. rr, size := utf8.DecodeRune(s[r:])
  245. if rr == utf8.RuneError && size == 1 {
  246. break
  247. }
  248. r += size
  249. }
  250. if r == len(s) {
  251. return s, true
  252. }
  253. b := make([]byte, len(s)+2*utf8.UTFMax)
  254. w := copy(b, s[0:r])
  255. for r < len(s) {
  256. // Out of room? Can only happen if s is full of
  257. // malformed UTF-8 and we're replacing each
  258. // byte with RuneError.
  259. if w >= len(b)-2*utf8.UTFMax {
  260. nb := make([]byte, (len(b)+utf8.UTFMax)*2)
  261. copy(nb, b[0:w])
  262. b = nb
  263. }
  264. switch c := s[r]; {
  265. case c == '\\':
  266. r++
  267. if r >= len(s) {
  268. return
  269. }
  270. switch s[r] {
  271. default:
  272. return
  273. case '"', '\\', '/', '\'':
  274. b[w] = s[r]
  275. r++
  276. w++
  277. case 'b':
  278. b[w] = '\b'
  279. r++
  280. w++
  281. case 'f':
  282. b[w] = '\f'
  283. r++
  284. w++
  285. case 'n':
  286. b[w] = '\n'
  287. r++
  288. w++
  289. case 'r':
  290. b[w] = '\r'
  291. r++
  292. w++
  293. case 't':
  294. b[w] = '\t'
  295. r++
  296. w++
  297. case 'u':
  298. r--
  299. rr := getu4(s[r:])
  300. if rr < 0 {
  301. return
  302. }
  303. r += 6
  304. if utf16.IsSurrogate(rr) {
  305. rr1 := getu4(s[r:])
  306. dec := utf16.DecodeRune(rr, rr1)
  307. if dec != unicode.ReplacementChar {
  308. // A valid pair; consume.
  309. r += 6
  310. w += utf8.EncodeRune(b[w:], dec)
  311. break
  312. }
  313. // Invalid surrogate; fall back to replacement rune.
  314. rr = unicode.ReplacementChar
  315. }
  316. w += utf8.EncodeRune(b[w:], rr)
  317. }
  318. // Quote, control characters are invalid.
  319. case c == '"', c < ' ':
  320. return
  321. // ASCII
  322. case c < utf8.RuneSelf:
  323. b[w] = c
  324. r++
  325. w++
  326. // Coerce to well-formed UTF-8.
  327. default:
  328. rr, size := utf8.DecodeRune(s[r:])
  329. r += size
  330. w += utf8.EncodeRune(b[w:], rr)
  331. }
  332. }
  333. return b[0:w], true
  334. }