scanner.go 15 KB


  1. // A modified version of Go's JSON implementation.
  2. // Copyright 2010 The Go Authors. All rights reserved.
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file.
  5. package json
  6. import "strconv"
  7. func checkValid(data []byte, scan *scanner) error {
  8. scan.reset()
  9. for _, c := range data {
  10. scan.bytes++
  11. if scan.step(scan, c) == scanError {
  12. return scan.err
  13. }
  14. }
  15. if scan.eof() == scanError {
  16. return scan.err
  17. }
  18. return nil
  19. }
  20. // A SyntaxError is a description of a JSON syntax error.
  21. type SyntaxError struct {
  22. msg string // description of error
  23. Offset int64 // error occurred after reading Offset bytes
  24. }
  25. func (e *SyntaxError) Error() string { return e.msg }
  26. // A scanner is a JSON scanning state machine.
  27. // Callers call scan.reset() and then pass bytes in one at a time
  28. // by calling scan.step(&scan, c) for each byte.
  29. // The return value, referred to as an opcode, tells the
  30. // caller about significant parsing events like beginning
  31. // and ending literals, objects, and arrays, so that the
  32. // caller can follow along if it wishes.
  33. // The return value scanEnd indicates that a single top-level
  34. // JSON value has been completed, *before* the byte that
  35. // just got passed in. (The indication must be delayed in order
  36. // to recognize the end of numbers: is 123 a whole value or
  37. // the beginning of 12345e+6?).
  38. type scanner struct {
  39. // The step is a func to be called to execute the next transition.
  40. // Also tried using an integer constant and a single func
  41. // with a switch, but using the func directly was 10% faster
  42. // on a 64-bit Mac Mini, and it's nicer to read.
  43. step func(*scanner, byte) int
  44. // Reached end of top-level value.
  45. endTop bool
  46. // Stack of what we're in the middle of - array values, object keys, object values.
  47. parseState []int
  48. // Error that happened, if any.
  49. err error
  50. // total bytes consumed, updated by decoder.Decode
  51. bytes int64
  52. }
  53. // These values are returned by the state transition functions
  54. // assigned to scanner.state and the method scanner.eof.
  55. // They give details about the current state of the scan that
  56. // callers might be interested to know about.
  57. // It is okay to ignore the return value of any particular
  58. // call to scanner.state: if one call returns scanError,
  59. // every subsequent call will return scanError too.
  60. const (
  61. // Continue.
  62. scanContinue = iota // uninteresting byte
  63. scanBeginLiteral // end implied by next result != scanContinue
  64. scanBeginObject // begin object
  65. scanObjectKey // just finished object key (string)
  66. scanObjectValue // just finished non-last object value
  67. scanEndObject // end object (implies scanObjectValue if possible)
  68. scanBeginArray // begin array
  69. scanArrayValue // just finished array value
  70. scanEndArray // end array (implies scanArrayValue if possible)
  71. scanSkipSpace // space byte; can skip; known to be last "continue" result
  72. // Stop.
  73. scanEnd // top-level value ended *before* this byte; known to be first "stop" result
  74. scanError // hit an error, scanner.err.
  75. )
  76. // These values are stored in the parseState stack.
  77. // They give the current state of a composite value
  78. // being scanned. If the parser is inside a nested value
  79. // the parseState describes the nested state, outermost at entry 0.
  80. const (
  81. parseObjectKey = iota // parsing object key (before colon)
  82. parseObjectValue // parsing object value (after colon)
  83. parseArrayValue // parsing array value
  84. )
  85. // reset prepares the scanner for use.
  86. // It must be called before calling s.step.
  87. func (s *scanner) reset() {
  88. s.step = stateBeginValue
  89. s.parseState = s.parseState[0:0]
  90. s.err = nil
  91. s.endTop = false
  92. }
  93. // eof tells the scanner that the end of input has been reached.
  94. // It returns a scan status just as s.step does.
  95. func (s *scanner) eof() int {
  96. if s.err != nil {
  97. return scanError
  98. }
  99. if s.endTop {
  100. return scanEnd
  101. }
  102. s.step(s, ' ')
  103. if s.endTop {
  104. return scanEnd
  105. }
  106. if s.err == nil {
  107. s.err = &SyntaxError{"unexpected end of JSON input", s.bytes}
  108. }
  109. return scanError
  110. }
  111. // pushParseState pushes a new parse state p onto the parse stack.
  112. func (s *scanner) pushParseState(p int) {
  113. s.parseState = append(s.parseState, p)
  114. }
  115. // popParseState pops a parse state (already obtained) off the stack
  116. // and updates s.step accordingly.
  117. func (s *scanner) popParseState() {
  118. n := len(s.parseState) - 1
  119. s.parseState = s.parseState[0:n]
  120. if n == 0 {
  121. s.step = stateEndTop
  122. s.endTop = true
  123. } else {
  124. s.step = stateEndValue
  125. }
  126. }
  127. func isSpace(c byte) bool {
  128. return c == ' ' || c == '\t' || c == '\r' || c == '\n'
  129. }
  130. // stateBeginValueOrEmpty is the state after reading `[`.
  131. func stateBeginValueOrEmpty(s *scanner, c byte) int {
  132. if c <= ' ' && isSpace(c) {
  133. return scanSkipSpace
  134. }
  135. if c == ']' {
  136. return stateEndValue(s, c)
  137. }
  138. return stateBeginValue(s, c)
  139. }
  140. // stateBeginValue is the state at the beginning of the input.
  141. func stateBeginValue(s *scanner, c byte) int {
  142. if c <= ' ' && isSpace(c) {
  143. return scanSkipSpace
  144. }
  145. switch c {
  146. case '{':
  147. s.step = stateBeginStringOrEmpty
  148. s.pushParseState(parseObjectKey)
  149. return scanBeginObject
  150. case '[':
  151. s.step = stateBeginValueOrEmpty
  152. s.pushParseState(parseArrayValue)
  153. return scanBeginArray
  154. case '"':
  155. s.step = stateInString
  156. return scanBeginLiteral
  157. case '-':
  158. s.step = stateNeg
  159. return scanBeginLiteral
  160. case '0': // beginning of 0.123
  161. s.step = state0
  162. return scanBeginLiteral
  163. case 't': // beginning of true
  164. s.step = stateT
  165. return scanBeginLiteral
  166. case 'f': // beginning of false
  167. s.step = stateF
  168. return scanBeginLiteral
  169. case 'n': // beginning of null
  170. s.step = stateN
  171. return scanBeginLiteral
  172. }
  173. if '1' <= c && c <= '9' { // beginning of 1234.5
  174. s.step = state1
  175. return scanBeginLiteral
  176. }
  177. return s.error(c, "looking for beginning of value")
  178. }
  179. // stateBeginStringOrEmpty is the state after reading `{`.
  180. func stateBeginStringOrEmpty(s *scanner, c byte) int {
  181. if c <= ' ' && isSpace(c) {
  182. return scanSkipSpace
  183. }
  184. if c == '}' {
  185. n := len(s.parseState)
  186. s.parseState[n-1] = parseObjectValue
  187. return stateEndValue(s, c)
  188. }
  189. return stateBeginString(s, c)
  190. }
  191. // stateBeginString is the state after reading `{"key": value,`.
  192. func stateBeginString(s *scanner, c byte) int {
  193. if c <= ' ' && isSpace(c) {
  194. return scanSkipSpace
  195. }
  196. if c == '"' {
  197. s.step = stateInString
  198. return scanBeginLiteral
  199. }
  200. return s.error(c, "looking for beginning of object key string")
  201. }
  202. // stateEndValue is the state after completing a value,
  203. // such as after reading `{}` or `true` or `["x"`.
  204. func stateEndValue(s *scanner, c byte) int {
  205. n := len(s.parseState)
  206. if n == 0 {
  207. // Completed top-level before the current byte.
  208. s.step = stateEndTop
  209. s.endTop = true
  210. return stateEndTop(s, c)
  211. }
  212. if c <= ' ' && isSpace(c) {
  213. s.step = stateEndValue
  214. return scanSkipSpace
  215. }
  216. ps := s.parseState[n-1]
  217. switch ps {
  218. case parseObjectKey:
  219. if c == ':' {
  220. s.parseState[n-1] = parseObjectValue
  221. s.step = stateBeginValue
  222. return scanObjectKey
  223. }
  224. return s.error(c, "after object key")
  225. case parseObjectValue:
  226. if c == ',' {
  227. s.parseState[n-1] = parseObjectKey
  228. s.step = stateBeginString
  229. return scanObjectValue
  230. }
  231. if c == '}' {
  232. s.popParseState()
  233. return scanEndObject
  234. }
  235. return s.error(c, "after object key:value pair")
  236. case parseArrayValue:
  237. if c == ',' {
  238. s.step = stateBeginValue
  239. return scanArrayValue
  240. }
  241. if c == ']' {
  242. s.popParseState()
  243. return scanEndArray
  244. }
  245. return s.error(c, "after array element")
  246. }
  247. return s.error(c, "")
  248. }
  249. // stateEndTop is the state after finishing the top-level value,
  250. // such as after reading `{}` or `[1,2,3]`.
  251. // Only space characters should be seen now.
  252. func stateEndTop(s *scanner, c byte) int {
  253. if !isSpace(c) {
  254. // Complain about non-space byte on next call.
  255. s.error(c, "after top-level value")
  256. }
  257. return scanEnd
  258. }
  259. // stateInString is the state after reading `"`.
  260. func stateInString(s *scanner, c byte) int {
  261. if c == '"' {
  262. s.step = stateEndValue
  263. return scanContinue
  264. }
  265. if c == '\\' {
  266. s.step = stateInStringEsc
  267. return scanContinue
  268. }
  269. if c < 0x20 {
  270. return s.error(c, "in string literal")
  271. }
  272. return scanContinue
  273. }
  274. // stateInStringEsc is the state after reading `"\` during a quoted string.
  275. func stateInStringEsc(s *scanner, c byte) int {
  276. switch c {
  277. case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
  278. s.step = stateInString
  279. return scanContinue
  280. case 'u':
  281. s.step = stateInStringEscU
  282. return scanContinue
  283. }
  284. return s.error(c, "in string escape code")
  285. }
  286. // stateInStringEscU is the state after reading `"\u` during a quoted string.
  287. func stateInStringEscU(s *scanner, c byte) int {
  288. if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
  289. s.step = stateInStringEscU1
  290. return scanContinue
  291. }
  292. // numbers
  293. return s.error(c, "in \\u hexadecimal character escape")
  294. }
  295. // stateInStringEscU1 is the state after reading `"\u1` during a quoted string.
  296. func stateInStringEscU1(s *scanner, c byte) int {
  297. if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
  298. s.step = stateInStringEscU12
  299. return scanContinue
  300. }
  301. // numbers
  302. return s.error(c, "in \\u hexadecimal character escape")
  303. }
  304. // stateInStringEscU12 is the state after reading `"\u12` during a quoted string.
  305. func stateInStringEscU12(s *scanner, c byte) int {
  306. if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
  307. s.step = stateInStringEscU123
  308. return scanContinue
  309. }
  310. // numbers
  311. return s.error(c, "in \\u hexadecimal character escape")
  312. }
  313. // stateInStringEscU123 is the state after reading `"\u123` during a quoted string.
  314. func stateInStringEscU123(s *scanner, c byte) int {
  315. if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
  316. s.step = stateInString
  317. return scanContinue
  318. }
  319. // numbers
  320. return s.error(c, "in \\u hexadecimal character escape")
  321. }
  322. // stateNeg is the state after reading `-` during a number.
  323. func stateNeg(s *scanner, c byte) int {
  324. if c == '0' {
  325. s.step = state0
  326. return scanContinue
  327. }
  328. if '1' <= c && c <= '9' {
  329. s.step = state1
  330. return scanContinue
  331. }
  332. return s.error(c, "in numeric literal")
  333. }
  334. // state1 is the state after reading a non-zero integer during a number,
  335. // such as after reading `1` or `100` but not `0`.
  336. func state1(s *scanner, c byte) int {
  337. if '0' <= c && c <= '9' {
  338. s.step = state1
  339. return scanContinue
  340. }
  341. return state0(s, c)
  342. }
  343. // state0 is the state after reading `0` during a number.
  344. func state0(s *scanner, c byte) int {
  345. if c == '.' {
  346. s.step = stateDot
  347. return scanContinue
  348. }
  349. if c == 'e' || c == 'E' {
  350. s.step = stateE
  351. return scanContinue
  352. }
  353. return stateEndValue(s, c)
  354. }
  355. // stateDot is the state after reading the integer and decimal point in a number,
  356. // such as after reading `1.`.
  357. func stateDot(s *scanner, c byte) int {
  358. if '0' <= c && c <= '9' {
  359. s.step = stateDot0
  360. return scanContinue
  361. }
  362. return s.error(c, "after decimal point in numeric literal")
  363. }
  364. // stateDot0 is the state after reading the integer, decimal point, and subsequent
  365. // digits of a number, such as after reading `3.14`.
  366. func stateDot0(s *scanner, c byte) int {
  367. if '0' <= c && c <= '9' {
  368. return scanContinue
  369. }
  370. if c == 'e' || c == 'E' {
  371. s.step = stateE
  372. return scanContinue
  373. }
  374. return stateEndValue(s, c)
  375. }
  376. // stateE is the state after reading the mantissa and e in a number,
  377. // such as after reading `314e` or `0.314e`.
  378. func stateE(s *scanner, c byte) int {
  379. if c == '+' || c == '-' {
  380. s.step = stateESign
  381. return scanContinue
  382. }
  383. return stateESign(s, c)
  384. }
  385. // stateESign is the state after reading the mantissa, e, and sign in a number,
  386. // such as after reading `314e-` or `0.314e+`.
  387. func stateESign(s *scanner, c byte) int {
  388. if '0' <= c && c <= '9' {
  389. s.step = stateE0
  390. return scanContinue
  391. }
  392. return s.error(c, "in exponent of numeric literal")
  393. }
  394. // stateE0 is the state after reading the mantissa, e, optional sign,
  395. // and at least one digit of the exponent in a number,
  396. // such as after reading `314e-2` or `0.314e+1` or `3.14e0`.
  397. func stateE0(s *scanner, c byte) int {
  398. if '0' <= c && c <= '9' {
  399. return scanContinue
  400. }
  401. return stateEndValue(s, c)
  402. }
  403. // stateT is the state after reading `t`.
  404. func stateT(s *scanner, c byte) int {
  405. if c == 'r' {
  406. s.step = stateTr
  407. return scanContinue
  408. }
  409. return s.error(c, "in literal true (expecting 'r')")
  410. }
  411. // stateTr is the state after reading `tr`.
  412. func stateTr(s *scanner, c byte) int {
  413. if c == 'u' {
  414. s.step = stateTru
  415. return scanContinue
  416. }
  417. return s.error(c, "in literal true (expecting 'u')")
  418. }
  419. // stateTru is the state after reading `tru`.
  420. func stateTru(s *scanner, c byte) int {
  421. if c == 'e' {
  422. s.step = stateEndValue
  423. return scanContinue
  424. }
  425. return s.error(c, "in literal true (expecting 'e')")
  426. }
  427. // stateF is the state after reading `f`.
  428. func stateF(s *scanner, c byte) int {
  429. if c == 'a' {
  430. s.step = stateFa
  431. return scanContinue
  432. }
  433. return s.error(c, "in literal false (expecting 'a')")
  434. }
  435. // stateFa is the state after reading `fa`.
  436. func stateFa(s *scanner, c byte) int {
  437. if c == 'l' {
  438. s.step = stateFal
  439. return scanContinue
  440. }
  441. return s.error(c, "in literal false (expecting 'l')")
  442. }
  443. // stateFal is the state after reading `fal`.
  444. func stateFal(s *scanner, c byte) int {
  445. if c == 's' {
  446. s.step = stateFals
  447. return scanContinue
  448. }
  449. return s.error(c, "in literal false (expecting 's')")
  450. }
  451. // stateFals is the state after reading `fals`.
  452. func stateFals(s *scanner, c byte) int {
  453. if c == 'e' {
  454. s.step = stateEndValue
  455. return scanContinue
  456. }
  457. return s.error(c, "in literal false (expecting 'e')")
  458. }
  459. // stateN is the state after reading `n`.
  460. func stateN(s *scanner, c byte) int {
  461. if c == 'u' {
  462. s.step = stateNu
  463. return scanContinue
  464. }
  465. return s.error(c, "in literal null (expecting 'u')")
  466. }
  467. // stateNu is the state after reading `nu`.
  468. func stateNu(s *scanner, c byte) int {
  469. if c == 'l' {
  470. s.step = stateNul
  471. return scanContinue
  472. }
  473. return s.error(c, "in literal null (expecting 'l')")
  474. }
  475. // stateNul is the state after reading `nul`.
  476. func stateNul(s *scanner, c byte) int {
  477. if c == 'l' {
  478. s.step = stateEndValue
  479. return scanContinue
  480. }
  481. return s.error(c, "in literal null (expecting 'l')")
  482. }
  483. // stateError is the state after reaching a syntax error,
  484. // such as after reading `[1}` or `5.1.2`.
  485. func stateError(_ *scanner, _ byte) int {
  486. return scanError
  487. }
  488. // error records an error and switches to the error state.
  489. func (s *scanner) error(c byte, context string) int {
  490. s.step = stateError
  491. s.err = &SyntaxError{
  492. msg: "invalid character " + quoteChar(c) + " " + context,
  493. Offset: s.bytes,
  494. }
  495. return scanError
  496. }
  497. // quoteChar formats c as a quoted character literal
  498. func quoteChar(c byte) string {
  499. // special cases - different from quoted strings
  500. if c == '\'' {
  501. return `'\''`
  502. }
  503. if c == '"' {
  504. return `'"'`
  505. }
  506. // use quoted string with different quotation marks
  507. s := strconv.Quote(string(c))
  508. return "'" + s[1:len(s)-1] + "'"
  509. }