libgo: update to go1.9
[official-gcc.git] / libgo / go / encoding / json / scanner.go
blobae34418d1da633f445afcee71e39410da6e3999e
1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package json
7 // JSON value parser state machine.
8 // Just about at the limit of what is reasonable to write by hand.
9 // Some parts are a bit tedious, but overall it nicely factors out the
10 // otherwise common code from the multiple scanning functions
11 // in this package (Compact, Indent, checkValid, nextValue, etc).
13 // This file starts with two simple examples using the scanner
14 // before diving into the scanner itself.
16 import "strconv"
18 // Valid reports whether data is a valid JSON encoding.
19 func Valid(data []byte) bool {
20 return checkValid(data, &scanner{}) == nil
23 // checkValid verifies that data is valid JSON-encoded data.
24 // scan is passed in for use by checkValid to avoid an allocation.
25 func checkValid(data []byte, scan *scanner) error {
26 scan.reset()
27 for _, c := range data {
28 scan.bytes++
29 if scan.step(scan, c) == scanError {
30 return scan.err
33 if scan.eof() == scanError {
34 return scan.err
36 return nil
39 // nextValue splits data after the next whole JSON value,
40 // returning that value and the bytes that follow it as separate slices.
41 // scan is passed in for use by nextValue to avoid an allocation.
42 func nextValue(data []byte, scan *scanner) (value, rest []byte, err error) {
43 scan.reset()
44 for i, c := range data {
45 v := scan.step(scan, c)
46 if v >= scanEndObject {
47 switch v {
48 // probe the scanner with a space to determine whether we will
49 // get scanEnd on the next character. Otherwise, if the next character
50 // is not a space, scanEndTop allocates a needless error.
51 case scanEndObject, scanEndArray:
52 if scan.step(scan, ' ') == scanEnd {
53 return data[:i+1], data[i+1:], nil
55 case scanError:
56 return nil, nil, scan.err
57 case scanEnd:
58 return data[:i], data[i:], nil
62 if scan.eof() == scanError {
63 return nil, nil, scan.err
65 return data, nil, nil
68 // A SyntaxError is a description of a JSON syntax error.
69 type SyntaxError struct {
70 msg string // description of error
71 Offset int64 // error occurred after reading Offset bytes
74 func (e *SyntaxError) Error() string { return e.msg }
76 // A scanner is a JSON scanning state machine.
77 // Callers call scan.reset() and then pass bytes in one at a time
78 // by calling scan.step(&scan, c) for each byte.
79 // The return value, referred to as an opcode, tells the
80 // caller about significant parsing events like beginning
81 // and ending literals, objects, and arrays, so that the
82 // caller can follow along if it wishes.
83 // The return value scanEnd indicates that a single top-level
84 // JSON value has been completed, *before* the byte that
85 // just got passed in. (The indication must be delayed in order
86 // to recognize the end of numbers: is 123 a whole value or
87 // the beginning of 12345e+6?).
88 type scanner struct {
89 // The step is a func to be called to execute the next transition.
90 // Also tried using an integer constant and a single func
91 // with a switch, but using the func directly was 10% faster
92 // on a 64-bit Mac Mini, and it's nicer to read.
93 step func(*scanner, byte) int
95 // Reached end of top-level value.
96 endTop bool
98 // Stack of what we're in the middle of - array values, object keys, object values.
99 parseState []int
101 // Error that happened, if any.
102 err error
104 // 1-byte redo (see undo method)
105 redo bool
106 redoCode int
107 redoState func(*scanner, byte) int
109 // total bytes consumed, updated by decoder.Decode
110 bytes int64
113 // These values are returned by the state transition functions
114 // assigned to scanner.state and the method scanner.eof.
115 // They give details about the current state of the scan that
116 // callers might be interested to know about.
117 // It is okay to ignore the return value of any particular
118 // call to scanner.state: if one call returns scanError,
119 // every subsequent call will return scanError too.
120 const (
121 // Continue.
122 scanContinue = iota // uninteresting byte
123 scanBeginLiteral // end implied by next result != scanContinue
124 scanBeginObject // begin object
125 scanObjectKey // just finished object key (string)
126 scanObjectValue // just finished non-last object value
127 scanEndObject // end object (implies scanObjectValue if possible)
128 scanBeginArray // begin array
129 scanArrayValue // just finished array value
130 scanEndArray // end array (implies scanArrayValue if possible)
131 scanSkipSpace // space byte; can skip; known to be last "continue" result
133 // Stop.
134 scanEnd // top-level value ended *before* this byte; known to be first "stop" result
135 scanError // hit an error, scanner.err.
138 // These values are stored in the parseState stack.
139 // They give the current state of a composite value
140 // being scanned. If the parser is inside a nested value
141 // the parseState describes the nested state, outermost at entry 0.
142 const (
143 parseObjectKey = iota // parsing object key (before colon)
144 parseObjectValue // parsing object value (after colon)
145 parseArrayValue // parsing array value
148 // reset prepares the scanner for use.
149 // It must be called before calling s.step.
150 func (s *scanner) reset() {
151 s.step = stateBeginValue
152 s.parseState = s.parseState[0:0]
153 s.err = nil
154 s.redo = false
155 s.endTop = false
158 // eof tells the scanner that the end of input has been reached.
159 // It returns a scan status just as s.step does.
160 func (s *scanner) eof() int {
161 if s.err != nil {
162 return scanError
164 if s.endTop {
165 return scanEnd
167 s.step(s, ' ')
168 if s.endTop {
169 return scanEnd
171 if s.err == nil {
172 s.err = &SyntaxError{"unexpected end of JSON input", s.bytes}
174 return scanError
177 // pushParseState pushes a new parse state p onto the parse stack.
178 func (s *scanner) pushParseState(p int) {
179 s.parseState = append(s.parseState, p)
182 // popParseState pops a parse state (already obtained) off the stack
183 // and updates s.step accordingly.
184 func (s *scanner) popParseState() {
185 n := len(s.parseState) - 1
186 s.parseState = s.parseState[0:n]
187 s.redo = false
188 if n == 0 {
189 s.step = stateEndTop
190 s.endTop = true
191 } else {
192 s.step = stateEndValue
196 func isSpace(c byte) bool {
197 return c == ' ' || c == '\t' || c == '\r' || c == '\n'
200 // stateBeginValueOrEmpty is the state after reading `[`.
201 func stateBeginValueOrEmpty(s *scanner, c byte) int {
202 if c <= ' ' && isSpace(c) {
203 return scanSkipSpace
205 if c == ']' {
206 return stateEndValue(s, c)
208 return stateBeginValue(s, c)
211 // stateBeginValue is the state at the beginning of the input.
212 func stateBeginValue(s *scanner, c byte) int {
213 if c <= ' ' && isSpace(c) {
214 return scanSkipSpace
216 switch c {
217 case '{':
218 s.step = stateBeginStringOrEmpty
219 s.pushParseState(parseObjectKey)
220 return scanBeginObject
221 case '[':
222 s.step = stateBeginValueOrEmpty
223 s.pushParseState(parseArrayValue)
224 return scanBeginArray
225 case '"':
226 s.step = stateInString
227 return scanBeginLiteral
228 case '-':
229 s.step = stateNeg
230 return scanBeginLiteral
231 case '0': // beginning of 0.123
232 s.step = state0
233 return scanBeginLiteral
234 case 't': // beginning of true
235 s.step = stateT
236 return scanBeginLiteral
237 case 'f': // beginning of false
238 s.step = stateF
239 return scanBeginLiteral
240 case 'n': // beginning of null
241 s.step = stateN
242 return scanBeginLiteral
244 if '1' <= c && c <= '9' { // beginning of 1234.5
245 s.step = state1
246 return scanBeginLiteral
248 return s.error(c, "looking for beginning of value")
251 // stateBeginStringOrEmpty is the state after reading `{`.
252 func stateBeginStringOrEmpty(s *scanner, c byte) int {
253 if c <= ' ' && isSpace(c) {
254 return scanSkipSpace
256 if c == '}' {
257 n := len(s.parseState)
258 s.parseState[n-1] = parseObjectValue
259 return stateEndValue(s, c)
261 return stateBeginString(s, c)
264 // stateBeginString is the state after reading `{"key": value,`.
265 func stateBeginString(s *scanner, c byte) int {
266 if c <= ' ' && isSpace(c) {
267 return scanSkipSpace
269 if c == '"' {
270 s.step = stateInString
271 return scanBeginLiteral
273 return s.error(c, "looking for beginning of object key string")
276 // stateEndValue is the state after completing a value,
277 // such as after reading `{}` or `true` or `["x"`.
278 func stateEndValue(s *scanner, c byte) int {
279 n := len(s.parseState)
280 if n == 0 {
281 // Completed top-level before the current byte.
282 s.step = stateEndTop
283 s.endTop = true
284 return stateEndTop(s, c)
286 if c <= ' ' && isSpace(c) {
287 s.step = stateEndValue
288 return scanSkipSpace
290 ps := s.parseState[n-1]
291 switch ps {
292 case parseObjectKey:
293 if c == ':' {
294 s.parseState[n-1] = parseObjectValue
295 s.step = stateBeginValue
296 return scanObjectKey
298 return s.error(c, "after object key")
299 case parseObjectValue:
300 if c == ',' {
301 s.parseState[n-1] = parseObjectKey
302 s.step = stateBeginString
303 return scanObjectValue
305 if c == '}' {
306 s.popParseState()
307 return scanEndObject
309 return s.error(c, "after object key:value pair")
310 case parseArrayValue:
311 if c == ',' {
312 s.step = stateBeginValue
313 return scanArrayValue
315 if c == ']' {
316 s.popParseState()
317 return scanEndArray
319 return s.error(c, "after array element")
321 return s.error(c, "")
324 // stateEndTop is the state after finishing the top-level value,
325 // such as after reading `{}` or `[1,2,3]`.
326 // Only space characters should be seen now.
327 func stateEndTop(s *scanner, c byte) int {
328 if c != ' ' && c != '\t' && c != '\r' && c != '\n' {
329 // Complain about non-space byte on next call.
330 s.error(c, "after top-level value")
332 return scanEnd
335 // stateInString is the state after reading `"`.
336 func stateInString(s *scanner, c byte) int {
337 if c == '"' {
338 s.step = stateEndValue
339 return scanContinue
341 if c == '\\' {
342 s.step = stateInStringEsc
343 return scanContinue
345 if c < 0x20 {
346 return s.error(c, "in string literal")
348 return scanContinue
351 // stateInStringEsc is the state after reading `"\` during a quoted string.
352 func stateInStringEsc(s *scanner, c byte) int {
353 switch c {
354 case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
355 s.step = stateInString
356 return scanContinue
357 case 'u':
358 s.step = stateInStringEscU
359 return scanContinue
361 return s.error(c, "in string escape code")
364 // stateInStringEscU is the state after reading `"\u` during a quoted string.
365 func stateInStringEscU(s *scanner, c byte) int {
366 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
367 s.step = stateInStringEscU1
368 return scanContinue
370 // numbers
371 return s.error(c, "in \\u hexadecimal character escape")
374 // stateInStringEscU1 is the state after reading `"\u1` during a quoted string.
375 func stateInStringEscU1(s *scanner, c byte) int {
376 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
377 s.step = stateInStringEscU12
378 return scanContinue
380 // numbers
381 return s.error(c, "in \\u hexadecimal character escape")
384 // stateInStringEscU12 is the state after reading `"\u12` during a quoted string.
385 func stateInStringEscU12(s *scanner, c byte) int {
386 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
387 s.step = stateInStringEscU123
388 return scanContinue
390 // numbers
391 return s.error(c, "in \\u hexadecimal character escape")
394 // stateInStringEscU123 is the state after reading `"\u123` during a quoted string.
395 func stateInStringEscU123(s *scanner, c byte) int {
396 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
397 s.step = stateInString
398 return scanContinue
400 // numbers
401 return s.error(c, "in \\u hexadecimal character escape")
404 // stateNeg is the state after reading `-` during a number.
405 func stateNeg(s *scanner, c byte) int {
406 if c == '0' {
407 s.step = state0
408 return scanContinue
410 if '1' <= c && c <= '9' {
411 s.step = state1
412 return scanContinue
414 return s.error(c, "in numeric literal")
417 // state1 is the state after reading a non-zero integer during a number,
418 // such as after reading `1` or `100` but not `0`.
419 func state1(s *scanner, c byte) int {
420 if '0' <= c && c <= '9' {
421 s.step = state1
422 return scanContinue
424 return state0(s, c)
427 // state0 is the state after reading `0` during a number.
428 func state0(s *scanner, c byte) int {
429 if c == '.' {
430 s.step = stateDot
431 return scanContinue
433 if c == 'e' || c == 'E' {
434 s.step = stateE
435 return scanContinue
437 return stateEndValue(s, c)
440 // stateDot is the state after reading the integer and decimal point in a number,
441 // such as after reading `1.`.
442 func stateDot(s *scanner, c byte) int {
443 if '0' <= c && c <= '9' {
444 s.step = stateDot0
445 return scanContinue
447 return s.error(c, "after decimal point in numeric literal")
450 // stateDot0 is the state after reading the integer, decimal point, and subsequent
451 // digits of a number, such as after reading `3.14`.
452 func stateDot0(s *scanner, c byte) int {
453 if '0' <= c && c <= '9' {
454 return scanContinue
456 if c == 'e' || c == 'E' {
457 s.step = stateE
458 return scanContinue
460 return stateEndValue(s, c)
463 // stateE is the state after reading the mantissa and e in a number,
464 // such as after reading `314e` or `0.314e`.
465 func stateE(s *scanner, c byte) int {
466 if c == '+' || c == '-' {
467 s.step = stateESign
468 return scanContinue
470 return stateESign(s, c)
473 // stateESign is the state after reading the mantissa, e, and sign in a number,
474 // such as after reading `314e-` or `0.314e+`.
475 func stateESign(s *scanner, c byte) int {
476 if '0' <= c && c <= '9' {
477 s.step = stateE0
478 return scanContinue
480 return s.error(c, "in exponent of numeric literal")
483 // stateE0 is the state after reading the mantissa, e, optional sign,
484 // and at least one digit of the exponent in a number,
485 // such as after reading `314e-2` or `0.314e+1` or `3.14e0`.
486 func stateE0(s *scanner, c byte) int {
487 if '0' <= c && c <= '9' {
488 return scanContinue
490 return stateEndValue(s, c)
493 // stateT is the state after reading `t`.
494 func stateT(s *scanner, c byte) int {
495 if c == 'r' {
496 s.step = stateTr
497 return scanContinue
499 return s.error(c, "in literal true (expecting 'r')")
502 // stateTr is the state after reading `tr`.
503 func stateTr(s *scanner, c byte) int {
504 if c == 'u' {
505 s.step = stateTru
506 return scanContinue
508 return s.error(c, "in literal true (expecting 'u')")
511 // stateTru is the state after reading `tru`.
512 func stateTru(s *scanner, c byte) int {
513 if c == 'e' {
514 s.step = stateEndValue
515 return scanContinue
517 return s.error(c, "in literal true (expecting 'e')")
520 // stateF is the state after reading `f`.
521 func stateF(s *scanner, c byte) int {
522 if c == 'a' {
523 s.step = stateFa
524 return scanContinue
526 return s.error(c, "in literal false (expecting 'a')")
529 // stateFa is the state after reading `fa`.
530 func stateFa(s *scanner, c byte) int {
531 if c == 'l' {
532 s.step = stateFal
533 return scanContinue
535 return s.error(c, "in literal false (expecting 'l')")
538 // stateFal is the state after reading `fal`.
539 func stateFal(s *scanner, c byte) int {
540 if c == 's' {
541 s.step = stateFals
542 return scanContinue
544 return s.error(c, "in literal false (expecting 's')")
547 // stateFals is the state after reading `fals`.
548 func stateFals(s *scanner, c byte) int {
549 if c == 'e' {
550 s.step = stateEndValue
551 return scanContinue
553 return s.error(c, "in literal false (expecting 'e')")
556 // stateN is the state after reading `n`.
557 func stateN(s *scanner, c byte) int {
558 if c == 'u' {
559 s.step = stateNu
560 return scanContinue
562 return s.error(c, "in literal null (expecting 'u')")
565 // stateNu is the state after reading `nu`.
566 func stateNu(s *scanner, c byte) int {
567 if c == 'l' {
568 s.step = stateNul
569 return scanContinue
571 return s.error(c, "in literal null (expecting 'l')")
574 // stateNul is the state after reading `nul`.
575 func stateNul(s *scanner, c byte) int {
576 if c == 'l' {
577 s.step = stateEndValue
578 return scanContinue
580 return s.error(c, "in literal null (expecting 'l')")
583 // stateError is the state after reaching a syntax error,
584 // such as after reading `[1}` or `5.1.2`.
585 func stateError(s *scanner, c byte) int {
586 return scanError
589 // error records an error and switches to the error state.
590 func (s *scanner) error(c byte, context string) int {
591 s.step = stateError
592 s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes}
593 return scanError
596 // quoteChar formats c as a quoted character literal
597 func quoteChar(c byte) string {
598 // special cases - different from quoted strings
599 if c == '\'' {
600 return `'\''`
602 if c == '"' {
603 return `'"'`
606 // use quoted string with different quotation marks
607 s := strconv.Quote(string(c))
608 return "'" + s[1:len(s)-1] + "'"
611 // undo causes the scanner to return scanCode from the next state transition.
612 // This gives callers a simple 1-byte undo mechanism.
613 func (s *scanner) undo(scanCode int) {
614 if s.redo {
615 panic("json: invalid use of scanner")
617 s.redoCode = scanCode
618 s.redoState = s.step
619 s.step = stateRedo
620 s.redo = true
623 // stateRedo helps implement the scanner's 1-byte undo.
624 func stateRedo(s *scanner, c byte) int {
625 s.redo = false
626 s.step = s.redoState
627 return s.redoCode