1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
18 // readRuner is the interface to something that can read runes. If
19 // the object provided to Scan does not satisfy this interface, the
20 // object will be wrapped by a readRune object.
21 type readRuner
interface {
22 ReadRune() (rune
int, size
int, err os
.Error
)
25 // unreadRuner is the interface to something that can unread runes.
26 // If the object provided to Scan does not satisfy this interface,
27 // a local buffer will be used to back up the input, but its contents
28 // will be lost when Scan returns.
29 type unreadRuner
interface {
33 // ScanState represents the scanner state passed to custom scanners.
34 // Scanners may do rune-at-a-time scanning or ask the ScanState
35 // to discover the next space-delimited token.
36 type ScanState
interface {
37 // GetRune reads the next rune (Unicode code point) from the input.
38 GetRune() (rune
int, err os
.Error
)
39 // UngetRune causes the next call to GetRune to return the rune.
41 // Width returns the value of the width option and whether it has been set.
42 // The unit is Unicode code points.
43 Width() (wid
int, ok
bool)
44 // Token returns the next space-delimited token from the input. If
45 // a width has been specified, the returned token will be no longer
47 Token() (token
string, err os
.Error
)
50 // Scanner is implemented by any value that has a Scan method, which scans
51 // the input for the representation of a value and stores the result in the
52 // receiver, which must be a pointer to be useful. The Scan method is called
53 // for any argument to Scan or Scanln that implements it.
54 type Scanner
interface {
55 Scan(state ScanState
, verb
int) os
.Error
58 // Scan scans text read from standard input, storing successive
59 // space-separated values into successive arguments. Newlines count
60 // as space. It returns the number of items successfully scanned.
61 // If that is less than the number of arguments, err will report why.
62 func Scan(a
...interface{}) (n
int, err os
.Error
) {
63 return Fscan(os
.Stdin
, a
...)
66 // Scanln is similar to Scan, but stops scanning at a newline and
67 // after the final item there must be a newline or EOF.
68 func Scanln(a
...interface{}) (n
int, err os
.Error
) {
69 return Fscanln(os
.Stdin
, a
...)
72 // Scanf scans text read from standard input, storing successive
73 // space-separated values into successive arguments as determined by
74 // the format. It returns the number of items successfully scanned.
75 func Scanf(format
string, a
...interface{}) (n
int, err os
.Error
) {
76 return Fscanf(os
.Stdin
, format
, a
...)
79 // Sscan scans the argument string, storing successive space-separated
80 // values into successive arguments. Newlines count as space. It
81 // returns the number of items successfully scanned. If that is less
82 // than the number of arguments, err will report why.
83 func Sscan(str
string, a
...interface{}) (n
int, err os
.Error
) {
84 return Fscan(strings
.NewReader(str
), a
...)
87 // Sscanln is similar to Sscan, but stops scanning at a newline and
88 // after the final item there must be a newline or EOF.
89 func Sscanln(str
string, a
...interface{}) (n
int, err os
.Error
) {
90 return Fscanln(strings
.NewReader(str
), a
...)
93 // Sscanf scans the argument string, storing successive space-separated
94 // values into successive arguments as determined by the format. It
95 // returns the number of items successfully parsed.
96 func Sscanf(str
string, format
string, a
...interface{}) (n
int, err os
.Error
) {
97 return Fscanf(strings
.NewReader(str
), format
, a
...)
100 // Fscan scans text read from r, storing successive space-separated
101 // values into successive arguments. Newlines count as space. It
102 // returns the number of items successfully scanned. If that is less
103 // than the number of arguments, err will report why.
104 func Fscan(r io
.Reader
, a
...interface{}) (n
int, err os
.Error
) {
105 s
:= newScanState(r
, true)
111 // Fscanln is similar to Fscan, but stops scanning at a newline and
112 // after the final item there must be a newline or EOF.
113 func Fscanln(r io
.Reader
, a
...interface{}) (n
int, err os
.Error
) {
114 s
:= newScanState(r
, false)
120 // Fscanf scans text read from r, storing successive space-separated
121 // values into successive arguments as determined by the format. It
122 // returns the number of items successfully parsed.
123 func Fscanf(r io
.Reader
, format
string, a
...interface{}) (n
int, err os
.Error
) {
124 s
:= newScanState(r
, false)
125 n
, err
= s
.doScanf(format
, a
)
130 // scanError represents an error generated by the scanning software.
131 // It's used as a unique signature to identify such errors when recovering.
132 type scanError
struct {
138 // ss is the internal implementation of ScanState.
140 rr readRuner
// where to read input
141 buf bytes
.Buffer
// token accumulator
142 nlIsSpace
bool // whether newline counts as white space
143 peekRune
int // one-rune lookahead
144 prevRune
int // last rune returned by GetRune
145 atEOF
bool // already read EOF
146 maxWid
int // max width of field, in runes
147 widPresent
bool // width was specified
148 wid
int // width consumed so far; used in accept()
151 func (s
*ss
) GetRune() (rune
int, err os
.Error
) {
158 rune
, _
, err
= s
.rr
.ReadRune()
165 func (s
*ss
) Width() (wid
int, ok
bool) {
166 return s
.maxWid
, s
.widPresent
169 // The public method returns an error; this private one panics.
170 // If getRune reaches EOF, the return value is EOF (-1).
171 func (s
*ss
) getRune() (rune
int) {
181 rune
, _
, err
:= s
.rr
.ReadRune()
184 } else if err
!= nil {
194 // mustGetRune turns os.EOF into a panic(io.ErrUnexpectedEOF).
195 // It is called in cases such as string scanning where an EOF is a
197 func (s
*ss
) mustGetRune() (rune
int) {
199 s
.error(io
.ErrUnexpectedEOF
)
206 rune
, _
, err
:= s
.rr
.ReadRune()
209 err
= io
.ErrUnexpectedEOF
217 func (s
*ss
) UngetRune() {
218 if u
, ok
:= s
.rr
.(unreadRuner
); ok
{
221 s
.peekRune
= s
.prevRune
225 func (s
*ss
) error(err os
.Error
) {
226 panic(scanError
{err
})
229 func (s
*ss
) errorString(err
string) {
230 panic(scanError
{os
.ErrorString(err
)})
233 func (s
*ss
) Token() (tok
string, err os
.Error
) {
235 if e
:= recover(); e
!= nil {
236 if se
, ok
:= e
.(scanError
); ok
{
247 // readRune is a structure to enable reading UTF-8 encoded code points
248 // from an io.Reader. It is used if the Reader given to the scanner does
249 // not already implement ReadRuner.
250 type readRune
struct {
252 buf
[utf8
.UTFMax
]byte // used only inside ReadRune
253 pending
int // number of bytes in pendBuf; only >0 for bad UTF-8
254 pendBuf
[utf8
.UTFMax
]byte // bytes left over
257 // readByte returns the next byte from the input, which may be
258 // left over from a previous read if the UTF-8 was ill-formed.
259 func (r
*readRune
) readByte() (b
byte, err os
.Error
) {
262 copy(r
.pendBuf
[0:], r
.pendBuf
[1:])
266 _
, err
= r
.reader
.Read(r
.pendBuf
[0:1])
267 return r
.pendBuf
[0], err
270 // unread saves the bytes for the next read.
271 func (r
*readRune
) unread(buf
[]byte) {
272 copy(r
.pendBuf
[r
.pending
:], buf
)
273 r
.pending
+= len(buf
)
276 // ReadRune returns the next UTF-8 encoded code point from the
277 // io.Reader inside r.
278 func (r
*readRune
) ReadRune() (rune
int, size
int, err os
.Error
) {
279 r
.buf
[0], err
= r
.readByte()
283 if r
.buf
[0] < utf8
.RuneSelf
{ // fast check for common ASCII case
288 for n
= 1; !utf8
.FullRune(r
.buf
[0:n
]); n
++ {
289 r
.buf
[n
], err
= r
.readByte()
298 rune
, size
= utf8
.DecodeRune(r
.buf
[0:n
])
299 if size
< n
{ // an error
300 r
.unread(r
.buf
[size
:n
])
306 // A leaky bucket of reusable ss structures.
307 var ssFree
= make(chan *ss
, 100)
309 // Allocate a new ss struct. Probably can grab the previous one from ssFree.
310 func newScanState(r io
.Reader
, nlIsSpace
bool) *ss
{
315 if rr
, ok
:= r
.(readRuner
); ok
{
318 s
.rr
= &readRune
{reader
: r
}
320 s
.nlIsSpace
= nlIsSpace
328 // Save used ss structs in ssFree; avoid an allocation per invocation.
329 func (s
*ss
) free() {
330 // Don't hold on to ss structs with large buffers.
331 if cap(s
.buf
.Bytes()) > 1024 {
339 // skipSpace skips spaces and maybe newlines.
340 func (s
*ss
) skipSpace(stopAtNewline
bool) {
353 s
.errorString("unexpected newline")
356 if !unicode
.IsSpace(rune
) {
363 // token returns the next space-delimited string from the input. It
364 // skips white space. For Scanln, it stops at newlines. For Scan,
365 // newlines are treated as spaces.
366 func (s
*ss
) token() string {
368 // read until white space or newline
369 for nrunes
:= 0; !s
.widPresent || nrunes
< s
.maxWid
; nrunes
++ {
374 if unicode
.IsSpace(rune
) {
378 s
.buf
.WriteRune(rune
)
380 return s
.buf
.String()
383 // typeError indicates that the type of the operand did not match the format
384 func (s
*ss
) typeError(field
interface{}, expected
string) {
385 s
.errorString("expected field of type pointer to " + expected
+ "; found " + reflect
.Typeof(field
).String())
388 var complexError
= os
.ErrorString("syntax error scanning complex number")
389 var boolError
= os
.ErrorString("syntax error scanning boolean")
391 // accepts checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the
392 // buffer and returns true. Otherwise it return false.
393 func (s
*ss
) accept(ok
string) bool {
394 if s
.wid
>= s
.maxWid
{
401 for i
:= 0; i
< len(ok
); i
++ {
402 if int(ok
[i
]) == rune
{
403 s
.buf
.WriteRune(rune
)
414 // okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
415 func (s
*ss
) okVerb(verb
int, okVerbs
, typ
string) bool {
416 for _
, v
:= range okVerbs
{
421 s
.errorString("bad verb %" + string(verb
) + " for " + typ
)
425 // scanBool returns the value of the boolean represented by the next token.
426 func (s
*ss
) scanBool(verb
int) bool {
427 if !s
.okVerb(verb
, "tv", "boolean") {
430 // Syntax-checking a boolean is annoying. We're not fastidious about case.
431 switch s
.mustGetRune() {
437 if s
.accept("rR") && (!s
.accept("uU") ||
!s
.accept("eE")) {
442 if s
.accept("aL") && (!s
.accept("lL") ||
!s
.accept("sS") ||
!s
.accept("eE")) {
450 // Numerical elements
453 octalDigits
= "01234567"
454 decimalDigits
= "0123456789"
455 hexadecimalDigits
= "0123456789aAbBcCdDeEfF"
461 // getBase returns the numeric base represented by the verb and its digit string.
462 func (s
*ss
) getBase(verb
int) (base
int, digits
string) {
463 s
.okVerb(verb
, "bdoxXv", "integer") // sets s.err
465 digits
= decimalDigits
469 digits
= binaryDigits
475 digits
= hexadecimalDigits
480 // scanNumber returns the numerical string with specified digits starting here.
481 func (s
*ss
) scanNumber(digits
string) string {
482 if !s
.accept(digits
) {
483 s
.errorString("expected integer")
485 for s
.accept(digits
) {
487 return s
.buf
.String()
490 // scanRune returns the next rune value in the input.
491 func (s
*ss
) scanRune(bitSize
int) int64 {
492 rune
:= int64(s
.mustGetRune())
494 x
:= (rune
<< (64 - n
)) >> (64 - n
)
496 s
.errorString("overflow on character value " + string(rune
))
501 // scanInt returns the value of the integer represented by the next
502 // token, checking for overflow. Any error is stored in s.err.
503 func (s
*ss
) scanInt(verb
int, bitSize
int) int64 {
505 return s
.scanRune(bitSize
)
507 base
, digits
:= s
.getBase(verb
)
509 s
.accept(sign
) // If there's a sign, it will be left in the token buffer.
510 tok
:= s
.scanNumber(digits
)
511 i
, err
:= strconv
.Btoi64(tok
, base
)
516 x
:= (i
<< (64 - n
)) >> (64 - n
)
518 s
.errorString("integer overflow on token " + tok
)
523 // scanUint returns the value of the unsigned integer represented
524 // by the next token, checking for overflow. Any error is stored in s.err.
525 func (s
*ss
) scanUint(verb
int, bitSize
int) uint64 {
527 return uint64(s
.scanRune(bitSize
))
529 base
, digits
:= s
.getBase(verb
)
531 tok
:= s
.scanNumber(digits
)
532 i
, err
:= strconv
.Btoui64(tok
, base
)
537 x
:= (i
<< (64 - n
)) >> (64 - n
)
539 s
.errorString("unsigned integer overflow on token " + tok
)
544 // floatToken returns the floating-point number starting here, no longer than swid
545 // if the width is specified. It's not rigorous about syntax because it doesn't check that
546 // we have at least some digits, but Atof will do that.
547 func (s
*ss
) floatToken() string {
552 for s
.accept(decimalDigits
) {
555 if s
.accept(period
) {
557 for s
.accept(decimalDigits
) {
561 if s
.accept(exponent
) {
565 for s
.accept(decimalDigits
) {
568 return s
.buf
.String()
571 // complexTokens returns the real and imaginary parts of the complex number starting here.
572 // The number might be parenthesized and has the format (N+Ni) where N is a floating-point
573 // number and there are no spaces within.
574 func (s
*ss
) complexTokens() (real
, imag
string) {
575 // TODO: accept N and Ni independently?
576 parens
:= s
.accept("(")
577 real
= s
.floatToken()
579 // Must now have a sign.
581 s
.error(complexError
)
583 // Sign is now in buffer
584 imagSign
:= s
.buf
.String()
585 imag
= s
.floatToken()
587 s
.error(complexError
)
589 if parens
&& !s
.accept(")") {
590 s
.error(complexError
)
592 return real
, imagSign
+ imag
595 // convertFloat converts the string to a float64value.
596 func (s
*ss
) convertFloat(str
string, n
int) float64 {
597 f
, err
:= strconv
.AtofN(str
, n
)
604 // convertComplex converts the next token to a complex128 value.
605 // The atof argument is a type-specific reader for the underlying type.
606 // If we're reading complex64, atof will parse float32s and convert them
607 // to float64's to avoid reproducing this code for each complex type.
608 func (s
*ss
) scanComplex(verb
int, n
int) complex128
{
609 if !s
.okVerb(verb
, floatVerbs
, "complex") {
613 sreal
, simag
:= s
.complexTokens()
614 real
:= s
.convertFloat(sreal
, n
/2)
615 imag
:= s
.convertFloat(simag
, n
/2)
616 return cmplx(real
, imag
)
619 // convertString returns the string represented by the next input characters.
620 // The format of the input is determined by the verb.
621 func (s
*ss
) convertString(verb
int) (str
string) {
622 if !s
.okVerb(verb
, "svqx", "string") {
628 str
= s
.quotedString()
632 str
= s
.token() // %s and %v just return the next word
634 // Empty strings other than with %q are not OK.
635 if len(str
) == 0 && verb
!= 'q' && s
.maxWid
> 0 {
636 s
.errorString("Scan: no data for string")
641 // quotedString returns the double- or back-quoted string represented by the next input characters.
642 func (s
*ss
) quotedString() string {
643 quote
:= s
.mustGetRune()
646 // Back-quoted: Anything goes until EOF or back quote.
648 rune
:= s
.mustGetRune()
652 s
.buf
.WriteRune(rune
)
654 return s
.buf
.String()
656 // Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes.
657 s
.buf
.WriteRune(quote
)
659 rune
:= s
.mustGetRune()
660 s
.buf
.WriteRune(rune
)
662 // In a legal backslash escape, no matter how long, only the character
663 // immediately after the escape can itself be a backslash or quote.
664 // Thus we only need to protect the first character after the backslash.
665 rune
:= s
.mustGetRune()
666 s
.buf
.WriteRune(rune
)
667 } else if rune
== '"' {
671 result
, err
:= strconv
.Unquote(s
.buf
.String())
677 s
.errorString("expected quoted string")
682 // hexDigit returns the value of the hexadecimal digit
683 func (s
*ss
) hexDigit(digit
int) int {
685 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
687 case 'a', 'b', 'c', 'd', 'e', 'f':
688 return 10 + digit
- 'a'
689 case 'A', 'B', 'C', 'D', 'E', 'F':
690 return 10 + digit
- 'A'
692 s
.errorString("Scan: illegal hex digit")
696 // hexByte returns the next hex-encoded (two-character) byte from the input.
697 // There must be either two hexadecimal digits or a space character in the input.
698 func (s
*ss
) hexByte() (b
byte, ok
bool) {
703 if unicode
.IsSpace(rune1
) {
707 rune2
:= s
.mustGetRune()
708 return byte(s
.hexDigit(rune1
)<<4 | s
.hexDigit(rune2
)), true
711 // hexString returns the space-delimited hexpair-encoded string.
712 func (s
*ss
) hexString() string {
720 if s
.buf
.Len() == 0 {
721 s
.errorString("Scan: no hex data for %x string")
724 return s
.buf
.String()
727 const floatVerbs
= "eEfFgGv"
729 // scanOne scans a single value, deriving the scanner from the type of the argument.
730 func (s
*ss
) scanOne(verb
int, field
interface{}) {
733 // If the parameter has its own Scan method, use that.
734 if v
, ok
:= field
.(Scanner
); ok
{
735 err
= v
.Scan(s
, verb
)
742 s
.maxWid
= 1 << 30 // Huge
745 switch v
:= field
.(type) {
747 *v
= s
.scanBool(verb
)
749 *v
= complex(s
.scanComplex(verb
, int(complexBits
)))
751 *v
= complex64(s
.scanComplex(verb
, 64))
753 *v
= s
.scanComplex(verb
, 128)
755 *v
= int(s
.scanInt(verb
, intBits
))
757 *v
= int8(s
.scanInt(verb
, 8))
759 *v
= int16(s
.scanInt(verb
, 16))
761 *v
= int32(s
.scanInt(verb
, 32))
763 *v
= s
.scanInt(verb
, 64)
765 *v
= uint(s
.scanUint(verb
, intBits
))
767 *v
= uint8(s
.scanUint(verb
, 8))
769 *v
= uint16(s
.scanUint(verb
, 16))
771 *v
= uint32(s
.scanUint(verb
, 32))
773 *v
= s
.scanUint(verb
, 64)
775 *v
= uintptr(s
.scanUint(verb
, uintptrBits
))
776 // Floats are tricky because you want to scan in the precision of the result, not
777 // scan in high precision and convert, in order to preserve the correct error condition.
779 if s
.okVerb(verb
, floatVerbs
, "float") {
781 *v
= float(s
.convertFloat(s
.floatToken(), int(floatBits
)))
784 if s
.okVerb(verb
, floatVerbs
, "float32") {
786 *v
= float32(s
.convertFloat(s
.floatToken(), 32))
789 if s
.okVerb(verb
, floatVerbs
, "float64") {
791 *v
= s
.convertFloat(s
.floatToken(), 64)
794 *v
= s
.convertString(verb
)
796 // We scan to string and convert so we get a copy of the data.
797 // If we scanned to bytes, the slice would point at the buffer.
798 *v
= []byte(s
.convertString(verb
))
800 val
:= reflect
.NewValue(v
)
801 ptr
, ok
:= val
.(*reflect
.PtrValue
)
803 s
.errorString("Scan: type not a pointer: " + val
.Type().String())
806 switch v
:= ptr
.Elem().(type) {
807 case *reflect
.BoolValue
:
808 v
.Set(s
.scanBool(verb
))
809 case *reflect
.IntValue
:
810 v
.Set(s
.scanInt(verb
, v
.Type().Bits()))
811 case *reflect
.UintValue
:
812 v
.Set(s
.scanUint(verb
, v
.Type().Bits()))
813 case *reflect
.StringValue
:
814 v
.Set(s
.convertString(verb
))
815 case *reflect
.SliceValue
:
816 // For now, can only handle (renamed) []byte.
817 typ
:= v
.Type().(*reflect
.SliceType
)
818 if typ
.Elem().Kind() != reflect
.Uint8
{
821 str
:= s
.convertString(verb
)
822 v
.Set(reflect
.MakeSlice(typ
, len(str
), len(str
)))
823 for i
:= 0; i
< len(str
); i
++ {
824 v
.Elem(i
).(*reflect
.UintValue
).Set(uint64(str
[i
]))
826 case *reflect
.FloatValue
:
828 v
.Set(s
.convertFloat(s
.floatToken(), v
.Type().Bits()))
829 case *reflect
.ComplexValue
:
830 v
.Set(s
.scanComplex(verb
, v
.Type().Bits()))
833 s
.errorString("Scan: can't handle type: " + val
.Type().String())
838 // errorHandler turns local panics into error returns. EOFs are benign.
839 func errorHandler(errp
*os
.Error
) {
840 if e
:= recover(); e
!= nil {
841 if se
, ok
:= e
.(scanError
); ok
{ // catch local error
842 if se
.err
!= os
.EOF
{
851 // doScan does the real work for scanning without a format string.
852 // At the moment, it handles only pointers to basic types.
853 func (s
*ss
) doScan(a
[]interface{}) (numProcessed
int, err os
.Error
) {
854 defer errorHandler(&err
)
855 for _
, field
:= range a
{
856 s
.scanOne('v', field
)
859 // Check for newline if required.
863 if rune
== '\n' || rune
== EOF
{
866 if !unicode
.IsSpace(rune
) {
867 s
.errorString("Scan: expected newline")
875 // advance determines whether the next characters in the input match
876 // those of the format. It returns the number of bytes (sic) consumed
877 // in the format. Newlines included, all runs of space characters in
878 // either input or format behave as a single space. This routine also
879 // handles the %% case. If the return value is zero, either format
880 // starts with a % (with no following %) or the input is empty.
881 // If it is negative, the input did not match the string.
882 func (s
*ss
) advance(format
string) (i
int) {
883 for i
< len(format
) {
884 fmtc
, w
:= utf8
.DecodeRuneInString(format
[i
:])
886 // %% acts like a real percent
887 nextc
, _
:= utf8
.DecodeRuneInString(format
[i
+w
:]) // will not match % if string is empty
891 i
+= w
// skip the first %
894 for unicode
.IsSpace(fmtc
) && i
< len(format
) {
897 fmtc
, w
= utf8
.DecodeRuneInString(format
[i
:])
900 // There was space in the format, so there should be space (EOF)
902 inputc
:= s
.getRune()
906 if !unicode
.IsSpace(inputc
) {
907 // Space in format but not in input: error
908 s
.errorString("expected space in input to match format")
913 inputc
:= s
.mustGetRune()
923 // doScanf does the real work when scanning with a format string.
924 // At the moment, it handles only pointers to basic types.
925 func (s
*ss
) doScanf(format
string, a
[]interface{}) (numProcessed
int, err os
.Error
) {
926 defer errorHandler(&err
)
927 end
:= len(format
) - 1
928 // We process one item per non-trivial format
929 for i
:= 0; i
<= end
; {
930 w
:= s
.advance(format
[i
:])
935 // Either we failed to advance, we have a percent character, or we ran out of input.
936 if format
[i
] != '%' {
937 // Can't advance format. Why not?
939 s
.errorString("input does not match format")
941 // Otherwise at EOF; "too many operands" error handled below
946 // do we have 20 (width)?
947 s
.maxWid
, s
.widPresent
, i
= parsenum(format
, i
, end
)
949 c
, w
:= utf8
.DecodeRuneInString(format
[i
:])
952 if numProcessed
>= len(a
) { // out of operands
953 s
.errorString("too few operands for format %" + format
[i
-w
:])
956 field
:= a
[numProcessed
]
961 if numProcessed
< len(a
) {
962 s
.errorString("too many operands")