libgo/go/fmt/scan.go

   1 // Copyright 2010 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package fmt
   6
   7 import (
   8         "errors"
   9         "io"
  10         "math"
  11         "os"
  12         "reflect"
  13         "strconv"
  14         "sync"
  15         "unicode/utf8"
  16 )
  17
  18 // runeUnreader is the interface to something that can unread runes.
  19 // If the object provided to Scan does not satisfy this interface,
  20 // a local buffer will be used to back up the input, but its contents
  21 // will be lost when Scan returns.
  22 type runeUnreader interface {
  23         UnreadRune() error
  24 }
  25
  26 // ScanState represents the scanner state passed to custom scanners.
  27 // Scanners may do rune-at-a-time scanning or ask the ScanState
  28 // to discover the next space-delimited token.
  29 type ScanState interface {
  30         // ReadRune reads the next rune (Unicode code point) from the input.
  31         // If invoked during Scanln, Fscanln, or Sscanln, ReadRune() will
  32         // return EOF after returning the first '\n' or when reading beyond
  33         // the specified width.
  34         ReadRune() (r rune, size int, err error)
  35         // UnreadRune causes the next call to ReadRune to return the same rune.
  36         UnreadRune() error
  37         // SkipSpace skips space in the input. Newlines are treated as space
  38         // unless the scan operation is Scanln, Fscanln or Sscanln, in which case
  39         // a newline is treated as EOF.
  40         SkipSpace()
  41         // Token skips space in the input if skipSpace is true, then returns the
  42         // run of Unicode code points c satisfying f(c).  If f is nil,
  43         // !unicode.IsSpace(c) is used; that is, the token will hold non-space
  44         // characters.  Newlines are treated as space unless the scan operation
  45         // is Scanln, Fscanln or Sscanln, in which case a newline is treated as
  46         // EOF.  The returned slice points to shared data that may be overwritten
  47         // by the next call to Token, a call to a Scan function using the ScanState
  48         // as input, or when the calling Scan method returns.
  49         Token(skipSpace bool, f func(rune) bool) (token []byte, err error)
  50         // Width returns the value of the width option and whether it has been set.
  51         // The unit is Unicode code points.
  52         Width() (wid int, ok bool)
  53         // Because ReadRune is implemented by the interface, Read should never be
  54         // called by the scanning routines and a valid implementation of
  55         // ScanState may choose always to return an error from Read.
  56         Read(buf []byte) (n int, err error)
  57 }
  58
  59 // Scanner is implemented by any value that has a Scan method, which scans
  60 // the input for the representation of a value and stores the result in the
  61 // receiver, which must be a pointer to be useful.  The Scan method is called
  62 // for any argument to Scan, Scanf, or Scanln that implements it.
  63 type Scanner interface {
  64         Scan(state ScanState, verb rune) error
  65 }
  66
  67 // Scan scans text read from standard input, storing successive
  68 // space-separated values into successive arguments.  Newlines count
  69 // as space.  It returns the number of items successfully scanned.
  70 // If that is less than the number of arguments, err will report why.
  71 func Scan(a ...interface{}) (n int, err error) {
  72         return Fscan(os.Stdin, a...)
  73 }
  74
  75 // Scanln is similar to Scan, but stops scanning at a newline and
  76 // after the final item there must be a newline or EOF.
  77 func Scanln(a ...interface{}) (n int, err error) {
  78         return Fscanln(os.Stdin, a...)
  79 }
  80
  81 // Scanf scans text read from standard input, storing successive
  82 // space-separated values into successive arguments as determined by
  83 // the format.  It returns the number of items successfully scanned.
  84 func Scanf(format string, a ...interface{}) (n int, err error) {
  85         return Fscanf(os.Stdin, format, a...)
  86 }
  87
  88 type stringReader string
  89
  90 func (r *stringReader) Read(b []byte) (n int, err error) {
  91         n = copy(b, *r)
  92         *r = (*r)[n:]
  93         if n == 0 {
  94                 err = io.EOF
  95         }
  96         return
  97 }
  98
  99 // Sscan scans the argument string, storing successive space-separated
 100 // values into successive arguments.  Newlines count as space.  It
 101 // returns the number of items successfully scanned.  If that is less
 102 // than the number of arguments, err will report why.
 103 func Sscan(str string, a ...interface{}) (n int, err error) {
 104         return Fscan((*stringReader)(&str), a...)
 105 }
 106
 107 // Sscanln is similar to Sscan, but stops scanning at a newline and
 108 // after the final item there must be a newline or EOF.
 109 func Sscanln(str string, a ...interface{}) (n int, err error) {
 110         return Fscanln((*stringReader)(&str), a...)
 111 }
 112
 113 // Sscanf scans the argument string, storing successive space-separated
 114 // values into successive arguments as determined by the format.  It
 115 // returns the number of items successfully parsed.
 116 func Sscanf(str string, format string, a ...interface{}) (n int, err error) {
 117         return Fscanf((*stringReader)(&str), format, a...)
 118 }
 119
 120 // Fscan scans text read from r, storing successive space-separated
 121 // values into successive arguments.  Newlines count as space.  It
 122 // returns the number of items successfully scanned.  If that is less
 123 // than the number of arguments, err will report why.
 124 func Fscan(r io.Reader, a ...interface{}) (n int, err error) {
 125         s, old := newScanState(r, true, false)
 126         n, err = s.doScan(a)
 127         s.free(old)
 128         return
 129 }
 130
 131 // Fscanln is similar to Fscan, but stops scanning at a newline and
 132 // after the final item there must be a newline or EOF.
 133 func Fscanln(r io.Reader, a ...interface{}) (n int, err error) {
 134         s, old := newScanState(r, false, true)
 135         n, err = s.doScan(a)
 136         s.free(old)
 137         return
 138 }
 139
 140 // Fscanf scans text read from r, storing successive space-separated
 141 // values into successive arguments as determined by the format.  It
 142 // returns the number of items successfully parsed.
 143 func Fscanf(r io.Reader, format string, a ...interface{}) (n int, err error) {
 144         s, old := newScanState(r, false, false)
 145         n, err = s.doScanf(format, a)
 146         s.free(old)
 147         return
 148 }
 149
 150 // scanError represents an error generated by the scanning software.
 151 // It's used as a unique signature to identify such errors when recovering.
 152 type scanError struct {
 153         err error
 154 }
 155
 156 const eof = -1
 157
 158 // ss is the internal implementation of ScanState.
 159 type ss struct {
 160         rr       io.RuneReader // where to read input
 161         buf      buffer        // token accumulator
 162         peekRune rune          // one-rune lookahead
 163         prevRune rune          // last rune returned by ReadRune
 164         count    int           // runes consumed so far.
 165         atEOF    bool          // already read EOF
 166         ssave
 167 }
 168
 169 // ssave holds the parts of ss that need to be
 170 // saved and restored on recursive scans.
 171 type ssave struct {
 172         validSave bool // is or was a part of an actual ss.
 173         nlIsEnd   bool // whether newline terminates scan
 174         nlIsSpace bool // whether newline counts as white space
 175         argLimit  int  // max value of ss.count for this arg; argLimit <= limit
 176         limit     int  // max value of ss.count.
 177         maxWid    int  // width of this arg.
 178 }
 179
 180 // The Read method is only in ScanState so that ScanState
 181 // satisfies io.Reader. It will never be called when used as
 182 // intended, so there is no need to make it actually work.
 183 func (s *ss) Read(buf []byte) (n int, err error) {
 184         return 0, errors.New("ScanState's Read should not be called. Use ReadRune")
 185 }
 186
 187 func (s *ss) ReadRune() (r rune, size int, err error) {
 188         if s.peekRune >= 0 {
 189                 s.count++
 190                 r = s.peekRune
 191                 size = utf8.RuneLen(r)
 192                 s.prevRune = r
 193                 s.peekRune = -1
 194                 return
 195         }
 196         if s.atEOF || s.nlIsEnd && s.prevRune == '\n' || s.count >= s.argLimit {
 197                 err = io.EOF
 198                 return
 199         }
 200
 201         r, size, err = s.rr.ReadRune()
 202         if err == nil {
 203                 s.count++
 204                 s.prevRune = r
 205         } else if err == io.EOF {
 206                 s.atEOF = true
 207         }
 208         return
 209 }
 210
 211 func (s *ss) Width() (wid int, ok bool) {
 212         if s.maxWid == hugeWid {
 213                 return 0, false
 214         }
 215         return s.maxWid, true
 216 }
 217
 218 // The public method returns an error; this private one panics.
 219 // If getRune reaches EOF, the return value is EOF (-1).
 220 func (s *ss) getRune() (r rune) {
 221         r, _, err := s.ReadRune()
 222         if err != nil {
 223                 if err == io.EOF {
 224                         return eof
 225                 }
 226                 s.error(err)
 227         }
 228         return
 229 }
 230
 231 // mustReadRune turns io.EOF into a panic(io.ErrUnexpectedEOF).
 232 // It is called in cases such as string scanning where an EOF is a
 233 // syntax error.
 234 func (s *ss) mustReadRune() (r rune) {
 235         r = s.getRune()
 236         if r == eof {
 237                 s.error(io.ErrUnexpectedEOF)
 238         }
 239         return
 240 }
 241
 242 func (s *ss) UnreadRune() error {
 243         if u, ok := s.rr.(runeUnreader); ok {
 244                 u.UnreadRune()
 245         } else {
 246                 s.peekRune = s.prevRune
 247         }
 248         s.prevRune = -1
 249         s.count--
 250         return nil
 251 }
 252
 253 func (s *ss) error(err error) {
 254         panic(scanError{err})
 255 }
 256
 257 func (s *ss) errorString(err string) {
 258         panic(scanError{errors.New(err)})
 259 }
 260
 261 func (s *ss) Token(skipSpace bool, f func(rune) bool) (tok []byte, err error) {
 262         defer func() {
 263                 if e := recover(); e != nil {
 264                         if se, ok := e.(scanError); ok {
 265                                 err = se.err
 266                         } else {
 267                                 panic(e)
 268                         }
 269                 }
 270         }()
 271         if f == nil {
 272                 f = notSpace
 273         }
 274         s.buf = s.buf[:0]
 275         tok = s.token(skipSpace, f)
 276         return
 277 }
 278
 279 // space is a copy of the unicode.White_Space ranges,
 280 // to avoid depending on package unicode.
 281 var space = [][2]uint16{
 282         {0x0009, 0x000d},
 283         {0x0020, 0x0020},
 284         {0x0085, 0x0085},
 285         {0x00a0, 0x00a0},
 286         {0x1680, 0x1680},
 287         {0x2000, 0x200a},
 288         {0x2028, 0x2029},
 289         {0x202f, 0x202f},
 290         {0x205f, 0x205f},
 291         {0x3000, 0x3000},
 292 }
 293
 294 func isSpace(r rune) bool {
 295         if r >= 1<<16 {
 296                 return false
 297         }
 298         rx := uint16(r)
 299         for _, rng := range space {
 300                 if rx < rng[0] {
 301                         return false
 302                 }
 303                 if rx <= rng[1] {
 304                         return true
 305                 }
 306         }
 307         return false
 308 }
 309
 310 // notSpace is the default scanning function used in Token.
 311 func notSpace(r rune) bool {
 312         return !isSpace(r)
 313 }
 314
 315 // SkipSpace provides Scan methods the ability to skip space and newline
 316 // characters in keeping with the current scanning mode set by format strings
 317 // and Scan/Scanln.
 318 func (s *ss) SkipSpace() {
 319         s.skipSpace(false)
 320 }
 321
 322 // readRune is a structure to enable reading UTF-8 encoded code points
 323 // from an io.Reader.  It is used if the Reader given to the scanner does
 324 // not already implement io.RuneReader.
 325 type readRune struct {
 326         reader  io.Reader
 327         buf     [utf8.UTFMax]byte // used only inside ReadRune
 328         pending int               // number of bytes in pendBuf; only >0 for bad UTF-8
 329         pendBuf [utf8.UTFMax]byte // bytes left over
 330 }
 331
 332 // readByte returns the next byte from the input, which may be
 333 // left over from a previous read if the UTF-8 was ill-formed.
 334 func (r *readRune) readByte() (b byte, err error) {
 335         if r.pending > 0 {
 336                 b = r.pendBuf[0]
 337                 copy(r.pendBuf[0:], r.pendBuf[1:])
 338                 r.pending--
 339                 return
 340         }
 341         n, err := io.ReadFull(r.reader, r.pendBuf[0:1])
 342         if n != 1 {
 343                 return 0, err
 344         }
 345         return r.pendBuf[0], err
 346 }
 347
 348 // unread saves the bytes for the next read.
 349 func (r *readRune) unread(buf []byte) {
 350         copy(r.pendBuf[r.pending:], buf)
 351         r.pending += len(buf)
 352 }
 353
 354 // ReadRune returns the next UTF-8 encoded code point from the
 355 // io.Reader inside r.
 356 func (r *readRune) ReadRune() (rr rune, size int, err error) {
 357         r.buf[0], err = r.readByte()
 358         if err != nil {
 359                 return 0, 0, err
 360         }
 361         if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
 362                 rr = rune(r.buf[0])
 363                 return
 364         }
 365         var n int
 366         for n = 1; !utf8.FullRune(r.buf[0:n]); n++ {
 367                 r.buf[n], err = r.readByte()
 368                 if err != nil {
 369                         if err == io.EOF {
 370                                 err = nil
 371                                 break
 372                         }
 373                         return
 374                 }
 375         }
 376         rr, size = utf8.DecodeRune(r.buf[0:n])
 377         if size < n { // an error
 378                 r.unread(r.buf[size:n])
 379         }
 380         return
 381 }
 382
 383 var ssFree = sync.Pool{
 384         New: func() interface{} { return new(ss) },
 385 }
 386
 387 // newScanState allocates a new ss struct or grab a cached one.
 388 func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) {
 389         // If the reader is a *ss, then we've got a recursive
 390         // call to Scan, so re-use the scan state.
 391         s, ok := r.(*ss)
 392         if ok {
 393                 old = s.ssave
 394                 s.limit = s.argLimit
 395                 s.nlIsEnd = nlIsEnd || s.nlIsEnd
 396                 s.nlIsSpace = nlIsSpace
 397                 return
 398         }
 399
 400         s = ssFree.Get().(*ss)
 401         if rr, ok := r.(io.RuneReader); ok {
 402                 s.rr = rr
 403         } else {
 404                 s.rr = &readRune{reader: r}
 405         }
 406         s.nlIsSpace = nlIsSpace
 407         s.nlIsEnd = nlIsEnd
 408         s.prevRune = -1
 409         s.peekRune = -1
 410         s.atEOF = false
 411         s.limit = hugeWid
 412         s.argLimit = hugeWid
 413         s.maxWid = hugeWid
 414         s.validSave = true
 415         s.count = 0
 416         return
 417 }
 418
 419 // free saves used ss structs in ssFree; avoid an allocation per invocation.
 420 func (s *ss) free(old ssave) {
 421         // If it was used recursively, just restore the old state.
 422         if old.validSave {
 423                 s.ssave = old
 424                 return
 425         }
 426         // Don't hold on to ss structs with large buffers.
 427         if cap(s.buf) > 1024 {
 428                 return
 429         }
 430         s.buf = s.buf[:0]
 431         s.rr = nil
 432         ssFree.Put(s)
 433 }
 434
 435 // skipSpace skips spaces and maybe newlines.
 436 func (s *ss) skipSpace(stopAtNewline bool) {
 437         for {
 438                 r := s.getRune()
 439                 if r == eof {
 440                         return
 441                 }
 442                 if r == '\r' && s.peek("\n") {
 443                         continue
 444                 }
 445                 if r == '\n' {
 446                         if stopAtNewline {
 447                                 break
 448                         }
 449                         if s.nlIsSpace {
 450                                 continue
 451                         }
 452                         s.errorString("unexpected newline")
 453                         return
 454                 }
 455                 if !isSpace(r) {
 456                         s.UnreadRune()
 457                         break
 458                 }
 459         }
 460 }
 461
 462 // token returns the next space-delimited string from the input.  It
 463 // skips white space.  For Scanln, it stops at newlines.  For Scan,
 464 // newlines are treated as spaces.
 465 func (s *ss) token(skipSpace bool, f func(rune) bool) []byte {
 466         if skipSpace {
 467                 s.skipSpace(false)
 468         }
 469         // read until white space or newline
 470         for {
 471                 r := s.getRune()
 472                 if r == eof {
 473                         break
 474                 }
 475                 if !f(r) {
 476                         s.UnreadRune()
 477                         break
 478                 }
 479                 s.buf.WriteRune(r)
 480         }
 481         return s.buf
 482 }
 483
 484 var complexError = errors.New("syntax error scanning complex number")
 485 var boolError = errors.New("syntax error scanning boolean")
 486
 487 func indexRune(s string, r rune) int {
 488         for i, c := range s {
 489                 if c == r {
 490                         return i
 491                 }
 492         }
 493         return -1
 494 }
 495
 496 // consume reads the next rune in the input and reports whether it is in the ok string.
 497 // If accept is true, it puts the character into the input token.
 498 func (s *ss) consume(ok string, accept bool) bool {
 499         r := s.getRune()
 500         if r == eof {
 501                 return false
 502         }
 503         if indexRune(ok, r) >= 0 {
 504                 if accept {
 505                         s.buf.WriteRune(r)
 506                 }
 507                 return true
 508         }
 509         if r != eof && accept {
 510                 s.UnreadRune()
 511         }
 512         return false
 513 }
 514
 515 // peek reports whether the next character is in the ok string, without consuming it.
 516 func (s *ss) peek(ok string) bool {
 517         r := s.getRune()
 518         if r != eof {
 519                 s.UnreadRune()
 520         }
 521         return indexRune(ok, r) >= 0
 522 }
 523
 524 func (s *ss) notEOF() {
 525         // Guarantee there is data to be read.
 526         if r := s.getRune(); r == eof {
 527                 panic(io.EOF)
 528         }
 529         s.UnreadRune()
 530 }
 531
 532 // accept checks the next rune in the input.  If it's a byte (sic) in the string, it puts it in the
 533 // buffer and returns true. Otherwise it return false.
 534 func (s *ss) accept(ok string) bool {
 535         return s.consume(ok, true)
 536 }
 537
 538 // okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
 539 func (s *ss) okVerb(verb rune, okVerbs, typ string) bool {
 540         for _, v := range okVerbs {
 541                 if v == verb {
 542                         return true
 543                 }
 544         }
 545         s.errorString("bad verb %" + string(verb) + " for " + typ)
 546         return false
 547 }
 548
 549 // scanBool returns the value of the boolean represented by the next token.
 550 func (s *ss) scanBool(verb rune) bool {
 551         s.skipSpace(false)
 552         s.notEOF()
 553         if !s.okVerb(verb, "tv", "boolean") {
 554                 return false
 555         }
 556         // Syntax-checking a boolean is annoying.  We're not fastidious about case.
 557         switch s.getRune() {
 558         case '0':
 559                 return false
 560         case '1':
 561                 return true
 562         case 't', 'T':
 563                 if s.accept("rR") && (!s.accept("uU") || !s.accept("eE")) {
 564                         s.error(boolError)
 565                 }
 566                 return true
 567         case 'f', 'F':
 568                 if s.accept("aA") && (!s.accept("lL") || !s.accept("sS") || !s.accept("eE")) {
 569                         s.error(boolError)
 570                 }
 571                 return false
 572         }
 573         return false
 574 }
 575
 576 // Numerical elements
 577 const (
 578         binaryDigits      = "01"
 579         octalDigits       = "01234567"
 580         decimalDigits     = "0123456789"
 581         hexadecimalDigits = "0123456789aAbBcCdDeEfF"
 582         sign              = "+-"
 583         period            = "."
 584         exponent          = "eEp"
 585 )
 586
 587 // getBase returns the numeric base represented by the verb and its digit string.
 588 func (s *ss) getBase(verb rune) (base int, digits string) {
 589         s.okVerb(verb, "bdoUxXv", "integer") // sets s.err
 590         base = 10
 591         digits = decimalDigits
 592         switch verb {
 593         case 'b':
 594                 base = 2
 595                 digits = binaryDigits
 596         case 'o':
 597                 base = 8
 598                 digits = octalDigits
 599         case 'x', 'X', 'U':
 600                 base = 16
 601                 digits = hexadecimalDigits
 602         }
 603         return
 604 }
 605
 606 // scanNumber returns the numerical string with specified digits starting here.
 607 func (s *ss) scanNumber(digits string, haveDigits bool) string {
 608         if !haveDigits {
 609                 s.notEOF()
 610                 if !s.accept(digits) {
 611                         s.errorString("expected integer")
 612                 }
 613         }
 614         for s.accept(digits) {
 615         }
 616         return string(s.buf)
 617 }
 618
 619 // scanRune returns the next rune value in the input.
 620 func (s *ss) scanRune(bitSize int) int64 {
 621         s.notEOF()
 622         r := int64(s.getRune())
 623         n := uint(bitSize)
 624         x := (r << (64 - n)) >> (64 - n)
 625         if x != r {
 626                 s.errorString("overflow on character value " + string(r))
 627         }
 628         return r
 629 }
 630
 631 // scanBasePrefix reports whether the integer begins with a 0 or 0x,
 632 // and returns the base, digit string, and whether a zero was found.
 633 // It is called only if the verb is %v.
 634 func (s *ss) scanBasePrefix() (base int, digits string, found bool) {
 635         if !s.peek("0") {
 636                 return 10, decimalDigits, false
 637         }
 638         s.accept("0")
 639         found = true // We've put a digit into the token buffer.
 640         // Special cases for '0' && '0x'
 641         base, digits = 8, octalDigits
 642         if s.peek("xX") {
 643                 s.consume("xX", false)
 644                 base, digits = 16, hexadecimalDigits
 645         }
 646         return
 647 }
 648
 649 // scanInt returns the value of the integer represented by the next
 650 // token, checking for overflow.  Any error is stored in s.err.
 651 func (s *ss) scanInt(verb rune, bitSize int) int64 {
 652         if verb == 'c' {
 653                 return s.scanRune(bitSize)
 654         }
 655         s.skipSpace(false)
 656         s.notEOF()
 657         base, digits := s.getBase(verb)
 658         haveDigits := false
 659         if verb == 'U' {
 660                 if !s.consume("U", false) || !s.consume("+", false) {
 661                         s.errorString("bad unicode format ")
 662                 }
 663         } else {
 664                 s.accept(sign) // If there's a sign, it will be left in the token buffer.
 665                 if verb == 'v' {
 666                         base, digits, haveDigits = s.scanBasePrefix()
 667                 }
 668         }
 669         tok := s.scanNumber(digits, haveDigits)
 670         i, err := strconv.ParseInt(tok, base, 64)
 671         if err != nil {
 672                 s.error(err)
 673         }
 674         n := uint(bitSize)
 675         x := (i << (64 - n)) >> (64 - n)
 676         if x != i {
 677                 s.errorString("integer overflow on token " + tok)
 678         }
 679         return i
 680 }
 681
 682 // scanUint returns the value of the unsigned integer represented
 683 // by the next token, checking for overflow.  Any error is stored in s.err.
 684 func (s *ss) scanUint(verb rune, bitSize int) uint64 {
 685         if verb == 'c' {
 686                 return uint64(s.scanRune(bitSize))
 687         }
 688         s.skipSpace(false)
 689         s.notEOF()
 690         base, digits := s.getBase(verb)
 691         haveDigits := false
 692         if verb == 'U' {
 693                 if !s.consume("U", false) || !s.consume("+", false) {
 694                         s.errorString("bad unicode format ")
 695                 }
 696         } else if verb == 'v' {
 697                 base, digits, haveDigits = s.scanBasePrefix()
 698         }
 699         tok := s.scanNumber(digits, haveDigits)
 700         i, err := strconv.ParseUint(tok, base, 64)
 701         if err != nil {
 702                 s.error(err)
 703         }
 704         n := uint(bitSize)
 705         x := (i << (64 - n)) >> (64 - n)
 706         if x != i {
 707                 s.errorString("unsigned integer overflow on token " + tok)
 708         }
 709         return i
 710 }
 711
 712 // floatToken returns the floating-point number starting here, no longer than swid
 713 // if the width is specified. It's not rigorous about syntax because it doesn't check that
 714 // we have at least some digits, but Atof will do that.
 715 func (s *ss) floatToken() string {
 716         s.buf = s.buf[:0]
 717         // NaN?
 718         if s.accept("nN") && s.accept("aA") && s.accept("nN") {
 719                 return string(s.buf)
 720         }
 721         // leading sign?
 722         s.accept(sign)
 723         // Inf?
 724         if s.accept("iI") && s.accept("nN") && s.accept("fF") {
 725                 return string(s.buf)
 726         }
 727         // digits?
 728         for s.accept(decimalDigits) {
 729         }
 730         // decimal point?
 731         if s.accept(period) {
 732                 // fraction?
 733                 for s.accept(decimalDigits) {
 734                 }
 735         }
 736         // exponent?
 737         if s.accept(exponent) {
 738                 // leading sign?
 739                 s.accept(sign)
 740                 // digits?
 741                 for s.accept(decimalDigits) {
 742                 }
 743         }
 744         return string(s.buf)
 745 }
 746
 747 // complexTokens returns the real and imaginary parts of the complex number starting here.
 748 // The number might be parenthesized and has the format (N+Ni) where N is a floating-point
 749 // number and there are no spaces within.
 750 func (s *ss) complexTokens() (real, imag string) {
 751         // TODO: accept N and Ni independently?
 752         parens := s.accept("(")
 753         real = s.floatToken()
 754         s.buf = s.buf[:0]
 755         // Must now have a sign.
 756         if !s.accept("+-") {
 757                 s.error(complexError)
 758         }
 759         // Sign is now in buffer
 760         imagSign := string(s.buf)
 761         imag = s.floatToken()
 762         if !s.accept("i") {
 763                 s.error(complexError)
 764         }
 765         if parens && !s.accept(")") {
 766                 s.error(complexError)
 767         }
 768         return real, imagSign + imag
 769 }
 770
 771 // convertFloat converts the string to a float64value.
 772 func (s *ss) convertFloat(str string, n int) float64 {
 773         if p := indexRune(str, 'p'); p >= 0 {
 774                 // Atof doesn't handle power-of-2 exponents,
 775                 // but they're easy to evaluate.
 776                 f, err := strconv.ParseFloat(str[:p], n)
 777                 if err != nil {
 778                         // Put full string into error.
 779                         if e, ok := err.(*strconv.NumError); ok {
 780                                 e.Num = str
 781                         }
 782                         s.error(err)
 783                 }
 784                 m, err := strconv.Atoi(str[p+1:])
 785                 if err != nil {
 786                         // Put full string into error.
 787                         if e, ok := err.(*strconv.NumError); ok {
 788                                 e.Num = str
 789                         }
 790                         s.error(err)
 791                 }
 792                 return math.Ldexp(f, m)
 793         }
 794         f, err := strconv.ParseFloat(str, n)
 795         if err != nil {
 796                 s.error(err)
 797         }
 798         return f
 799 }
 800
 801 // convertComplex converts the next token to a complex128 value.
 802 // The atof argument is a type-specific reader for the underlying type.
 803 // If we're reading complex64, atof will parse float32s and convert them
 804 // to float64's to avoid reproducing this code for each complex type.
 805 func (s *ss) scanComplex(verb rune, n int) complex128 {
 806         if !s.okVerb(verb, floatVerbs, "complex") {
 807                 return 0
 808         }
 809         s.skipSpace(false)
 810         s.notEOF()
 811         sreal, simag := s.complexTokens()
 812         real := s.convertFloat(sreal, n/2)
 813         imag := s.convertFloat(simag, n/2)
 814         return complex(real, imag)
 815 }
 816
 817 // convertString returns the string represented by the next input characters.
 818 // The format of the input is determined by the verb.
 819 func (s *ss) convertString(verb rune) (str string) {
 820         if !s.okVerb(verb, "svqx", "string") {
 821                 return ""
 822         }
 823         s.skipSpace(false)
 824         s.notEOF()
 825         switch verb {
 826         case 'q':
 827                 str = s.quotedString()
 828         case 'x':
 829                 str = s.hexString()
 830         default:
 831                 str = string(s.token(true, notSpace)) // %s and %v just return the next word
 832         }
 833         return
 834 }
 835
 836 // quotedString returns the double- or back-quoted string represented by the next input characters.
 837 func (s *ss) quotedString() string {
 838         s.notEOF()
 839         quote := s.getRune()
 840         switch quote {
 841         case '`':
 842                 // Back-quoted: Anything goes until EOF or back quote.
 843                 for {
 844                         r := s.mustReadRune()
 845                         if r == quote {
 846                                 break
 847                         }
 848                         s.buf.WriteRune(r)
 849                 }
 850                 return string(s.buf)
 851         case '"':
 852                 // Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes.
 853                 s.buf.WriteRune(quote)
 854                 for {
 855                         r := s.mustReadRune()
 856                         s.buf.WriteRune(r)
 857                         if r == '\\' {
 858                                 // In a legal backslash escape, no matter how long, only the character
 859                                 // immediately after the escape can itself be a backslash or quote.
 860                                 // Thus we only need to protect the first character after the backslash.
 861                                 s.buf.WriteRune(s.mustReadRune())
 862                         } else if r == '"' {
 863                                 break
 864                         }
 865                 }
 866                 result, err := strconv.Unquote(string(s.buf))
 867                 if err != nil {
 868                         s.error(err)
 869                 }
 870                 return result
 871         default:
 872                 s.errorString("expected quoted string")
 873         }
 874         return ""
 875 }
 876
 877 // hexDigit returns the value of the hexadecimal digit
 878 func (s *ss) hexDigit(d rune) int {
 879         digit := int(d)
 880         switch digit {
 881         case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 882                 return digit - '0'
 883         case 'a', 'b', 'c', 'd', 'e', 'f':
 884                 return 10 + digit - 'a'
 885         case 'A', 'B', 'C', 'D', 'E', 'F':
 886                 return 10 + digit - 'A'
 887         }
 888         s.errorString("illegal hex digit")
 889         return 0
 890 }
 891
 892 // hexByte returns the next hex-encoded (two-character) byte from the input.
 893 // There must be either two hexadecimal digits or a space character in the input.
 894 func (s *ss) hexByte() (b byte, ok bool) {
 895         rune1 := s.getRune()
 896         if rune1 == eof {
 897                 return
 898         }
 899         if isSpace(rune1) {
 900                 s.UnreadRune()
 901                 return
 902         }
 903         rune2 := s.mustReadRune()
 904         return byte(s.hexDigit(rune1)<<4 | s.hexDigit(rune2)), true
 905 }
 906
 907 // hexString returns the space-delimited hexpair-encoded string.
 908 func (s *ss) hexString() string {
 909         s.notEOF()
 910         for {
 911                 b, ok := s.hexByte()
 912                 if !ok {
 913                         break
 914                 }
 915                 s.buf.WriteByte(b)
 916         }
 917         if len(s.buf) == 0 {
 918                 s.errorString("no hex data for %x string")
 919                 return ""
 920         }
 921         return string(s.buf)
 922 }
 923
 924 const floatVerbs = "beEfFgGv"
 925
 926 const hugeWid = 1 << 30
 927
 928 // scanOne scans a single value, deriving the scanner from the type of the argument.
 929 func (s *ss) scanOne(verb rune, arg interface{}) {
 930         s.buf = s.buf[:0]
 931         var err error
 932         // If the parameter has its own Scan method, use that.
 933         if v, ok := arg.(Scanner); ok {
 934                 err = v.Scan(s, verb)
 935                 if err != nil {
 936                         if err == io.EOF {
 937                                 err = io.ErrUnexpectedEOF
 938                         }
 939                         s.error(err)
 940                 }
 941                 return
 942         }
 943
 944         switch v := arg.(type) {
 945         case *bool:
 946                 *v = s.scanBool(verb)
 947         case *complex64:
 948                 *v = complex64(s.scanComplex(verb, 64))
 949         case *complex128:
 950                 *v = s.scanComplex(verb, 128)
 951         case *int:
 952                 *v = int(s.scanInt(verb, intBits))
 953         case *int8:
 954                 *v = int8(s.scanInt(verb, 8))
 955         case *int16:
 956                 *v = int16(s.scanInt(verb, 16))
 957         case *int32:
 958                 *v = int32(s.scanInt(verb, 32))
 959         case *int64:
 960                 *v = s.scanInt(verb, 64)
 961         case *uint:
 962                 *v = uint(s.scanUint(verb, intBits))
 963         case *uint8:
 964                 *v = uint8(s.scanUint(verb, 8))
 965         case *uint16:
 966                 *v = uint16(s.scanUint(verb, 16))
 967         case *uint32:
 968                 *v = uint32(s.scanUint(verb, 32))
 969         case *uint64:
 970                 *v = s.scanUint(verb, 64)
 971         case *uintptr:
 972                 *v = uintptr(s.scanUint(verb, uintptrBits))
 973         // Floats are tricky because you want to scan in the precision of the result, not
 974         // scan in high precision and convert, in order to preserve the correct error condition.
 975         case *float32:
 976                 if s.okVerb(verb, floatVerbs, "float32") {
 977                         s.skipSpace(false)
 978                         s.notEOF()
 979                         *v = float32(s.convertFloat(s.floatToken(), 32))
 980                 }
 981         case *float64:
 982                 if s.okVerb(verb, floatVerbs, "float64") {
 983                         s.skipSpace(false)
 984                         s.notEOF()
 985                         *v = s.convertFloat(s.floatToken(), 64)
 986                 }
 987         case *string:
 988                 *v = s.convertString(verb)
 989         case *[]byte:
 990                 // We scan to string and convert so we get a copy of the data.
 991                 // If we scanned to bytes, the slice would point at the buffer.
 992                 *v = []byte(s.convertString(verb))
 993         default:
 994                 val := reflect.ValueOf(v)
 995                 ptr := val
 996                 if ptr.Kind() != reflect.Ptr {
 997                         s.errorString("type not a pointer: " + val.Type().String())
 998                         return
 999                 }
1000                 switch v := ptr.Elem(); v.Kind() {
1001                 case reflect.Bool:
1002                         v.SetBool(s.scanBool(verb))
1003                 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
1004                         v.SetInt(s.scanInt(verb, v.Type().Bits()))
1005                 case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
1006                         v.SetUint(s.scanUint(verb, v.Type().Bits()))
1007                 case reflect.String:
1008                         v.SetString(s.convertString(verb))
1009                 case reflect.Slice:
1010                         // For now, can only handle (renamed) []byte.
1011                         typ := v.Type()
1012                         if typ.Elem().Kind() != reflect.Uint8 {
1013                                 s.errorString("can't scan type: " + val.Type().String())
1014                         }
1015                         str := s.convertString(verb)
1016                         v.Set(reflect.MakeSlice(typ, len(str), len(str)))
1017                         for i := 0; i < len(str); i++ {
1018                                 v.Index(i).SetUint(uint64(str[i]))
1019                         }
1020                 case reflect.Float32, reflect.Float64:
1021                         s.skipSpace(false)
1022                         s.notEOF()
1023                         v.SetFloat(s.convertFloat(s.floatToken(), v.Type().Bits()))
1024                 case reflect.Complex64, reflect.Complex128:
1025                         v.SetComplex(s.scanComplex(verb, v.Type().Bits()))
1026                 default:
1027                         s.errorString("can't scan type: " + val.Type().String())
1028                 }
1029         }
1030 }
1031
1032 // errorHandler turns local panics into error returns.
1033 func errorHandler(errp *error) {
1034         if e := recover(); e != nil {
1035                 if se, ok := e.(scanError); ok { // catch local error
1036                         *errp = se.err
1037                 } else if eof, ok := e.(error); ok && eof == io.EOF { // out of input
1038                         *errp = eof
1039                 } else {
1040                         panic(e)
1041                 }
1042         }
1043 }
1044
1045 // doScan does the real work for scanning without a format string.
1046 func (s *ss) doScan(a []interface{}) (numProcessed int, err error) {
1047         defer errorHandler(&err)
1048         for _, arg := range a {
1049                 s.scanOne('v', arg)
1050                 numProcessed++
1051         }
1052         // Check for newline if required.
1053         if !s.nlIsSpace {
1054                 for {
1055                         r := s.getRune()
1056                         if r == '\n' || r == eof {
1057                                 break
1058                         }
1059                         if !isSpace(r) {
1060                                 s.errorString("expected newline")
1061                                 break
1062                         }
1063                 }
1064         }
1065         return
1066 }
1067
1068 // advance determines whether the next characters in the input match
1069 // those of the format.  It returns the number of bytes (sic) consumed
1070 // in the format. Newlines included, all runs of space characters in
1071 // either input or format behave as a single space. This routine also
1072 // handles the %% case.  If the return value is zero, either format
1073 // starts with a % (with no following %) or the input is empty.
1074 // If it is negative, the input did not match the string.
1075 func (s *ss) advance(format string) (i int) {
1076         for i < len(format) {
1077                 fmtc, w := utf8.DecodeRuneInString(format[i:])
1078                 if fmtc == '%' {
1079                         // %% acts like a real percent
1080                         nextc, _ := utf8.DecodeRuneInString(format[i+w:]) // will not match % if string is empty
1081                         if nextc != '%' {
1082                                 return
1083                         }
1084                         i += w // skip the first %
1085                 }
1086                 sawSpace := false
1087                 for isSpace(fmtc) && i < len(format) {
1088                         sawSpace = true
1089                         i += w
1090                         fmtc, w = utf8.DecodeRuneInString(format[i:])
1091                 }
1092                 if sawSpace {
1093                         // There was space in the format, so there should be space (EOF)
1094                         // in the input.
1095                         inputc := s.getRune()
1096                         if inputc == eof || inputc == '\n' {
1097                                 // If we've reached a newline, stop now; don't read ahead.
1098                                 return
1099                         }
1100                         if !isSpace(inputc) {
1101                                 // Space in format but not in input: error
1102                                 s.errorString("expected space in input to match format")
1103                         }
1104                         s.skipSpace(true)
1105                         continue
1106                 }
1107                 inputc := s.mustReadRune()
1108                 if fmtc != inputc {
1109                         s.UnreadRune()
1110                         return -1
1111                 }
1112                 i += w
1113         }
1114         return
1115 }
1116
1117 // doScanf does the real work when scanning with a format string.
1118 //  At the moment, it handles only pointers to basic types.
1119 func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err error) {
1120         defer errorHandler(&err)
1121         end := len(format) - 1
1122         // We process one item per non-trivial format
1123         for i := 0; i <= end; {
1124                 w := s.advance(format[i:])
1125                 if w > 0 {
1126                         i += w
1127                         continue
1128                 }
1129                 // Either we failed to advance, we have a percent character, or we ran out of input.
1130                 if format[i] != '%' {
1131                         // Can't advance format.  Why not?
1132                         if w < 0 {
1133                                 s.errorString("input does not match format")
1134                         }
1135                         // Otherwise at EOF; "too many operands" error handled below
1136                         break
1137                 }
1138                 i++ // % is one byte
1139
1140                 // do we have 20 (width)?
1141                 var widPresent bool
1142                 s.maxWid, widPresent, i = parsenum(format, i, end)
1143                 if !widPresent {
1144                         s.maxWid = hugeWid
1145                 }
1146                 s.argLimit = s.limit
1147                 if f := s.count + s.maxWid; f < s.argLimit {
1148                         s.argLimit = f
1149                 }
1150
1151                 c, w := utf8.DecodeRuneInString(format[i:])
1152                 i += w
1153
1154                 if numProcessed >= len(a) { // out of operands
1155                         s.errorString("too few operands for format %" + format[i-w:])
1156                         break
1157                 }
1158                 arg := a[numProcessed]
1159
1160                 s.scanOne(c, arg)
1161                 numProcessed++
1162                 s.argLimit = s.limit
1163         }
1164         if numProcessed < len(a) {
1165                 s.errorString("too many operands")
1166         }
1167         return
1168 }