libgo/go/fmt/scan.go

   1 // Copyright 2010 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package fmt
   6
   7 import (
   8         "errors"
   9         "io"
  10         "math"
  11         "os"
  12         "reflect"
  13         "strconv"
  14         "sync"
  15         "unicode/utf8"
  16 )
  17
  18 // runeUnreader is the interface to something that can unread runes.
  19 // If the object provided to Scan does not satisfy this interface,
  20 // a local buffer will be used to back up the input, but its contents
  21 // will be lost when Scan returns.
  22 type runeUnreader interface {
  23         UnreadRune() error
  24 }
  25
  26 // ScanState represents the scanner state passed to custom scanners.
  27 // Scanners may do rune-at-a-time scanning or ask the ScanState
  28 // to discover the next space-delimited token.
  29 type ScanState interface {
  30         // ReadRune reads the next rune (Unicode code point) from the input.
  31         // If invoked during Scanln, Fscanln, or Sscanln, ReadRune() will
  32         // return EOF after returning the first '\n' or when reading beyond
  33         // the specified width.
  34         ReadRune() (r rune, size int, err error)
  35         // UnreadRune causes the next call to ReadRune to return the same rune.
  36         UnreadRune() error
  37         // SkipSpace skips space in the input. Newlines are treated as space
  38         // unless the scan operation is Scanln, Fscanln or Sscanln, in which case
  39         // a newline is treated as EOF.
  40         SkipSpace()
  41         // Token skips space in the input if skipSpace is true, then returns the
  42         // run of Unicode code points c satisfying f(c).  If f is nil,
  43         // !unicode.IsSpace(c) is used; that is, the token will hold non-space
  44         // characters.  Newlines are treated as space unless the scan operation
  45         // is Scanln, Fscanln or Sscanln, in which case a newline is treated as
  46         // EOF.  The returned slice points to shared data that may be overwritten
  47         // by the next call to Token, a call to a Scan function using the ScanState
  48         // as input, or when the calling Scan method returns.
  49         Token(skipSpace bool, f func(rune) bool) (token []byte, err error)
  50         // Width returns the value of the width option and whether it has been set.
  51         // The unit is Unicode code points.
  52         Width() (wid int, ok bool)
  53         // Because ReadRune is implemented by the interface, Read should never be
  54         // called by the scanning routines and a valid implementation of
  55         // ScanState may choose always to return an error from Read.
  56         Read(buf []byte) (n int, err error)
  57 }
  58
  59 // Scanner is implemented by any value that has a Scan method, which scans
  60 // the input for the representation of a value and stores the result in the
  61 // receiver, which must be a pointer to be useful.  The Scan method is called
  62 // for any argument to Scan, Scanf, or Scanln that implements it.
  63 type Scanner interface {
  64         Scan(state ScanState, verb rune) error
  65 }
  66
  67 // Scan scans text read from standard input, storing successive
  68 // space-separated values into successive arguments.  Newlines count
  69 // as space.  It returns the number of items successfully scanned.
  70 // If that is less than the number of arguments, err will report why.
  71 func Scan(a ...interface{}) (n int, err error) {
  72         return Fscan(os.Stdin, a...)
  73 }
  74
  75 // Scanln is similar to Scan, but stops scanning at a newline and
  76 // after the final item there must be a newline or EOF.
  77 func Scanln(a ...interface{}) (n int, err error) {
  78         return Fscanln(os.Stdin, a...)
  79 }
  80
  81 // Scanf scans text read from standard input, storing successive
  82 // space-separated values into successive arguments as determined by
  83 // the format.  It returns the number of items successfully scanned.
  84 func Scanf(format string, a ...interface{}) (n int, err error) {
  85         return Fscanf(os.Stdin, format, a...)
  86 }
  87
  88 type stringReader string
  89
  90 func (r *stringReader) Read(b []byte) (n int, err error) {
  91         n = copy(b, *r)
  92         *r = (*r)[n:]
  93         if n == 0 {
  94                 err = io.EOF
  95         }
  96         return
  97 }
  98
  99 // Sscan scans the argument string, storing successive space-separated
 100 // values into successive arguments.  Newlines count as space.  It
 101 // returns the number of items successfully scanned.  If that is less
 102 // than the number of arguments, err will report why.
 103 func Sscan(str string, a ...interface{}) (n int, err error) {
 104         return Fscan((*stringReader)(&str), a...)
 105 }
 106
 107 // Sscanln is similar to Sscan, but stops scanning at a newline and
 108 // after the final item there must be a newline or EOF.
 109 func Sscanln(str string, a ...interface{}) (n int, err error) {
 110         return Fscanln((*stringReader)(&str), a...)
 111 }
 112
 113 // Sscanf scans the argument string, storing successive space-separated
 114 // values into successive arguments as determined by the format.  It
 115 // returns the number of items successfully parsed.
 116 func Sscanf(str string, format string, a ...interface{}) (n int, err error) {
 117         return Fscanf((*stringReader)(&str), format, a...)
 118 }
 119
 120 // Fscan scans text read from r, storing successive space-separated
 121 // values into successive arguments.  Newlines count as space.  It
 122 // returns the number of items successfully scanned.  If that is less
 123 // than the number of arguments, err will report why.
 124 func Fscan(r io.Reader, a ...interface{}) (n int, err error) {
 125         s, old := newScanState(r, true, false)
 126         n, err = s.doScan(a)
 127         s.free(old)
 128         return
 129 }
 130
 131 // Fscanln is similar to Fscan, but stops scanning at a newline and
 132 // after the final item there must be a newline or EOF.
 133 func Fscanln(r io.Reader, a ...interface{}) (n int, err error) {
 134         s, old := newScanState(r, false, true)
 135         n, err = s.doScan(a)
 136         s.free(old)
 137         return
 138 }
 139
 140 // Fscanf scans text read from r, storing successive space-separated
 141 // values into successive arguments as determined by the format.  It
 142 // returns the number of items successfully parsed.
 143 func Fscanf(r io.Reader, format string, a ...interface{}) (n int, err error) {
 144         s, old := newScanState(r, false, false)
 145         n, err = s.doScanf(format, a)
 146         s.free(old)
 147         return
 148 }
 149
 150 // scanError represents an error generated by the scanning software.
 151 // It's used as a unique signature to identify such errors when recovering.
 152 type scanError struct {
 153         err error
 154 }
 155
 156 const eof = -1
 157
 158 // ss is the internal implementation of ScanState.
 159 type ss struct {
 160         rr       io.RuneReader // where to read input
 161         buf      buffer        // token accumulator
 162         peekRune rune          // one-rune lookahead
 163         prevRune rune          // last rune returned by ReadRune
 164         count    int           // runes consumed so far.
 165         atEOF    bool          // already read EOF
 166         ssave
 167 }
 168
 169 // ssave holds the parts of ss that need to be
 170 // saved and restored on recursive scans.
 171 type ssave struct {
 172         validSave bool // is or was a part of an actual ss.
 173         nlIsEnd   bool // whether newline terminates scan
 174         nlIsSpace bool // whether newline counts as white space
 175         argLimit  int  // max value of ss.count for this arg; argLimit <= limit
 176         limit     int  // max value of ss.count.
 177         maxWid    int  // width of this arg.
 178 }
 179
 180 // The Read method is only in ScanState so that ScanState
 181 // satisfies io.Reader. It will never be called when used as
 182 // intended, so there is no need to make it actually work.
 183 func (s *ss) Read(buf []byte) (n int, err error) {
 184         return 0, errors.New("ScanState's Read should not be called. Use ReadRune")
 185 }
 186
 187 func (s *ss) ReadRune() (r rune, size int, err error) {
 188         if s.peekRune >= 0 {
 189                 s.count++
 190                 r = s.peekRune
 191                 size = utf8.RuneLen(r)
 192                 s.prevRune = r
 193                 s.peekRune = -1
 194                 return
 195         }
 196         if s.atEOF || s.nlIsEnd && s.prevRune == '\n' || s.count >= s.argLimit {
 197                 err = io.EOF
 198                 return
 199         }
 200
 201         r, size, err = s.rr.ReadRune()
 202         if err == nil {
 203                 s.count++
 204                 s.prevRune = r
 205         } else if err == io.EOF {
 206                 s.atEOF = true
 207         }
 208         return
 209 }
 210
 211 func (s *ss) Width() (wid int, ok bool) {
 212         if s.maxWid == hugeWid {
 213                 return 0, false
 214         }
 215         return s.maxWid, true
 216 }
 217
 218 // The public method returns an error; this private one panics.
 219 // If getRune reaches EOF, the return value is EOF (-1).
 220 func (s *ss) getRune() (r rune) {
 221         r, _, err := s.ReadRune()
 222         if err != nil {
 223                 if err == io.EOF {
 224                         return eof
 225                 }
 226                 s.error(err)
 227         }
 228         return
 229 }
 230
 231 // mustReadRune turns io.EOF into a panic(io.ErrUnexpectedEOF).
 232 // It is called in cases such as string scanning where an EOF is a
 233 // syntax error.
 234 func (s *ss) mustReadRune() (r rune) {
 235         r = s.getRune()
 236         if r == eof {
 237                 s.error(io.ErrUnexpectedEOF)
 238         }
 239         return
 240 }
 241
 242 func (s *ss) UnreadRune() error {
 243         if u, ok := s.rr.(runeUnreader); ok {
 244                 u.UnreadRune()
 245         } else {
 246                 s.peekRune = s.prevRune
 247         }
 248         s.prevRune = -1
 249         s.count--
 250         return nil
 251 }
 252
 253 func (s *ss) error(err error) {
 254         panic(scanError{err})
 255 }
 256
 257 func (s *ss) errorString(err string) {
 258         panic(scanError{errors.New(err)})
 259 }
 260
 261 func (s *ss) Token(skipSpace bool, f func(rune) bool) (tok []byte, err error) {
 262         defer func() {
 263                 if e := recover(); e != nil {
 264                         if se, ok := e.(scanError); ok {
 265                                 err = se.err
 266                         } else {
 267                                 panic(e)
 268                         }
 269                 }
 270         }()
 271         if f == nil {
 272                 f = notSpace
 273         }
 274         s.buf = s.buf[:0]
 275         tok = s.token(skipSpace, f)
 276         return
 277 }
 278
 279 // space is a copy of the unicode.White_Space ranges,
 280 // to avoid depending on package unicode.
 281 var space = [][2]uint16{
 282         {0x0009, 0x000d},
 283         {0x0020, 0x0020},
 284         {0x0085, 0x0085},
 285         {0x00a0, 0x00a0},
 286         {0x1680, 0x1680},
 287         {0x2000, 0x200a},
 288         {0x2028, 0x2029},
 289         {0x202f, 0x202f},
 290         {0x205f, 0x205f},
 291         {0x3000, 0x3000},
 292 }
 293
 294 func isSpace(r rune) bool {
 295         if r >= 1<<16 {
 296                 return false
 297         }
 298         rx := uint16(r)
 299         for _, rng := range space {
 300                 if rx < rng[0] {
 301                         return false
 302                 }
 303                 if rx <= rng[1] {
 304                         return true
 305                 }
 306         }
 307         return false
 308 }
 309
 310 // notSpace is the default scanning function used in Token.
 311 func notSpace(r rune) bool {
 312         return !isSpace(r)
 313 }
 314
 315 // SkipSpace provides Scan methods the ability to skip space and newline
 316 // characters in keeping with the current scanning mode set by format strings
 317 // and Scan/Scanln.
 318 func (s *ss) SkipSpace() {
 319         s.skipSpace(false)
 320 }
 321
 322 // readRune is a structure to enable reading UTF-8 encoded code points
 323 // from an io.Reader.  It is used if the Reader given to the scanner does
 324 // not already implement io.RuneReader.
 325 type readRune struct {
 326         reader  io.Reader
 327         buf     [utf8.UTFMax]byte // used only inside ReadRune
 328         pending int               // number of bytes in pendBuf; only >0 for bad UTF-8
 329         pendBuf [utf8.UTFMax]byte // bytes left over
 330 }
 331
 332 // readByte returns the next byte from the input, which may be
 333 // left over from a previous read if the UTF-8 was ill-formed.
 334 func (r *readRune) readByte() (b byte, err error) {
 335         if r.pending > 0 {
 336                 b = r.pendBuf[0]
 337                 copy(r.pendBuf[0:], r.pendBuf[1:])
 338                 r.pending--
 339                 return
 340         }
 341         n, err := io.ReadFull(r.reader, r.pendBuf[0:1])
 342         if n != 1 {
 343                 return 0, err
 344         }
 345         return r.pendBuf[0], err
 346 }
 347
 348 // unread saves the bytes for the next read.
 349 func (r *readRune) unread(buf []byte) {
 350         copy(r.pendBuf[r.pending:], buf)
 351         r.pending += len(buf)
 352 }
 353
 354 // ReadRune returns the next UTF-8 encoded code point from the
 355 // io.Reader inside r.
 356 func (r *readRune) ReadRune() (rr rune, size int, err error) {
 357         r.buf[0], err = r.readByte()
 358         if err != nil {
 359                 return 0, 0, err
 360         }
 361         if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
 362                 rr = rune(r.buf[0])
 363                 size = 1 // Known to be 1.
 364                 return
 365         }
 366         var n int
 367         for n = 1; !utf8.FullRune(r.buf[0:n]); n++ {
 368                 r.buf[n], err = r.readByte()
 369                 if err != nil {
 370                         if err == io.EOF {
 371                                 err = nil
 372                                 break
 373                         }
 374                         return
 375                 }
 376         }
 377         rr, size = utf8.DecodeRune(r.buf[0:n])
 378         if size < n { // an error
 379                 r.unread(r.buf[size:n])
 380         }
 381         return
 382 }
 383
 384 var ssFree = sync.Pool{
 385         New: func() interface{} { return new(ss) },
 386 }
 387
 388 // newScanState allocates a new ss struct or grab a cached one.
 389 func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) {
 390         // If the reader is a *ss, then we've got a recursive
 391         // call to Scan, so re-use the scan state.
 392         s, ok := r.(*ss)
 393         if ok {
 394                 old = s.ssave
 395                 s.limit = s.argLimit
 396                 s.nlIsEnd = nlIsEnd || s.nlIsEnd
 397                 s.nlIsSpace = nlIsSpace
 398                 return
 399         }
 400
 401         s = ssFree.Get().(*ss)
 402         if rr, ok := r.(io.RuneReader); ok {
 403                 s.rr = rr
 404         } else {
 405                 s.rr = &readRune{reader: r}
 406         }
 407         s.nlIsSpace = nlIsSpace
 408         s.nlIsEnd = nlIsEnd
 409         s.prevRune = -1
 410         s.peekRune = -1
 411         s.atEOF = false
 412         s.limit = hugeWid
 413         s.argLimit = hugeWid
 414         s.maxWid = hugeWid
 415         s.validSave = true
 416         s.count = 0
 417         return
 418 }
 419
 420 // free saves used ss structs in ssFree; avoid an allocation per invocation.
 421 func (s *ss) free(old ssave) {
 422         // If it was used recursively, just restore the old state.
 423         if old.validSave {
 424                 s.ssave = old
 425                 return
 426         }
 427         // Don't hold on to ss structs with large buffers.
 428         if cap(s.buf) > 1024 {
 429                 return
 430         }
 431         s.buf = s.buf[:0]
 432         s.rr = nil
 433         ssFree.Put(s)
 434 }
 435
 436 // skipSpace skips spaces and maybe newlines.
 437 func (s *ss) skipSpace(stopAtNewline bool) {
 438         for {
 439                 r := s.getRune()
 440                 if r == eof {
 441                         return
 442                 }
 443                 if r == '\r' && s.peek("\n") {
 444                         continue
 445                 }
 446                 if r == '\n' {
 447                         if stopAtNewline {
 448                                 break
 449                         }
 450                         if s.nlIsSpace {
 451                                 continue
 452                         }
 453                         s.errorString("unexpected newline")
 454                         return
 455                 }
 456                 if !isSpace(r) {
 457                         s.UnreadRune()
 458                         break
 459                 }
 460         }
 461 }
 462
 463 // token returns the next space-delimited string from the input.  It
 464 // skips white space.  For Scanln, it stops at newlines.  For Scan,
 465 // newlines are treated as spaces.
 466 func (s *ss) token(skipSpace bool, f func(rune) bool) []byte {
 467         if skipSpace {
 468                 s.skipSpace(false)
 469         }
 470         // read until white space or newline
 471         for {
 472                 r := s.getRune()
 473                 if r == eof {
 474                         break
 475                 }
 476                 if !f(r) {
 477                         s.UnreadRune()
 478                         break
 479                 }
 480                 s.buf.WriteRune(r)
 481         }
 482         return s.buf
 483 }
 484
 485 var complexError = errors.New("syntax error scanning complex number")
 486 var boolError = errors.New("syntax error scanning boolean")
 487
 488 func indexRune(s string, r rune) int {
 489         for i, c := range s {
 490                 if c == r {
 491                         return i
 492                 }
 493         }
 494         return -1
 495 }
 496
 497 // consume reads the next rune in the input and reports whether it is in the ok string.
 498 // If accept is true, it puts the character into the input token.
 499 func (s *ss) consume(ok string, accept bool) bool {
 500         r := s.getRune()
 501         if r == eof {
 502                 return false
 503         }
 504         if indexRune(ok, r) >= 0 {
 505                 if accept {
 506                         s.buf.WriteRune(r)
 507                 }
 508                 return true
 509         }
 510         if r != eof && accept {
 511                 s.UnreadRune()
 512         }
 513         return false
 514 }
 515
 516 // peek reports whether the next character is in the ok string, without consuming it.
 517 func (s *ss) peek(ok string) bool {
 518         r := s.getRune()
 519         if r != eof {
 520                 s.UnreadRune()
 521         }
 522         return indexRune(ok, r) >= 0
 523 }
 524
 525 func (s *ss) notEOF() {
 526         // Guarantee there is data to be read.
 527         if r := s.getRune(); r == eof {
 528                 panic(io.EOF)
 529         }
 530         s.UnreadRune()
 531 }
 532
 533 // accept checks the next rune in the input.  If it's a byte (sic) in the string, it puts it in the
 534 // buffer and returns true. Otherwise it return false.
 535 func (s *ss) accept(ok string) bool {
 536         return s.consume(ok, true)
 537 }
 538
 539 // okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
 540 func (s *ss) okVerb(verb rune, okVerbs, typ string) bool {
 541         for _, v := range okVerbs {
 542                 if v == verb {
 543                         return true
 544                 }
 545         }
 546         s.errorString("bad verb %" + string(verb) + " for " + typ)
 547         return false
 548 }
 549
 550 // scanBool returns the value of the boolean represented by the next token.
 551 func (s *ss) scanBool(verb rune) bool {
 552         s.skipSpace(false)
 553         s.notEOF()
 554         if !s.okVerb(verb, "tv", "boolean") {
 555                 return false
 556         }
 557         // Syntax-checking a boolean is annoying.  We're not fastidious about case.
 558         switch s.getRune() {
 559         case '0':
 560                 return false
 561         case '1':
 562                 return true
 563         case 't', 'T':
 564                 if s.accept("rR") && (!s.accept("uU") || !s.accept("eE")) {
 565                         s.error(boolError)
 566                 }
 567                 return true
 568         case 'f', 'F':
 569                 if s.accept("aA") && (!s.accept("lL") || !s.accept("sS") || !s.accept("eE")) {
 570                         s.error(boolError)
 571                 }
 572                 return false
 573         }
 574         return false
 575 }
 576
 577 // Numerical elements
 578 const (
 579         binaryDigits      = "01"
 580         octalDigits       = "01234567"
 581         decimalDigits     = "0123456789"
 582         hexadecimalDigits = "0123456789aAbBcCdDeEfF"
 583         sign              = "+-"
 584         period            = "."
 585         exponent          = "eEp"
 586 )
 587
 588 // getBase returns the numeric base represented by the verb and its digit string.
 589 func (s *ss) getBase(verb rune) (base int, digits string) {
 590         s.okVerb(verb, "bdoUxXv", "integer") // sets s.err
 591         base = 10
 592         digits = decimalDigits
 593         switch verb {
 594         case 'b':
 595                 base = 2
 596                 digits = binaryDigits
 597         case 'o':
 598                 base = 8
 599                 digits = octalDigits
 600         case 'x', 'X', 'U':
 601                 base = 16
 602                 digits = hexadecimalDigits
 603         }
 604         return
 605 }
 606
 607 // scanNumber returns the numerical string with specified digits starting here.
 608 func (s *ss) scanNumber(digits string, haveDigits bool) string {
 609         if !haveDigits {
 610                 s.notEOF()
 611                 if !s.accept(digits) {
 612                         s.errorString("expected integer")
 613                 }
 614         }
 615         for s.accept(digits) {
 616         }
 617         return string(s.buf)
 618 }
 619
 620 // scanRune returns the next rune value in the input.
 621 func (s *ss) scanRune(bitSize int) int64 {
 622         s.notEOF()
 623         r := int64(s.getRune())
 624         n := uint(bitSize)
 625         x := (r << (64 - n)) >> (64 - n)
 626         if x != r {
 627                 s.errorString("overflow on character value " + string(r))
 628         }
 629         return r
 630 }
 631
 632 // scanBasePrefix reports whether the integer begins with a 0 or 0x,
 633 // and returns the base, digit string, and whether a zero was found.
 634 // It is called only if the verb is %v.
 635 func (s *ss) scanBasePrefix() (base int, digits string, found bool) {
 636         if !s.peek("0") {
 637                 return 10, decimalDigits, false
 638         }
 639         s.accept("0")
 640         found = true // We've put a digit into the token buffer.
 641         // Special cases for '0' && '0x'
 642         base, digits = 8, octalDigits
 643         if s.peek("xX") {
 644                 s.consume("xX", false)
 645                 base, digits = 16, hexadecimalDigits
 646         }
 647         return
 648 }
 649
 650 // scanInt returns the value of the integer represented by the next
 651 // token, checking for overflow.  Any error is stored in s.err.
 652 func (s *ss) scanInt(verb rune, bitSize int) int64 {
 653         if verb == 'c' {
 654                 return s.scanRune(bitSize)
 655         }
 656         s.skipSpace(false)
 657         s.notEOF()
 658         base, digits := s.getBase(verb)
 659         haveDigits := false
 660         if verb == 'U' {
 661                 if !s.consume("U", false) || !s.consume("+", false) {
 662                         s.errorString("bad unicode format ")
 663                 }
 664         } else {
 665                 s.accept(sign) // If there's a sign, it will be left in the token buffer.
 666                 if verb == 'v' {
 667                         base, digits, haveDigits = s.scanBasePrefix()
 668                 }
 669         }
 670         tok := s.scanNumber(digits, haveDigits)
 671         i, err := strconv.ParseInt(tok, base, 64)
 672         if err != nil {
 673                 s.error(err)
 674         }
 675         n := uint(bitSize)
 676         x := (i << (64 - n)) >> (64 - n)
 677         if x != i {
 678                 s.errorString("integer overflow on token " + tok)
 679         }
 680         return i
 681 }
 682
 683 // scanUint returns the value of the unsigned integer represented
 684 // by the next token, checking for overflow.  Any error is stored in s.err.
 685 func (s *ss) scanUint(verb rune, bitSize int) uint64 {
 686         if verb == 'c' {
 687                 return uint64(s.scanRune(bitSize))
 688         }
 689         s.skipSpace(false)
 690         s.notEOF()
 691         base, digits := s.getBase(verb)
 692         haveDigits := false
 693         if verb == 'U' {
 694                 if !s.consume("U", false) || !s.consume("+", false) {
 695                         s.errorString("bad unicode format ")
 696                 }
 697         } else if verb == 'v' {
 698                 base, digits, haveDigits = s.scanBasePrefix()
 699         }
 700         tok := s.scanNumber(digits, haveDigits)
 701         i, err := strconv.ParseUint(tok, base, 64)
 702         if err != nil {
 703                 s.error(err)
 704         }
 705         n := uint(bitSize)
 706         x := (i << (64 - n)) >> (64 - n)
 707         if x != i {
 708                 s.errorString("unsigned integer overflow on token " + tok)
 709         }
 710         return i
 711 }
 712
 713 // floatToken returns the floating-point number starting here, no longer than swid
 714 // if the width is specified. It's not rigorous about syntax because it doesn't check that
 715 // we have at least some digits, but Atof will do that.
 716 func (s *ss) floatToken() string {
 717         s.buf = s.buf[:0]
 718         // NaN?
 719         if s.accept("nN") && s.accept("aA") && s.accept("nN") {
 720                 return string(s.buf)
 721         }
 722         // leading sign?
 723         s.accept(sign)
 724         // Inf?
 725         if s.accept("iI") && s.accept("nN") && s.accept("fF") {
 726                 return string(s.buf)
 727         }
 728         // digits?
 729         for s.accept(decimalDigits) {
 730         }
 731         // decimal point?
 732         if s.accept(period) {
 733                 // fraction?
 734                 for s.accept(decimalDigits) {
 735                 }
 736         }
 737         // exponent?
 738         if s.accept(exponent) {
 739                 // leading sign?
 740                 s.accept(sign)
 741                 // digits?
 742                 for s.accept(decimalDigits) {
 743                 }
 744         }
 745         return string(s.buf)
 746 }
 747
 748 // complexTokens returns the real and imaginary parts of the complex number starting here.
 749 // The number might be parenthesized and has the format (N+Ni) where N is a floating-point
 750 // number and there are no spaces within.
 751 func (s *ss) complexTokens() (real, imag string) {
 752         // TODO: accept N and Ni independently?
 753         parens := s.accept("(")
 754         real = s.floatToken()
 755         s.buf = s.buf[:0]
 756         // Must now have a sign.
 757         if !s.accept("+-") {
 758                 s.error(complexError)
 759         }
 760         // Sign is now in buffer
 761         imagSign := string(s.buf)
 762         imag = s.floatToken()
 763         if !s.accept("i") {
 764                 s.error(complexError)
 765         }
 766         if parens && !s.accept(")") {
 767                 s.error(complexError)
 768         }
 769         return real, imagSign + imag
 770 }
 771
 772 // convertFloat converts the string to a float64value.
 773 func (s *ss) convertFloat(str string, n int) float64 {
 774         if p := indexRune(str, 'p'); p >= 0 {
 775                 // Atof doesn't handle power-of-2 exponents,
 776                 // but they're easy to evaluate.
 777                 f, err := strconv.ParseFloat(str[:p], n)
 778                 if err != nil {
 779                         // Put full string into error.
 780                         if e, ok := err.(*strconv.NumError); ok {
 781                                 e.Num = str
 782                         }
 783                         s.error(err)
 784                 }
 785                 m, err := strconv.Atoi(str[p+1:])
 786                 if err != nil {
 787                         // Put full string into error.
 788                         if e, ok := err.(*strconv.NumError); ok {
 789                                 e.Num = str
 790                         }
 791                         s.error(err)
 792                 }
 793                 return math.Ldexp(f, m)
 794         }
 795         f, err := strconv.ParseFloat(str, n)
 796         if err != nil {
 797                 s.error(err)
 798         }
 799         return f
 800 }
 801
 802 // convertComplex converts the next token to a complex128 value.
 803 // The atof argument is a type-specific reader for the underlying type.
 804 // If we're reading complex64, atof will parse float32s and convert them
 805 // to float64's to avoid reproducing this code for each complex type.
 806 func (s *ss) scanComplex(verb rune, n int) complex128 {
 807         if !s.okVerb(verb, floatVerbs, "complex") {
 808                 return 0
 809         }
 810         s.skipSpace(false)
 811         s.notEOF()
 812         sreal, simag := s.complexTokens()
 813         real := s.convertFloat(sreal, n/2)
 814         imag := s.convertFloat(simag, n/2)
 815         return complex(real, imag)
 816 }
 817
 818 // convertString returns the string represented by the next input characters.
 819 // The format of the input is determined by the verb.
 820 func (s *ss) convertString(verb rune) (str string) {
 821         if !s.okVerb(verb, "svqx", "string") {
 822                 return ""
 823         }
 824         s.skipSpace(false)
 825         s.notEOF()
 826         switch verb {
 827         case 'q':
 828                 str = s.quotedString()
 829         case 'x':
 830                 str = s.hexString()
 831         default:
 832                 str = string(s.token(true, notSpace)) // %s and %v just return the next word
 833         }
 834         return
 835 }
 836
 837 // quotedString returns the double- or back-quoted string represented by the next input characters.
 838 func (s *ss) quotedString() string {
 839         s.notEOF()
 840         quote := s.getRune()
 841         switch quote {
 842         case '`':
 843                 // Back-quoted: Anything goes until EOF or back quote.
 844                 for {
 845                         r := s.mustReadRune()
 846                         if r == quote {
 847                                 break
 848                         }
 849                         s.buf.WriteRune(r)
 850                 }
 851                 return string(s.buf)
 852         case '"':
 853                 // Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes.
 854                 s.buf.WriteRune(quote)
 855                 for {
 856                         r := s.mustReadRune()
 857                         s.buf.WriteRune(r)
 858                         if r == '\\' {
 859                                 // In a legal backslash escape, no matter how long, only the character
 860                                 // immediately after the escape can itself be a backslash or quote.
 861                                 // Thus we only need to protect the first character after the backslash.
 862                                 s.buf.WriteRune(s.mustReadRune())
 863                         } else if r == '"' {
 864                                 break
 865                         }
 866                 }
 867                 result, err := strconv.Unquote(string(s.buf))
 868                 if err != nil {
 869                         s.error(err)
 870                 }
 871                 return result
 872         default:
 873                 s.errorString("expected quoted string")
 874         }
 875         return ""
 876 }
 877
 878 // hexDigit returns the value of the hexadecimal digit
 879 func (s *ss) hexDigit(d rune) int {
 880         digit := int(d)
 881         switch digit {
 882         case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 883                 return digit - '0'
 884         case 'a', 'b', 'c', 'd', 'e', 'f':
 885                 return 10 + digit - 'a'
 886         case 'A', 'B', 'C', 'D', 'E', 'F':
 887                 return 10 + digit - 'A'
 888         }
 889         s.errorString("illegal hex digit")
 890         return 0
 891 }
 892
 893 // hexByte returns the next hex-encoded (two-character) byte from the input.
 894 // There must be either two hexadecimal digits or a space character in the input.
 895 func (s *ss) hexByte() (b byte, ok bool) {
 896         rune1 := s.getRune()
 897         if rune1 == eof {
 898                 return
 899         }
 900         if isSpace(rune1) {
 901                 s.UnreadRune()
 902                 return
 903         }
 904         rune2 := s.mustReadRune()
 905         return byte(s.hexDigit(rune1)<<4 | s.hexDigit(rune2)), true
 906 }
 907
 908 // hexString returns the space-delimited hexpair-encoded string.
 909 func (s *ss) hexString() string {
 910         s.notEOF()
 911         for {
 912                 b, ok := s.hexByte()
 913                 if !ok {
 914                         break
 915                 }
 916                 s.buf.WriteByte(b)
 917         }
 918         if len(s.buf) == 0 {
 919                 s.errorString("no hex data for %x string")
 920                 return ""
 921         }
 922         return string(s.buf)
 923 }
 924
 925 const floatVerbs = "beEfFgGv"
 926
 927 const hugeWid = 1 << 30
 928
 929 // scanOne scans a single value, deriving the scanner from the type of the argument.
 930 func (s *ss) scanOne(verb rune, arg interface{}) {
 931         s.buf = s.buf[:0]
 932         var err error
 933         // If the parameter has its own Scan method, use that.
 934         if v, ok := arg.(Scanner); ok {
 935                 err = v.Scan(s, verb)
 936                 if err != nil {
 937                         if err == io.EOF {
 938                                 err = io.ErrUnexpectedEOF
 939                         }
 940                         s.error(err)
 941                 }
 942                 return
 943         }
 944
 945         switch v := arg.(type) {
 946         case *bool:
 947                 *v = s.scanBool(verb)
 948         case *complex64:
 949                 *v = complex64(s.scanComplex(verb, 64))
 950         case *complex128:
 951                 *v = s.scanComplex(verb, 128)
 952         case *int:
 953                 *v = int(s.scanInt(verb, intBits))
 954         case *int8:
 955                 *v = int8(s.scanInt(verb, 8))
 956         case *int16:
 957                 *v = int16(s.scanInt(verb, 16))
 958         case *int32:
 959                 *v = int32(s.scanInt(verb, 32))
 960         case *int64:
 961                 *v = s.scanInt(verb, 64)
 962         case *uint:
 963                 *v = uint(s.scanUint(verb, intBits))
 964         case *uint8:
 965                 *v = uint8(s.scanUint(verb, 8))
 966         case *uint16:
 967                 *v = uint16(s.scanUint(verb, 16))
 968         case *uint32:
 969                 *v = uint32(s.scanUint(verb, 32))
 970         case *uint64:
 971                 *v = s.scanUint(verb, 64)
 972         case *uintptr:
 973                 *v = uintptr(s.scanUint(verb, uintptrBits))
 974         // Floats are tricky because you want to scan in the precision of the result, not
 975         // scan in high precision and convert, in order to preserve the correct error condition.
 976         case *float32:
 977                 if s.okVerb(verb, floatVerbs, "float32") {
 978                         s.skipSpace(false)
 979                         s.notEOF()
 980                         *v = float32(s.convertFloat(s.floatToken(), 32))
 981                 }
 982         case *float64:
 983                 if s.okVerb(verb, floatVerbs, "float64") {
 984                         s.skipSpace(false)
 985                         s.notEOF()
 986                         *v = s.convertFloat(s.floatToken(), 64)
 987                 }
 988         case *string:
 989                 *v = s.convertString(verb)
 990         case *[]byte:
 991                 // We scan to string and convert so we get a copy of the data.
 992                 // If we scanned to bytes, the slice would point at the buffer.
 993                 *v = []byte(s.convertString(verb))
 994         default:
 995                 val := reflect.ValueOf(v)
 996                 ptr := val
 997                 if ptr.Kind() != reflect.Ptr {
 998                         s.errorString("type not a pointer: " + val.Type().String())
 999                         return
1000                 }
1001                 switch v := ptr.Elem(); v.Kind() {
1002                 case reflect.Bool:
1003                         v.SetBool(s.scanBool(verb))
1004                 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
1005                         v.SetInt(s.scanInt(verb, v.Type().Bits()))
1006                 case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
1007                         v.SetUint(s.scanUint(verb, v.Type().Bits()))
1008                 case reflect.String:
1009                         v.SetString(s.convertString(verb))
1010                 case reflect.Slice:
1011                         // For now, can only handle (renamed) []byte.
1012                         typ := v.Type()
1013                         if typ.Elem().Kind() != reflect.Uint8 {
1014                                 s.errorString("can't scan type: " + val.Type().String())
1015                         }
1016                         str := s.convertString(verb)
1017                         v.Set(reflect.MakeSlice(typ, len(str), len(str)))
1018                         for i := 0; i < len(str); i++ {
1019                                 v.Index(i).SetUint(uint64(str[i]))
1020                         }
1021                 case reflect.Float32, reflect.Float64:
1022                         s.skipSpace(false)
1023                         s.notEOF()
1024                         v.SetFloat(s.convertFloat(s.floatToken(), v.Type().Bits()))
1025                 case reflect.Complex64, reflect.Complex128:
1026                         v.SetComplex(s.scanComplex(verb, v.Type().Bits()))
1027                 default:
1028                         s.errorString("can't scan type: " + val.Type().String())
1029                 }
1030         }
1031 }
1032
1033 // errorHandler turns local panics into error returns.
1034 func errorHandler(errp *error) {
1035         if e := recover(); e != nil {
1036                 if se, ok := e.(scanError); ok { // catch local error
1037                         *errp = se.err
1038                 } else if eof, ok := e.(error); ok && eof == io.EOF { // out of input
1039                         *errp = eof
1040                 } else {
1041                         panic(e)
1042                 }
1043         }
1044 }
1045
1046 // doScan does the real work for scanning without a format string.
1047 func (s *ss) doScan(a []interface{}) (numProcessed int, err error) {
1048         defer errorHandler(&err)
1049         for _, arg := range a {
1050                 s.scanOne('v', arg)
1051                 numProcessed++
1052         }
1053         // Check for newline if required.
1054         if !s.nlIsSpace {
1055                 for {
1056                         r := s.getRune()
1057                         if r == '\n' || r == eof {
1058                                 break
1059                         }
1060                         if !isSpace(r) {
1061                                 s.errorString("expected newline")
1062                                 break
1063                         }
1064                 }
1065         }
1066         return
1067 }
1068
1069 // advance determines whether the next characters in the input match
1070 // those of the format.  It returns the number of bytes (sic) consumed
1071 // in the format. Newlines included, all runs of space characters in
1072 // either input or format behave as a single space. This routine also
1073 // handles the %% case.  If the return value is zero, either format
1074 // starts with a % (with no following %) or the input is empty.
1075 // If it is negative, the input did not match the string.
1076 func (s *ss) advance(format string) (i int) {
1077         for i < len(format) {
1078                 fmtc, w := utf8.DecodeRuneInString(format[i:])
1079                 if fmtc == '%' {
1080                         // %% acts like a real percent
1081                         nextc, _ := utf8.DecodeRuneInString(format[i+w:]) // will not match % if string is empty
1082                         if nextc != '%' {
1083                                 return
1084                         }
1085                         i += w // skip the first %
1086                 }
1087                 sawSpace := false
1088                 for isSpace(fmtc) && i < len(format) {
1089                         sawSpace = true
1090                         i += w
1091                         fmtc, w = utf8.DecodeRuneInString(format[i:])
1092                 }
1093                 if sawSpace {
1094                         // There was space in the format, so there should be space (EOF)
1095                         // in the input.
1096                         inputc := s.getRune()
1097                         if inputc == eof || inputc == '\n' {
1098                                 // If we've reached a newline, stop now; don't read ahead.
1099                                 return
1100                         }
1101                         if !isSpace(inputc) {
1102                                 // Space in format but not in input: error
1103                                 s.errorString("expected space in input to match format")
1104                         }
1105                         s.skipSpace(true)
1106                         continue
1107                 }
1108                 inputc := s.mustReadRune()
1109                 if fmtc != inputc {
1110                         s.UnreadRune()
1111                         return -1
1112                 }
1113                 i += w
1114         }
1115         return
1116 }
1117
1118 // doScanf does the real work when scanning with a format string.
1119 //  At the moment, it handles only pointers to basic types.
1120 func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err error) {
1121         defer errorHandler(&err)
1122         end := len(format) - 1
1123         // We process one item per non-trivial format
1124         for i := 0; i <= end; {
1125                 w := s.advance(format[i:])
1126                 if w > 0 {
1127                         i += w
1128                         continue
1129                 }
1130                 // Either we failed to advance, we have a percent character, or we ran out of input.
1131                 if format[i] != '%' {
1132                         // Can't advance format.  Why not?
1133                         if w < 0 {
1134                                 s.errorString("input does not match format")
1135                         }
1136                         // Otherwise at EOF; "too many operands" error handled below
1137                         break
1138                 }
1139                 i++ // % is one byte
1140
1141                 // do we have 20 (width)?
1142                 var widPresent bool
1143                 s.maxWid, widPresent, i = parsenum(format, i, end)
1144                 if !widPresent {
1145                         s.maxWid = hugeWid
1146                 }
1147                 s.argLimit = s.limit
1148                 if f := s.count + s.maxWid; f < s.argLimit {
1149                         s.argLimit = f
1150                 }
1151
1152                 c, w := utf8.DecodeRuneInString(format[i:])
1153                 i += w
1154
1155                 if numProcessed >= len(a) { // out of operands
1156                         s.errorString("too few operands for format %" + format[i-w:])
1157                         break
1158                 }
1159                 arg := a[numProcessed]
1160
1161                 s.scanOne(c, arg)
1162                 numProcessed++
1163                 s.argLimit = s.limit
1164         }
1165         if numProcessed < len(a) {
1166                 s.errorString("too many operands")
1167         }
1168         return
1169 }