libgo/go/fmt/scan.go

   1 // Copyright 2010 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package fmt
   6
   7 import (
   8         "errors"
   9         "io"
  10         "math"
  11         "os"
  12         "reflect"
  13         "strconv"
  14         "sync"
  15         "unicode/utf8"
  16 )
  17
  18 // ScanState represents the scanner state passed to custom scanners.
  19 // Scanners may do rune-at-a-time scanning or ask the ScanState
  20 // to discover the next space-delimited token.
  21 type ScanState interface {
  22         // ReadRune reads the next rune (Unicode code point) from the input.
  23         // If invoked during Scanln, Fscanln, or Sscanln, ReadRune() will
  24         // return EOF after returning the first '\n' or when reading beyond
  25         // the specified width.
  26         ReadRune() (r rune, size int, err error)
  27         // UnreadRune causes the next call to ReadRune to return the same rune.
  28         UnreadRune() error
  29         // SkipSpace skips space in the input. Newlines are treated appropriately
  30         // for the operation being performed; see the package documentation
  31         // for more information.
  32         SkipSpace()
  33         // Token skips space in the input if skipSpace is true, then returns the
  34         // run of Unicode code points c satisfying f(c).  If f is nil,
  35         // !unicode.IsSpace(c) is used; that is, the token will hold non-space
  36         // characters. Newlines are treated appropriately for the operation being
  37         // performed; see the package documentation for more information.
  38         // The returned slice points to shared data that may be overwritten
  39         // by the next call to Token, a call to a Scan function using the ScanState
  40         // as input, or when the calling Scan method returns.
  41         Token(skipSpace bool, f func(rune) bool) (token []byte, err error)
  42         // Width returns the value of the width option and whether it has been set.
  43         // The unit is Unicode code points.
  44         Width() (wid int, ok bool)
  45         // Because ReadRune is implemented by the interface, Read should never be
  46         // called by the scanning routines and a valid implementation of
  47         // ScanState may choose always to return an error from Read.
  48         Read(buf []byte) (n int, err error)
  49 }
  50
  51 // Scanner is implemented by any value that has a Scan method, which scans
  52 // the input for the representation of a value and stores the result in the
  53 // receiver, which must be a pointer to be useful. The Scan method is called
  54 // for any argument to Scan, Scanf, or Scanln that implements it.
  55 type Scanner interface {
  56         Scan(state ScanState, verb rune) error
  57 }
  58
  59 // Scan scans text read from standard input, storing successive
  60 // space-separated values into successive arguments. Newlines count
  61 // as space. It returns the number of items successfully scanned.
  62 // If that is less than the number of arguments, err will report why.
  63 func Scan(a ...interface{}) (n int, err error) {
  64         return Fscan(os.Stdin, a...)
  65 }
  66
  67 // Scanln is similar to Scan, but stops scanning at a newline and
  68 // after the final item there must be a newline or EOF.
  69 func Scanln(a ...interface{}) (n int, err error) {
  70         return Fscanln(os.Stdin, a...)
  71 }
  72
  73 // Scanf scans text read from standard input, storing successive
  74 // space-separated values into successive arguments as determined by
  75 // the format. It returns the number of items successfully scanned.
  76 // If that is less than the number of arguments, err will report why.
  77 // Newlines in the input must match newlines in the format.
  78 // The one exception: the verb %c always scans the next rune in the
  79 // input, even if it is a space (or tab etc.) or newline.
  80 func Scanf(format string, a ...interface{}) (n int, err error) {
  81         return Fscanf(os.Stdin, format, a...)
  82 }
  83
  84 type stringReader string
  85
  86 func (r *stringReader) Read(b []byte) (n int, err error) {
  87         n = copy(b, *r)
  88         *r = (*r)[n:]
  89         if n == 0 {
  90                 err = io.EOF
  91         }
  92         return
  93 }
  94
  95 // Sscan scans the argument string, storing successive space-separated
  96 // values into successive arguments. Newlines count as space. It
  97 // returns the number of items successfully scanned. If that is less
  98 // than the number of arguments, err will report why.
  99 func Sscan(str string, a ...interface{}) (n int, err error) {
 100         return Fscan((*stringReader)(&str), a...)
 101 }
 102
 103 // Sscanln is similar to Sscan, but stops scanning at a newline and
 104 // after the final item there must be a newline or EOF.
 105 func Sscanln(str string, a ...interface{}) (n int, err error) {
 106         return Fscanln((*stringReader)(&str), a...)
 107 }
 108
 109 // Sscanf scans the argument string, storing successive space-separated
 110 // values into successive arguments as determined by the format. It
 111 // returns the number of items successfully parsed.
 112 // Newlines in the input must match newlines in the format.
 113 func Sscanf(str string, format string, a ...interface{}) (n int, err error) {
 114         return Fscanf((*stringReader)(&str), format, a...)
 115 }
 116
 117 // Fscan scans text read from r, storing successive space-separated
 118 // values into successive arguments. Newlines count as space. It
 119 // returns the number of items successfully scanned. If that is less
 120 // than the number of arguments, err will report why.
 121 func Fscan(r io.Reader, a ...interface{}) (n int, err error) {
 122         s, old := newScanState(r, true, false)
 123         n, err = s.doScan(a)
 124         s.free(old)
 125         return
 126 }
 127
 128 // Fscanln is similar to Fscan, but stops scanning at a newline and
 129 // after the final item there must be a newline or EOF.
 130 func Fscanln(r io.Reader, a ...interface{}) (n int, err error) {
 131         s, old := newScanState(r, false, true)
 132         n, err = s.doScan(a)
 133         s.free(old)
 134         return
 135 }
 136
 137 // Fscanf scans text read from r, storing successive space-separated
 138 // values into successive arguments as determined by the format. It
 139 // returns the number of items successfully parsed.
 140 // Newlines in the input must match newlines in the format.
 141 func Fscanf(r io.Reader, format string, a ...interface{}) (n int, err error) {
 142         s, old := newScanState(r, false, false)
 143         n, err = s.doScanf(format, a)
 144         s.free(old)
 145         return
 146 }
 147
 148 // scanError represents an error generated by the scanning software.
 149 // It's used as a unique signature to identify such errors when recovering.
 150 type scanError struct {
 151         err error
 152 }
 153
 154 const eof = -1
 155
 156 // ss is the internal implementation of ScanState.
 157 type ss struct {
 158         rs    io.RuneScanner // where to read input
 159         buf   buffer         // token accumulator
 160         count int            // runes consumed so far.
 161         atEOF bool           // already read EOF
 162         ssave
 163 }
 164
 165 // ssave holds the parts of ss that need to be
 166 // saved and restored on recursive scans.
 167 type ssave struct {
 168         validSave bool // is or was a part of an actual ss.
 169         nlIsEnd   bool // whether newline terminates scan
 170         nlIsSpace bool // whether newline counts as white space
 171         argLimit  int  // max value of ss.count for this arg; argLimit <= limit
 172         limit     int  // max value of ss.count.
 173         maxWid    int  // width of this arg.
 174 }
 175
 176 // The Read method is only in ScanState so that ScanState
 177 // satisfies io.Reader. It will never be called when used as
 178 // intended, so there is no need to make it actually work.
 179 func (s *ss) Read(buf []byte) (n int, err error) {
 180         return 0, errors.New("ScanState's Read should not be called. Use ReadRune")
 181 }
 182
 183 func (s *ss) ReadRune() (r rune, size int, err error) {
 184         if s.atEOF || s.count >= s.argLimit {
 185                 err = io.EOF
 186                 return
 187         }
 188
 189         r, size, err = s.rs.ReadRune()
 190         if err == nil {
 191                 s.count++
 192                 if s.nlIsEnd && r == '\n' {
 193                         s.atEOF = true
 194                 }
 195         } else if err == io.EOF {
 196                 s.atEOF = true
 197         }
 198         return
 199 }
 200
 201 func (s *ss) Width() (wid int, ok bool) {
 202         if s.maxWid == hugeWid {
 203                 return 0, false
 204         }
 205         return s.maxWid, true
 206 }
 207
 208 // The public method returns an error; this private one panics.
 209 // If getRune reaches EOF, the return value is EOF (-1).
 210 func (s *ss) getRune() (r rune) {
 211         r, _, err := s.ReadRune()
 212         if err != nil {
 213                 if err == io.EOF {
 214                         return eof
 215                 }
 216                 s.error(err)
 217         }
 218         return
 219 }
 220
 221 // mustReadRune turns io.EOF into a panic(io.ErrUnexpectedEOF).
 222 // It is called in cases such as string scanning where an EOF is a
 223 // syntax error.
 224 func (s *ss) mustReadRune() (r rune) {
 225         r = s.getRune()
 226         if r == eof {
 227                 s.error(io.ErrUnexpectedEOF)
 228         }
 229         return
 230 }
 231
 232 func (s *ss) UnreadRune() error {
 233         s.rs.UnreadRune()
 234         s.atEOF = false
 235         s.count--
 236         return nil
 237 }
 238
 239 func (s *ss) error(err error) {
 240         panic(scanError{err})
 241 }
 242
 243 func (s *ss) errorString(err string) {
 244         panic(scanError{errors.New(err)})
 245 }
 246
 247 func (s *ss) Token(skipSpace bool, f func(rune) bool) (tok []byte, err error) {
 248         defer func() {
 249                 if e := recover(); e != nil {
 250                         if se, ok := e.(scanError); ok {
 251                                 err = se.err
 252                         } else {
 253                                 panic(e)
 254                         }
 255                 }
 256         }()
 257         if f == nil {
 258                 f = notSpace
 259         }
 260         s.buf = s.buf[:0]
 261         tok = s.token(skipSpace, f)
 262         return
 263 }
 264
 265 // space is a copy of the unicode.White_Space ranges,
 266 // to avoid depending on package unicode.
 267 var space = [][2]uint16{
 268         {0x0009, 0x000d},
 269         {0x0020, 0x0020},
 270         {0x0085, 0x0085},
 271         {0x00a0, 0x00a0},
 272         {0x1680, 0x1680},
 273         {0x2000, 0x200a},
 274         {0x2028, 0x2029},
 275         {0x202f, 0x202f},
 276         {0x205f, 0x205f},
 277         {0x3000, 0x3000},
 278 }
 279
 280 func isSpace(r rune) bool {
 281         if r >= 1<<16 {
 282                 return false
 283         }
 284         rx := uint16(r)
 285         for _, rng := range space {
 286                 if rx < rng[0] {
 287                         return false
 288                 }
 289                 if rx <= rng[1] {
 290                         return true
 291                 }
 292         }
 293         return false
 294 }
 295
 296 // notSpace is the default scanning function used in Token.
 297 func notSpace(r rune) bool {
 298         return !isSpace(r)
 299 }
 300
 301 // SkipSpace provides Scan methods the ability to skip space and newline
 302 // characters in keeping with the current scanning mode set by format strings
 303 // and Scan/Scanln.
 304 func (s *ss) SkipSpace() {
 305         s.skipSpace(false)
 306 }
 307
 308 // readRune is a structure to enable reading UTF-8 encoded code points
 309 // from an io.Reader. It is used if the Reader given to the scanner does
 310 // not already implement io.RuneScanner.
 311 type readRune struct {
 312         reader   io.Reader
 313         buf      [utf8.UTFMax]byte // used only inside ReadRune
 314         pending  int               // number of bytes in pendBuf; only >0 for bad UTF-8
 315         pendBuf  [utf8.UTFMax]byte // bytes left over
 316         peekRune rune              // if >=0 next rune; when <0 is ^(previous Rune)
 317 }
 318
 319 // readByte returns the next byte from the input, which may be
 320 // left over from a previous read if the UTF-8 was ill-formed.
 321 func (r *readRune) readByte() (b byte, err error) {
 322         if r.pending > 0 {
 323                 b = r.pendBuf[0]
 324                 copy(r.pendBuf[0:], r.pendBuf[1:])
 325                 r.pending--
 326                 return
 327         }
 328         n, err := io.ReadFull(r.reader, r.pendBuf[:1])
 329         if n != 1 {
 330                 return 0, err
 331         }
 332         return r.pendBuf[0], err
 333 }
 334
 335 // ReadRune returns the next UTF-8 encoded code point from the
 336 // io.Reader inside r.
 337 func (r *readRune) ReadRune() (rr rune, size int, err error) {
 338         if r.peekRune >= 0 {
 339                 rr = r.peekRune
 340                 r.peekRune = ^r.peekRune
 341                 size = utf8.RuneLen(rr)
 342                 return
 343         }
 344         r.buf[0], err = r.readByte()
 345         if err != nil {
 346                 return
 347         }
 348         if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
 349                 rr = rune(r.buf[0])
 350                 size = 1 // Known to be 1.
 351                 // Flip the bits of the rune so it's available to UnreadRune.
 352                 r.peekRune = ^rr
 353                 return
 354         }
 355         var n int
 356         for n = 1; !utf8.FullRune(r.buf[:n]); n++ {
 357                 r.buf[n], err = r.readByte()
 358                 if err != nil {
 359                         if err == io.EOF {
 360                                 err = nil
 361                                 break
 362                         }
 363                         return
 364                 }
 365         }
 366         rr, size = utf8.DecodeRune(r.buf[:n])
 367         if size < n { // an error, save the bytes for the next read
 368                 copy(r.pendBuf[r.pending:], r.buf[size:n])
 369                 r.pending += n - size
 370         }
 371         // Flip the bits of the rune so it's available to UnreadRune.
 372         r.peekRune = ^rr
 373         return
 374 }
 375
 376 func (r *readRune) UnreadRune() error {
 377         if r.peekRune >= 0 {
 378                 return errors.New("fmt: scanning called UnreadRune with no rune available")
 379         }
 380         // Reverse bit flip of previously read rune to obtain valid >=0 state.
 381         r.peekRune = ^r.peekRune
 382         return nil
 383 }
 384
 385 var ssFree = sync.Pool{
 386         New: func() interface{} { return new(ss) },
 387 }
 388
 389 // newScanState allocates a new ss struct or grab a cached one.
 390 func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) {
 391         s = ssFree.Get().(*ss)
 392         if rs, ok := r.(io.RuneScanner); ok {
 393                 s.rs = rs
 394         } else {
 395                 s.rs = &readRune{reader: r, peekRune: -1}
 396         }
 397         s.nlIsSpace = nlIsSpace
 398         s.nlIsEnd = nlIsEnd
 399         s.atEOF = false
 400         s.limit = hugeWid
 401         s.argLimit = hugeWid
 402         s.maxWid = hugeWid
 403         s.validSave = true
 404         s.count = 0
 405         return
 406 }
 407
 408 // free saves used ss structs in ssFree; avoid an allocation per invocation.
 409 func (s *ss) free(old ssave) {
 410         // If it was used recursively, just restore the old state.
 411         if old.validSave {
 412                 s.ssave = old
 413                 return
 414         }
 415         // Don't hold on to ss structs with large buffers.
 416         if cap(s.buf) > 1024 {
 417                 return
 418         }
 419         s.buf = s.buf[:0]
 420         s.rs = nil
 421         ssFree.Put(s)
 422 }
 423
 424 // skipSpace skips spaces and maybe newlines.
 425 func (s *ss) skipSpace(stopAtNewline bool) {
 426         for {
 427                 r := s.getRune()
 428                 if r == eof {
 429                         return
 430                 }
 431                 if r == '\r' && s.peek("\n") {
 432                         continue
 433                 }
 434                 if r == '\n' {
 435                         if stopAtNewline {
 436                                 break
 437                         }
 438                         if s.nlIsSpace {
 439                                 continue
 440                         }
 441                         s.errorString("unexpected newline")
 442                         return
 443                 }
 444                 if !isSpace(r) {
 445                         s.UnreadRune()
 446                         break
 447                 }
 448         }
 449 }
 450
 451 // token returns the next space-delimited string from the input. It
 452 // skips white space. For Scanln, it stops at newlines. For Scan,
 453 // newlines are treated as spaces.
 454 func (s *ss) token(skipSpace bool, f func(rune) bool) []byte {
 455         if skipSpace {
 456                 s.skipSpace(false)
 457         }
 458         // read until white space or newline
 459         for {
 460                 r := s.getRune()
 461                 if r == eof {
 462                         break
 463                 }
 464                 if !f(r) {
 465                         s.UnreadRune()
 466                         break
 467                 }
 468                 s.buf.WriteRune(r)
 469         }
 470         return s.buf
 471 }
 472
 473 var complexError = errors.New("syntax error scanning complex number")
 474 var boolError = errors.New("syntax error scanning boolean")
 475
 476 func indexRune(s string, r rune) int {
 477         for i, c := range s {
 478                 if c == r {
 479                         return i
 480                 }
 481         }
 482         return -1
 483 }
 484
 485 // consume reads the next rune in the input and reports whether it is in the ok string.
 486 // If accept is true, it puts the character into the input token.
 487 func (s *ss) consume(ok string, accept bool) bool {
 488         r := s.getRune()
 489         if r == eof {
 490                 return false
 491         }
 492         if indexRune(ok, r) >= 0 {
 493                 if accept {
 494                         s.buf.WriteRune(r)
 495                 }
 496                 return true
 497         }
 498         if r != eof && accept {
 499                 s.UnreadRune()
 500         }
 501         return false
 502 }
 503
 504 // peek reports whether the next character is in the ok string, without consuming it.
 505 func (s *ss) peek(ok string) bool {
 506         r := s.getRune()
 507         if r != eof {
 508                 s.UnreadRune()
 509         }
 510         return indexRune(ok, r) >= 0
 511 }
 512
 513 func (s *ss) notEOF() {
 514         // Guarantee there is data to be read.
 515         if r := s.getRune(); r == eof {
 516                 panic(io.EOF)
 517         }
 518         s.UnreadRune()
 519 }
 520
 521 // accept checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the
 522 // buffer and returns true. Otherwise it return false.
 523 func (s *ss) accept(ok string) bool {
 524         return s.consume(ok, true)
 525 }
 526
 527 // okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
 528 func (s *ss) okVerb(verb rune, okVerbs, typ string) bool {
 529         for _, v := range okVerbs {
 530                 if v == verb {
 531                         return true
 532                 }
 533         }
 534         s.errorString("bad verb '%" + string(verb) + "' for " + typ)
 535         return false
 536 }
 537
 538 // scanBool returns the value of the boolean represented by the next token.
 539 func (s *ss) scanBool(verb rune) bool {
 540         s.skipSpace(false)
 541         s.notEOF()
 542         if !s.okVerb(verb, "tv", "boolean") {
 543                 return false
 544         }
 545         // Syntax-checking a boolean is annoying. We're not fastidious about case.
 546         switch s.getRune() {
 547         case '0':
 548                 return false
 549         case '1':
 550                 return true
 551         case 't', 'T':
 552                 if s.accept("rR") && (!s.accept("uU") || !s.accept("eE")) {
 553                         s.error(boolError)
 554                 }
 555                 return true
 556         case 'f', 'F':
 557                 if s.accept("aA") && (!s.accept("lL") || !s.accept("sS") || !s.accept("eE")) {
 558                         s.error(boolError)
 559                 }
 560                 return false
 561         }
 562         return false
 563 }
 564
 565 // Numerical elements
 566 const (
 567         binaryDigits      = "01"
 568         octalDigits       = "01234567"
 569         decimalDigits     = "0123456789"
 570         hexadecimalDigits = "0123456789aAbBcCdDeEfF"
 571         sign              = "+-"
 572         period            = "."
 573         exponent          = "eEp"
 574 )
 575
 576 // getBase returns the numeric base represented by the verb and its digit string.
 577 func (s *ss) getBase(verb rune) (base int, digits string) {
 578         s.okVerb(verb, "bdoUxXv", "integer") // sets s.err
 579         base = 10
 580         digits = decimalDigits
 581         switch verb {
 582         case 'b':
 583                 base = 2
 584                 digits = binaryDigits
 585         case 'o':
 586                 base = 8
 587                 digits = octalDigits
 588         case 'x', 'X', 'U':
 589                 base = 16
 590                 digits = hexadecimalDigits
 591         }
 592         return
 593 }
 594
 595 // scanNumber returns the numerical string with specified digits starting here.
 596 func (s *ss) scanNumber(digits string, haveDigits bool) string {
 597         if !haveDigits {
 598                 s.notEOF()
 599                 if !s.accept(digits) {
 600                         s.errorString("expected integer")
 601                 }
 602         }
 603         for s.accept(digits) {
 604         }
 605         return string(s.buf)
 606 }
 607
 608 // scanRune returns the next rune value in the input.
 609 func (s *ss) scanRune(bitSize int) int64 {
 610         s.notEOF()
 611         r := int64(s.getRune())
 612         n := uint(bitSize)
 613         x := (r << (64 - n)) >> (64 - n)
 614         if x != r {
 615                 s.errorString("overflow on character value " + string(r))
 616         }
 617         return r
 618 }
 619
 620 // scanBasePrefix reports whether the integer begins with a 0 or 0x,
 621 // and returns the base, digit string, and whether a zero was found.
 622 // It is called only if the verb is %v.
 623 func (s *ss) scanBasePrefix() (base int, digits string, found bool) {
 624         if !s.peek("0") {
 625                 return 10, decimalDigits, false
 626         }
 627         s.accept("0")
 628         found = true // We've put a digit into the token buffer.
 629         // Special cases for '0' && '0x'
 630         base, digits = 8, octalDigits
 631         if s.peek("xX") {
 632                 s.consume("xX", false)
 633                 base, digits = 16, hexadecimalDigits
 634         }
 635         return
 636 }
 637
 638 // scanInt returns the value of the integer represented by the next
 639 // token, checking for overflow. Any error is stored in s.err.
 640 func (s *ss) scanInt(verb rune, bitSize int) int64 {
 641         if verb == 'c' {
 642                 return s.scanRune(bitSize)
 643         }
 644         s.skipSpace(false)
 645         s.notEOF()
 646         base, digits := s.getBase(verb)
 647         haveDigits := false
 648         if verb == 'U' {
 649                 if !s.consume("U", false) || !s.consume("+", false) {
 650                         s.errorString("bad unicode format ")
 651                 }
 652         } else {
 653                 s.accept(sign) // If there's a sign, it will be left in the token buffer.
 654                 if verb == 'v' {
 655                         base, digits, haveDigits = s.scanBasePrefix()
 656                 }
 657         }
 658         tok := s.scanNumber(digits, haveDigits)
 659         i, err := strconv.ParseInt(tok, base, 64)
 660         if err != nil {
 661                 s.error(err)
 662         }
 663         n := uint(bitSize)
 664         x := (i << (64 - n)) >> (64 - n)
 665         if x != i {
 666                 s.errorString("integer overflow on token " + tok)
 667         }
 668         return i
 669 }
 670
 671 // scanUint returns the value of the unsigned integer represented
 672 // by the next token, checking for overflow. Any error is stored in s.err.
 673 func (s *ss) scanUint(verb rune, bitSize int) uint64 {
 674         if verb == 'c' {
 675                 return uint64(s.scanRune(bitSize))
 676         }
 677         s.skipSpace(false)
 678         s.notEOF()
 679         base, digits := s.getBase(verb)
 680         haveDigits := false
 681         if verb == 'U' {
 682                 if !s.consume("U", false) || !s.consume("+", false) {
 683                         s.errorString("bad unicode format ")
 684                 }
 685         } else if verb == 'v' {
 686                 base, digits, haveDigits = s.scanBasePrefix()
 687         }
 688         tok := s.scanNumber(digits, haveDigits)
 689         i, err := strconv.ParseUint(tok, base, 64)
 690         if err != nil {
 691                 s.error(err)
 692         }
 693         n := uint(bitSize)
 694         x := (i << (64 - n)) >> (64 - n)
 695         if x != i {
 696                 s.errorString("unsigned integer overflow on token " + tok)
 697         }
 698         return i
 699 }
 700
 701 // floatToken returns the floating-point number starting here, no longer than swid
 702 // if the width is specified. It's not rigorous about syntax because it doesn't check that
 703 // we have at least some digits, but Atof will do that.
 704 func (s *ss) floatToken() string {
 705         s.buf = s.buf[:0]
 706         // NaN?
 707         if s.accept("nN") && s.accept("aA") && s.accept("nN") {
 708                 return string(s.buf)
 709         }
 710         // leading sign?
 711         s.accept(sign)
 712         // Inf?
 713         if s.accept("iI") && s.accept("nN") && s.accept("fF") {
 714                 return string(s.buf)
 715         }
 716         // digits?
 717         for s.accept(decimalDigits) {
 718         }
 719         // decimal point?
 720         if s.accept(period) {
 721                 // fraction?
 722                 for s.accept(decimalDigits) {
 723                 }
 724         }
 725         // exponent?
 726         if s.accept(exponent) {
 727                 // leading sign?
 728                 s.accept(sign)
 729                 // digits?
 730                 for s.accept(decimalDigits) {
 731                 }
 732         }
 733         return string(s.buf)
 734 }
 735
 736 // complexTokens returns the real and imaginary parts of the complex number starting here.
 737 // The number might be parenthesized and has the format (N+Ni) where N is a floating-point
 738 // number and there are no spaces within.
 739 func (s *ss) complexTokens() (real, imag string) {
 740         // TODO: accept N and Ni independently?
 741         parens := s.accept("(")
 742         real = s.floatToken()
 743         s.buf = s.buf[:0]
 744         // Must now have a sign.
 745         if !s.accept("+-") {
 746                 s.error(complexError)
 747         }
 748         // Sign is now in buffer
 749         imagSign := string(s.buf)
 750         imag = s.floatToken()
 751         if !s.accept("i") {
 752                 s.error(complexError)
 753         }
 754         if parens && !s.accept(")") {
 755                 s.error(complexError)
 756         }
 757         return real, imagSign + imag
 758 }
 759
 760 // convertFloat converts the string to a float64value.
 761 func (s *ss) convertFloat(str string, n int) float64 {
 762         if p := indexRune(str, 'p'); p >= 0 {
 763                 // Atof doesn't handle power-of-2 exponents,
 764                 // but they're easy to evaluate.
 765                 f, err := strconv.ParseFloat(str[:p], n)
 766                 if err != nil {
 767                         // Put full string into error.
 768                         if e, ok := err.(*strconv.NumError); ok {
 769                                 e.Num = str
 770                         }
 771                         s.error(err)
 772                 }
 773                 m, err := strconv.Atoi(str[p+1:])
 774                 if err != nil {
 775                         // Put full string into error.
 776                         if e, ok := err.(*strconv.NumError); ok {
 777                                 e.Num = str
 778                         }
 779                         s.error(err)
 780                 }
 781                 return math.Ldexp(f, m)
 782         }
 783         f, err := strconv.ParseFloat(str, n)
 784         if err != nil {
 785                 s.error(err)
 786         }
 787         return f
 788 }
 789
 790 // convertComplex converts the next token to a complex128 value.
 791 // The atof argument is a type-specific reader for the underlying type.
 792 // If we're reading complex64, atof will parse float32s and convert them
 793 // to float64's to avoid reproducing this code for each complex type.
 794 func (s *ss) scanComplex(verb rune, n int) complex128 {
 795         if !s.okVerb(verb, floatVerbs, "complex") {
 796                 return 0
 797         }
 798         s.skipSpace(false)
 799         s.notEOF()
 800         sreal, simag := s.complexTokens()
 801         real := s.convertFloat(sreal, n/2)
 802         imag := s.convertFloat(simag, n/2)
 803         return complex(real, imag)
 804 }
 805
 806 // convertString returns the string represented by the next input characters.
 807 // The format of the input is determined by the verb.
 808 func (s *ss) convertString(verb rune) (str string) {
 809         if !s.okVerb(verb, "svqxX", "string") {
 810                 return ""
 811         }
 812         s.skipSpace(false)
 813         s.notEOF()
 814         switch verb {
 815         case 'q':
 816                 str = s.quotedString()
 817         case 'x', 'X':
 818                 str = s.hexString()
 819         default:
 820                 str = string(s.token(true, notSpace)) // %s and %v just return the next word
 821         }
 822         return
 823 }
 824
 825 // quotedString returns the double- or back-quoted string represented by the next input characters.
 826 func (s *ss) quotedString() string {
 827         s.notEOF()
 828         quote := s.getRune()
 829         switch quote {
 830         case '`':
 831                 // Back-quoted: Anything goes until EOF or back quote.
 832                 for {
 833                         r := s.mustReadRune()
 834                         if r == quote {
 835                                 break
 836                         }
 837                         s.buf.WriteRune(r)
 838                 }
 839                 return string(s.buf)
 840         case '"':
 841                 // Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes.
 842                 s.buf.WriteByte('"')
 843                 for {
 844                         r := s.mustReadRune()
 845                         s.buf.WriteRune(r)
 846                         if r == '\\' {
 847                                 // In a legal backslash escape, no matter how long, only the character
 848                                 // immediately after the escape can itself be a backslash or quote.
 849                                 // Thus we only need to protect the first character after the backslash.
 850                                 s.buf.WriteRune(s.mustReadRune())
 851                         } else if r == '"' {
 852                                 break
 853                         }
 854                 }
 855                 result, err := strconv.Unquote(string(s.buf))
 856                 if err != nil {
 857                         s.error(err)
 858                 }
 859                 return result
 860         default:
 861                 s.errorString("expected quoted string")
 862         }
 863         return ""
 864 }
 865
 866 // hexDigit returns the value of the hexadecimal digit.
 867 func hexDigit(d rune) (int, bool) {
 868         digit := int(d)
 869         switch digit {
 870         case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 871                 return digit - '0', true
 872         case 'a', 'b', 'c', 'd', 'e', 'f':
 873                 return 10 + digit - 'a', true
 874         case 'A', 'B', 'C', 'D', 'E', 'F':
 875                 return 10 + digit - 'A', true
 876         }
 877         return -1, false
 878 }
 879
 880 // hexByte returns the next hex-encoded (two-character) byte from the input.
 881 // It returns ok==false if the next bytes in the input do not encode a hex byte.
 882 // If the first byte is hex and the second is not, processing stops.
 883 func (s *ss) hexByte() (b byte, ok bool) {
 884         rune1 := s.getRune()
 885         if rune1 == eof {
 886                 return
 887         }
 888         value1, ok := hexDigit(rune1)
 889         if !ok {
 890                 s.UnreadRune()
 891                 return
 892         }
 893         value2, ok := hexDigit(s.mustReadRune())
 894         if !ok {
 895                 s.errorString("illegal hex digit")
 896                 return
 897         }
 898         return byte(value1<<4 | value2), true
 899 }
 900
 901 // hexString returns the space-delimited hexpair-encoded string.
 902 func (s *ss) hexString() string {
 903         s.notEOF()
 904         for {
 905                 b, ok := s.hexByte()
 906                 if !ok {
 907                         break
 908                 }
 909                 s.buf.WriteByte(b)
 910         }
 911         if len(s.buf) == 0 {
 912                 s.errorString("no hex data for %x string")
 913                 return ""
 914         }
 915         return string(s.buf)
 916 }
 917
 918 const (
 919         floatVerbs = "beEfFgGv"
 920
 921         hugeWid = 1 << 30
 922
 923         intBits     = 32 << (^uint(0) >> 63)
 924         uintptrBits = 32 << (^uintptr(0) >> 63)
 925 )
 926
 927 // scanOne scans a single value, deriving the scanner from the type of the argument.
 928 func (s *ss) scanOne(verb rune, arg interface{}) {
 929         s.buf = s.buf[:0]
 930         var err error
 931         // If the parameter has its own Scan method, use that.
 932         if v, ok := arg.(Scanner); ok {
 933                 err = v.Scan(s, verb)
 934                 if err != nil {
 935                         if err == io.EOF {
 936                                 err = io.ErrUnexpectedEOF
 937                         }
 938                         s.error(err)
 939                 }
 940                 return
 941         }
 942
 943         switch v := arg.(type) {
 944         case *bool:
 945                 *v = s.scanBool(verb)
 946         case *complex64:
 947                 *v = complex64(s.scanComplex(verb, 64))
 948         case *complex128:
 949                 *v = s.scanComplex(verb, 128)
 950         case *int:
 951                 *v = int(s.scanInt(verb, intBits))
 952         case *int8:
 953                 *v = int8(s.scanInt(verb, 8))
 954         case *int16:
 955                 *v = int16(s.scanInt(verb, 16))
 956         case *int32:
 957                 *v = int32(s.scanInt(verb, 32))
 958         case *int64:
 959                 *v = s.scanInt(verb, 64)
 960         case *uint:
 961                 *v = uint(s.scanUint(verb, intBits))
 962         case *uint8:
 963                 *v = uint8(s.scanUint(verb, 8))
 964         case *uint16:
 965                 *v = uint16(s.scanUint(verb, 16))
 966         case *uint32:
 967                 *v = uint32(s.scanUint(verb, 32))
 968         case *uint64:
 969                 *v = s.scanUint(verb, 64)
 970         case *uintptr:
 971                 *v = uintptr(s.scanUint(verb, uintptrBits))
 972         // Floats are tricky because you want to scan in the precision of the result, not
 973         // scan in high precision and convert, in order to preserve the correct error condition.
 974         case *float32:
 975                 if s.okVerb(verb, floatVerbs, "float32") {
 976                         s.skipSpace(false)
 977                         s.notEOF()
 978                         *v = float32(s.convertFloat(s.floatToken(), 32))
 979                 }
 980         case *float64:
 981                 if s.okVerb(verb, floatVerbs, "float64") {
 982                         s.skipSpace(false)
 983                         s.notEOF()
 984                         *v = s.convertFloat(s.floatToken(), 64)
 985                 }
 986         case *string:
 987                 *v = s.convertString(verb)
 988         case *[]byte:
 989                 // We scan to string and convert so we get a copy of the data.
 990                 // If we scanned to bytes, the slice would point at the buffer.
 991                 *v = []byte(s.convertString(verb))
 992         default:
 993                 val := reflect.ValueOf(v)
 994                 ptr := val
 995                 if ptr.Kind() != reflect.Ptr {
 996                         s.errorString("type not a pointer: " + val.Type().String())
 997                         return
 998                 }
 999                 switch v := ptr.Elem(); v.Kind() {
1000                 case reflect.Bool:
1001                         v.SetBool(s.scanBool(verb))
1002                 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
1003                         v.SetInt(s.scanInt(verb, v.Type().Bits()))
1004                 case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
1005                         v.SetUint(s.scanUint(verb, v.Type().Bits()))
1006                 case reflect.String:
1007                         v.SetString(s.convertString(verb))
1008                 case reflect.Slice:
1009                         // For now, can only handle (renamed) []byte.
1010                         typ := v.Type()
1011                         if typ.Elem().Kind() != reflect.Uint8 {
1012                                 s.errorString("can't scan type: " + val.Type().String())
1013                         }
1014                         str := s.convertString(verb)
1015                         v.Set(reflect.MakeSlice(typ, len(str), len(str)))
1016                         for i := 0; i < len(str); i++ {
1017                                 v.Index(i).SetUint(uint64(str[i]))
1018                         }
1019                 case reflect.Float32, reflect.Float64:
1020                         s.skipSpace(false)
1021                         s.notEOF()
1022                         v.SetFloat(s.convertFloat(s.floatToken(), v.Type().Bits()))
1023                 case reflect.Complex64, reflect.Complex128:
1024                         v.SetComplex(s.scanComplex(verb, v.Type().Bits()))
1025                 default:
1026                         s.errorString("can't scan type: " + val.Type().String())
1027                 }
1028         }
1029 }
1030
1031 // errorHandler turns local panics into error returns.
1032 func errorHandler(errp *error) {
1033         if e := recover(); e != nil {
1034                 if se, ok := e.(scanError); ok { // catch local error
1035                         *errp = se.err
1036                 } else if eof, ok := e.(error); ok && eof == io.EOF { // out of input
1037                         *errp = eof
1038                 } else {
1039                         panic(e)
1040                 }
1041         }
1042 }
1043
1044 // doScan does the real work for scanning without a format string.
1045 func (s *ss) doScan(a []interface{}) (numProcessed int, err error) {
1046         defer errorHandler(&err)
1047         for _, arg := range a {
1048                 s.scanOne('v', arg)
1049                 numProcessed++
1050         }
1051         // Check for newline (or EOF) if required (Scanln etc.).
1052         if s.nlIsEnd {
1053                 for {
1054                         r := s.getRune()
1055                         if r == '\n' || r == eof {
1056                                 break
1057                         }
1058                         if !isSpace(r) {
1059                                 s.errorString("expected newline")
1060                                 break
1061                         }
1062                 }
1063         }
1064         return
1065 }
1066
1067 // advance determines whether the next characters in the input match
1068 // those of the format. It returns the number of bytes (sic) consumed
1069 // in the format. All runs of space characters in either input or
1070 // format behave as a single space. Newlines are special, though:
1071 // newlines in the format must match those in the input and vice versa.
1072 // This routine also handles the %% case. If the return value is zero,
1073 // either format starts with a % (with no following %) or the input
1074 // is empty. If it is negative, the input did not match the string.
1075 func (s *ss) advance(format string) (i int) {
1076         for i < len(format) {
1077                 fmtc, w := utf8.DecodeRuneInString(format[i:])
1078
1079                 // Space processing.
1080                 // In the rest of this comment "space" means spaces other than newline.
1081                 // Newline in the format matches input of zero or more spaces and then newline or end-of-input.
1082                 // Spaces in the format before the newline are collapsed into the newline.
1083                 // Spaces in the format after the newline match zero or more spaces after the corresponding input newline.
1084                 // Other spaces in the format match input of one or more spaces or end-of-input.
1085                 if isSpace(fmtc) {
1086                         newlines := 0
1087                         trailingSpace := false
1088                         for isSpace(fmtc) && i < len(format) {
1089                                 if fmtc == '\n' {
1090                                         newlines++
1091                                         trailingSpace = false
1092                                 } else {
1093                                         trailingSpace = true
1094                                 }
1095                                 i += w
1096                                 fmtc, w = utf8.DecodeRuneInString(format[i:])
1097                         }
1098                         for j := 0; j < newlines; j++ {
1099                                 inputc := s.getRune()
1100                                 for isSpace(inputc) && inputc != '\n' {
1101                                         inputc = s.getRune()
1102                                 }
1103                                 if inputc != '\n' && inputc != eof {
1104                                         s.errorString("newline in format does not match input")
1105                                 }
1106                         }
1107                         if trailingSpace {
1108                                 inputc := s.getRune()
1109                                 if newlines == 0 {
1110                                         // If the trailing space stood alone (did not follow a newline),
1111                                         // it must find at least one space to consume.
1112                                         if !isSpace(inputc) && inputc != eof {
1113                                                 s.errorString("expected space in input to match format")
1114                                         }
1115                                         if inputc == '\n' {
1116                                                 s.errorString("newline in input does not match format")
1117                                         }
1118                                 }
1119                                 for isSpace(inputc) && inputc != '\n' {
1120                                         inputc = s.getRune()
1121                                 }
1122                                 if inputc != eof {
1123                                         s.UnreadRune()
1124                                 }
1125                         }
1126                         continue
1127                 }
1128
1129                 // Verbs.
1130                 if fmtc == '%' {
1131                         // % at end of string is an error.
1132                         if i+w == len(format) {
1133                                 s.errorString("missing verb: % at end of format string")
1134                         }
1135                         // %% acts like a real percent
1136                         nextc, _ := utf8.DecodeRuneInString(format[i+w:]) // will not match % if string is empty
1137                         if nextc != '%' {
1138                                 return
1139                         }
1140                         i += w // skip the first %
1141                 }
1142
1143                 // Literals.
1144                 inputc := s.mustReadRune()
1145                 if fmtc != inputc {
1146                         s.UnreadRune()
1147                         return -1
1148                 }
1149                 i += w
1150         }
1151         return
1152 }
1153
1154 // doScanf does the real work when scanning with a format string.
1155 // At the moment, it handles only pointers to basic types.
1156 func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err error) {
1157         defer errorHandler(&err)
1158         end := len(format) - 1
1159         // We process one item per non-trivial format
1160         for i := 0; i <= end; {
1161                 w := s.advance(format[i:])
1162                 if w > 0 {
1163                         i += w
1164                         continue
1165                 }
1166                 // Either we failed to advance, we have a percent character, or we ran out of input.
1167                 if format[i] != '%' {
1168                         // Can't advance format. Why not?
1169                         if w < 0 {
1170                                 s.errorString("input does not match format")
1171                         }
1172                         // Otherwise at EOF; "too many operands" error handled below
1173                         break
1174                 }
1175                 i++ // % is one byte
1176
1177                 // do we have 20 (width)?
1178                 var widPresent bool
1179                 s.maxWid, widPresent, i = parsenum(format, i, end)
1180                 if !widPresent {
1181                         s.maxWid = hugeWid
1182                 }
1183
1184                 c, w := utf8.DecodeRuneInString(format[i:])
1185                 i += w
1186
1187                 if c != 'c' {
1188                         s.SkipSpace()
1189                 }
1190                 s.argLimit = s.limit
1191                 if f := s.count + s.maxWid; f < s.argLimit {
1192                         s.argLimit = f
1193                 }
1194
1195                 if numProcessed >= len(a) { // out of operands
1196                         s.errorString("too few operands for format '%" + format[i-w:] + "'")
1197                         break
1198                 }
1199                 arg := a[numProcessed]
1200
1201                 s.scanOne(c, arg)
1202                 numProcessed++
1203                 s.argLimit = s.limit
1204         }
1205         if numProcessed < len(a) {
1206                 s.errorString("too many operands")
1207         }
1208         return
1209 }