libgo/go/fmt/scan.go

   1 // Copyright 2010 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package fmt
   6
   7 import (
   8         "errors"
   9         "io"
  10         "math"
  11         "os"
  12         "reflect"
  13         "strconv"
  14         "sync"
  15         "unicode/utf8"
  16 )
  17
  18 // runeUnreader is the interface to something that can unread runes.
  19 // If the object provided to Scan does not satisfy this interface,
  20 // a local buffer will be used to back up the input, but its contents
  21 // will be lost when Scan returns.
  22 type runeUnreader interface {
  23         UnreadRune() error
  24 }
  25
  26 // ScanState represents the scanner state passed to custom scanners.
  27 // Scanners may do rune-at-a-time scanning or ask the ScanState
  28 // to discover the next space-delimited token.
  29 type ScanState interface {
  30         // ReadRune reads the next rune (Unicode code point) from the input.
  31         // If invoked during Scanln, Fscanln, or Sscanln, ReadRune() will
  32         // return EOF after returning the first '\n' or when reading beyond
  33         // the specified width.
  34         ReadRune() (r rune, size int, err error)
  35         // UnreadRune causes the next call to ReadRune to return the same rune.
  36         UnreadRune() error
  37         // SkipSpace skips space in the input. Newlines are treated appropriately
  38         // for the operation being performed; see the package documentation
  39         // for more information.
  40         SkipSpace()
  41         // Token skips space in the input if skipSpace is true, then returns the
  42         // run of Unicode code points c satisfying f(c).  If f is nil,
  43         // !unicode.IsSpace(c) is used; that is, the token will hold non-space
  44         // characters.  Newlines are treated appropriately for the operation being
  45         // performed; see the package documentation for more information.
  46         // The returned slice points to shared data that may be overwritten
  47         // by the next call to Token, a call to a Scan function using the ScanState
  48         // as input, or when the calling Scan method returns.
  49         Token(skipSpace bool, f func(rune) bool) (token []byte, err error)
  50         // Width returns the value of the width option and whether it has been set.
  51         // The unit is Unicode code points.
  52         Width() (wid int, ok bool)
  53         // Because ReadRune is implemented by the interface, Read should never be
  54         // called by the scanning routines and a valid implementation of
  55         // ScanState may choose always to return an error from Read.
  56         Read(buf []byte) (n int, err error)
  57 }
  58
  59 // Scanner is implemented by any value that has a Scan method, which scans
  60 // the input for the representation of a value and stores the result in the
  61 // receiver, which must be a pointer to be useful.  The Scan method is called
  62 // for any argument to Scan, Scanf, or Scanln that implements it.
  63 type Scanner interface {
  64         Scan(state ScanState, verb rune) error
  65 }
  66
  67 // Scan scans text read from standard input, storing successive
  68 // space-separated values into successive arguments.  Newlines count
  69 // as space.  It returns the number of items successfully scanned.
  70 // If that is less than the number of arguments, err will report why.
  71 func Scan(a ...interface{}) (n int, err error) {
  72         return Fscan(os.Stdin, a...)
  73 }
  74
  75 // Scanln is similar to Scan, but stops scanning at a newline and
  76 // after the final item there must be a newline or EOF.
  77 func Scanln(a ...interface{}) (n int, err error) {
  78         return Fscanln(os.Stdin, a...)
  79 }
  80
  81 // Scanf scans text read from standard input, storing successive
  82 // space-separated values into successive arguments as determined by
  83 // the format.  It returns the number of items successfully scanned.
  84 // If that is less than the number of arguments, err will report why.
  85 // Newlines in the input must match newlines in the format.
  86 // The one exception: the verb %c always scans the next rune in the
  87 // input, even if it is a space (or tab etc.) or newline.
  88 func Scanf(format string, a ...interface{}) (n int, err error) {
  89         return Fscanf(os.Stdin, format, a...)
  90 }
  91
  92 type stringReader string
  93
  94 func (r *stringReader) Read(b []byte) (n int, err error) {
  95         n = copy(b, *r)
  96         *r = (*r)[n:]
  97         if n == 0 {
  98                 err = io.EOF
  99         }
 100         return
 101 }
 102
 103 // Sscan scans the argument string, storing successive space-separated
 104 // values into successive arguments.  Newlines count as space.  It
 105 // returns the number of items successfully scanned.  If that is less
 106 // than the number of arguments, err will report why.
 107 func Sscan(str string, a ...interface{}) (n int, err error) {
 108         return Fscan((*stringReader)(&str), a...)
 109 }
 110
 111 // Sscanln is similar to Sscan, but stops scanning at a newline and
 112 // after the final item there must be a newline or EOF.
 113 func Sscanln(str string, a ...interface{}) (n int, err error) {
 114         return Fscanln((*stringReader)(&str), a...)
 115 }
 116
 117 // Sscanf scans the argument string, storing successive space-separated
 118 // values into successive arguments as determined by the format.  It
 119 // returns the number of items successfully parsed.
 120 // Newlines in the input must match newlines in the format.
 121 func Sscanf(str string, format string, a ...interface{}) (n int, err error) {
 122         return Fscanf((*stringReader)(&str), format, a...)
 123 }
 124
 125 // Fscan scans text read from r, storing successive space-separated
 126 // values into successive arguments.  Newlines count as space.  It
 127 // returns the number of items successfully scanned.  If that is less
 128 // than the number of arguments, err will report why.
 129 func Fscan(r io.Reader, a ...interface{}) (n int, err error) {
 130         s, old := newScanState(r, true, false)
 131         n, err = s.doScan(a)
 132         s.free(old)
 133         return
 134 }
 135
 136 // Fscanln is similar to Fscan, but stops scanning at a newline and
 137 // after the final item there must be a newline or EOF.
 138 func Fscanln(r io.Reader, a ...interface{}) (n int, err error) {
 139         s, old := newScanState(r, false, true)
 140         n, err = s.doScan(a)
 141         s.free(old)
 142         return
 143 }
 144
 145 // Fscanf scans text read from r, storing successive space-separated
 146 // values into successive arguments as determined by the format.  It
 147 // returns the number of items successfully parsed.
 148 // Newlines in the input must match newlines in the format.
 149 func Fscanf(r io.Reader, format string, a ...interface{}) (n int, err error) {
 150         s, old := newScanState(r, false, false)
 151         n, err = s.doScanf(format, a)
 152         s.free(old)
 153         return
 154 }
 155
 156 // scanError represents an error generated by the scanning software.
 157 // It's used as a unique signature to identify such errors when recovering.
 158 type scanError struct {
 159         err error
 160 }
 161
 162 const eof = -1
 163
 164 // ss is the internal implementation of ScanState.
 165 type ss struct {
 166         rr       io.RuneReader // where to read input
 167         buf      buffer        // token accumulator
 168         peekRune rune          // one-rune lookahead
 169         prevRune rune          // last rune returned by ReadRune
 170         count    int           // runes consumed so far.
 171         atEOF    bool          // already read EOF
 172         ssave
 173 }
 174
 175 // ssave holds the parts of ss that need to be
 176 // saved and restored on recursive scans.
 177 type ssave struct {
 178         validSave bool // is or was a part of an actual ss.
 179         nlIsEnd   bool // whether newline terminates scan
 180         nlIsSpace bool // whether newline counts as white space
 181         argLimit  int  // max value of ss.count for this arg; argLimit <= limit
 182         limit     int  // max value of ss.count.
 183         maxWid    int  // width of this arg.
 184 }
 185
 186 // The Read method is only in ScanState so that ScanState
 187 // satisfies io.Reader. It will never be called when used as
 188 // intended, so there is no need to make it actually work.
 189 func (s *ss) Read(buf []byte) (n int, err error) {
 190         return 0, errors.New("ScanState's Read should not be called. Use ReadRune")
 191 }
 192
 193 func (s *ss) ReadRune() (r rune, size int, err error) {
 194         if s.peekRune >= 0 {
 195                 s.count++
 196                 r = s.peekRune
 197                 size = utf8.RuneLen(r)
 198                 s.prevRune = r
 199                 s.peekRune = -1
 200                 return
 201         }
 202         if s.atEOF || s.nlIsEnd && s.prevRune == '\n' || s.count >= s.argLimit {
 203                 err = io.EOF
 204                 return
 205         }
 206
 207         r, size, err = s.rr.ReadRune()
 208         if err == nil {
 209                 s.count++
 210                 s.prevRune = r
 211         } else if err == io.EOF {
 212                 s.atEOF = true
 213         }
 214         return
 215 }
 216
 217 func (s *ss) Width() (wid int, ok bool) {
 218         if s.maxWid == hugeWid {
 219                 return 0, false
 220         }
 221         return s.maxWid, true
 222 }
 223
 224 // The public method returns an error; this private one panics.
 225 // If getRune reaches EOF, the return value is EOF (-1).
 226 func (s *ss) getRune() (r rune) {
 227         r, _, err := s.ReadRune()
 228         if err != nil {
 229                 if err == io.EOF {
 230                         return eof
 231                 }
 232                 s.error(err)
 233         }
 234         return
 235 }
 236
 237 // mustReadRune turns io.EOF into a panic(io.ErrUnexpectedEOF).
 238 // It is called in cases such as string scanning where an EOF is a
 239 // syntax error.
 240 func (s *ss) mustReadRune() (r rune) {
 241         r = s.getRune()
 242         if r == eof {
 243                 s.error(io.ErrUnexpectedEOF)
 244         }
 245         return
 246 }
 247
 248 func (s *ss) UnreadRune() error {
 249         if u, ok := s.rr.(runeUnreader); ok {
 250                 u.UnreadRune()
 251         } else {
 252                 s.peekRune = s.prevRune
 253         }
 254         s.prevRune = -1
 255         s.count--
 256         return nil
 257 }
 258
 259 func (s *ss) error(err error) {
 260         panic(scanError{err})
 261 }
 262
 263 func (s *ss) errorString(err string) {
 264         panic(scanError{errors.New(err)})
 265 }
 266
 267 func (s *ss) Token(skipSpace bool, f func(rune) bool) (tok []byte, err error) {
 268         defer func() {
 269                 if e := recover(); e != nil {
 270                         if se, ok := e.(scanError); ok {
 271                                 err = se.err
 272                         } else {
 273                                 panic(e)
 274                         }
 275                 }
 276         }()
 277         if f == nil {
 278                 f = notSpace
 279         }
 280         s.buf = s.buf[:0]
 281         tok = s.token(skipSpace, f)
 282         return
 283 }
 284
 285 // space is a copy of the unicode.White_Space ranges,
 286 // to avoid depending on package unicode.
 287 var space = [][2]uint16{
 288         {0x0009, 0x000d},
 289         {0x0020, 0x0020},
 290         {0x0085, 0x0085},
 291         {0x00a0, 0x00a0},
 292         {0x1680, 0x1680},
 293         {0x2000, 0x200a},
 294         {0x2028, 0x2029},
 295         {0x202f, 0x202f},
 296         {0x205f, 0x205f},
 297         {0x3000, 0x3000},
 298 }
 299
 300 func isSpace(r rune) bool {
 301         if r >= 1<<16 {
 302                 return false
 303         }
 304         rx := uint16(r)
 305         for _, rng := range space {
 306                 if rx < rng[0] {
 307                         return false
 308                 }
 309                 if rx <= rng[1] {
 310                         return true
 311                 }
 312         }
 313         return false
 314 }
 315
 316 // notSpace is the default scanning function used in Token.
 317 func notSpace(r rune) bool {
 318         return !isSpace(r)
 319 }
 320
 321 // SkipSpace provides Scan methods the ability to skip space and newline
 322 // characters in keeping with the current scanning mode set by format strings
 323 // and Scan/Scanln.
 324 func (s *ss) SkipSpace() {
 325         s.skipSpace(false)
 326 }
 327
 328 // readRune is a structure to enable reading UTF-8 encoded code points
 329 // from an io.Reader.  It is used if the Reader given to the scanner does
 330 // not already implement io.RuneReader.
 331 type readRune struct {
 332         reader  io.Reader
 333         buf     [utf8.UTFMax]byte // used only inside ReadRune
 334         pending int               // number of bytes in pendBuf; only >0 for bad UTF-8
 335         pendBuf [utf8.UTFMax]byte // bytes left over
 336 }
 337
 338 // readByte returns the next byte from the input, which may be
 339 // left over from a previous read if the UTF-8 was ill-formed.
 340 func (r *readRune) readByte() (b byte, err error) {
 341         if r.pending > 0 {
 342                 b = r.pendBuf[0]
 343                 copy(r.pendBuf[0:], r.pendBuf[1:])
 344                 r.pending--
 345                 return
 346         }
 347         n, err := io.ReadFull(r.reader, r.pendBuf[0:1])
 348         if n != 1 {
 349                 return 0, err
 350         }
 351         return r.pendBuf[0], err
 352 }
 353
 354 // unread saves the bytes for the next read.
 355 func (r *readRune) unread(buf []byte) {
 356         copy(r.pendBuf[r.pending:], buf)
 357         r.pending += len(buf)
 358 }
 359
 360 // ReadRune returns the next UTF-8 encoded code point from the
 361 // io.Reader inside r.
 362 func (r *readRune) ReadRune() (rr rune, size int, err error) {
 363         r.buf[0], err = r.readByte()
 364         if err != nil {
 365                 return 0, 0, err
 366         }
 367         if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
 368                 rr = rune(r.buf[0])
 369                 size = 1 // Known to be 1.
 370                 return
 371         }
 372         var n int
 373         for n = 1; !utf8.FullRune(r.buf[0:n]); n++ {
 374                 r.buf[n], err = r.readByte()
 375                 if err != nil {
 376                         if err == io.EOF {
 377                                 err = nil
 378                                 break
 379                         }
 380                         return
 381                 }
 382         }
 383         rr, size = utf8.DecodeRune(r.buf[0:n])
 384         if size < n { // an error
 385                 r.unread(r.buf[size:n])
 386         }
 387         return
 388 }
 389
 390 var ssFree = sync.Pool{
 391         New: func() interface{} { return new(ss) },
 392 }
 393
 394 // newScanState allocates a new ss struct or grab a cached one.
 395 func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) {
 396         s = ssFree.Get().(*ss)
 397         if rr, ok := r.(io.RuneReader); ok {
 398                 s.rr = rr
 399         } else {
 400                 s.rr = &readRune{reader: r}
 401         }
 402         s.nlIsSpace = nlIsSpace
 403         s.nlIsEnd = nlIsEnd
 404         s.prevRune = -1
 405         s.peekRune = -1
 406         s.atEOF = false
 407         s.limit = hugeWid
 408         s.argLimit = hugeWid
 409         s.maxWid = hugeWid
 410         s.validSave = true
 411         s.count = 0
 412         return
 413 }
 414
 415 // free saves used ss structs in ssFree; avoid an allocation per invocation.
 416 func (s *ss) free(old ssave) {
 417         // If it was used recursively, just restore the old state.
 418         if old.validSave {
 419                 s.ssave = old
 420                 return
 421         }
 422         // Don't hold on to ss structs with large buffers.
 423         if cap(s.buf) > 1024 {
 424                 return
 425         }
 426         s.buf = s.buf[:0]
 427         s.rr = nil
 428         ssFree.Put(s)
 429 }
 430
 431 // skipSpace skips spaces and maybe newlines.
 432 func (s *ss) skipSpace(stopAtNewline bool) {
 433         for {
 434                 r := s.getRune()
 435                 if r == eof {
 436                         return
 437                 }
 438                 if r == '\r' && s.peek("\n") {
 439                         continue
 440                 }
 441                 if r == '\n' {
 442                         if stopAtNewline {
 443                                 break
 444                         }
 445                         if s.nlIsSpace {
 446                                 continue
 447                         }
 448                         s.errorString("unexpected newline")
 449                         return
 450                 }
 451                 if !isSpace(r) {
 452                         s.UnreadRune()
 453                         break
 454                 }
 455         }
 456 }
 457
 458 // token returns the next space-delimited string from the input.  It
 459 // skips white space.  For Scanln, it stops at newlines.  For Scan,
 460 // newlines are treated as spaces.
 461 func (s *ss) token(skipSpace bool, f func(rune) bool) []byte {
 462         if skipSpace {
 463                 s.skipSpace(false)
 464         }
 465         // read until white space or newline
 466         for {
 467                 r := s.getRune()
 468                 if r == eof {
 469                         break
 470                 }
 471                 if !f(r) {
 472                         s.UnreadRune()
 473                         break
 474                 }
 475                 s.buf.WriteRune(r)
 476         }
 477         return s.buf
 478 }
 479
 480 var complexError = errors.New("syntax error scanning complex number")
 481 var boolError = errors.New("syntax error scanning boolean")
 482
 483 func indexRune(s string, r rune) int {
 484         for i, c := range s {
 485                 if c == r {
 486                         return i
 487                 }
 488         }
 489         return -1
 490 }
 491
 492 // consume reads the next rune in the input and reports whether it is in the ok string.
 493 // If accept is true, it puts the character into the input token.
 494 func (s *ss) consume(ok string, accept bool) bool {
 495         r := s.getRune()
 496         if r == eof {
 497                 return false
 498         }
 499         if indexRune(ok, r) >= 0 {
 500                 if accept {
 501                         s.buf.WriteRune(r)
 502                 }
 503                 return true
 504         }
 505         if r != eof && accept {
 506                 s.UnreadRune()
 507         }
 508         return false
 509 }
 510
 511 // peek reports whether the next character is in the ok string, without consuming it.
 512 func (s *ss) peek(ok string) bool {
 513         r := s.getRune()
 514         if r != eof {
 515                 s.UnreadRune()
 516         }
 517         return indexRune(ok, r) >= 0
 518 }
 519
 520 func (s *ss) notEOF() {
 521         // Guarantee there is data to be read.
 522         if r := s.getRune(); r == eof {
 523                 panic(io.EOF)
 524         }
 525         s.UnreadRune()
 526 }
 527
 528 // accept checks the next rune in the input.  If it's a byte (sic) in the string, it puts it in the
 529 // buffer and returns true. Otherwise it return false.
 530 func (s *ss) accept(ok string) bool {
 531         return s.consume(ok, true)
 532 }
 533
 534 // okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
 535 func (s *ss) okVerb(verb rune, okVerbs, typ string) bool {
 536         for _, v := range okVerbs {
 537                 if v == verb {
 538                         return true
 539                 }
 540         }
 541         s.errorString("bad verb '%" + string(verb) + "' for " + typ)
 542         return false
 543 }
 544
 545 // scanBool returns the value of the boolean represented by the next token.
 546 func (s *ss) scanBool(verb rune) bool {
 547         s.skipSpace(false)
 548         s.notEOF()
 549         if !s.okVerb(verb, "tv", "boolean") {
 550                 return false
 551         }
 552         // Syntax-checking a boolean is annoying.  We're not fastidious about case.
 553         switch s.getRune() {
 554         case '0':
 555                 return false
 556         case '1':
 557                 return true
 558         case 't', 'T':
 559                 if s.accept("rR") && (!s.accept("uU") || !s.accept("eE")) {
 560                         s.error(boolError)
 561                 }
 562                 return true
 563         case 'f', 'F':
 564                 if s.accept("aA") && (!s.accept("lL") || !s.accept("sS") || !s.accept("eE")) {
 565                         s.error(boolError)
 566                 }
 567                 return false
 568         }
 569         return false
 570 }
 571
 572 // Numerical elements
 573 const (
 574         binaryDigits      = "01"
 575         octalDigits       = "01234567"
 576         decimalDigits     = "0123456789"
 577         hexadecimalDigits = "0123456789aAbBcCdDeEfF"
 578         sign              = "+-"
 579         period            = "."
 580         exponent          = "eEp"
 581 )
 582
 583 // getBase returns the numeric base represented by the verb and its digit string.
 584 func (s *ss) getBase(verb rune) (base int, digits string) {
 585         s.okVerb(verb, "bdoUxXv", "integer") // sets s.err
 586         base = 10
 587         digits = decimalDigits
 588         switch verb {
 589         case 'b':
 590                 base = 2
 591                 digits = binaryDigits
 592         case 'o':
 593                 base = 8
 594                 digits = octalDigits
 595         case 'x', 'X', 'U':
 596                 base = 16
 597                 digits = hexadecimalDigits
 598         }
 599         return
 600 }
 601
 602 // scanNumber returns the numerical string with specified digits starting here.
 603 func (s *ss) scanNumber(digits string, haveDigits bool) string {
 604         if !haveDigits {
 605                 s.notEOF()
 606                 if !s.accept(digits) {
 607                         s.errorString("expected integer")
 608                 }
 609         }
 610         for s.accept(digits) {
 611         }
 612         return string(s.buf)
 613 }
 614
 615 // scanRune returns the next rune value in the input.
 616 func (s *ss) scanRune(bitSize int) int64 {
 617         s.notEOF()
 618         r := int64(s.getRune())
 619         n := uint(bitSize)
 620         x := (r << (64 - n)) >> (64 - n)
 621         if x != r {
 622                 s.errorString("overflow on character value " + string(r))
 623         }
 624         return r
 625 }
 626
 627 // scanBasePrefix reports whether the integer begins with a 0 or 0x,
 628 // and returns the base, digit string, and whether a zero was found.
 629 // It is called only if the verb is %v.
 630 func (s *ss) scanBasePrefix() (base int, digits string, found bool) {
 631         if !s.peek("0") {
 632                 return 10, decimalDigits, false
 633         }
 634         s.accept("0")
 635         found = true // We've put a digit into the token buffer.
 636         // Special cases for '0' && '0x'
 637         base, digits = 8, octalDigits
 638         if s.peek("xX") {
 639                 s.consume("xX", false)
 640                 base, digits = 16, hexadecimalDigits
 641         }
 642         return
 643 }
 644
 645 // scanInt returns the value of the integer represented by the next
 646 // token, checking for overflow.  Any error is stored in s.err.
 647 func (s *ss) scanInt(verb rune, bitSize int) int64 {
 648         if verb == 'c' {
 649                 return s.scanRune(bitSize)
 650         }
 651         s.skipSpace(false)
 652         s.notEOF()
 653         base, digits := s.getBase(verb)
 654         haveDigits := false
 655         if verb == 'U' {
 656                 if !s.consume("U", false) || !s.consume("+", false) {
 657                         s.errorString("bad unicode format ")
 658                 }
 659         } else {
 660                 s.accept(sign) // If there's a sign, it will be left in the token buffer.
 661                 if verb == 'v' {
 662                         base, digits, haveDigits = s.scanBasePrefix()
 663                 }
 664         }
 665         tok := s.scanNumber(digits, haveDigits)
 666         i, err := strconv.ParseInt(tok, base, 64)
 667         if err != nil {
 668                 s.error(err)
 669         }
 670         n := uint(bitSize)
 671         x := (i << (64 - n)) >> (64 - n)
 672         if x != i {
 673                 s.errorString("integer overflow on token " + tok)
 674         }
 675         return i
 676 }
 677
 678 // scanUint returns the value of the unsigned integer represented
 679 // by the next token, checking for overflow.  Any error is stored in s.err.
 680 func (s *ss) scanUint(verb rune, bitSize int) uint64 {
 681         if verb == 'c' {
 682                 return uint64(s.scanRune(bitSize))
 683         }
 684         s.skipSpace(false)
 685         s.notEOF()
 686         base, digits := s.getBase(verb)
 687         haveDigits := false
 688         if verb == 'U' {
 689                 if !s.consume("U", false) || !s.consume("+", false) {
 690                         s.errorString("bad unicode format ")
 691                 }
 692         } else if verb == 'v' {
 693                 base, digits, haveDigits = s.scanBasePrefix()
 694         }
 695         tok := s.scanNumber(digits, haveDigits)
 696         i, err := strconv.ParseUint(tok, base, 64)
 697         if err != nil {
 698                 s.error(err)
 699         }
 700         n := uint(bitSize)
 701         x := (i << (64 - n)) >> (64 - n)
 702         if x != i {
 703                 s.errorString("unsigned integer overflow on token " + tok)
 704         }
 705         return i
 706 }
 707
 708 // floatToken returns the floating-point number starting here, no longer than swid
 709 // if the width is specified. It's not rigorous about syntax because it doesn't check that
 710 // we have at least some digits, but Atof will do that.
 711 func (s *ss) floatToken() string {
 712         s.buf = s.buf[:0]
 713         // NaN?
 714         if s.accept("nN") && s.accept("aA") && s.accept("nN") {
 715                 return string(s.buf)
 716         }
 717         // leading sign?
 718         s.accept(sign)
 719         // Inf?
 720         if s.accept("iI") && s.accept("nN") && s.accept("fF") {
 721                 return string(s.buf)
 722         }
 723         // digits?
 724         for s.accept(decimalDigits) {
 725         }
 726         // decimal point?
 727         if s.accept(period) {
 728                 // fraction?
 729                 for s.accept(decimalDigits) {
 730                 }
 731         }
 732         // exponent?
 733         if s.accept(exponent) {
 734                 // leading sign?
 735                 s.accept(sign)
 736                 // digits?
 737                 for s.accept(decimalDigits) {
 738                 }
 739         }
 740         return string(s.buf)
 741 }
 742
 743 // complexTokens returns the real and imaginary parts of the complex number starting here.
 744 // The number might be parenthesized and has the format (N+Ni) where N is a floating-point
 745 // number and there are no spaces within.
 746 func (s *ss) complexTokens() (real, imag string) {
 747         // TODO: accept N and Ni independently?
 748         parens := s.accept("(")
 749         real = s.floatToken()
 750         s.buf = s.buf[:0]
 751         // Must now have a sign.
 752         if !s.accept("+-") {
 753                 s.error(complexError)
 754         }
 755         // Sign is now in buffer
 756         imagSign := string(s.buf)
 757         imag = s.floatToken()
 758         if !s.accept("i") {
 759                 s.error(complexError)
 760         }
 761         if parens && !s.accept(")") {
 762                 s.error(complexError)
 763         }
 764         return real, imagSign + imag
 765 }
 766
 767 // convertFloat converts the string to a float64value.
 768 func (s *ss) convertFloat(str string, n int) float64 {
 769         if p := indexRune(str, 'p'); p >= 0 {
 770                 // Atof doesn't handle power-of-2 exponents,
 771                 // but they're easy to evaluate.
 772                 f, err := strconv.ParseFloat(str[:p], n)
 773                 if err != nil {
 774                         // Put full string into error.
 775                         if e, ok := err.(*strconv.NumError); ok {
 776                                 e.Num = str
 777                         }
 778                         s.error(err)
 779                 }
 780                 m, err := strconv.Atoi(str[p+1:])
 781                 if err != nil {
 782                         // Put full string into error.
 783                         if e, ok := err.(*strconv.NumError); ok {
 784                                 e.Num = str
 785                         }
 786                         s.error(err)
 787                 }
 788                 return math.Ldexp(f, m)
 789         }
 790         f, err := strconv.ParseFloat(str, n)
 791         if err != nil {
 792                 s.error(err)
 793         }
 794         return f
 795 }
 796
 797 // convertComplex converts the next token to a complex128 value.
 798 // The atof argument is a type-specific reader for the underlying type.
 799 // If we're reading complex64, atof will parse float32s and convert them
 800 // to float64's to avoid reproducing this code for each complex type.
 801 func (s *ss) scanComplex(verb rune, n int) complex128 {
 802         if !s.okVerb(verb, floatVerbs, "complex") {
 803                 return 0
 804         }
 805         s.skipSpace(false)
 806         s.notEOF()
 807         sreal, simag := s.complexTokens()
 808         real := s.convertFloat(sreal, n/2)
 809         imag := s.convertFloat(simag, n/2)
 810         return complex(real, imag)
 811 }
 812
 813 // convertString returns the string represented by the next input characters.
 814 // The format of the input is determined by the verb.
 815 func (s *ss) convertString(verb rune) (str string) {
 816         if !s.okVerb(verb, "svqxX", "string") {
 817                 return ""
 818         }
 819         s.skipSpace(false)
 820         s.notEOF()
 821         switch verb {
 822         case 'q':
 823                 str = s.quotedString()
 824         case 'x', 'X':
 825                 str = s.hexString()
 826         default:
 827                 str = string(s.token(true, notSpace)) // %s and %v just return the next word
 828         }
 829         return
 830 }
 831
 832 // quotedString returns the double- or back-quoted string represented by the next input characters.
 833 func (s *ss) quotedString() string {
 834         s.notEOF()
 835         quote := s.getRune()
 836         switch quote {
 837         case '`':
 838                 // Back-quoted: Anything goes until EOF or back quote.
 839                 for {
 840                         r := s.mustReadRune()
 841                         if r == quote {
 842                                 break
 843                         }
 844                         s.buf.WriteRune(r)
 845                 }
 846                 return string(s.buf)
 847         case '"':
 848                 // Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes.
 849                 s.buf.WriteRune(quote)
 850                 for {
 851                         r := s.mustReadRune()
 852                         s.buf.WriteRune(r)
 853                         if r == '\\' {
 854                                 // In a legal backslash escape, no matter how long, only the character
 855                                 // immediately after the escape can itself be a backslash or quote.
 856                                 // Thus we only need to protect the first character after the backslash.
 857                                 s.buf.WriteRune(s.mustReadRune())
 858                         } else if r == '"' {
 859                                 break
 860                         }
 861                 }
 862                 result, err := strconv.Unquote(string(s.buf))
 863                 if err != nil {
 864                         s.error(err)
 865                 }
 866                 return result
 867         default:
 868                 s.errorString("expected quoted string")
 869         }
 870         return ""
 871 }
 872
 873 // hexDigit returns the value of the hexadecimal digit.
 874 func hexDigit(d rune) (int, bool) {
 875         digit := int(d)
 876         switch digit {
 877         case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 878                 return digit - '0', true
 879         case 'a', 'b', 'c', 'd', 'e', 'f':
 880                 return 10 + digit - 'a', true
 881         case 'A', 'B', 'C', 'D', 'E', 'F':
 882                 return 10 + digit - 'A', true
 883         }
 884         return -1, false
 885 }
 886
 887 // hexByte returns the next hex-encoded (two-character) byte from the input.
 888 // It returns ok==false if the next bytes in the input do not encode a hex byte.
 889 // If the first byte is hex and the second is not, processing stops.
 890 func (s *ss) hexByte() (b byte, ok bool) {
 891         rune1 := s.getRune()
 892         if rune1 == eof {
 893                 return
 894         }
 895         value1, ok := hexDigit(rune1)
 896         if !ok {
 897                 s.UnreadRune()
 898                 return
 899         }
 900         value2, ok := hexDigit(s.mustReadRune())
 901         if !ok {
 902                 s.errorString("illegal hex digit")
 903                 return
 904         }
 905         return byte(value1<<4 | value2), true
 906 }
 907
 908 // hexString returns the space-delimited hexpair-encoded string.
 909 func (s *ss) hexString() string {
 910         s.notEOF()
 911         for {
 912                 b, ok := s.hexByte()
 913                 if !ok {
 914                         break
 915                 }
 916                 s.buf.WriteByte(b)
 917         }
 918         if len(s.buf) == 0 {
 919                 s.errorString("no hex data for %x string")
 920                 return ""
 921         }
 922         return string(s.buf)
 923 }
 924
 925 const floatVerbs = "beEfFgGv"
 926
 927 const hugeWid = 1 << 30
 928
 929 // scanOne scans a single value, deriving the scanner from the type of the argument.
 930 func (s *ss) scanOne(verb rune, arg interface{}) {
 931         s.buf = s.buf[:0]
 932         var err error
 933         // If the parameter has its own Scan method, use that.
 934         if v, ok := arg.(Scanner); ok {
 935                 err = v.Scan(s, verb)
 936                 if err != nil {
 937                         if err == io.EOF {
 938                                 err = io.ErrUnexpectedEOF
 939                         }
 940                         s.error(err)
 941                 }
 942                 return
 943         }
 944
 945         switch v := arg.(type) {
 946         case *bool:
 947                 *v = s.scanBool(verb)
 948         case *complex64:
 949                 *v = complex64(s.scanComplex(verb, 64))
 950         case *complex128:
 951                 *v = s.scanComplex(verb, 128)
 952         case *int:
 953                 *v = int(s.scanInt(verb, intBits))
 954         case *int8:
 955                 *v = int8(s.scanInt(verb, 8))
 956         case *int16:
 957                 *v = int16(s.scanInt(verb, 16))
 958         case *int32:
 959                 *v = int32(s.scanInt(verb, 32))
 960         case *int64:
 961                 *v = s.scanInt(verb, 64)
 962         case *uint:
 963                 *v = uint(s.scanUint(verb, intBits))
 964         case *uint8:
 965                 *v = uint8(s.scanUint(verb, 8))
 966         case *uint16:
 967                 *v = uint16(s.scanUint(verb, 16))
 968         case *uint32:
 969                 *v = uint32(s.scanUint(verb, 32))
 970         case *uint64:
 971                 *v = s.scanUint(verb, 64)
 972         case *uintptr:
 973                 *v = uintptr(s.scanUint(verb, uintptrBits))
 974         // Floats are tricky because you want to scan in the precision of the result, not
 975         // scan in high precision and convert, in order to preserve the correct error condition.
 976         case *float32:
 977                 if s.okVerb(verb, floatVerbs, "float32") {
 978                         s.skipSpace(false)
 979                         s.notEOF()
 980                         *v = float32(s.convertFloat(s.floatToken(), 32))
 981                 }
 982         case *float64:
 983                 if s.okVerb(verb, floatVerbs, "float64") {
 984                         s.skipSpace(false)
 985                         s.notEOF()
 986                         *v = s.convertFloat(s.floatToken(), 64)
 987                 }
 988         case *string:
 989                 *v = s.convertString(verb)
 990         case *[]byte:
 991                 // We scan to string and convert so we get a copy of the data.
 992                 // If we scanned to bytes, the slice would point at the buffer.
 993                 *v = []byte(s.convertString(verb))
 994         default:
 995                 val := reflect.ValueOf(v)
 996                 ptr := val
 997                 if ptr.Kind() != reflect.Ptr {
 998                         s.errorString("type not a pointer: " + val.Type().String())
 999                         return
1000                 }
1001                 switch v := ptr.Elem(); v.Kind() {
1002                 case reflect.Bool:
1003                         v.SetBool(s.scanBool(verb))
1004                 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
1005                         v.SetInt(s.scanInt(verb, v.Type().Bits()))
1006                 case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
1007                         v.SetUint(s.scanUint(verb, v.Type().Bits()))
1008                 case reflect.String:
1009                         v.SetString(s.convertString(verb))
1010                 case reflect.Slice:
1011                         // For now, can only handle (renamed) []byte.
1012                         typ := v.Type()
1013                         if typ.Elem().Kind() != reflect.Uint8 {
1014                                 s.errorString("can't scan type: " + val.Type().String())
1015                         }
1016                         str := s.convertString(verb)
1017                         v.Set(reflect.MakeSlice(typ, len(str), len(str)))
1018                         for i := 0; i < len(str); i++ {
1019                                 v.Index(i).SetUint(uint64(str[i]))
1020                         }
1021                 case reflect.Float32, reflect.Float64:
1022                         s.skipSpace(false)
1023                         s.notEOF()
1024                         v.SetFloat(s.convertFloat(s.floatToken(), v.Type().Bits()))
1025                 case reflect.Complex64, reflect.Complex128:
1026                         v.SetComplex(s.scanComplex(verb, v.Type().Bits()))
1027                 default:
1028                         s.errorString("can't scan type: " + val.Type().String())
1029                 }
1030         }
1031 }
1032
1033 // errorHandler turns local panics into error returns.
1034 func errorHandler(errp *error) {
1035         if e := recover(); e != nil {
1036                 if se, ok := e.(scanError); ok { // catch local error
1037                         *errp = se.err
1038                 } else if eof, ok := e.(error); ok && eof == io.EOF { // out of input
1039                         *errp = eof
1040                 } else {
1041                         panic(e)
1042                 }
1043         }
1044 }
1045
1046 // doScan does the real work for scanning without a format string.
1047 func (s *ss) doScan(a []interface{}) (numProcessed int, err error) {
1048         defer errorHandler(&err)
1049         for _, arg := range a {
1050                 s.scanOne('v', arg)
1051                 numProcessed++
1052         }
1053         // Check for newline (or EOF) if required (Scanln etc.).
1054         if s.nlIsEnd {
1055                 for {
1056                         r := s.getRune()
1057                         if r == '\n' || r == eof {
1058                                 break
1059                         }
1060                         if !isSpace(r) {
1061                                 s.errorString("expected newline")
1062                                 break
1063                         }
1064                 }
1065         }
1066         return
1067 }
1068
1069 // advance determines whether the next characters in the input match
1070 // those of the format. It returns the number of bytes (sic) consumed
1071 // in the format. All runs of space characters in either input or
1072 // format behave as a single space. Newlines are special, though:
1073 // newlines in the format must match those in the input and vice versa.
1074 // This routine also handles the %% case. If the return value is zero,
1075 // either format starts with a % (with no following %) or the input
1076 // is empty. If it is negative, the input did not match the string.
1077 func (s *ss) advance(format string) (i int) {
1078         for i < len(format) {
1079                 fmtc, w := utf8.DecodeRuneInString(format[i:])
1080                 if fmtc == '%' {
1081                         // % at end of string is an error.
1082                         if i+w == len(format) {
1083                                 s.errorString("missing verb: % at end of format string")
1084                         }
1085                         // %% acts like a real percent
1086                         nextc, _ := utf8.DecodeRuneInString(format[i+w:]) // will not match % if string is empty
1087                         if nextc != '%' {
1088                                 return
1089                         }
1090                         i += w // skip the first %
1091                 }
1092                 sawSpace := false
1093                 wasNewline := false
1094                 // Skip spaces in format but absorb at most one newline.
1095                 for isSpace(fmtc) && i < len(format) {
1096                         if fmtc == '\n' {
1097                                 if wasNewline { // Already saw one; stop here.
1098                                         break
1099                                 }
1100                                 wasNewline = true
1101                         }
1102                         sawSpace = true
1103                         i += w
1104                         fmtc, w = utf8.DecodeRuneInString(format[i:])
1105                 }
1106                 if sawSpace {
1107                         // There was space in the format, so there should be space
1108                         // in the input.
1109                         inputc := s.getRune()
1110                         if inputc == eof {
1111                                 return
1112                         }
1113                         if !isSpace(inputc) {
1114                                 // Space in format but not in input.
1115                                 s.errorString("expected space in input to match format")
1116                         }
1117                         // Skip spaces but stop at newline.
1118                         for inputc != '\n' && isSpace(inputc) {
1119                                 inputc = s.getRune()
1120                         }
1121                         if inputc == '\n' {
1122                                 if !wasNewline {
1123                                         s.errorString("newline in input does not match format")
1124                                 }
1125                                 // We've reached a newline, stop now; don't read further.
1126                                 return
1127                         }
1128                         s.UnreadRune()
1129                         if wasNewline {
1130                                 s.errorString("newline in format does not match input")
1131                         }
1132                         continue
1133                 }
1134                 inputc := s.mustReadRune()
1135                 if fmtc != inputc {
1136                         s.UnreadRune()
1137                         return -1
1138                 }
1139                 i += w
1140         }
1141         return
1142 }
1143
1144 // doScanf does the real work when scanning with a format string.
1145 //  At the moment, it handles only pointers to basic types.
1146 func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err error) {
1147         defer errorHandler(&err)
1148         end := len(format) - 1
1149         // We process one item per non-trivial format
1150         for i := 0; i <= end; {
1151                 w := s.advance(format[i:])
1152                 if w > 0 {
1153                         i += w
1154                         continue
1155                 }
1156                 // Either we failed to advance, we have a percent character, or we ran out of input.
1157                 if format[i] != '%' {
1158                         // Can't advance format.  Why not?
1159                         if w < 0 {
1160                                 s.errorString("input does not match format")
1161                         }
1162                         // Otherwise at EOF; "too many operands" error handled below
1163                         break
1164                 }
1165                 i++ // % is one byte
1166
1167                 // do we have 20 (width)?
1168                 var widPresent bool
1169                 s.maxWid, widPresent, i = parsenum(format, i, end)
1170                 if !widPresent {
1171                         s.maxWid = hugeWid
1172                 }
1173
1174                 c, w := utf8.DecodeRuneInString(format[i:])
1175                 i += w
1176
1177                 if c != 'c' {
1178                         s.SkipSpace()
1179                 }
1180                 s.argLimit = s.limit
1181                 if f := s.count + s.maxWid; f < s.argLimit {
1182                         s.argLimit = f
1183                 }
1184
1185                 if numProcessed >= len(a) { // out of operands
1186                         s.errorString("too few operands for format '%" + format[i-w:] + "'")
1187                         break
1188                 }
1189                 arg := a[numProcessed]
1190
1191                 s.scanOne(c, arg)
1192                 numProcessed++
1193                 s.argLimit = s.limit
1194         }
1195         if numProcessed < len(a) {
1196                 s.errorString("too many operands")
1197         }
1198         return
1199 }