libgo/go/fmt/scan.go

   1 // Copyright 2010 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package fmt
   6
   7 import (
   8         "errors"
   9         "io"
  10         "math"
  11         "os"
  12         "reflect"
  13         "strconv"
  14         "sync"
  15         "unicode/utf8"
  16 )
  17
  18 // ScanState represents the scanner state passed to custom scanners.
  19 // Scanners may do rune-at-a-time scanning or ask the ScanState
  20 // to discover the next space-delimited token.
  21 type ScanState interface {
  22         // ReadRune reads the next rune (Unicode code point) from the input.
  23         // If invoked during Scanln, Fscanln, or Sscanln, ReadRune() will
  24         // return EOF after returning the first '\n' or when reading beyond
  25         // the specified width.
  26         ReadRune() (r rune, size int, err error)
  27         // UnreadRune causes the next call to ReadRune to return the same rune.
  28         UnreadRune() error
  29         // SkipSpace skips space in the input. Newlines are treated appropriately
  30         // for the operation being performed; see the package documentation
  31         // for more information.
  32         SkipSpace()
  33         // Token skips space in the input if skipSpace is true, then returns the
  34         // run of Unicode code points c satisfying f(c).  If f is nil,
  35         // !unicode.IsSpace(c) is used; that is, the token will hold non-space
  36         // characters. Newlines are treated appropriately for the operation being
  37         // performed; see the package documentation for more information.
  38         // The returned slice points to shared data that may be overwritten
  39         // by the next call to Token, a call to a Scan function using the ScanState
  40         // as input, or when the calling Scan method returns.
  41         Token(skipSpace bool, f func(rune) bool) (token []byte, err error)
  42         // Width returns the value of the width option and whether it has been set.
  43         // The unit is Unicode code points.
  44         Width() (wid int, ok bool)
  45         // Because ReadRune is implemented by the interface, Read should never be
  46         // called by the scanning routines and a valid implementation of
  47         // ScanState may choose always to return an error from Read.
  48         Read(buf []byte) (n int, err error)
  49 }
  50
  51 // Scanner is implemented by any value that has a Scan method, which scans
  52 // the input for the representation of a value and stores the result in the
  53 // receiver, which must be a pointer to be useful. The Scan method is called
  54 // for any argument to Scan, Scanf, or Scanln that implements it.
  55 type Scanner interface {
  56         Scan(state ScanState, verb rune) error
  57 }
  58
  59 // Scan scans text read from standard input, storing successive
  60 // space-separated values into successive arguments. Newlines count
  61 // as space. It returns the number of items successfully scanned.
  62 // If that is less than the number of arguments, err will report why.
  63 func Scan(a ...any) (n int, err error) {
  64         return Fscan(os.Stdin, a...)
  65 }
  66
  67 // Scanln is similar to Scan, but stops scanning at a newline and
  68 // after the final item there must be a newline or EOF.
  69 func Scanln(a ...any) (n int, err error) {
  70         return Fscanln(os.Stdin, a...)
  71 }
  72
  73 // Scanf scans text read from standard input, storing successive
  74 // space-separated values into successive arguments as determined by
  75 // the format. It returns the number of items successfully scanned.
  76 // If that is less than the number of arguments, err will report why.
  77 // Newlines in the input must match newlines in the format.
  78 // The one exception: the verb %c always scans the next rune in the
  79 // input, even if it is a space (or tab etc.) or newline.
  80 func Scanf(format string, a ...any) (n int, err error) {
  81         return Fscanf(os.Stdin, format, a...)
  82 }
  83
  84 type stringReader string
  85
  86 func (r *stringReader) Read(b []byte) (n int, err error) {
  87         n = copy(b, *r)
  88         *r = (*r)[n:]
  89         if n == 0 {
  90                 err = io.EOF
  91         }
  92         return
  93 }
  94
  95 // Sscan scans the argument string, storing successive space-separated
  96 // values into successive arguments. Newlines count as space. It
  97 // returns the number of items successfully scanned. If that is less
  98 // than the number of arguments, err will report why.
  99 func Sscan(str string, a ...any) (n int, err error) {
 100         return Fscan((*stringReader)(&str), a...)
 101 }
 102
 103 // Sscanln is similar to Sscan, but stops scanning at a newline and
 104 // after the final item there must be a newline or EOF.
 105 func Sscanln(str string, a ...any) (n int, err error) {
 106         return Fscanln((*stringReader)(&str), a...)
 107 }
 108
 109 // Sscanf scans the argument string, storing successive space-separated
 110 // values into successive arguments as determined by the format. It
 111 // returns the number of items successfully parsed.
 112 // Newlines in the input must match newlines in the format.
 113 func Sscanf(str string, format string, a ...any) (n int, err error) {
 114         return Fscanf((*stringReader)(&str), format, a...)
 115 }
 116
 117 // Fscan scans text read from r, storing successive space-separated
 118 // values into successive arguments. Newlines count as space. It
 119 // returns the number of items successfully scanned. If that is less
 120 // than the number of arguments, err will report why.
 121 func Fscan(r io.Reader, a ...any) (n int, err error) {
 122         s, old := newScanState(r, true, false)
 123         n, err = s.doScan(a)
 124         s.free(old)
 125         return
 126 }
 127
 128 // Fscanln is similar to Fscan, but stops scanning at a newline and
 129 // after the final item there must be a newline or EOF.
 130 func Fscanln(r io.Reader, a ...any) (n int, err error) {
 131         s, old := newScanState(r, false, true)
 132         n, err = s.doScan(a)
 133         s.free(old)
 134         return
 135 }
 136
 137 // Fscanf scans text read from r, storing successive space-separated
 138 // values into successive arguments as determined by the format. It
 139 // returns the number of items successfully parsed.
 140 // Newlines in the input must match newlines in the format.
 141 func Fscanf(r io.Reader, format string, a ...any) (n int, err error) {
 142         s, old := newScanState(r, false, false)
 143         n, err = s.doScanf(format, a)
 144         s.free(old)
 145         return
 146 }
 147
 148 // scanError represents an error generated by the scanning software.
 149 // It's used as a unique signature to identify such errors when recovering.
 150 type scanError struct {
 151         err error
 152 }
 153
 154 const eof = -1
 155
 156 // ss is the internal implementation of ScanState.
 157 type ss struct {
 158         rs    io.RuneScanner // where to read input
 159         buf   buffer         // token accumulator
 160         count int            // runes consumed so far.
 161         atEOF bool           // already read EOF
 162         ssave
 163 }
 164
 165 // ssave holds the parts of ss that need to be
 166 // saved and restored on recursive scans.
 167 type ssave struct {
 168         validSave bool // is or was a part of an actual ss.
 169         nlIsEnd   bool // whether newline terminates scan
 170         nlIsSpace bool // whether newline counts as white space
 171         argLimit  int  // max value of ss.count for this arg; argLimit <= limit
 172         limit     int  // max value of ss.count.
 173         maxWid    int  // width of this arg.
 174 }
 175
 176 // The Read method is only in ScanState so that ScanState
 177 // satisfies io.Reader. It will never be called when used as
 178 // intended, so there is no need to make it actually work.
 179 func (s *ss) Read(buf []byte) (n int, err error) {
 180         return 0, errors.New("ScanState's Read should not be called. Use ReadRune")
 181 }
 182
 183 func (s *ss) ReadRune() (r rune, size int, err error) {
 184         if s.atEOF || s.count >= s.argLimit {
 185                 err = io.EOF
 186                 return
 187         }
 188
 189         r, size, err = s.rs.ReadRune()
 190         if err == nil {
 191                 s.count++
 192                 if s.nlIsEnd && r == '\n' {
 193                         s.atEOF = true
 194                 }
 195         } else if err == io.EOF {
 196                 s.atEOF = true
 197         }
 198         return
 199 }
 200
 201 func (s *ss) Width() (wid int, ok bool) {
 202         if s.maxWid == hugeWid {
 203                 return 0, false
 204         }
 205         return s.maxWid, true
 206 }
 207
 208 // The public method returns an error; this private one panics.
 209 // If getRune reaches EOF, the return value is EOF (-1).
 210 func (s *ss) getRune() (r rune) {
 211         r, _, err := s.ReadRune()
 212         if err != nil {
 213                 if err == io.EOF {
 214                         return eof
 215                 }
 216                 s.error(err)
 217         }
 218         return
 219 }
 220
 221 // mustReadRune turns io.EOF into a panic(io.ErrUnexpectedEOF).
 222 // It is called in cases such as string scanning where an EOF is a
 223 // syntax error.
 224 func (s *ss) mustReadRune() (r rune) {
 225         r = s.getRune()
 226         if r == eof {
 227                 s.error(io.ErrUnexpectedEOF)
 228         }
 229         return
 230 }
 231
 232 func (s *ss) UnreadRune() error {
 233         s.rs.UnreadRune()
 234         s.atEOF = false
 235         s.count--
 236         return nil
 237 }
 238
 239 func (s *ss) error(err error) {
 240         panic(scanError{err})
 241 }
 242
 243 func (s *ss) errorString(err string) {
 244         panic(scanError{errors.New(err)})
 245 }
 246
 247 func (s *ss) Token(skipSpace bool, f func(rune) bool) (tok []byte, err error) {
 248         defer func() {
 249                 if e := recover(); e != nil {
 250                         if se, ok := e.(scanError); ok {
 251                                 err = se.err
 252                         } else {
 253                                 panic(e)
 254                         }
 255                 }
 256         }()
 257         if f == nil {
 258                 f = notSpace
 259         }
 260         s.buf = s.buf[:0]
 261         tok = s.token(skipSpace, f)
 262         return
 263 }
 264
 265 // space is a copy of the unicode.White_Space ranges,
 266 // to avoid depending on package unicode.
 267 var space = [][2]uint16{
 268         {0x0009, 0x000d},
 269         {0x0020, 0x0020},
 270         {0x0085, 0x0085},
 271         {0x00a0, 0x00a0},
 272         {0x1680, 0x1680},
 273         {0x2000, 0x200a},
 274         {0x2028, 0x2029},
 275         {0x202f, 0x202f},
 276         {0x205f, 0x205f},
 277         {0x3000, 0x3000},
 278 }
 279
 280 func isSpace(r rune) bool {
 281         if r >= 1<<16 {
 282                 return false
 283         }
 284         rx := uint16(r)
 285         for _, rng := range space {
 286                 if rx < rng[0] {
 287                         return false
 288                 }
 289                 if rx <= rng[1] {
 290                         return true
 291                 }
 292         }
 293         return false
 294 }
 295
 296 // notSpace is the default scanning function used in Token.
 297 func notSpace(r rune) bool {
 298         return !isSpace(r)
 299 }
 300
 301 // readRune is a structure to enable reading UTF-8 encoded code points
 302 // from an io.Reader. It is used if the Reader given to the scanner does
 303 // not already implement io.RuneScanner.
 304 type readRune struct {
 305         reader   io.Reader
 306         buf      [utf8.UTFMax]byte // used only inside ReadRune
 307         pending  int               // number of bytes in pendBuf; only >0 for bad UTF-8
 308         pendBuf  [utf8.UTFMax]byte // bytes left over
 309         peekRune rune              // if >=0 next rune; when <0 is ^(previous Rune)
 310 }
 311
 312 // readByte returns the next byte from the input, which may be
 313 // left over from a previous read if the UTF-8 was ill-formed.
 314 func (r *readRune) readByte() (b byte, err error) {
 315         if r.pending > 0 {
 316                 b = r.pendBuf[0]
 317                 copy(r.pendBuf[0:], r.pendBuf[1:])
 318                 r.pending--
 319                 return
 320         }
 321         n, err := io.ReadFull(r.reader, r.pendBuf[:1])
 322         if n != 1 {
 323                 return 0, err
 324         }
 325         return r.pendBuf[0], err
 326 }
 327
 328 // ReadRune returns the next UTF-8 encoded code point from the
 329 // io.Reader inside r.
 330 func (r *readRune) ReadRune() (rr rune, size int, err error) {
 331         if r.peekRune >= 0 {
 332                 rr = r.peekRune
 333                 r.peekRune = ^r.peekRune
 334                 size = utf8.RuneLen(rr)
 335                 return
 336         }
 337         r.buf[0], err = r.readByte()
 338         if err != nil {
 339                 return
 340         }
 341         if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
 342                 rr = rune(r.buf[0])
 343                 size = 1 // Known to be 1.
 344                 // Flip the bits of the rune so it's available to UnreadRune.
 345                 r.peekRune = ^rr
 346                 return
 347         }
 348         var n int
 349         for n = 1; !utf8.FullRune(r.buf[:n]); n++ {
 350                 r.buf[n], err = r.readByte()
 351                 if err != nil {
 352                         if err == io.EOF {
 353                                 err = nil
 354                                 break
 355                         }
 356                         return
 357                 }
 358         }
 359         rr, size = utf8.DecodeRune(r.buf[:n])
 360         if size < n { // an error, save the bytes for the next read
 361                 copy(r.pendBuf[r.pending:], r.buf[size:n])
 362                 r.pending += n - size
 363         }
 364         // Flip the bits of the rune so it's available to UnreadRune.
 365         r.peekRune = ^rr
 366         return
 367 }
 368
 369 func (r *readRune) UnreadRune() error {
 370         if r.peekRune >= 0 {
 371                 return errors.New("fmt: scanning called UnreadRune with no rune available")
 372         }
 373         // Reverse bit flip of previously read rune to obtain valid >=0 state.
 374         r.peekRune = ^r.peekRune
 375         return nil
 376 }
 377
 378 var ssFree = sync.Pool{
 379         New: func() any { return new(ss) },
 380 }
 381
 382 // newScanState allocates a new ss struct or grab a cached one.
 383 func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) {
 384         s = ssFree.Get().(*ss)
 385         if rs, ok := r.(io.RuneScanner); ok {
 386                 s.rs = rs
 387         } else {
 388                 s.rs = &readRune{reader: r, peekRune: -1}
 389         }
 390         s.nlIsSpace = nlIsSpace
 391         s.nlIsEnd = nlIsEnd
 392         s.atEOF = false
 393         s.limit = hugeWid
 394         s.argLimit = hugeWid
 395         s.maxWid = hugeWid
 396         s.validSave = true
 397         s.count = 0
 398         return
 399 }
 400
 401 // free saves used ss structs in ssFree; avoid an allocation per invocation.
 402 func (s *ss) free(old ssave) {
 403         // If it was used recursively, just restore the old state.
 404         if old.validSave {
 405                 s.ssave = old
 406                 return
 407         }
 408         // Don't hold on to ss structs with large buffers.
 409         if cap(s.buf) > 1024 {
 410                 return
 411         }
 412         s.buf = s.buf[:0]
 413         s.rs = nil
 414         ssFree.Put(s)
 415 }
 416
 417 // SkipSpace provides Scan methods the ability to skip space and newline
 418 // characters in keeping with the current scanning mode set by format strings
 419 // and Scan/Scanln.
 420 func (s *ss) SkipSpace() {
 421         for {
 422                 r := s.getRune()
 423                 if r == eof {
 424                         return
 425                 }
 426                 if r == '\r' && s.peek("\n") {
 427                         continue
 428                 }
 429                 if r == '\n' {
 430                         if s.nlIsSpace {
 431                                 continue
 432                         }
 433                         s.errorString("unexpected newline")
 434                         return
 435                 }
 436                 if !isSpace(r) {
 437                         s.UnreadRune()
 438                         break
 439                 }
 440         }
 441 }
 442
 443 // token returns the next space-delimited string from the input. It
 444 // skips white space. For Scanln, it stops at newlines. For Scan,
 445 // newlines are treated as spaces.
 446 func (s *ss) token(skipSpace bool, f func(rune) bool) []byte {
 447         if skipSpace {
 448                 s.SkipSpace()
 449         }
 450         // read until white space or newline
 451         for {
 452                 r := s.getRune()
 453                 if r == eof {
 454                         break
 455                 }
 456                 if !f(r) {
 457                         s.UnreadRune()
 458                         break
 459                 }
 460                 s.buf.writeRune(r)
 461         }
 462         return s.buf
 463 }
 464
 465 var complexError = errors.New("syntax error scanning complex number")
 466 var boolError = errors.New("syntax error scanning boolean")
 467
 468 func indexRune(s string, r rune) int {
 469         for i, c := range s {
 470                 if c == r {
 471                         return i
 472                 }
 473         }
 474         return -1
 475 }
 476
 477 // consume reads the next rune in the input and reports whether it is in the ok string.
 478 // If accept is true, it puts the character into the input token.
 479 func (s *ss) consume(ok string, accept bool) bool {
 480         r := s.getRune()
 481         if r == eof {
 482                 return false
 483         }
 484         if indexRune(ok, r) >= 0 {
 485                 if accept {
 486                         s.buf.writeRune(r)
 487                 }
 488                 return true
 489         }
 490         if r != eof && accept {
 491                 s.UnreadRune()
 492         }
 493         return false
 494 }
 495
 496 // peek reports whether the next character is in the ok string, without consuming it.
 497 func (s *ss) peek(ok string) bool {
 498         r := s.getRune()
 499         if r != eof {
 500                 s.UnreadRune()
 501         }
 502         return indexRune(ok, r) >= 0
 503 }
 504
 505 func (s *ss) notEOF() {
 506         // Guarantee there is data to be read.
 507         if r := s.getRune(); r == eof {
 508                 panic(io.EOF)
 509         }
 510         s.UnreadRune()
 511 }
 512
 513 // accept checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the
 514 // buffer and returns true. Otherwise it return false.
 515 func (s *ss) accept(ok string) bool {
 516         return s.consume(ok, true)
 517 }
 518
 519 // okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
 520 func (s *ss) okVerb(verb rune, okVerbs, typ string) bool {
 521         for _, v := range okVerbs {
 522                 if v == verb {
 523                         return true
 524                 }
 525         }
 526         s.errorString("bad verb '%" + string(verb) + "' for " + typ)
 527         return false
 528 }
 529
 530 // scanBool returns the value of the boolean represented by the next token.
 531 func (s *ss) scanBool(verb rune) bool {
 532         s.SkipSpace()
 533         s.notEOF()
 534         if !s.okVerb(verb, "tv", "boolean") {
 535                 return false
 536         }
 537         // Syntax-checking a boolean is annoying. We're not fastidious about case.
 538         switch s.getRune() {
 539         case '0':
 540                 return false
 541         case '1':
 542                 return true
 543         case 't', 'T':
 544                 if s.accept("rR") && (!s.accept("uU") || !s.accept("eE")) {
 545                         s.error(boolError)
 546                 }
 547                 return true
 548         case 'f', 'F':
 549                 if s.accept("aA") && (!s.accept("lL") || !s.accept("sS") || !s.accept("eE")) {
 550                         s.error(boolError)
 551                 }
 552                 return false
 553         }
 554         return false
 555 }
 556
 557 // Numerical elements
 558 const (
 559         binaryDigits      = "01"
 560         octalDigits       = "01234567"
 561         decimalDigits     = "0123456789"
 562         hexadecimalDigits = "0123456789aAbBcCdDeEfF"
 563         sign              = "+-"
 564         period            = "."
 565         exponent          = "eEpP"
 566 )
 567
 568 // getBase returns the numeric base represented by the verb and its digit string.
 569 func (s *ss) getBase(verb rune) (base int, digits string) {
 570         s.okVerb(verb, "bdoUxXv", "integer") // sets s.err
 571         base = 10
 572         digits = decimalDigits
 573         switch verb {
 574         case 'b':
 575                 base = 2
 576                 digits = binaryDigits
 577         case 'o':
 578                 base = 8
 579                 digits = octalDigits
 580         case 'x', 'X', 'U':
 581                 base = 16
 582                 digits = hexadecimalDigits
 583         }
 584         return
 585 }
 586
 587 // scanNumber returns the numerical string with specified digits starting here.
 588 func (s *ss) scanNumber(digits string, haveDigits bool) string {
 589         if !haveDigits {
 590                 s.notEOF()
 591                 if !s.accept(digits) {
 592                         s.errorString("expected integer")
 593                 }
 594         }
 595         for s.accept(digits) {
 596         }
 597         return string(s.buf)
 598 }
 599
 600 // scanRune returns the next rune value in the input.
 601 func (s *ss) scanRune(bitSize int) int64 {
 602         s.notEOF()
 603         r := s.getRune()
 604         n := uint(bitSize)
 605         x := (int64(r) << (64 - n)) >> (64 - n)
 606         if x != int64(r) {
 607                 s.errorString("overflow on character value " + string(r))
 608         }
 609         return int64(r)
 610 }
 611
 612 // scanBasePrefix reports whether the integer begins with a base prefix
 613 // and returns the base, digit string, and whether a zero was found.
 614 // It is called only if the verb is %v.
 615 func (s *ss) scanBasePrefix() (base int, digits string, zeroFound bool) {
 616         if !s.peek("0") {
 617                 return 0, decimalDigits + "_", false
 618         }
 619         s.accept("0")
 620         // Special cases for 0, 0b, 0o, 0x.
 621         switch {
 622         case s.peek("bB"):
 623                 s.consume("bB", true)
 624                 return 0, binaryDigits + "_", true
 625         case s.peek("oO"):
 626                 s.consume("oO", true)
 627                 return 0, octalDigits + "_", true
 628         case s.peek("xX"):
 629                 s.consume("xX", true)
 630                 return 0, hexadecimalDigits + "_", true
 631         default:
 632                 return 0, octalDigits + "_", true
 633         }
 634 }
 635
 636 // scanInt returns the value of the integer represented by the next
 637 // token, checking for overflow. Any error is stored in s.err.
 638 func (s *ss) scanInt(verb rune, bitSize int) int64 {
 639         if verb == 'c' {
 640                 return s.scanRune(bitSize)
 641         }
 642         s.SkipSpace()
 643         s.notEOF()
 644         base, digits := s.getBase(verb)
 645         haveDigits := false
 646         if verb == 'U' {
 647                 if !s.consume("U", false) || !s.consume("+", false) {
 648                         s.errorString("bad unicode format ")
 649                 }
 650         } else {
 651                 s.accept(sign) // If there's a sign, it will be left in the token buffer.
 652                 if verb == 'v' {
 653                         base, digits, haveDigits = s.scanBasePrefix()
 654                 }
 655         }
 656         tok := s.scanNumber(digits, haveDigits)
 657         i, err := strconv.ParseInt(tok, base, 64)
 658         if err != nil {
 659                 s.error(err)
 660         }
 661         n := uint(bitSize)
 662         x := (i << (64 - n)) >> (64 - n)
 663         if x != i {
 664                 s.errorString("integer overflow on token " + tok)
 665         }
 666         return i
 667 }
 668
 669 // scanUint returns the value of the unsigned integer represented
 670 // by the next token, checking for overflow. Any error is stored in s.err.
 671 func (s *ss) scanUint(verb rune, bitSize int) uint64 {
 672         if verb == 'c' {
 673                 return uint64(s.scanRune(bitSize))
 674         }
 675         s.SkipSpace()
 676         s.notEOF()
 677         base, digits := s.getBase(verb)
 678         haveDigits := false
 679         if verb == 'U' {
 680                 if !s.consume("U", false) || !s.consume("+", false) {
 681                         s.errorString("bad unicode format ")
 682                 }
 683         } else if verb == 'v' {
 684                 base, digits, haveDigits = s.scanBasePrefix()
 685         }
 686         tok := s.scanNumber(digits, haveDigits)
 687         i, err := strconv.ParseUint(tok, base, 64)
 688         if err != nil {
 689                 s.error(err)
 690         }
 691         n := uint(bitSize)
 692         x := (i << (64 - n)) >> (64 - n)
 693         if x != i {
 694                 s.errorString("unsigned integer overflow on token " + tok)
 695         }
 696         return i
 697 }
 698
 699 // floatToken returns the floating-point number starting here, no longer than swid
 700 // if the width is specified. It's not rigorous about syntax because it doesn't check that
 701 // we have at least some digits, but Atof will do that.
 702 func (s *ss) floatToken() string {
 703         s.buf = s.buf[:0]
 704         // NaN?
 705         if s.accept("nN") && s.accept("aA") && s.accept("nN") {
 706                 return string(s.buf)
 707         }
 708         // leading sign?
 709         s.accept(sign)
 710         // Inf?
 711         if s.accept("iI") && s.accept("nN") && s.accept("fF") {
 712                 return string(s.buf)
 713         }
 714         digits := decimalDigits + "_"
 715         exp := exponent
 716         if s.accept("0") && s.accept("xX") {
 717                 digits = hexadecimalDigits + "_"
 718                 exp = "pP"
 719         }
 720         // digits?
 721         for s.accept(digits) {
 722         }
 723         // decimal point?
 724         if s.accept(period) {
 725                 // fraction?
 726                 for s.accept(digits) {
 727                 }
 728         }
 729         // exponent?
 730         if s.accept(exp) {
 731                 // leading sign?
 732                 s.accept(sign)
 733                 // digits?
 734                 for s.accept(decimalDigits + "_") {
 735                 }
 736         }
 737         return string(s.buf)
 738 }
 739
 740 // complexTokens returns the real and imaginary parts of the complex number starting here.
 741 // The number might be parenthesized and has the format (N+Ni) where N is a floating-point
 742 // number and there are no spaces within.
 743 func (s *ss) complexTokens() (real, imag string) {
 744         // TODO: accept N and Ni independently?
 745         parens := s.accept("(")
 746         real = s.floatToken()
 747         s.buf = s.buf[:0]
 748         // Must now have a sign.
 749         if !s.accept("+-") {
 750                 s.error(complexError)
 751         }
 752         // Sign is now in buffer
 753         imagSign := string(s.buf)
 754         imag = s.floatToken()
 755         if !s.accept("i") {
 756                 s.error(complexError)
 757         }
 758         if parens && !s.accept(")") {
 759                 s.error(complexError)
 760         }
 761         return real, imagSign + imag
 762 }
 763
 764 func hasX(s string) bool {
 765         for i := 0; i < len(s); i++ {
 766                 if s[i] == 'x' || s[i] == 'X' {
 767                         return true
 768                 }
 769         }
 770         return false
 771 }
 772
 773 // convertFloat converts the string to a float64value.
 774 func (s *ss) convertFloat(str string, n int) float64 {
 775         // strconv.ParseFloat will handle "+0x1.fp+2",
 776         // but we have to implement our non-standard
 777         // decimal+binary exponent mix (1.2p4) ourselves.
 778         if p := indexRune(str, 'p'); p >= 0 && !hasX(str) {
 779                 // Atof doesn't handle power-of-2 exponents,
 780                 // but they're easy to evaluate.
 781                 f, err := strconv.ParseFloat(str[:p], n)
 782                 if err != nil {
 783                         // Put full string into error.
 784                         if e, ok := err.(*strconv.NumError); ok {
 785                                 e.Num = str
 786                         }
 787                         s.error(err)
 788                 }
 789                 m, err := strconv.Atoi(str[p+1:])
 790                 if err != nil {
 791                         // Put full string into error.
 792                         if e, ok := err.(*strconv.NumError); ok {
 793                                 e.Num = str
 794                         }
 795                         s.error(err)
 796                 }
 797                 return math.Ldexp(f, m)
 798         }
 799         f, err := strconv.ParseFloat(str, n)
 800         if err != nil {
 801                 s.error(err)
 802         }
 803         return f
 804 }
 805
 806 // convertComplex converts the next token to a complex128 value.
 807 // The atof argument is a type-specific reader for the underlying type.
 808 // If we're reading complex64, atof will parse float32s and convert them
 809 // to float64's to avoid reproducing this code for each complex type.
 810 func (s *ss) scanComplex(verb rune, n int) complex128 {
 811         if !s.okVerb(verb, floatVerbs, "complex") {
 812                 return 0
 813         }
 814         s.SkipSpace()
 815         s.notEOF()
 816         sreal, simag := s.complexTokens()
 817         real := s.convertFloat(sreal, n/2)
 818         imag := s.convertFloat(simag, n/2)
 819         return complex(real, imag)
 820 }
 821
 822 // convertString returns the string represented by the next input characters.
 823 // The format of the input is determined by the verb.
 824 func (s *ss) convertString(verb rune) (str string) {
 825         if !s.okVerb(verb, "svqxX", "string") {
 826                 return ""
 827         }
 828         s.SkipSpace()
 829         s.notEOF()
 830         switch verb {
 831         case 'q':
 832                 str = s.quotedString()
 833         case 'x', 'X':
 834                 str = s.hexString()
 835         default:
 836                 str = string(s.token(true, notSpace)) // %s and %v just return the next word
 837         }
 838         return
 839 }
 840
 841 // quotedString returns the double- or back-quoted string represented by the next input characters.
 842 func (s *ss) quotedString() string {
 843         s.notEOF()
 844         quote := s.getRune()
 845         switch quote {
 846         case '`':
 847                 // Back-quoted: Anything goes until EOF or back quote.
 848                 for {
 849                         r := s.mustReadRune()
 850                         if r == quote {
 851                                 break
 852                         }
 853                         s.buf.writeRune(r)
 854                 }
 855                 return string(s.buf)
 856         case '"':
 857                 // Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes.
 858                 s.buf.writeByte('"')
 859                 for {
 860                         r := s.mustReadRune()
 861                         s.buf.writeRune(r)
 862                         if r == '\\' {
 863                                 // In a legal backslash escape, no matter how long, only the character
 864                                 // immediately after the escape can itself be a backslash or quote.
 865                                 // Thus we only need to protect the first character after the backslash.
 866                                 s.buf.writeRune(s.mustReadRune())
 867                         } else if r == '"' {
 868                                 break
 869                         }
 870                 }
 871                 result, err := strconv.Unquote(string(s.buf))
 872                 if err != nil {
 873                         s.error(err)
 874                 }
 875                 return result
 876         default:
 877                 s.errorString("expected quoted string")
 878         }
 879         return ""
 880 }
 881
 882 // hexDigit returns the value of the hexadecimal digit.
 883 func hexDigit(d rune) (int, bool) {
 884         digit := int(d)
 885         switch digit {
 886         case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 887                 return digit - '0', true
 888         case 'a', 'b', 'c', 'd', 'e', 'f':
 889                 return 10 + digit - 'a', true
 890         case 'A', 'B', 'C', 'D', 'E', 'F':
 891                 return 10 + digit - 'A', true
 892         }
 893         return -1, false
 894 }
 895
 896 // hexByte returns the next hex-encoded (two-character) byte from the input.
 897 // It returns ok==false if the next bytes in the input do not encode a hex byte.
 898 // If the first byte is hex and the second is not, processing stops.
 899 func (s *ss) hexByte() (b byte, ok bool) {
 900         rune1 := s.getRune()
 901         if rune1 == eof {
 902                 return
 903         }
 904         value1, ok := hexDigit(rune1)
 905         if !ok {
 906                 s.UnreadRune()
 907                 return
 908         }
 909         value2, ok := hexDigit(s.mustReadRune())
 910         if !ok {
 911                 s.errorString("illegal hex digit")
 912                 return
 913         }
 914         return byte(value1<<4 | value2), true
 915 }
 916
 917 // hexString returns the space-delimited hexpair-encoded string.
 918 func (s *ss) hexString() string {
 919         s.notEOF()
 920         for {
 921                 b, ok := s.hexByte()
 922                 if !ok {
 923                         break
 924                 }
 925                 s.buf.writeByte(b)
 926         }
 927         if len(s.buf) == 0 {
 928                 s.errorString("no hex data for %x string")
 929                 return ""
 930         }
 931         return string(s.buf)
 932 }
 933
 934 const (
 935         floatVerbs = "beEfFgGv"
 936
 937         hugeWid = 1 << 30
 938
 939         intBits     = 32 << (^uint(0) >> 63)
 940         uintptrBits = 32 << (^uintptr(0) >> 63)
 941 )
 942
 943 // scanPercent scans a literal percent character.
 944 func (s *ss) scanPercent() {
 945         s.SkipSpace()
 946         s.notEOF()
 947         if !s.accept("%") {
 948                 s.errorString("missing literal %")
 949         }
 950 }
 951
 952 // scanOne scans a single value, deriving the scanner from the type of the argument.
 953 func (s *ss) scanOne(verb rune, arg any) {
 954         s.buf = s.buf[:0]
 955         var err error
 956         // If the parameter has its own Scan method, use that.
 957         if v, ok := arg.(Scanner); ok {
 958                 err = v.Scan(s, verb)
 959                 if err != nil {
 960                         if err == io.EOF {
 961                                 err = io.ErrUnexpectedEOF
 962                         }
 963                         s.error(err)
 964                 }
 965                 return
 966         }
 967
 968         switch v := arg.(type) {
 969         case *bool:
 970                 *v = s.scanBool(verb)
 971         case *complex64:
 972                 *v = complex64(s.scanComplex(verb, 64))
 973         case *complex128:
 974                 *v = s.scanComplex(verb, 128)
 975         case *int:
 976                 *v = int(s.scanInt(verb, intBits))
 977         case *int8:
 978                 *v = int8(s.scanInt(verb, 8))
 979         case *int16:
 980                 *v = int16(s.scanInt(verb, 16))
 981         case *int32:
 982                 *v = int32(s.scanInt(verb, 32))
 983         case *int64:
 984                 *v = s.scanInt(verb, 64)
 985         case *uint:
 986                 *v = uint(s.scanUint(verb, intBits))
 987         case *uint8:
 988                 *v = uint8(s.scanUint(verb, 8))
 989         case *uint16:
 990                 *v = uint16(s.scanUint(verb, 16))
 991         case *uint32:
 992                 *v = uint32(s.scanUint(verb, 32))
 993         case *uint64:
 994                 *v = s.scanUint(verb, 64)
 995         case *uintptr:
 996                 *v = uintptr(s.scanUint(verb, uintptrBits))
 997         // Floats are tricky because you want to scan in the precision of the result, not
 998         // scan in high precision and convert, in order to preserve the correct error condition.
 999         case *float32:
1000                 if s.okVerb(verb, floatVerbs, "float32") {
1001                         s.SkipSpace()
1002                         s.notEOF()
1003                         *v = float32(s.convertFloat(s.floatToken(), 32))
1004                 }
1005         case *float64:
1006                 if s.okVerb(verb, floatVerbs, "float64") {
1007                         s.SkipSpace()
1008                         s.notEOF()
1009                         *v = s.convertFloat(s.floatToken(), 64)
1010                 }
1011         case *string:
1012                 *v = s.convertString(verb)
1013         case *[]byte:
1014                 // We scan to string and convert so we get a copy of the data.
1015                 // If we scanned to bytes, the slice would point at the buffer.
1016                 *v = []byte(s.convertString(verb))
1017         default:
1018                 val := reflect.ValueOf(v)
1019                 ptr := val
1020                 if ptr.Kind() != reflect.Pointer {
1021                         s.errorString("type not a pointer: " + val.Type().String())
1022                         return
1023                 }
1024                 switch v := ptr.Elem(); v.Kind() {
1025                 case reflect.Bool:
1026                         v.SetBool(s.scanBool(verb))
1027                 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
1028                         v.SetInt(s.scanInt(verb, v.Type().Bits()))
1029                 case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
1030                         v.SetUint(s.scanUint(verb, v.Type().Bits()))
1031                 case reflect.String:
1032                         v.SetString(s.convertString(verb))
1033                 case reflect.Slice:
1034                         // For now, can only handle (renamed) []byte.
1035                         typ := v.Type()
1036                         if typ.Elem().Kind() != reflect.Uint8 {
1037                                 s.errorString("can't scan type: " + val.Type().String())
1038                         }
1039                         str := s.convertString(verb)
1040                         v.Set(reflect.MakeSlice(typ, len(str), len(str)))
1041                         for i := 0; i < len(str); i++ {
1042                                 v.Index(i).SetUint(uint64(str[i]))
1043                         }
1044                 case reflect.Float32, reflect.Float64:
1045                         s.SkipSpace()
1046                         s.notEOF()
1047                         v.SetFloat(s.convertFloat(s.floatToken(), v.Type().Bits()))
1048                 case reflect.Complex64, reflect.Complex128:
1049                         v.SetComplex(s.scanComplex(verb, v.Type().Bits()))
1050                 default:
1051                         s.errorString("can't scan type: " + val.Type().String())
1052                 }
1053         }
1054 }
1055
1056 // errorHandler turns local panics into error returns.
1057 func errorHandler(errp *error) {
1058         if e := recover(); e != nil {
1059                 if se, ok := e.(scanError); ok { // catch local error
1060                         *errp = se.err
1061                 } else if eof, ok := e.(error); ok && eof == io.EOF { // out of input
1062                         *errp = eof
1063                 } else {
1064                         panic(e)
1065                 }
1066         }
1067 }
1068
1069 // doScan does the real work for scanning without a format string.
1070 func (s *ss) doScan(a []any) (numProcessed int, err error) {
1071         defer errorHandler(&err)
1072         for _, arg := range a {
1073                 s.scanOne('v', arg)
1074                 numProcessed++
1075         }
1076         // Check for newline (or EOF) if required (Scanln etc.).
1077         if s.nlIsEnd {
1078                 for {
1079                         r := s.getRune()
1080                         if r == '\n' || r == eof {
1081                                 break
1082                         }
1083                         if !isSpace(r) {
1084                                 s.errorString("expected newline")
1085                                 break
1086                         }
1087                 }
1088         }
1089         return
1090 }
1091
1092 // advance determines whether the next characters in the input match
1093 // those of the format. It returns the number of bytes (sic) consumed
1094 // in the format. All runs of space characters in either input or
1095 // format behave as a single space. Newlines are special, though:
1096 // newlines in the format must match those in the input and vice versa.
1097 // This routine also handles the %% case. If the return value is zero,
1098 // either format starts with a % (with no following %) or the input
1099 // is empty. If it is negative, the input did not match the string.
1100 func (s *ss) advance(format string) (i int) {
1101         for i < len(format) {
1102                 fmtc, w := utf8.DecodeRuneInString(format[i:])
1103
1104                 // Space processing.
1105                 // In the rest of this comment "space" means spaces other than newline.
1106                 // Newline in the format matches input of zero or more spaces and then newline or end-of-input.
1107                 // Spaces in the format before the newline are collapsed into the newline.
1108                 // Spaces in the format after the newline match zero or more spaces after the corresponding input newline.
1109                 // Other spaces in the format match input of one or more spaces or end-of-input.
1110                 if isSpace(fmtc) {
1111                         newlines := 0
1112                         trailingSpace := false
1113                         for isSpace(fmtc) && i < len(format) {
1114                                 if fmtc == '\n' {
1115                                         newlines++
1116                                         trailingSpace = false
1117                                 } else {
1118                                         trailingSpace = true
1119                                 }
1120                                 i += w
1121                                 fmtc, w = utf8.DecodeRuneInString(format[i:])
1122                         }
1123                         for j := 0; j < newlines; j++ {
1124                                 inputc := s.getRune()
1125                                 for isSpace(inputc) && inputc != '\n' {
1126                                         inputc = s.getRune()
1127                                 }
1128                                 if inputc != '\n' && inputc != eof {
1129                                         s.errorString("newline in format does not match input")
1130                                 }
1131                         }
1132                         if trailingSpace {
1133                                 inputc := s.getRune()
1134                                 if newlines == 0 {
1135                                         // If the trailing space stood alone (did not follow a newline),
1136                                         // it must find at least one space to consume.
1137                                         if !isSpace(inputc) && inputc != eof {
1138                                                 s.errorString("expected space in input to match format")
1139                                         }
1140                                         if inputc == '\n' {
1141                                                 s.errorString("newline in input does not match format")
1142                                         }
1143                                 }
1144                                 for isSpace(inputc) && inputc != '\n' {
1145                                         inputc = s.getRune()
1146                                 }
1147                                 if inputc != eof {
1148                                         s.UnreadRune()
1149                                 }
1150                         }
1151                         continue
1152                 }
1153
1154                 // Verbs.
1155                 if fmtc == '%' {
1156                         // % at end of string is an error.
1157                         if i+w == len(format) {
1158                                 s.errorString("missing verb: % at end of format string")
1159                         }
1160                         // %% acts like a real percent
1161                         nextc, _ := utf8.DecodeRuneInString(format[i+w:]) // will not match % if string is empty
1162                         if nextc != '%' {
1163                                 return
1164                         }
1165                         i += w // skip the first %
1166                 }
1167
1168                 // Literals.
1169                 inputc := s.mustReadRune()
1170                 if fmtc != inputc {
1171                         s.UnreadRune()
1172                         return -1
1173                 }
1174                 i += w
1175         }
1176         return
1177 }
1178
1179 // doScanf does the real work when scanning with a format string.
1180 // At the moment, it handles only pointers to basic types.
1181 func (s *ss) doScanf(format string, a []any) (numProcessed int, err error) {
1182         defer errorHandler(&err)
1183         end := len(format) - 1
1184         // We process one item per non-trivial format
1185         for i := 0; i <= end; {
1186                 w := s.advance(format[i:])
1187                 if w > 0 {
1188                         i += w
1189                         continue
1190                 }
1191                 // Either we failed to advance, we have a percent character, or we ran out of input.
1192                 if format[i] != '%' {
1193                         // Can't advance format. Why not?
1194                         if w < 0 {
1195                                 s.errorString("input does not match format")
1196                         }
1197                         // Otherwise at EOF; "too many operands" error handled below
1198                         break
1199                 }
1200                 i++ // % is one byte
1201
1202                 // do we have 20 (width)?
1203                 var widPresent bool
1204                 s.maxWid, widPresent, i = parsenum(format, i, end)
1205                 if !widPresent {
1206                         s.maxWid = hugeWid
1207                 }
1208
1209                 c, w := utf8.DecodeRuneInString(format[i:])
1210                 i += w
1211
1212                 if c != 'c' {
1213                         s.SkipSpace()
1214                 }
1215                 if c == '%' {
1216                         s.scanPercent()
1217                         continue // Do not consume an argument.
1218                 }
1219                 s.argLimit = s.limit
1220                 if f := s.count + s.maxWid; f < s.argLimit {
1221                         s.argLimit = f
1222                 }
1223
1224                 if numProcessed >= len(a) { // out of operands
1225                         s.errorString("too few operands for format '%" + format[i-w:] + "'")
1226                         break
1227                 }
1228                 arg := a[numProcessed]
1229
1230                 s.scanOne(c, arg)
1231                 numProcessed++
1232                 s.argLimit = s.limit
1233         }
1234         if numProcessed < len(a) {
1235                 s.errorString("too many operands")
1236         }
1237         return
1238 }