libgo/go/go/scanner/scanner_test.go

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package scanner
   6
   7 import (
   8         "go/token"
   9         "io/ioutil"
  10         "os"
  11         "path/filepath"
  12         "runtime"
  13         "testing"
  14 )
  15
  16 var fset = token.NewFileSet()
  17
  18 const /* class */ (
  19         special = iota
  20         literal
  21         operator
  22         keyword
  23 )
  24
  25 func tokenclass(tok token.Token) int {
  26         switch {
  27         case tok.IsLiteral():
  28                 return literal
  29         case tok.IsOperator():
  30                 return operator
  31         case tok.IsKeyword():
  32                 return keyword
  33         }
  34         return special
  35 }
  36
  37 type elt struct {
  38         tok   token.Token
  39         lit   string
  40         class int
  41 }
  42
  43 var tokens = [...]elt{
  44         // Special tokens
  45         {token.COMMENT, "/* a comment */", special},
  46         {token.COMMENT, "// a comment \n", special},
  47         {token.COMMENT, "/*\r*/", special},
  48         {token.COMMENT, "//\r\n", special},
  49
  50         // Identifiers and basic type literals
  51         {token.IDENT, "foobar", literal},
  52         {token.IDENT, "a۰۱۸", literal},
  53         {token.IDENT, "foo६४", literal},
  54         {token.IDENT, "bar９８７６", literal},
  55         {token.IDENT, "ŝ", literal},    // was bug (issue 4000)
  56         {token.IDENT, "ŝfoo", literal}, // was bug (issue 4000)
  57         {token.INT, "0", literal},
  58         {token.INT, "1", literal},
  59         {token.INT, "123456789012345678890", literal},
  60         {token.INT, "01234567", literal},
  61         {token.INT, "0xcafebabe", literal},
  62         {token.FLOAT, "0.", literal},
  63         {token.FLOAT, ".0", literal},
  64         {token.FLOAT, "3.14159265", literal},
  65         {token.FLOAT, "1e0", literal},
  66         {token.FLOAT, "1e+100", literal},
  67         {token.FLOAT, "1e-100", literal},
  68         {token.FLOAT, "2.71828e-1000", literal},
  69         {token.IMAG, "0i", literal},
  70         {token.IMAG, "1i", literal},
  71         {token.IMAG, "012345678901234567889i", literal},
  72         {token.IMAG, "123456789012345678890i", literal},
  73         {token.IMAG, "0.i", literal},
  74         {token.IMAG, ".0i", literal},
  75         {token.IMAG, "3.14159265i", literal},
  76         {token.IMAG, "1e0i", literal},
  77         {token.IMAG, "1e+100i", literal},
  78         {token.IMAG, "1e-100i", literal},
  79         {token.IMAG, "2.71828e-1000i", literal},
  80         {token.CHAR, "'a'", literal},
  81         {token.CHAR, "'\\000'", literal},
  82         {token.CHAR, "'\\xFF'", literal},
  83         {token.CHAR, "'\\uff16'", literal},
  84         {token.CHAR, "'\\U0000ff16'", literal},
  85         {token.STRING, "`foobar`", literal},
  86         {token.STRING, "`" + `foo
  87                                 bar` +
  88                 "`",
  89                 literal,
  90         },
  91         {token.STRING, "`\r`", literal},
  92         {token.STRING, "`foo\r\nbar`", literal},
  93
  94         // Operators and delimiters
  95         {token.ADD, "+", operator},
  96         {token.SUB, "-", operator},
  97         {token.MUL, "*", operator},
  98         {token.QUO, "/", operator},
  99         {token.REM, "%", operator},
 100
 101         {token.AND, "&", operator},
 102         {token.OR, "|", operator},
 103         {token.XOR, "^", operator},
 104         {token.SHL, "<<", operator},
 105         {token.SHR, ">>", operator},
 106         {token.AND_NOT, "&^", operator},
 107
 108         {token.ADD_ASSIGN, "+=", operator},
 109         {token.SUB_ASSIGN, "-=", operator},
 110         {token.MUL_ASSIGN, "*=", operator},
 111         {token.QUO_ASSIGN, "/=", operator},
 112         {token.REM_ASSIGN, "%=", operator},
 113
 114         {token.AND_ASSIGN, "&=", operator},
 115         {token.OR_ASSIGN, "|=", operator},
 116         {token.XOR_ASSIGN, "^=", operator},
 117         {token.SHL_ASSIGN, "<<=", operator},
 118         {token.SHR_ASSIGN, ">>=", operator},
 119         {token.AND_NOT_ASSIGN, "&^=", operator},
 120
 121         {token.LAND, "&&", operator},
 122         {token.LOR, "||", operator},
 123         {token.ARROW, "<-", operator},
 124         {token.INC, "++", operator},
 125         {token.DEC, "--", operator},
 126
 127         {token.EQL, "==", operator},
 128         {token.LSS, "<", operator},
 129         {token.GTR, ">", operator},
 130         {token.ASSIGN, "=", operator},
 131         {token.NOT, "!", operator},
 132
 133         {token.NEQ, "!=", operator},
 134         {token.LEQ, "<=", operator},
 135         {token.GEQ, ">=", operator},
 136         {token.DEFINE, ":=", operator},
 137         {token.ELLIPSIS, "...", operator},
 138
 139         {token.LPAREN, "(", operator},
 140         {token.LBRACK, "[", operator},
 141         {token.LBRACE, "{", operator},
 142         {token.COMMA, ",", operator},
 143         {token.PERIOD, ".", operator},
 144
 145         {token.RPAREN, ")", operator},
 146         {token.RBRACK, "]", operator},
 147         {token.RBRACE, "}", operator},
 148         {token.SEMICOLON, ";", operator},
 149         {token.COLON, ":", operator},
 150
 151         // Keywords
 152         {token.BREAK, "break", keyword},
 153         {token.CASE, "case", keyword},
 154         {token.CHAN, "chan", keyword},
 155         {token.CONST, "const", keyword},
 156         {token.CONTINUE, "continue", keyword},
 157
 158         {token.DEFAULT, "default", keyword},
 159         {token.DEFER, "defer", keyword},
 160         {token.ELSE, "else", keyword},
 161         {token.FALLTHROUGH, "fallthrough", keyword},
 162         {token.FOR, "for", keyword},
 163
 164         {token.FUNC, "func", keyword},
 165         {token.GO, "go", keyword},
 166         {token.GOTO, "goto", keyword},
 167         {token.IF, "if", keyword},
 168         {token.IMPORT, "import", keyword},
 169
 170         {token.INTERFACE, "interface", keyword},
 171         {token.MAP, "map", keyword},
 172         {token.PACKAGE, "package", keyword},
 173         {token.RANGE, "range", keyword},
 174         {token.RETURN, "return", keyword},
 175
 176         {token.SELECT, "select", keyword},
 177         {token.STRUCT, "struct", keyword},
 178         {token.SWITCH, "switch", keyword},
 179         {token.TYPE, "type", keyword},
 180         {token.VAR, "var", keyword},
 181 }
 182
 183 const whitespace = "  \t  \n\n\n" // to separate tokens
 184
 185 var source = func() []byte {
 186         var src []byte
 187         for _, t := range tokens {
 188                 src = append(src, t.lit...)
 189                 src = append(src, whitespace...)
 190         }
 191         return src
 192 }()
 193
 194 func newlineCount(s string) int {
 195         n := 0
 196         for i := 0; i < len(s); i++ {
 197                 if s[i] == '\n' {
 198                         n++
 199                 }
 200         }
 201         return n
 202 }
 203
 204 func checkPos(t *testing.T, lit string, p token.Pos, expected token.Position) {
 205         pos := fset.Position(p)
 206         if pos.Filename != expected.Filename {
 207                 t.Errorf("bad filename for %q: got %s, expected %s", lit, pos.Filename, expected.Filename)
 208         }
 209         if pos.Offset != expected.Offset {
 210                 t.Errorf("bad position for %q: got %d, expected %d", lit, pos.Offset, expected.Offset)
 211         }
 212         if pos.Line != expected.Line {
 213                 t.Errorf("bad line for %q: got %d, expected %d", lit, pos.Line, expected.Line)
 214         }
 215         if pos.Column != expected.Column {
 216                 t.Errorf("bad column for %q: got %d, expected %d", lit, pos.Column, expected.Column)
 217         }
 218 }
 219
 220 // Verify that calling Scan() provides the correct results.
 221 func TestScan(t *testing.T) {
 222         whitespace_linecount := newlineCount(whitespace)
 223
 224         // error handler
 225         eh := func(_ token.Position, msg string) {
 226                 t.Errorf("error handler called (msg = %s)", msg)
 227         }
 228
 229         // verify scan
 230         var s Scanner
 231         s.Init(fset.AddFile("", fset.Base(), len(source)), source, eh, ScanComments|dontInsertSemis)
 232
 233         // set up expected position
 234         epos := token.Position{
 235                 Filename: "",
 236                 Offset:   0,
 237                 Line:     1,
 238                 Column:   1,
 239         }
 240
 241         index := 0
 242         for {
 243                 pos, tok, lit := s.Scan()
 244
 245                 // check position
 246                 if tok == token.EOF {
 247                         // correction for EOF
 248                         epos.Line = newlineCount(string(source))
 249                         epos.Column = 2
 250                 }
 251                 checkPos(t, lit, pos, epos)
 252
 253                 // check token
 254                 e := elt{token.EOF, "", special}
 255                 if index < len(tokens) {
 256                         e = tokens[index]
 257                         index++
 258                 }
 259                 if tok != e.tok {
 260                         t.Errorf("bad token for %q: got %s, expected %s", lit, tok, e.tok)
 261                 }
 262
 263                 // check token class
 264                 if tokenclass(tok) != e.class {
 265                         t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class)
 266                 }
 267
 268                 // check literal
 269                 elit := ""
 270                 switch e.tok {
 271                 case token.COMMENT:
 272                         // no CRs in comments
 273                         elit = string(stripCR([]byte(e.lit)))
 274                         //-style comment literal doesn't contain newline
 275                         if elit[1] == '/' {
 276                                 elit = elit[0 : len(elit)-1]
 277                         }
 278                 case token.IDENT:
 279                         elit = e.lit
 280                 case token.SEMICOLON:
 281                         elit = ";"
 282                 default:
 283                         if e.tok.IsLiteral() {
 284                                 // no CRs in raw string literals
 285                                 elit = e.lit
 286                                 if elit[0] == '`' {
 287                                         elit = string(stripCR([]byte(elit)))
 288                                 }
 289                         } else if e.tok.IsKeyword() {
 290                                 elit = e.lit
 291                         }
 292                 }
 293                 if lit != elit {
 294                         t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, elit)
 295                 }
 296
 297                 if tok == token.EOF {
 298                         break
 299                 }
 300
 301                 // update position
 302                 epos.Offset += len(e.lit) + len(whitespace)
 303                 epos.Line += newlineCount(e.lit) + whitespace_linecount
 304
 305         }
 306
 307         if s.ErrorCount != 0 {
 308                 t.Errorf("found %d errors", s.ErrorCount)
 309         }
 310 }
 311
 312 func checkSemi(t *testing.T, line string, mode Mode) {
 313         var S Scanner
 314         file := fset.AddFile("TestSemis", fset.Base(), len(line))
 315         S.Init(file, []byte(line), nil, mode)
 316         pos, tok, lit := S.Scan()
 317         for tok != token.EOF {
 318                 if tok == token.ILLEGAL {
 319                         // the illegal token literal indicates what
 320                         // kind of semicolon literal to expect
 321                         semiLit := "\n"
 322                         if lit[0] == '#' {
 323                                 semiLit = ";"
 324                         }
 325                         // next token must be a semicolon
 326                         semiPos := file.Position(pos)
 327                         semiPos.Offset++
 328                         semiPos.Column++
 329                         pos, tok, lit = S.Scan()
 330                         if tok == token.SEMICOLON {
 331                                 if lit != semiLit {
 332                                         t.Errorf(`bad literal for %q: got %q, expected %q`, line, lit, semiLit)
 333                                 }
 334                                 checkPos(t, line, pos, semiPos)
 335                         } else {
 336                                 t.Errorf("bad token for %q: got %s, expected ;", line, tok)
 337                         }
 338                 } else if tok == token.SEMICOLON {
 339                         t.Errorf("bad token for %q: got ;, expected no ;", line)
 340                 }
 341                 pos, tok, lit = S.Scan()
 342         }
 343 }
 344
 345 var lines = []string{
 346         // # indicates a semicolon present in the source
 347         // $ indicates an automatically inserted semicolon
 348         "",
 349         "\ufeff#;", // first BOM is ignored
 350         "#;",
 351         "foo$\n",
 352         "123$\n",
 353         "1.2$\n",
 354         "'x'$\n",
 355         `"x"` + "$\n",
 356         "`x`$\n",
 357
 358         "+\n",
 359         "-\n",
 360         "*\n",
 361         "/\n",
 362         "%\n",
 363
 364         "&\n",
 365         "|\n",
 366         "^\n",
 367         "<<\n",
 368         ">>\n",
 369         "&^\n",
 370
 371         "+=\n",
 372         "-=\n",
 373         "*=\n",
 374         "/=\n",
 375         "%=\n",
 376
 377         "&=\n",
 378         "|=\n",
 379         "^=\n",
 380         "<<=\n",
 381         ">>=\n",
 382         "&^=\n",
 383
 384         "&&\n",
 385         "||\n",
 386         "<-\n",
 387         "++$\n",
 388         "--$\n",
 389
 390         "==\n",
 391         "<\n",
 392         ">\n",
 393         "=\n",
 394         "!\n",
 395
 396         "!=\n",
 397         "<=\n",
 398         ">=\n",
 399         ":=\n",
 400         "...\n",
 401
 402         "(\n",
 403         "[\n",
 404         "{\n",
 405         ",\n",
 406         ".\n",
 407
 408         ")$\n",
 409         "]$\n",
 410         "}$\n",
 411         "#;\n",
 412         ":\n",
 413
 414         "break$\n",
 415         "case\n",
 416         "chan\n",
 417         "const\n",
 418         "continue$\n",
 419
 420         "default\n",
 421         "defer\n",
 422         "else\n",
 423         "fallthrough$\n",
 424         "for\n",
 425
 426         "func\n",
 427         "go\n",
 428         "goto\n",
 429         "if\n",
 430         "import\n",
 431
 432         "interface\n",
 433         "map\n",
 434         "package\n",
 435         "range\n",
 436         "return$\n",
 437
 438         "select\n",
 439         "struct\n",
 440         "switch\n",
 441         "type\n",
 442         "var\n",
 443
 444         "foo$//comment\n",
 445         "foo$//comment",
 446         "foo$/*comment*/\n",
 447         "foo$/*\n*/",
 448         "foo$/*comment*/    \n",
 449         "foo$/*\n*/    ",
 450
 451         "foo    $// comment\n",
 452         "foo    $// comment",
 453         "foo    $/*comment*/\n",
 454         "foo    $/*\n*/",
 455         "foo    $/*  */ /* \n */ bar$/**/\n",
 456         "foo    $/*0*/ /*1*/ /*2*/\n",
 457
 458         "foo    $/*comment*/    \n",
 459         "foo    $/*0*/ /*1*/ /*2*/    \n",
 460         "foo    $/**/ /*-------------*/       /*----\n*/bar       $/*  \n*/baa$\n",
 461         "foo    $/* an EOF terminates a line */",
 462         "foo    $/* an EOF terminates a line */ /*",
 463         "foo    $/* an EOF terminates a line */ //",
 464
 465         "package main$\n\nfunc main() {\n\tif {\n\t\treturn /* */ }$\n}$\n",
 466         "package main$",
 467 }
 468
 469 func TestSemis(t *testing.T) {
 470         for _, line := range lines {
 471                 checkSemi(t, line, 0)
 472                 checkSemi(t, line, ScanComments)
 473
 474                 // if the input ended in newlines, the input must tokenize the
 475                 // same with or without those newlines
 476                 for i := len(line) - 1; i >= 0 && line[i] == '\n'; i-- {
 477                         checkSemi(t, line[0:i], 0)
 478                         checkSemi(t, line[0:i], ScanComments)
 479                 }
 480         }
 481 }
 482
 483 type segment struct {
 484         srcline  string // a line of source text
 485         filename string // filename for current token
 486         line     int    // line number for current token
 487 }
 488
 489 var segments = []segment{
 490         // exactly one token per line since the test consumes one token per segment
 491         {"  line1", filepath.Join("dir", "TestLineComments"), 1},
 492         {"\nline2", filepath.Join("dir", "TestLineComments"), 2},
 493         {"\nline3  //line File1.go:100", filepath.Join("dir", "TestLineComments"), 3}, // bad line comment, ignored
 494         {"\nline4", filepath.Join("dir", "TestLineComments"), 4},
 495         {"\n//line File1.go:100\n  line100", filepath.Join("dir", "File1.go"), 100},
 496         {"\n//line  \t :42\n  line1", "", 42},
 497         {"\n//line File2.go:200\n  line200", filepath.Join("dir", "File2.go"), 200},
 498         {"\n//line foo\t:42\n  line42", filepath.Join("dir", "foo"), 42},
 499         {"\n //line foo:42\n  line44", filepath.Join("dir", "foo"), 44},           // bad line comment, ignored
 500         {"\n//line foo 42\n  line46", filepath.Join("dir", "foo"), 46},            // bad line comment, ignored
 501         {"\n//line foo:42 extra text\n  line48", filepath.Join("dir", "foo"), 48}, // bad line comment, ignored
 502         {"\n//line ./foo:42\n  line42", filepath.Join("dir", "foo"), 42},
 503         {"\n//line a/b/c/File1.go:100\n  line100", filepath.Join("dir", "a", "b", "c", "File1.go"), 100},
 504 }
 505
 506 var unixsegments = []segment{
 507         {"\n//line /bar:42\n  line42", "/bar", 42},
 508 }
 509
 510 var winsegments = []segment{
 511         {"\n//line c:\\bar:42\n  line42", "c:\\bar", 42},
 512         {"\n//line c:\\dir\\File1.go:100\n  line100", "c:\\dir\\File1.go", 100},
 513 }
 514
 515 // Verify that comments of the form "//line filename:line" are interpreted correctly.
 516 func TestLineComments(t *testing.T) {
 517         segs := segments
 518         if runtime.GOOS == "windows" {
 519                 segs = append(segs, winsegments...)
 520         } else {
 521                 segs = append(segs, unixsegments...)
 522         }
 523
 524         // make source
 525         var src string
 526         for _, e := range segs {
 527                 src += e.srcline
 528         }
 529
 530         // verify scan
 531         var S Scanner
 532         file := fset.AddFile(filepath.Join("dir", "TestLineComments"), fset.Base(), len(src))
 533         S.Init(file, []byte(src), nil, dontInsertSemis)
 534         for _, s := range segs {
 535                 p, _, lit := S.Scan()
 536                 pos := file.Position(p)
 537                 checkPos(t, lit, p, token.Position{
 538                         Filename: s.filename,
 539                         Offset:   pos.Offset,
 540                         Line:     s.line,
 541                         Column:   pos.Column,
 542                 })
 543         }
 544
 545         if S.ErrorCount != 0 {
 546                 t.Errorf("found %d errors", S.ErrorCount)
 547         }
 548 }
 549
 550 // Verify that initializing the same scanner more than once works correctly.
 551 func TestInit(t *testing.T) {
 552         var s Scanner
 553
 554         // 1st init
 555         src1 := "if true { }"
 556         f1 := fset.AddFile("src1", fset.Base(), len(src1))
 557         s.Init(f1, []byte(src1), nil, dontInsertSemis)
 558         if f1.Size() != len(src1) {
 559                 t.Errorf("bad file size: got %d, expected %d", f1.Size(), len(src1))
 560         }
 561         s.Scan()              // if
 562         s.Scan()              // true
 563         _, tok, _ := s.Scan() // {
 564         if tok != token.LBRACE {
 565                 t.Errorf("bad token: got %s, expected %s", tok, token.LBRACE)
 566         }
 567
 568         // 2nd init
 569         src2 := "go true { ]"
 570         f2 := fset.AddFile("src2", fset.Base(), len(src2))
 571         s.Init(f2, []byte(src2), nil, dontInsertSemis)
 572         if f2.Size() != len(src2) {
 573                 t.Errorf("bad file size: got %d, expected %d", f2.Size(), len(src2))
 574         }
 575         _, tok, _ = s.Scan() // go
 576         if tok != token.GO {
 577                 t.Errorf("bad token: got %s, expected %s", tok, token.GO)
 578         }
 579
 580         if s.ErrorCount != 0 {
 581                 t.Errorf("found %d errors", s.ErrorCount)
 582         }
 583 }
 584
 585 func TestStdErrorHander(t *testing.T) {
 586         const src = "@\n" + // illegal character, cause an error
 587                 "@ @\n" + // two errors on the same line
 588                 "//line File2:20\n" +
 589                 "@\n" + // different file, but same line
 590                 "//line File2:1\n" +
 591                 "@ @\n" + // same file, decreasing line number
 592                 "//line File1:1\n" +
 593                 "@ @ @" // original file, line 1 again
 594
 595         var list ErrorList
 596         eh := func(pos token.Position, msg string) { list.Add(pos, msg) }
 597
 598         var s Scanner
 599         s.Init(fset.AddFile("File1", fset.Base(), len(src)), []byte(src), eh, dontInsertSemis)
 600         for {
 601                 if _, tok, _ := s.Scan(); tok == token.EOF {
 602                         break
 603                 }
 604         }
 605
 606         if len(list) != s.ErrorCount {
 607                 t.Errorf("found %d errors, expected %d", len(list), s.ErrorCount)
 608         }
 609
 610         if len(list) != 9 {
 611                 t.Errorf("found %d raw errors, expected 9", len(list))
 612                 PrintError(os.Stderr, list)
 613         }
 614
 615         list.Sort()
 616         if len(list) != 9 {
 617                 t.Errorf("found %d sorted errors, expected 9", len(list))
 618                 PrintError(os.Stderr, list)
 619         }
 620
 621         list.RemoveMultiples()
 622         if len(list) != 4 {
 623                 t.Errorf("found %d one-per-line errors, expected 4", len(list))
 624                 PrintError(os.Stderr, list)
 625         }
 626 }
 627
 628 type errorCollector struct {
 629         cnt int            // number of errors encountered
 630         msg string         // last error message encountered
 631         pos token.Position // last error position encountered
 632 }
 633
 634 func checkError(t *testing.T, src string, tok token.Token, pos int, lit, err string) {
 635         var s Scanner
 636         var h errorCollector
 637         eh := func(pos token.Position, msg string) {
 638                 h.cnt++
 639                 h.msg = msg
 640                 h.pos = pos
 641         }
 642         s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), eh, ScanComments|dontInsertSemis)
 643         _, tok0, lit0 := s.Scan()
 644         if tok0 != tok {
 645                 t.Errorf("%q: got %s, expected %s", src, tok0, tok)
 646         }
 647         if tok0 != token.ILLEGAL && lit0 != lit {
 648                 t.Errorf("%q: got literal %q, expected %q", src, lit0, lit)
 649         }
 650         cnt := 0
 651         if err != "" {
 652                 cnt = 1
 653         }
 654         if h.cnt != cnt {
 655                 t.Errorf("%q: got cnt %d, expected %d", src, h.cnt, cnt)
 656         }
 657         if h.msg != err {
 658                 t.Errorf("%q: got msg %q, expected %q", src, h.msg, err)
 659         }
 660         if h.pos.Offset != pos {
 661                 t.Errorf("%q: got offset %d, expected %d", src, h.pos.Offset, pos)
 662         }
 663 }
 664
 665 var errors = []struct {
 666         src string
 667         tok token.Token
 668         pos int
 669         lit string
 670         err string
 671 }{
 672         {"\a", token.ILLEGAL, 0, "", "illegal character U+0007"},
 673         {`#`, token.ILLEGAL, 0, "", "illegal character U+0023 '#'"},
 674         {`…`, token.ILLEGAL, 0, "", "illegal character U+2026 '…'"},
 675         {`' '`, token.CHAR, 0, `' '`, ""},
 676         {`''`, token.CHAR, 0, `''`, "illegal rune literal"},
 677         {`'12'`, token.CHAR, 0, `'12'`, "illegal rune literal"},
 678         {`'123'`, token.CHAR, 0, `'123'`, "illegal rune literal"},
 679         {`'\0'`, token.CHAR, 3, `'\0'`, "illegal character U+0027 ''' in escape sequence"},
 680         {`'\07'`, token.CHAR, 4, `'\07'`, "illegal character U+0027 ''' in escape sequence"},
 681         {`'\8'`, token.CHAR, 2, `'\8'`, "unknown escape sequence"},
 682         {`'\08'`, token.CHAR, 3, `'\08'`, "illegal character U+0038 '8' in escape sequence"},
 683         {`'\x'`, token.CHAR, 3, `'\x'`, "illegal character U+0027 ''' in escape sequence"},
 684         {`'\x0'`, token.CHAR, 4, `'\x0'`, "illegal character U+0027 ''' in escape sequence"},
 685         {`'\x0g'`, token.CHAR, 4, `'\x0g'`, "illegal character U+0067 'g' in escape sequence"},
 686         {`'\u'`, token.CHAR, 3, `'\u'`, "illegal character U+0027 ''' in escape sequence"},
 687         {`'\u0'`, token.CHAR, 4, `'\u0'`, "illegal character U+0027 ''' in escape sequence"},
 688         {`'\u00'`, token.CHAR, 5, `'\u00'`, "illegal character U+0027 ''' in escape sequence"},
 689         {`'\u000'`, token.CHAR, 6, `'\u000'`, "illegal character U+0027 ''' in escape sequence"},
 690         {`'\u000`, token.CHAR, 6, `'\u000`, "escape sequence not terminated"},
 691         {`'\u0000'`, token.CHAR, 0, `'\u0000'`, ""},
 692         {`'\U'`, token.CHAR, 3, `'\U'`, "illegal character U+0027 ''' in escape sequence"},
 693         {`'\U0'`, token.CHAR, 4, `'\U0'`, "illegal character U+0027 ''' in escape sequence"},
 694         {`'\U00'`, token.CHAR, 5, `'\U00'`, "illegal character U+0027 ''' in escape sequence"},
 695         {`'\U000'`, token.CHAR, 6, `'\U000'`, "illegal character U+0027 ''' in escape sequence"},
 696         {`'\U0000'`, token.CHAR, 7, `'\U0000'`, "illegal character U+0027 ''' in escape sequence"},
 697         {`'\U00000'`, token.CHAR, 8, `'\U00000'`, "illegal character U+0027 ''' in escape sequence"},
 698         {`'\U000000'`, token.CHAR, 9, `'\U000000'`, "illegal character U+0027 ''' in escape sequence"},
 699         {`'\U0000000'`, token.CHAR, 10, `'\U0000000'`, "illegal character U+0027 ''' in escape sequence"},
 700         {`'\U0000000`, token.CHAR, 10, `'\U0000000`, "escape sequence not terminated"},
 701         {`'\U00000000'`, token.CHAR, 0, `'\U00000000'`, ""},
 702         {`'\Uffffffff'`, token.CHAR, 2, `'\Uffffffff'`, "escape sequence is invalid Unicode code point"},
 703         {`'`, token.CHAR, 0, `'`, "rune literal not terminated"},
 704         {`'\`, token.CHAR, 2, `'\`, "escape sequence not terminated"},
 705         {"'\n", token.CHAR, 0, "'", "rune literal not terminated"},
 706         {"'\n   ", token.CHAR, 0, "'", "rune literal not terminated"},
 707         {`""`, token.STRING, 0, `""`, ""},
 708         {`"abc`, token.STRING, 0, `"abc`, "string literal not terminated"},
 709         {"\"abc\n", token.STRING, 0, `"abc`, "string literal not terminated"},
 710         {"\"abc\n   ", token.STRING, 0, `"abc`, "string literal not terminated"},
 711         {"``", token.STRING, 0, "``", ""},
 712         {"`", token.STRING, 0, "`", "raw string literal not terminated"},
 713         {"/**/", token.COMMENT, 0, "/**/", ""},
 714         {"/*", token.COMMENT, 0, "/*", "comment not terminated"},
 715         {"077", token.INT, 0, "077", ""},
 716         {"078.", token.FLOAT, 0, "078.", ""},
 717         {"07801234567.", token.FLOAT, 0, "07801234567.", ""},
 718         {"078e0", token.FLOAT, 0, "078e0", ""},
 719         {"0E", token.FLOAT, 0, "0E", "illegal floating-point exponent"}, // issue 17621
 720         {"078", token.INT, 0, "078", "illegal octal number"},
 721         {"07800000009", token.INT, 0, "07800000009", "illegal octal number"},
 722         {"0x", token.INT, 0, "0x", "illegal hexadecimal number"},
 723         {"0X", token.INT, 0, "0X", "illegal hexadecimal number"},
 724         {"\"abc\x00def\"", token.STRING, 4, "\"abc\x00def\"", "illegal character NUL"},
 725         {"\"abc\x80def\"", token.STRING, 4, "\"abc\x80def\"", "illegal UTF-8 encoding"},
 726         {"\ufeff\ufeff", token.ILLEGAL, 3, "\ufeff\ufeff", "illegal byte order mark"},                        // only first BOM is ignored
 727         {"//\ufeff", token.COMMENT, 2, "//\ufeff", "illegal byte order mark"},                                // only first BOM is ignored
 728         {"'\ufeff" + `'`, token.CHAR, 1, "'\ufeff" + `'`, "illegal byte order mark"},                         // only first BOM is ignored
 729         {`"` + "abc\ufeffdef" + `"`, token.STRING, 4, `"` + "abc\ufeffdef" + `"`, "illegal byte order mark"}, // only first BOM is ignored
 730 }
 731
 732 func TestScanErrors(t *testing.T) {
 733         for _, e := range errors {
 734                 checkError(t, e.src, e.tok, e.pos, e.lit, e.err)
 735         }
 736 }
 737
 738 // Verify that no comments show up as literal values when skipping comments.
 739 func TestIssue10213(t *testing.T) {
 740         var src = `
 741                 var (
 742                         A = 1 // foo
 743                 )
 744
 745                 var (
 746                         B = 2
 747                         // foo
 748                 )
 749
 750                 var C = 3 // foo
 751
 752                 var D = 4
 753                 // foo
 754
 755                 func anycode() {
 756                 // foo
 757                 }
 758         `
 759         var s Scanner
 760         s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), nil, 0)
 761         for {
 762                 pos, tok, lit := s.Scan()
 763                 class := tokenclass(tok)
 764                 if lit != "" && class != keyword && class != literal && tok != token.SEMICOLON {
 765                         t.Errorf("%s: tok = %s, lit = %q", fset.Position(pos), tok, lit)
 766                 }
 767                 if tok <= token.EOF {
 768                         break
 769                 }
 770         }
 771 }
 772
 773 func BenchmarkScan(b *testing.B) {
 774         b.StopTimer()
 775         fset := token.NewFileSet()
 776         file := fset.AddFile("", fset.Base(), len(source))
 777         var s Scanner
 778         b.StartTimer()
 779         for i := 0; i < b.N; i++ {
 780                 s.Init(file, source, nil, ScanComments)
 781                 for {
 782                         _, tok, _ := s.Scan()
 783                         if tok == token.EOF {
 784                                 break
 785                         }
 786                 }
 787         }
 788 }
 789
 790 func BenchmarkScanFile(b *testing.B) {
 791         b.StopTimer()
 792         const filename = "scanner.go"
 793         src, err := ioutil.ReadFile(filename)
 794         if err != nil {
 795                 panic(err)
 796         }
 797         fset := token.NewFileSet()
 798         file := fset.AddFile(filename, fset.Base(), len(src))
 799         b.SetBytes(int64(len(src)))
 800         var s Scanner
 801         b.StartTimer()
 802         for i := 0; i < b.N; i++ {
 803                 s.Init(file, src, nil, ScanComments)
 804                 for {
 805                         _, tok, _ := s.Scan()
 806                         if tok == token.EOF {
 807                                 break
 808                         }
 809                 }
 810         }
 811 }