1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
16 // A StringReader delivers its data one string segment at a time via Read.
17 type StringReader
struct {
22 func (r
*StringReader
) Read(p
[]byte) (n
int, err error
) {
23 if r
.step
< len(r
.data
) {
33 func readRuneSegments(t
*testing
.T
, segments
[]string) {
35 want
:= strings
.Join(segments
, "")
36 s
:= new(Scanner
).Init(&StringReader
{data
: segments
})
45 t
.Errorf("segments=%v got=%s want=%s", segments
, got
, want
)
49 var segmentList
= [][]string{
53 {"\u65e5", "\u672c", "\u8a9e"},
54 {"\U000065e5", " ", "\U0000672c", "\U00008a9e"},
55 {"\xe6", "\x97\xa5\xe6", "\x9c\xac\xe8\xaa\x9e"},
56 {"Hello", ", ", "World", "!"},
57 {"Hello", ", ", "", "World", "!"},
60 func TestNext(t
*testing
.T
) {
61 for _
, s
:= range segmentList
{
62 readRuneSegments(t
, s
)
71 var f100
= "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
73 var tokenList
= []token
{
74 {Comment
, "// line comments"},
77 {Comment
, "// comment"},
78 {Comment
, "// /* comment */"},
79 {Comment
, "// // comment //"},
80 {Comment
, "//" + f100
},
82 {Comment
, "// general comments"},
85 {Comment
, "/* comment */"},
86 {Comment
, "/* // comment */"},
87 {Comment
, "/* /* comment */"},
88 {Comment
, "/*\n comment\n*/"},
89 {Comment
, "/*" + f100
+ "*/"},
91 {Comment
, "// identifiers"},
100 {Ident
, "_abc_123_"},
110 {Comment
, "// decimal ints"},
117 {Comment
, "// octal ints"},
124 {Comment
, "// hexadecimal ints"},
129 {Int
, "0x123456789abcDEF"},
135 {Int
, "0X123456789abcDEF"},
138 {Comment
, "// floats"},
142 {Float
, "01234567890."},
146 {Float
, ".0123456789"},
150 {Float
, "01234567890.0"},
154 {Float
, "01234567890e0"},
158 {Float
, "01234567890E0"},
162 {Float
, "01234567890e-10"},
166 {Float
, "01234567890E-10"},
168 {Comment
, "// chars"},
186 {Char
, `'\U00000000'`},
187 {Char
, `'\U0000ffAB'`},
189 {Comment
, "// strings"},
205 {String
, `"\u0000"`},
206 {String
, `"\ufA16"`},
207 {String
, `"\U00000000"`},
208 {String
, `"\U0000ffAB"`},
209 {String
, `"` + f100
+ `"`},
211 {Comment
, "// raw strings"},
214 {String
, "`" + "\n\n/* foobar */\n\n" + "`"},
215 {String
, "`" + f100
+ "`"},
217 {Comment
, "// individual characters"},
218 // NUL character is not allowed
220 {' ' - 1, string(' ' - 1)},
228 func makeSource(pattern
string) *bytes
.Buffer
{
230 for _
, k
:= range tokenList
{
231 fmt
.Fprintf(&buf
, pattern
, k
.text
)
236 func checkTok(t
*testing
.T
, s
*Scanner
, line
int, got
, want rune
, text
string) {
238 t
.Fatalf("tok = %s, want %s for %q", TokenString(got
), TokenString(want
), text
)
241 t
.Errorf("line = %d, want %d for %q", s
.Line
, line
, text
)
243 stext
:= s
.TokenText()
245 t
.Errorf("text = %q, want %q", stext
, text
)
247 // check idempotency of TokenText() call
248 stext
= s
.TokenText()
250 t
.Errorf("text = %q, want %q (idempotency check)", stext
, text
)
255 func countNewlines(s
string) int {
257 for _
, ch
:= range s
{
265 func testScan(t
*testing
.T
, mode
uint) {
266 s
:= new(Scanner
).Init(makeSource(" \t%s\n"))
270 for _
, k
:= range tokenList
{
271 if mode
&SkipComments
== 0 || k
.tok
!= Comment
{
272 checkTok(t
, s
, line
, tok
, k
.tok
, k
.text
)
275 line
+= countNewlines(k
.text
) + 1 // each token is on a new line
277 checkTok(t
, s
, line
, tok
, EOF
, "")
280 func TestScan(t
*testing
.T
) {
281 testScan(t
, GoTokens
)
282 testScan(t
, GoTokens
&^SkipComments
)
285 func TestPosition(t
*testing
.T
) {
286 src
:= makeSource("\t\t\t\t%s\n")
287 s
:= new(Scanner
).Init(src
)
288 s
.Mode
= GoTokens
&^ SkipComments
290 pos
:= Position
{"", 4, 1, 5}
291 for _
, k
:= range tokenList
{
292 if s
.Offset
!= pos
.Offset
{
293 t
.Errorf("offset = %d, want %d for %q", s
.Offset
, pos
.Offset
, k
.text
)
295 if s
.Line
!= pos
.Line
{
296 t
.Errorf("line = %d, want %d for %q", s
.Line
, pos
.Line
, k
.text
)
298 if s
.Column
!= pos
.Column
{
299 t
.Errorf("column = %d, want %d for %q", s
.Column
, pos
.Column
, k
.text
)
301 pos
.Offset
+= 4 + len(k
.text
) + 1 // 4 tabs + token bytes + newline
302 pos
.Line
+= countNewlines(k
.text
) + 1 // each token is on a new line
305 // make sure there were no token-internal errors reported by scanner
306 if s
.ErrorCount
!= 0 {
307 t
.Errorf("%d errors", s
.ErrorCount
)
311 func TestScanZeroMode(t
*testing
.T
) {
312 src
:= makeSource("%s\n")
314 s
:= new(Scanner
).Init(src
)
315 s
.Mode
= 0 // don't recognize any token classes
316 s
.Whitespace
= 0 // don't skip any whitespace
318 for i
, ch
:= range str
{
320 t
.Fatalf("%d. tok = %s, want %s", i
, TokenString(tok
), TokenString(ch
))
325 t
.Fatalf("tok = %s, want EOF", TokenString(tok
))
327 if s
.ErrorCount
!= 0 {
328 t
.Errorf("%d errors", s
.ErrorCount
)
332 func testScanSelectedMode(t
*testing
.T
, mode
uint, class rune
) {
333 src
:= makeSource("%s\n")
334 s
:= new(Scanner
).Init(src
)
338 if tok
< 0 && tok
!= class
{
339 t
.Fatalf("tok = %s, want %s", TokenString(tok
), TokenString(class
))
343 if s
.ErrorCount
!= 0 {
344 t
.Errorf("%d errors", s
.ErrorCount
)
348 func TestScanSelectedMask(t
*testing
.T
) {
349 testScanSelectedMode(t
, 0, 0)
350 testScanSelectedMode(t
, ScanIdents
, Ident
)
351 // Don't test ScanInts and ScanNumbers since some parts of
352 // the floats in the source look like (illegal) octal ints
353 // and ScanNumbers may return either Int or Float.
354 testScanSelectedMode(t
, ScanChars
, Char
)
355 testScanSelectedMode(t
, ScanStrings
, String
)
356 testScanSelectedMode(t
, SkipComments
, 0)
357 testScanSelectedMode(t
, ScanComments
, Comment
)
360 func TestScanNext(t
*testing
.T
) {
363 s
:= new(Scanner
).Init(strings
.NewReader(BOMs
+ "if a == bcd /* com" + BOMs
+ "ment */ {\n\ta += c\n}" + BOMs
+ "// line comment ending in eof"))
364 checkTok(t
, s
, 1, s
.Scan(), Ident
, "if") // the first BOM is ignored
365 checkTok(t
, s
, 1, s
.Scan(), Ident
, "a")
366 checkTok(t
, s
, 1, s
.Scan(), '=', "=")
367 checkTok(t
, s
, 0, s
.Next(), '=', "")
368 checkTok(t
, s
, 0, s
.Next(), ' ', "")
369 checkTok(t
, s
, 0, s
.Next(), 'b', "")
370 checkTok(t
, s
, 1, s
.Scan(), Ident
, "cd")
371 checkTok(t
, s
, 1, s
.Scan(), '{', "{")
372 checkTok(t
, s
, 2, s
.Scan(), Ident
, "a")
373 checkTok(t
, s
, 2, s
.Scan(), '+', "+")
374 checkTok(t
, s
, 0, s
.Next(), '=', "")
375 checkTok(t
, s
, 2, s
.Scan(), Ident
, "c")
376 checkTok(t
, s
, 3, s
.Scan(), '}', "}")
377 checkTok(t
, s
, 3, s
.Scan(), BOM
, BOMs
)
378 checkTok(t
, s
, 3, s
.Scan(), -1, "")
379 if s
.ErrorCount
!= 0 {
380 t
.Errorf("%d errors", s
.ErrorCount
)
384 func TestScanWhitespace(t
*testing
.T
) {
387 // start at 1, NUL character is not allowed
388 for ch
:= byte(1); ch
< ' '; ch
++ {
395 s
:= new(Scanner
).Init(&buf
)
400 t
.Errorf("tok = %s, want %s", TokenString(tok
), TokenString(orig
))
404 func testError(t
*testing
.T
, src
, pos
, msg
string, tok rune
) {
405 s
:= new(Scanner
).Init(strings
.NewReader(src
))
407 s
.Error
= func(s
*Scanner
, m
string) {
409 // only look at first error
410 if p
:= s
.Pos().String(); p
!= pos
{
411 t
.Errorf("pos = %q, want %q for %q", p
, pos
, src
)
414 t
.Errorf("msg = %q, want %q for %q", m
, msg
, src
)
421 t
.Errorf("tok = %s, want %s for %q", TokenString(tk
), TokenString(tok
), src
)
424 t
.Errorf("error handler not called for %q", src
)
426 if s
.ErrorCount
== 0 {
427 t
.Errorf("count = %d, want > 0 for %q", s
.ErrorCount
, src
)
431 func TestError(t
*testing
.T
) {
432 testError(t
, "\x00", "1:1", "illegal character NUL", 0)
433 testError(t
, "\x80", "1:1", "illegal UTF-8 encoding", utf8
.RuneError
)
434 testError(t
, "\xff", "1:1", "illegal UTF-8 encoding", utf8
.RuneError
)
436 testError(t
, "a\x00", "1:2", "illegal character NUL", Ident
)
437 testError(t
, "ab\x80", "1:3", "illegal UTF-8 encoding", Ident
)
438 testError(t
, "abc\xff", "1:4", "illegal UTF-8 encoding", Ident
)
440 testError(t
, `"a`+"\x00", "1:3", "illegal character NUL", String
)
441 testError(t
, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", String
)
442 testError(t
, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", String
)
444 testError(t
, "`a"+"\x00", "1:3", "illegal character NUL", String
)
445 testError(t
, "`ab"+"\x80", "1:4", "illegal UTF-8 encoding", String
)
446 testError(t
, "`abc"+"\xff", "1:5", "illegal UTF-8 encoding", String
)
448 testError(t
, `'\"'`, "1:3", "illegal char escape", Char
)
449 testError(t
, `"\'"`, "1:3", "illegal char escape", String
)
451 testError(t
, `01238`, "1:6", "illegal octal number", Int
)
452 testError(t
, `01238123`, "1:9", "illegal octal number", Int
)
453 testError(t
, `0x`, "1:3", "illegal hexadecimal number", Int
)
454 testError(t
, `0xg`, "1:3", "illegal hexadecimal number", Int
)
455 testError(t
, `'aa'`, "1:4", "illegal char literal", Char
)
457 testError(t
, `'`, "1:2", "literal not terminated", Char
)
458 testError(t
, `'`+"\n", "1:2", "literal not terminated", Char
)
459 testError(t
, `"abc`, "1:5", "literal not terminated", String
)
460 testError(t
, `"abc`+"\n", "1:5", "literal not terminated", String
)
461 testError(t
, "`abc\n", "2:1", "literal not terminated", String
)
462 testError(t
, `/*/`, "1:4", "comment not terminated", EOF
)
465 // An errReader returns (0, err) where err is not io.EOF.
466 type errReader
struct{}
468 func (errReader
) Read(b
[]byte) (int, error
) {
469 return 0, io
.ErrNoProgress
// some error that is not io.EOF
472 func TestIOError(t
*testing
.T
) {
473 s
:= new(Scanner
).Init(errReader
{})
475 s
.Error
= func(s
*Scanner
, msg
string) {
477 if want
:= io
.ErrNoProgress
.Error(); msg
!= want
{
478 t
.Errorf("msg = %q, want %q", msg
, want
)
485 t
.Errorf("tok = %s, want EOF", TokenString(tok
))
488 t
.Errorf("error handler not called")
492 func checkPos(t
*testing
.T
, got
, want Position
) {
493 if got
.Offset
!= want
.Offset || got
.Line
!= want
.Line || got
.Column
!= want
.Column
{
494 t
.Errorf("got offset, line, column = %d, %d, %d; want %d, %d, %d",
495 got
.Offset
, got
.Line
, got
.Column
, want
.Offset
, want
.Line
, want
.Column
)
499 func checkNextPos(t
*testing
.T
, s
*Scanner
, offset
, line
, column
int, char rune
) {
500 if ch
:= s
.Next(); ch
!= char
{
501 t
.Errorf("ch = %s, want %s", TokenString(ch
), TokenString(char
))
503 want
:= Position
{Offset
: offset
, Line
: line
, Column
: column
}
504 checkPos(t
, s
.Pos(), want
)
507 func checkScanPos(t
*testing
.T
, s
*Scanner
, offset
, line
, column
int, char rune
) {
508 want
:= Position
{Offset
: offset
, Line
: line
, Column
: column
}
509 checkPos(t
, s
.Pos(), want
)
510 if ch
:= s
.Scan(); ch
!= char
{
511 t
.Errorf("ch = %s, want %s", TokenString(ch
), TokenString(char
))
512 if string(ch
) != s
.TokenText() {
513 t
.Errorf("tok = %q, want %q", s
.TokenText(), string(ch
))
516 checkPos(t
, s
.Position
, want
)
519 func TestPos(t
*testing
.T
) {
520 // corner case: empty source
521 s
:= new(Scanner
).Init(strings
.NewReader(""))
522 checkPos(t
, s
.Pos(), Position
{Offset
: 0, Line
: 1, Column
: 1})
523 s
.Peek() // peek doesn't affect the position
524 checkPos(t
, s
.Pos(), Position
{Offset
: 0, Line
: 1, Column
: 1})
526 // corner case: source with only a newline
527 s
= new(Scanner
).Init(strings
.NewReader("\n"))
528 checkPos(t
, s
.Pos(), Position
{Offset
: 0, Line
: 1, Column
: 1})
529 checkNextPos(t
, s
, 1, 2, 1, '\n')
530 // after EOF position doesn't change
531 for i
:= 10; i
> 0; i
-- {
532 checkScanPos(t
, s
, 1, 2, 1, EOF
)
534 if s
.ErrorCount
!= 0 {
535 t
.Errorf("%d errors", s
.ErrorCount
)
538 // corner case: source with only a single character
539 s
= new(Scanner
).Init(strings
.NewReader("本"))
540 checkPos(t
, s
.Pos(), Position
{Offset
: 0, Line
: 1, Column
: 1})
541 checkNextPos(t
, s
, 3, 1, 2, '本')
542 // after EOF position doesn't change
543 for i
:= 10; i
> 0; i
-- {
544 checkScanPos(t
, s
, 3, 1, 2, EOF
)
546 if s
.ErrorCount
!= 0 {
547 t
.Errorf("%d errors", s
.ErrorCount
)
550 // positions after calling Next
551 s
= new(Scanner
).Init(strings
.NewReader(" foo६४ \n\n本語\n"))
552 checkNextPos(t
, s
, 1, 1, 2, ' ')
553 s
.Peek() // peek doesn't affect the position
554 checkNextPos(t
, s
, 2, 1, 3, ' ')
555 checkNextPos(t
, s
, 3, 1, 4, 'f')
556 checkNextPos(t
, s
, 4, 1, 5, 'o')
557 checkNextPos(t
, s
, 5, 1, 6, 'o')
558 checkNextPos(t
, s
, 8, 1, 7, '६')
559 checkNextPos(t
, s
, 11, 1, 8, '४')
560 checkNextPos(t
, s
, 12, 1, 9, ' ')
561 checkNextPos(t
, s
, 13, 1, 10, ' ')
562 checkNextPos(t
, s
, 14, 2, 1, '\n')
563 checkNextPos(t
, s
, 15, 3, 1, '\n')
564 checkNextPos(t
, s
, 18, 3, 2, '本')
565 checkNextPos(t
, s
, 21, 3, 3, '語')
566 checkNextPos(t
, s
, 22, 4, 1, '\n')
567 // after EOF position doesn't change
568 for i
:= 10; i
> 0; i
-- {
569 checkScanPos(t
, s
, 22, 4, 1, EOF
)
571 if s
.ErrorCount
!= 0 {
572 t
.Errorf("%d errors", s
.ErrorCount
)
575 // positions after calling Scan
576 s
= new(Scanner
).Init(strings
.NewReader("abc\n本語\n\nx"))
579 checkScanPos(t
, s
, 0, 1, 1, 'a')
580 s
.Peek() // peek doesn't affect the position
581 checkScanPos(t
, s
, 1, 1, 2, 'b')
582 checkScanPos(t
, s
, 2, 1, 3, 'c')
583 checkScanPos(t
, s
, 3, 1, 4, '\n')
584 checkScanPos(t
, s
, 4, 2, 1, '本')
585 checkScanPos(t
, s
, 7, 2, 2, '語')
586 checkScanPos(t
, s
, 10, 2, 3, '\n')
587 checkScanPos(t
, s
, 11, 3, 1, '\n')
588 checkScanPos(t
, s
, 12, 4, 1, 'x')
589 // after EOF position doesn't change
590 for i
:= 10; i
> 0; i
-- {
591 checkScanPos(t
, s
, 13, 4, 2, EOF
)
593 if s
.ErrorCount
!= 0 {
594 t
.Errorf("%d errors", s
.ErrorCount
)