libgo: Update to Go 1.3 release.
[official-gcc.git] / libgo / go / text / scanner / scanner_test.go
blob7d3f597eb9ab074168bd2544d17c8f2f4a673869
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package scanner
7 import (
8 "bytes"
9 "fmt"
10 "io"
11 "strings"
12 "testing"
13 "unicode/utf8"
16 // A StringReader delivers its data one string segment at a time via Read.
17 type StringReader struct {
18 data []string
19 step int
22 func (r *StringReader) Read(p []byte) (n int, err error) {
23 if r.step < len(r.data) {
24 s := r.data[r.step]
25 n = copy(p, s)
26 r.step++
27 } else {
28 err = io.EOF
30 return
33 func readRuneSegments(t *testing.T, segments []string) {
34 got := ""
35 want := strings.Join(segments, "")
36 s := new(Scanner).Init(&StringReader{data: segments})
37 for {
38 ch := s.Next()
39 if ch == EOF {
40 break
42 got += string(ch)
44 if got != want {
45 t.Errorf("segments=%v got=%s want=%s", segments, got, want)
49 var segmentList = [][]string{
50 {},
51 {""},
52 {"日", "本語"},
53 {"\u65e5", "\u672c", "\u8a9e"},
54 {"\U000065e5", " ", "\U0000672c", "\U00008a9e"},
55 {"\xe6", "\x97\xa5\xe6", "\x9c\xac\xe8\xaa\x9e"},
56 {"Hello", ", ", "World", "!"},
57 {"Hello", ", ", "", "World", "!"},
60 func TestNext(t *testing.T) {
61 for _, s := range segmentList {
62 readRuneSegments(t, s)
66 type token struct {
67 tok rune
68 text string
71 var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
73 var tokenList = []token{
74 {Comment, "// line comments"},
75 {Comment, "//"},
76 {Comment, "////"},
77 {Comment, "// comment"},
78 {Comment, "// /* comment */"},
79 {Comment, "// // comment //"},
80 {Comment, "//" + f100},
82 {Comment, "// general comments"},
83 {Comment, "/**/"},
84 {Comment, "/***/"},
85 {Comment, "/* comment */"},
86 {Comment, "/* // comment */"},
87 {Comment, "/* /* comment */"},
88 {Comment, "/*\n comment\n*/"},
89 {Comment, "/*" + f100 + "*/"},
91 {Comment, "// identifiers"},
92 {Ident, "a"},
93 {Ident, "a0"},
94 {Ident, "foobar"},
95 {Ident, "abc123"},
96 {Ident, "LGTM"},
97 {Ident, "_"},
98 {Ident, "_abc123"},
99 {Ident, "abc123_"},
100 {Ident, "_abc_123_"},
101 {Ident, "_äöü"},
102 {Ident, "_本"},
103 {Ident, "äöü"},
104 {Ident, "本"},
105 {Ident, "a۰۱۸"},
106 {Ident, "foo६४"},
107 {Ident, "bar9876"},
108 {Ident, f100},
110 {Comment, "// decimal ints"},
111 {Int, "0"},
112 {Int, "1"},
113 {Int, "9"},
114 {Int, "42"},
115 {Int, "1234567890"},
117 {Comment, "// octal ints"},
118 {Int, "00"},
119 {Int, "01"},
120 {Int, "07"},
121 {Int, "042"},
122 {Int, "01234567"},
124 {Comment, "// hexadecimal ints"},
125 {Int, "0x0"},
126 {Int, "0x1"},
127 {Int, "0xf"},
128 {Int, "0x42"},
129 {Int, "0x123456789abcDEF"},
130 {Int, "0x" + f100},
131 {Int, "0X0"},
132 {Int, "0X1"},
133 {Int, "0XF"},
134 {Int, "0X42"},
135 {Int, "0X123456789abcDEF"},
136 {Int, "0X" + f100},
138 {Comment, "// floats"},
139 {Float, "0."},
140 {Float, "1."},
141 {Float, "42."},
142 {Float, "01234567890."},
143 {Float, ".0"},
144 {Float, ".1"},
145 {Float, ".42"},
146 {Float, ".0123456789"},
147 {Float, "0.0"},
148 {Float, "1.0"},
149 {Float, "42.0"},
150 {Float, "01234567890.0"},
151 {Float, "0e0"},
152 {Float, "1e0"},
153 {Float, "42e0"},
154 {Float, "01234567890e0"},
155 {Float, "0E0"},
156 {Float, "1E0"},
157 {Float, "42E0"},
158 {Float, "01234567890E0"},
159 {Float, "0e+10"},
160 {Float, "1e-10"},
161 {Float, "42e+10"},
162 {Float, "01234567890e-10"},
163 {Float, "0E+10"},
164 {Float, "1E-10"},
165 {Float, "42E+10"},
166 {Float, "01234567890E-10"},
168 {Comment, "// chars"},
169 {Char, `' '`},
170 {Char, `'a'`},
171 {Char, `'本'`},
172 {Char, `'\a'`},
173 {Char, `'\b'`},
174 {Char, `'\f'`},
175 {Char, `'\n'`},
176 {Char, `'\r'`},
177 {Char, `'\t'`},
178 {Char, `'\v'`},
179 {Char, `'\''`},
180 {Char, `'\000'`},
181 {Char, `'\777'`},
182 {Char, `'\x00'`},
183 {Char, `'\xff'`},
184 {Char, `'\u0000'`},
185 {Char, `'\ufA16'`},
186 {Char, `'\U00000000'`},
187 {Char, `'\U0000ffAB'`},
189 {Comment, "// strings"},
190 {String, `" "`},
191 {String, `"a"`},
192 {String, `"本"`},
193 {String, `"\a"`},
194 {String, `"\b"`},
195 {String, `"\f"`},
196 {String, `"\n"`},
197 {String, `"\r"`},
198 {String, `"\t"`},
199 {String, `"\v"`},
200 {String, `"\""`},
201 {String, `"\000"`},
202 {String, `"\777"`},
203 {String, `"\x00"`},
204 {String, `"\xff"`},
205 {String, `"\u0000"`},
206 {String, `"\ufA16"`},
207 {String, `"\U00000000"`},
208 {String, `"\U0000ffAB"`},
209 {String, `"` + f100 + `"`},
211 {Comment, "// raw strings"},
212 {String, "``"},
213 {String, "`\\`"},
214 {String, "`" + "\n\n/* foobar */\n\n" + "`"},
215 {String, "`" + f100 + "`"},
217 {Comment, "// individual characters"},
218 // NUL character is not allowed
219 {'\x01', "\x01"},
220 {' ' - 1, string(' ' - 1)},
221 {'+', "+"},
222 {'/', "/"},
223 {'.', "."},
224 {'~', "~"},
225 {'(', "("},
228 func makeSource(pattern string) *bytes.Buffer {
229 var buf bytes.Buffer
230 for _, k := range tokenList {
231 fmt.Fprintf(&buf, pattern, k.text)
233 return &buf
236 func checkTok(t *testing.T, s *Scanner, line int, got, want rune, text string) {
237 if got != want {
238 t.Fatalf("tok = %s, want %s for %q", TokenString(got), TokenString(want), text)
240 if s.Line != line {
241 t.Errorf("line = %d, want %d for %q", s.Line, line, text)
243 stext := s.TokenText()
244 if stext != text {
245 t.Errorf("text = %q, want %q", stext, text)
246 } else {
247 // check idempotency of TokenText() call
248 stext = s.TokenText()
249 if stext != text {
250 t.Errorf("text = %q, want %q (idempotency check)", stext, text)
255 func countNewlines(s string) int {
256 n := 0
257 for _, ch := range s {
258 if ch == '\n' {
262 return n
265 func testScan(t *testing.T, mode uint) {
266 s := new(Scanner).Init(makeSource(" \t%s\n"))
267 s.Mode = mode
268 tok := s.Scan()
269 line := 1
270 for _, k := range tokenList {
271 if mode&SkipComments == 0 || k.tok != Comment {
272 checkTok(t, s, line, tok, k.tok, k.text)
273 tok = s.Scan()
275 line += countNewlines(k.text) + 1 // each token is on a new line
277 checkTok(t, s, line, tok, EOF, "")
280 func TestScan(t *testing.T) {
281 testScan(t, GoTokens)
282 testScan(t, GoTokens&^SkipComments)
285 func TestPosition(t *testing.T) {
286 src := makeSource("\t\t\t\t%s\n")
287 s := new(Scanner).Init(src)
288 s.Mode = GoTokens &^ SkipComments
289 s.Scan()
290 pos := Position{"", 4, 1, 5}
291 for _, k := range tokenList {
292 if s.Offset != pos.Offset {
293 t.Errorf("offset = %d, want %d for %q", s.Offset, pos.Offset, k.text)
295 if s.Line != pos.Line {
296 t.Errorf("line = %d, want %d for %q", s.Line, pos.Line, k.text)
298 if s.Column != pos.Column {
299 t.Errorf("column = %d, want %d for %q", s.Column, pos.Column, k.text)
301 pos.Offset += 4 + len(k.text) + 1 // 4 tabs + token bytes + newline
302 pos.Line += countNewlines(k.text) + 1 // each token is on a new line
303 s.Scan()
305 // make sure there were no token-internal errors reported by scanner
306 if s.ErrorCount != 0 {
307 t.Errorf("%d errors", s.ErrorCount)
311 func TestScanZeroMode(t *testing.T) {
312 src := makeSource("%s\n")
313 str := src.String()
314 s := new(Scanner).Init(src)
315 s.Mode = 0 // don't recognize any token classes
316 s.Whitespace = 0 // don't skip any whitespace
317 tok := s.Scan()
318 for i, ch := range str {
319 if tok != ch {
320 t.Fatalf("%d. tok = %s, want %s", i, TokenString(tok), TokenString(ch))
322 tok = s.Scan()
324 if tok != EOF {
325 t.Fatalf("tok = %s, want EOF", TokenString(tok))
327 if s.ErrorCount != 0 {
328 t.Errorf("%d errors", s.ErrorCount)
332 func testScanSelectedMode(t *testing.T, mode uint, class rune) {
333 src := makeSource("%s\n")
334 s := new(Scanner).Init(src)
335 s.Mode = mode
336 tok := s.Scan()
337 for tok != EOF {
338 if tok < 0 && tok != class {
339 t.Fatalf("tok = %s, want %s", TokenString(tok), TokenString(class))
341 tok = s.Scan()
343 if s.ErrorCount != 0 {
344 t.Errorf("%d errors", s.ErrorCount)
348 func TestScanSelectedMask(t *testing.T) {
349 testScanSelectedMode(t, 0, 0)
350 testScanSelectedMode(t, ScanIdents, Ident)
351 // Don't test ScanInts and ScanNumbers since some parts of
352 // the floats in the source look like (illegal) octal ints
353 // and ScanNumbers may return either Int or Float.
354 testScanSelectedMode(t, ScanChars, Char)
355 testScanSelectedMode(t, ScanStrings, String)
356 testScanSelectedMode(t, SkipComments, 0)
357 testScanSelectedMode(t, ScanComments, Comment)
360 func TestScanNext(t *testing.T) {
361 const BOM = '\uFEFF'
362 BOMs := string(BOM)
363 s := new(Scanner).Init(strings.NewReader(BOMs + "if a == bcd /* com" + BOMs + "ment */ {\n\ta += c\n}" + BOMs + "// line comment ending in eof"))
364 checkTok(t, s, 1, s.Scan(), Ident, "if") // the first BOM is ignored
365 checkTok(t, s, 1, s.Scan(), Ident, "a")
366 checkTok(t, s, 1, s.Scan(), '=', "=")
367 checkTok(t, s, 0, s.Next(), '=', "")
368 checkTok(t, s, 0, s.Next(), ' ', "")
369 checkTok(t, s, 0, s.Next(), 'b', "")
370 checkTok(t, s, 1, s.Scan(), Ident, "cd")
371 checkTok(t, s, 1, s.Scan(), '{', "{")
372 checkTok(t, s, 2, s.Scan(), Ident, "a")
373 checkTok(t, s, 2, s.Scan(), '+', "+")
374 checkTok(t, s, 0, s.Next(), '=', "")
375 checkTok(t, s, 2, s.Scan(), Ident, "c")
376 checkTok(t, s, 3, s.Scan(), '}', "}")
377 checkTok(t, s, 3, s.Scan(), BOM, BOMs)
378 checkTok(t, s, 3, s.Scan(), -1, "")
379 if s.ErrorCount != 0 {
380 t.Errorf("%d errors", s.ErrorCount)
384 func TestScanWhitespace(t *testing.T) {
385 var buf bytes.Buffer
386 var ws uint64
387 // start at 1, NUL character is not allowed
388 for ch := byte(1); ch < ' '; ch++ {
389 buf.WriteByte(ch)
390 ws |= 1 << ch
392 const orig = 'x'
393 buf.WriteByte(orig)
395 s := new(Scanner).Init(&buf)
396 s.Mode = 0
397 s.Whitespace = ws
398 tok := s.Scan()
399 if tok != orig {
400 t.Errorf("tok = %s, want %s", TokenString(tok), TokenString(orig))
404 func testError(t *testing.T, src, pos, msg string, tok rune) {
405 s := new(Scanner).Init(strings.NewReader(src))
406 errorCalled := false
407 s.Error = func(s *Scanner, m string) {
408 if !errorCalled {
409 // only look at first error
410 if p := s.Pos().String(); p != pos {
411 t.Errorf("pos = %q, want %q for %q", p, pos, src)
413 if m != msg {
414 t.Errorf("msg = %q, want %q for %q", m, msg, src)
416 errorCalled = true
419 tk := s.Scan()
420 if tk != tok {
421 t.Errorf("tok = %s, want %s for %q", TokenString(tk), TokenString(tok), src)
423 if !errorCalled {
424 t.Errorf("error handler not called for %q", src)
426 if s.ErrorCount == 0 {
427 t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src)
431 func TestError(t *testing.T) {
432 testError(t, "\x00", "1:1", "illegal character NUL", 0)
433 testError(t, "\x80", "1:1", "illegal UTF-8 encoding", utf8.RuneError)
434 testError(t, "\xff", "1:1", "illegal UTF-8 encoding", utf8.RuneError)
436 testError(t, "a\x00", "1:2", "illegal character NUL", Ident)
437 testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", Ident)
438 testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", Ident)
440 testError(t, `"a`+"\x00", "1:3", "illegal character NUL", String)
441 testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", String)
442 testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", String)
444 testError(t, "`a"+"\x00", "1:3", "illegal character NUL", String)
445 testError(t, "`ab"+"\x80", "1:4", "illegal UTF-8 encoding", String)
446 testError(t, "`abc"+"\xff", "1:5", "illegal UTF-8 encoding", String)
448 testError(t, `'\"'`, "1:3", "illegal char escape", Char)
449 testError(t, `"\'"`, "1:3", "illegal char escape", String)
451 testError(t, `01238`, "1:6", "illegal octal number", Int)
452 testError(t, `01238123`, "1:9", "illegal octal number", Int)
453 testError(t, `0x`, "1:3", "illegal hexadecimal number", Int)
454 testError(t, `0xg`, "1:3", "illegal hexadecimal number", Int)
455 testError(t, `'aa'`, "1:4", "illegal char literal", Char)
457 testError(t, `'`, "1:2", "literal not terminated", Char)
458 testError(t, `'`+"\n", "1:2", "literal not terminated", Char)
459 testError(t, `"abc`, "1:5", "literal not terminated", String)
460 testError(t, `"abc`+"\n", "1:5", "literal not terminated", String)
461 testError(t, "`abc\n", "2:1", "literal not terminated", String)
462 testError(t, `/*/`, "1:4", "comment not terminated", EOF)
465 // An errReader returns (0, err) where err is not io.EOF.
466 type errReader struct{}
468 func (errReader) Read(b []byte) (int, error) {
469 return 0, io.ErrNoProgress // some error that is not io.EOF
472 func TestIOError(t *testing.T) {
473 s := new(Scanner).Init(errReader{})
474 errorCalled := false
475 s.Error = func(s *Scanner, msg string) {
476 if !errorCalled {
477 if want := io.ErrNoProgress.Error(); msg != want {
478 t.Errorf("msg = %q, want %q", msg, want)
480 errorCalled = true
483 tok := s.Scan()
484 if tok != EOF {
485 t.Errorf("tok = %s, want EOF", TokenString(tok))
487 if !errorCalled {
488 t.Errorf("error handler not called")
492 func checkPos(t *testing.T, got, want Position) {
493 if got.Offset != want.Offset || got.Line != want.Line || got.Column != want.Column {
494 t.Errorf("got offset, line, column = %d, %d, %d; want %d, %d, %d",
495 got.Offset, got.Line, got.Column, want.Offset, want.Line, want.Column)
499 func checkNextPos(t *testing.T, s *Scanner, offset, line, column int, char rune) {
500 if ch := s.Next(); ch != char {
501 t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char))
503 want := Position{Offset: offset, Line: line, Column: column}
504 checkPos(t, s.Pos(), want)
507 func checkScanPos(t *testing.T, s *Scanner, offset, line, column int, char rune) {
508 want := Position{Offset: offset, Line: line, Column: column}
509 checkPos(t, s.Pos(), want)
510 if ch := s.Scan(); ch != char {
511 t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char))
512 if string(ch) != s.TokenText() {
513 t.Errorf("tok = %q, want %q", s.TokenText(), string(ch))
516 checkPos(t, s.Position, want)
519 func TestPos(t *testing.T) {
520 // corner case: empty source
521 s := new(Scanner).Init(strings.NewReader(""))
522 checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
523 s.Peek() // peek doesn't affect the position
524 checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
526 // corner case: source with only a newline
527 s = new(Scanner).Init(strings.NewReader("\n"))
528 checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
529 checkNextPos(t, s, 1, 2, 1, '\n')
530 // after EOF position doesn't change
531 for i := 10; i > 0; i-- {
532 checkScanPos(t, s, 1, 2, 1, EOF)
534 if s.ErrorCount != 0 {
535 t.Errorf("%d errors", s.ErrorCount)
538 // corner case: source with only a single character
539 s = new(Scanner).Init(strings.NewReader("本"))
540 checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
541 checkNextPos(t, s, 3, 1, 2, '本')
542 // after EOF position doesn't change
543 for i := 10; i > 0; i-- {
544 checkScanPos(t, s, 3, 1, 2, EOF)
546 if s.ErrorCount != 0 {
547 t.Errorf("%d errors", s.ErrorCount)
550 // positions after calling Next
551 s = new(Scanner).Init(strings.NewReader(" foo६४ \n\n本語\n"))
552 checkNextPos(t, s, 1, 1, 2, ' ')
553 s.Peek() // peek doesn't affect the position
554 checkNextPos(t, s, 2, 1, 3, ' ')
555 checkNextPos(t, s, 3, 1, 4, 'f')
556 checkNextPos(t, s, 4, 1, 5, 'o')
557 checkNextPos(t, s, 5, 1, 6, 'o')
558 checkNextPos(t, s, 8, 1, 7, '६')
559 checkNextPos(t, s, 11, 1, 8, '४')
560 checkNextPos(t, s, 12, 1, 9, ' ')
561 checkNextPos(t, s, 13, 1, 10, ' ')
562 checkNextPos(t, s, 14, 2, 1, '\n')
563 checkNextPos(t, s, 15, 3, 1, '\n')
564 checkNextPos(t, s, 18, 3, 2, '本')
565 checkNextPos(t, s, 21, 3, 3, '語')
566 checkNextPos(t, s, 22, 4, 1, '\n')
567 // after EOF position doesn't change
568 for i := 10; i > 0; i-- {
569 checkScanPos(t, s, 22, 4, 1, EOF)
571 if s.ErrorCount != 0 {
572 t.Errorf("%d errors", s.ErrorCount)
575 // positions after calling Scan
576 s = new(Scanner).Init(strings.NewReader("abc\n本語\n\nx"))
577 s.Mode = 0
578 s.Whitespace = 0
579 checkScanPos(t, s, 0, 1, 1, 'a')
580 s.Peek() // peek doesn't affect the position
581 checkScanPos(t, s, 1, 1, 2, 'b')
582 checkScanPos(t, s, 2, 1, 3, 'c')
583 checkScanPos(t, s, 3, 1, 4, '\n')
584 checkScanPos(t, s, 4, 2, 1, '本')
585 checkScanPos(t, s, 7, 2, 2, '語')
586 checkScanPos(t, s, 10, 2, 3, '\n')
587 checkScanPos(t, s, 11, 3, 1, '\n')
588 checkScanPos(t, s, 12, 4, 1, 'x')
589 // after EOF position doesn't change
590 for i := 10; i > 0; i-- {
591 checkScanPos(t, s, 13, 4, 2, EOF)
593 if s.ErrorCount != 0 {
594 t.Errorf("%d errors", s.ErrorCount)