Merge from mainline (167278:168000).
[official-gcc/graphite-test-results.git] / libgo / go / scanner / scanner_test.go
blob506f434fe7bc9334d6bb05021f852ee80d50024d
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package scanner
7 import (
8 "bytes"
9 "fmt"
10 "os"
11 "strings"
12 "testing"
16 // A StringReader delivers its data one string segment at a time via Read.
17 type StringReader struct {
18 data []string
19 step int
23 func (r *StringReader) Read(p []byte) (n int, err os.Error) {
24 if r.step < len(r.data) {
25 s := r.data[r.step]
26 n = copy(p, s)
27 r.step++
28 } else {
29 err = os.EOF
31 return
35 func readRuneSegments(t *testing.T, segments []string) {
36 got := ""
37 want := strings.Join(segments, "")
38 s := new(Scanner).Init(&StringReader{data: segments})
39 for {
40 ch := s.Next()
41 if ch == EOF {
42 break
44 got += string(ch)
46 if got != want {
47 t.Errorf("segments=%v got=%s want=%s", segments, got, want)
52 var segmentList = [][]string{
53 {},
54 {""},
55 {"日", "本語"},
56 {"\u65e5", "\u672c", "\u8a9e"},
57 {"\U000065e5", " ", "\U0000672c", "\U00008a9e"},
58 {"\xe6", "\x97\xa5\xe6", "\x9c\xac\xe8\xaa\x9e"},
59 {"Hello", ", ", "World", "!"},
60 {"Hello", ", ", "", "World", "!"},
64 func TestNext(t *testing.T) {
65 for _, s := range segmentList {
66 readRuneSegments(t, s)
71 type token struct {
72 tok int
73 text string
76 var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
78 var tokenList = []token{
79 {Comment, "// line comments\n"},
80 {Comment, "//\n"},
81 {Comment, "////\n"},
82 {Comment, "// comment\n"},
83 {Comment, "// /* comment */\n"},
84 {Comment, "// // comment //\n"},
85 {Comment, "//" + f100 + "\n"},
87 {Comment, "// general comments\n"},
88 {Comment, "/**/"},
89 {Comment, "/***/"},
90 {Comment, "/* comment */"},
91 {Comment, "/* // comment */"},
92 {Comment, "/* /* comment */"},
93 {Comment, "/*\n comment\n*/"},
94 {Comment, "/*" + f100 + "*/"},
96 {Comment, "// identifiers\n"},
97 {Ident, "a"},
98 {Ident, "a0"},
99 {Ident, "foobar"},
100 {Ident, "abc123"},
101 {Ident, "LGTM"},
102 {Ident, "_"},
103 {Ident, "_abc123"},
104 {Ident, "abc123_"},
105 {Ident, "_abc_123_"},
106 {Ident, "_äöü"},
107 {Ident, "_本"},
108 // TODO for unknown reasons these fail when checking the literals
110 token{Ident, "äöü"},
111 token{Ident, "本"},
113 {Ident, "a۰۱۸"},
114 {Ident, "foo६४"},
115 {Ident, "bar9876"},
116 {Ident, f100},
118 {Comment, "// decimal ints\n"},
119 {Int, "0"},
120 {Int, "1"},
121 {Int, "9"},
122 {Int, "42"},
123 {Int, "1234567890"},
125 {Comment, "// octal ints\n"},
126 {Int, "00"},
127 {Int, "01"},
128 {Int, "07"},
129 {Int, "042"},
130 {Int, "01234567"},
132 {Comment, "// hexadecimal ints\n"},
133 {Int, "0x0"},
134 {Int, "0x1"},
135 {Int, "0xf"},
136 {Int, "0x42"},
137 {Int, "0x123456789abcDEF"},
138 {Int, "0x" + f100},
139 {Int, "0X0"},
140 {Int, "0X1"},
141 {Int, "0XF"},
142 {Int, "0X42"},
143 {Int, "0X123456789abcDEF"},
144 {Int, "0X" + f100},
146 {Comment, "// floats\n"},
147 {Float, "0."},
148 {Float, "1."},
149 {Float, "42."},
150 {Float, "01234567890."},
151 {Float, ".0"},
152 {Float, ".1"},
153 {Float, ".42"},
154 {Float, ".0123456789"},
155 {Float, "0.0"},
156 {Float, "1.0"},
157 {Float, "42.0"},
158 {Float, "01234567890.0"},
159 {Float, "0e0"},
160 {Float, "1e0"},
161 {Float, "42e0"},
162 {Float, "01234567890e0"},
163 {Float, "0E0"},
164 {Float, "1E0"},
165 {Float, "42E0"},
166 {Float, "01234567890E0"},
167 {Float, "0e+10"},
168 {Float, "1e-10"},
169 {Float, "42e+10"},
170 {Float, "01234567890e-10"},
171 {Float, "0E+10"},
172 {Float, "1E-10"},
173 {Float, "42E+10"},
174 {Float, "01234567890E-10"},
176 {Comment, "// chars\n"},
177 {Char, `' '`},
178 {Char, `'a'`},
179 {Char, `'本'`},
180 {Char, `'\a'`},
181 {Char, `'\b'`},
182 {Char, `'\f'`},
183 {Char, `'\n'`},
184 {Char, `'\r'`},
185 {Char, `'\t'`},
186 {Char, `'\v'`},
187 {Char, `'\''`},
188 {Char, `'\000'`},
189 {Char, `'\777'`},
190 {Char, `'\x00'`},
191 {Char, `'\xff'`},
192 {Char, `'\u0000'`},
193 {Char, `'\ufA16'`},
194 {Char, `'\U00000000'`},
195 {Char, `'\U0000ffAB'`},
197 {Comment, "// strings\n"},
198 {String, `" "`},
199 {String, `"a"`},
200 {String, `"本"`},
201 {String, `"\a"`},
202 {String, `"\b"`},
203 {String, `"\f"`},
204 {String, `"\n"`},
205 {String, `"\r"`},
206 {String, `"\t"`},
207 {String, `"\v"`},
208 {String, `"\""`},
209 {String, `"\000"`},
210 {String, `"\777"`},
211 {String, `"\x00"`},
212 {String, `"\xff"`},
213 {String, `"\u0000"`},
214 {String, `"\ufA16"`},
215 {String, `"\U00000000"`},
216 {String, `"\U0000ffAB"`},
217 {String, `"` + f100 + `"`},
219 {Comment, "// raw strings\n"},
220 {String, "``"},
221 {String, "`\\`"},
222 {String, "`" + "\n\n/* foobar */\n\n" + "`"},
223 {String, "`" + f100 + "`"},
225 {Comment, "// individual characters\n"},
226 // NUL character is not allowed
227 {'\x01', "\x01"},
228 {' ' - 1, string(' ' - 1)},
229 {'+', "+"},
230 {'/', "/"},
231 {'.', "."},
232 {'~', "~"},
233 {'(', "("},
237 func makeSource(pattern string) *bytes.Buffer {
238 var buf bytes.Buffer
239 for _, k := range tokenList {
240 fmt.Fprintf(&buf, pattern, k.text)
242 return &buf
246 func checkTok(t *testing.T, s *Scanner, line, got, want int, text string) {
247 if got != want {
248 t.Fatalf("tok = %s, want %s for %q", TokenString(got), TokenString(want), text)
250 if s.Line != line {
251 t.Errorf("line = %d, want %d for %q", s.Line, line, text)
253 stext := s.TokenText()
254 if stext != text {
255 t.Errorf("text = %q, want %q", stext, text)
256 } else {
257 // check idempotency of TokenText() call
258 stext = s.TokenText()
259 if stext != text {
260 t.Errorf("text = %q, want %q (idempotency check)", stext, text)
266 func countNewlines(s string) int {
267 n := 0
268 for _, ch := range s {
269 if ch == '\n' {
273 return n
277 func testScan(t *testing.T, mode uint) {
278 s := new(Scanner).Init(makeSource(" \t%s\t\n\r"))
279 s.Mode = mode
280 tok := s.Scan()
281 line := 1
282 for _, k := range tokenList {
283 if mode&SkipComments == 0 || k.tok != Comment {
284 checkTok(t, s, line, tok, k.tok, k.text)
285 tok = s.Scan()
287 line += countNewlines(k.text) + 1 // each token is on a new line
289 checkTok(t, s, line, tok, -1, "")
293 func TestScan(t *testing.T) {
294 testScan(t, GoTokens)
295 testScan(t, GoTokens&^SkipComments)
299 func TestPosition(t *testing.T) {
300 src := makeSource("\t\t\t\t%s\n")
301 s := new(Scanner).Init(src)
302 s.Mode = GoTokens &^ SkipComments
303 s.Scan()
304 pos := Position{"", 4, 1, 5}
305 for _, k := range tokenList {
306 if s.Offset != pos.Offset {
307 t.Errorf("offset = %d, want %d for %q", s.Offset, pos.Offset, k.text)
309 if s.Line != pos.Line {
310 t.Errorf("line = %d, want %d for %q", s.Line, pos.Line, k.text)
312 if s.Column != pos.Column {
313 t.Errorf("column = %d, want %d for %q", s.Column, pos.Column, k.text)
315 pos.Offset += 4 + len(k.text) + 1 // 4 tabs + token bytes + newline
316 pos.Line += countNewlines(k.text) + 1 // each token is on a new line
317 s.Scan()
322 func TestScanZeroMode(t *testing.T) {
323 src := makeSource("%s\n")
324 str := src.String()
325 s := new(Scanner).Init(src)
326 s.Mode = 0 // don't recognize any token classes
327 s.Whitespace = 0 // don't skip any whitespace
328 tok := s.Scan()
329 for i, ch := range str {
330 if tok != ch {
331 t.Fatalf("%d. tok = %s, want %s", i, TokenString(tok), TokenString(ch))
333 tok = s.Scan()
335 if tok != EOF {
336 t.Fatalf("tok = %s, want EOF", TokenString(tok))
341 func testScanSelectedMode(t *testing.T, mode uint, class int) {
342 src := makeSource("%s\n")
343 s := new(Scanner).Init(src)
344 s.Mode = mode
345 tok := s.Scan()
346 for tok != EOF {
347 if tok < 0 && tok != class {
348 t.Fatalf("tok = %s, want %s", TokenString(tok), TokenString(class))
350 tok = s.Scan()
355 func TestScanSelectedMask(t *testing.T) {
356 testScanSelectedMode(t, 0, 0)
357 testScanSelectedMode(t, ScanIdents, Ident)
358 // Don't test ScanInts and ScanNumbers since some parts of
359 // the floats in the source look like (illegal) octal ints
360 // and ScanNumbers may return either Int or Float.
361 testScanSelectedMode(t, ScanChars, Char)
362 testScanSelectedMode(t, ScanStrings, String)
363 testScanSelectedMode(t, SkipComments, 0)
364 testScanSelectedMode(t, ScanComments, Comment)
368 func TestScanNext(t *testing.T) {
369 s := new(Scanner).Init(bytes.NewBufferString("if a == bcd /* comment */ {\n\ta += c\n}"))
370 checkTok(t, s, 1, s.Scan(), Ident, "if")
371 checkTok(t, s, 1, s.Scan(), Ident, "a")
372 checkTok(t, s, 1, s.Scan(), '=', "=")
373 checkTok(t, s, 1, s.Next(), '=', "")
374 checkTok(t, s, 1, s.Next(), ' ', "")
375 checkTok(t, s, 1, s.Next(), 'b', "")
376 checkTok(t, s, 1, s.Scan(), Ident, "cd")
377 checkTok(t, s, 1, s.Scan(), '{', "{")
378 checkTok(t, s, 2, s.Scan(), Ident, "a")
379 checkTok(t, s, 2, s.Scan(), '+', "+")
380 checkTok(t, s, 2, s.Next(), '=', "")
381 checkTok(t, s, 2, s.Scan(), Ident, "c")
382 checkTok(t, s, 3, s.Scan(), '}', "}")
383 checkTok(t, s, 3, s.Scan(), -1, "")
387 func TestScanWhitespace(t *testing.T) {
388 var buf bytes.Buffer
389 var ws uint64
390 // start at 1, NUL character is not allowed
391 for ch := byte(1); ch < ' '; ch++ {
392 buf.WriteByte(ch)
393 ws |= 1 << ch
395 const orig = 'x'
396 buf.WriteByte(orig)
398 s := new(Scanner).Init(&buf)
399 s.Mode = 0
400 s.Whitespace = ws
401 tok := s.Scan()
402 if tok != orig {
403 t.Errorf("tok = %s, want %s", TokenString(tok), TokenString(orig))
408 func testError(t *testing.T, src, msg string, tok int) {
409 s := new(Scanner).Init(bytes.NewBufferString(src))
410 errorCalled := false
411 s.Error = func(s *Scanner, m string) {
412 if !errorCalled {
413 // only look at first error
414 if m != msg {
415 t.Errorf("msg = %q, want %q for %q", m, msg, src)
417 errorCalled = true
420 tk := s.Scan()
421 if tk != tok {
422 t.Errorf("tok = %s, want %s for %q", TokenString(tk), TokenString(tok), src)
424 if !errorCalled {
425 t.Errorf("error handler not called for %q", src)
427 if s.ErrorCount == 0 {
428 t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src)
433 func TestError(t *testing.T) {
434 testError(t, `01238`, "illegal octal number", Int)
435 testError(t, `'\"'`, "illegal char escape", Char)
436 testError(t, `'aa'`, "illegal char literal", Char)
437 testError(t, `'`, "literal not terminated", Char)
438 testError(t, `"\'"`, "illegal char escape", String)
439 testError(t, `"abc`, "literal not terminated", String)
440 testError(t, "`abc", "literal not terminated", String)
441 testError(t, `//`, "comment not terminated", EOF)
442 testError(t, `/*/`, "comment not terminated", EOF)
443 testError(t, `"abc`+"\x00"+`def"`, "illegal character NUL", String)
444 testError(t, `"abc`+"\xff"+`def"`, "illegal UTF-8 encoding", String)
448 func checkPos(t *testing.T, s *Scanner, offset, line, column, char int) {
449 pos := s.Pos()
450 if pos.Offset != offset {
451 t.Errorf("offset = %d, want %d", pos.Offset, offset)
453 if pos.Line != line {
454 t.Errorf("line = %d, want %d", pos.Line, line)
456 if pos.Column != column {
457 t.Errorf("column = %d, want %d", pos.Column, column)
459 ch := s.Scan()
460 if ch != char {
461 t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char))
466 func TestPos(t *testing.T) {
467 s := new(Scanner).Init(bytes.NewBufferString("abc\n012\n\nx"))
468 s.Mode = 0
469 s.Whitespace = 0
470 checkPos(t, s, 0, 1, 1, 'a')
471 checkPos(t, s, 1, 1, 2, 'b')
472 checkPos(t, s, 2, 1, 3, 'c')
473 checkPos(t, s, 3, 2, 0, '\n')
474 checkPos(t, s, 4, 2, 1, '0')
475 checkPos(t, s, 5, 2, 2, '1')
476 checkPos(t, s, 6, 2, 3, '2')
477 checkPos(t, s, 7, 3, 0, '\n')
478 checkPos(t, s, 8, 4, 0, '\n')
479 checkPos(t, s, 9, 4, 1, 'x')
480 checkPos(t, s, 9, 4, 1, EOF)
481 checkPos(t, s, 9, 4, 1, EOF) // after EOF, position doesn't change