1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
18 const smallMaxTokenSize
= 256 // Much smaller for more efficient testing.
20 // Test white space table matches the Unicode definition.
21 func TestSpace(t
*testing
.T
) {
22 for r
:= rune(0); r
<= utf8
.MaxRune
; r
++ {
23 if IsSpace(r
) != unicode
.IsSpace(r
) {
24 t
.Fatalf("white space property disagrees: %#U should be %t", r
, unicode
.IsSpace(r
))
29 var scanTests
= []string{
34 "\x81", // UTF-8 error
35 "\uFFFD", // correctly encoded RuneError
38 "abc¼☹\x81\uFFFD日本語\x82abc",
41 func TestScanByte(t
*testing
.T
) {
42 for n
, test
:= range scanTests
{
43 buf
:= strings
.NewReader(test
)
47 for i
= 0; s
.Scan(); i
++ {
48 if b
:= s
.Bytes(); len(b
) != 1 || b
[0] != test
[i
] {
49 t
.Errorf("#%d: %d: expected %q got %q", n
, i
, test
, b
)
53 t
.Errorf("#%d: termination expected at %d; got %d", n
, len(test
), i
)
57 t
.Errorf("#%d: %v", n
, err
)
62 // Test that the rune splitter returns same sequence of runes (not bytes) as for range string.
63 func TestScanRune(t
*testing
.T
) {
64 for n
, test
:= range scanTests
{
65 buf
:= strings
.NewReader(test
)
70 // Use a string range loop to validate the sequence of runes.
71 for i
, expect
= range string(test
) {
76 got
, _
:= utf8
.DecodeRune(s
.Bytes())
78 t
.Errorf("#%d: %d: expected %q got %q", n
, i
, expect
, got
)
82 t
.Errorf("#%d: scan ran too long, got %q", n
, s
.Text())
84 testRuneCount
:= utf8
.RuneCountInString(test
)
85 if runeCount
!= testRuneCount
{
86 t
.Errorf("#%d: termination expected at %d; got %d", n
, testRuneCount
, runeCount
)
90 t
.Errorf("#%d: %v", n
, err
)
95 var wordScanTests
= []string{
103 " abc\tdef\nghi\rjkl\fmno\vpqr\u0085stu\u00a0\n",
106 // Test that the word splitter returns the same data as strings.Fields.
107 func TestScanWords(t
*testing
.T
) {
108 for n
, test
:= range wordScanTests
{
109 buf
:= strings
.NewReader(test
)
112 words
:= strings
.Fields(test
)
114 for wordCount
= 0; wordCount
< len(words
); wordCount
++ {
119 if got
!= words
[wordCount
] {
120 t
.Errorf("#%d: %d: expected %q got %q", n
, wordCount
, words
[wordCount
], got
)
124 t
.Errorf("#%d: scan ran too long, got %q", n
, s
.Text())
126 if wordCount
!= len(words
) {
127 t
.Errorf("#%d: termination expected at %d; got %d", n
, len(words
), wordCount
)
131 t
.Errorf("#%d: %v", n
, err
)
136 // slowReader is a reader that returns only a few bytes at a time, to test the incremental
137 // reads in Scanner.Scan.
138 type slowReader
struct {
143 func (sr
*slowReader
) Read(p
[]byte) (n
int, err error
) {
147 return sr
.buf
.Read(p
)
150 // genLine writes to buf a predictable but non-trivial line of text of length
151 // n, including the terminal newline and an occasional carriage return.
152 // If addNewline is false, the \r and \n are not emitted.
153 func genLine(buf
*bytes
.Buffer
, lineNum
, n
int, addNewline
bool) {
155 doCR
:= lineNum%5
== 0
159 for i
:= 0; i
< n
-1; i
++ { // Stop early for \n.
160 c
:= 'a' + byte(lineNum
+i
)
161 if c
== '\n' || c
== '\r' { // Don't confuse us.
175 // Test the line splitter, including some carriage returns but no long lines.
176 func TestScanLongLines(t
*testing
.T
) {
177 // Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
178 tmp
:= new(bytes
.Buffer
)
179 buf
:= new(bytes
.Buffer
)
182 for i
:= 0; i
< 2*smallMaxTokenSize
; i
++ {
183 genLine(tmp
, lineNum
, j
, true)
184 if j
< smallMaxTokenSize
{
189 buf
.Write(tmp
.Bytes())
192 s
:= NewScanner(&slowReader
{1, buf
})
194 s
.MaxTokenSize(smallMaxTokenSize
)
196 for lineNum
:= 0; s
.Scan(); lineNum
++ {
197 genLine(tmp
, lineNum
, j
, false)
198 if j
< smallMaxTokenSize
{
203 line
:= tmp
.String() // We use the string-valued token here, for variety.
204 if s
.Text() != line
{
205 t
.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum
, len(s
.Bytes()), len(line
), s
.Text(), line
)
214 // Test that the line splitter errors out on a long line.
215 func TestScanLineTooLong(t
*testing
.T
) {
216 const smallMaxTokenSize
= 256 // Much smaller for more efficient testing.
217 // Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
218 tmp
:= new(bytes
.Buffer
)
219 buf
:= new(bytes
.Buffer
)
222 for i
:= 0; i
< 2*smallMaxTokenSize
; i
++ {
223 genLine(tmp
, lineNum
, j
, true)
225 buf
.Write(tmp
.Bytes())
228 s
:= NewScanner(&slowReader
{3, buf
})
230 s
.MaxTokenSize(smallMaxTokenSize
)
232 for lineNum
:= 0; s
.Scan(); lineNum
++ {
233 genLine(tmp
, lineNum
, j
, false)
234 if j
< smallMaxTokenSize
{
240 if !bytes
.Equal(s
.Bytes(), line
) {
241 t
.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum
, len(s
.Bytes()), len(line
), s
.Bytes(), line
)
245 if err
!= ErrTooLong
{
246 t
.Fatalf("expected ErrTooLong; got %s", err
)
250 // Test that the line splitter handles a final line without a newline.
251 func testNoNewline(text
string, lines
[]string, t
*testing
.T
) {
252 buf
:= strings
.NewReader(text
)
253 s
:= NewScanner(&slowReader
{7, buf
})
255 for lineNum
:= 0; s
.Scan(); lineNum
++ {
256 line
:= lines
[lineNum
]
257 if s
.Text() != line
{
258 t
.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum
, len(s
.Bytes()), len(line
), s
.Bytes(), line
)
267 // Test that the line splitter handles a final line without a newline.
268 func TestScanLineNoNewline(t
*testing
.T
) {
269 const text
= "abcdefghijklmn\nopqrstuvwxyz"
274 testNoNewline(text
, lines
, t
)
277 // Test that the line splitter handles a final line with a carriage return but no newline.
278 func TestScanLineReturnButNoNewline(t
*testing
.T
) {
279 const text
= "abcdefghijklmn\nopqrstuvwxyz\r"
284 testNoNewline(text
, lines
, t
)
287 // Test that the line splitter handles a final empty line.
288 func TestScanLineEmptyFinalLine(t
*testing
.T
) {
289 const text
= "abcdefghijklmn\nopqrstuvwxyz\n\n"
295 testNoNewline(text
, lines
, t
)
298 // Test that the line splitter handles a final empty line with a carriage return but no newline.
299 func TestScanLineEmptyFinalLineWithCR(t
*testing
.T
) {
300 const text
= "abcdefghijklmn\nopqrstuvwxyz\n\r"
306 testNoNewline(text
, lines
, t
)
309 var testError
= errors
.New("testError")
311 // Test the correct error is returned when the split function errors out.
312 func TestSplitError(t
*testing
.T
) {
313 // Create a split function that delivers a little data, then a predictable error.
316 errorSplit
:= func(data
[]byte, atEOF
bool) (advance
int, token
[]byte, err error
) {
318 panic("didn't get enough data")
320 if numSplits
>= okCount
{
321 return 0, nil, testError
324 return 1, data
[0:1], nil
327 const text
= "abcdefghijklmnopqrstuvwxyz"
328 buf
:= strings
.NewReader(text
)
329 s
:= NewScanner(&slowReader
{1, buf
})
332 for i
= 0; s
.Scan(); i
++ {
333 if len(s
.Bytes()) != 1 || text
[i
] != s
.Bytes()[0] {
334 t
.Errorf("#%d: expected %q got %q", i
, text
[i
], s
.Bytes()[0])
337 // Check correct termination location and error.
339 t
.Errorf("unexpected termination; expected %d tokens got %d", okCount
, i
)
342 if err
!= testError
{
343 t
.Fatalf("expected %q got %v", testError
, err
)
347 // Test that an EOF is overridden by a user-generated scan error.
348 func TestErrAtEOF(t
*testing
.T
) {
349 s
:= NewScanner(strings
.NewReader("1 2 33"))
350 // This splitter will fail on last entry, after s.err==EOF.
351 split
:= func(data
[]byte, atEOF
bool) (advance
int, token
[]byte, err error
) {
352 advance
, token
, err
= ScanWords(data
, atEOF
)
354 if s
.ErrOrEOF() != io
.EOF
{
355 t
.Fatal("not testing EOF")
364 if s
.Err() != testError
{
365 t
.Fatal("wrong error:", s
.Err())
369 // Test for issue 5268.
370 type alwaysError
struct{}
372 func (alwaysError
) Read(p
[]byte) (int, error
) {
373 return 0, io
.ErrUnexpectedEOF
376 func TestNonEOFWithEmptyRead(t
*testing
.T
) {
377 scanner
:= NewScanner(alwaysError
{})
379 t
.Fatal("read should fail")
382 if err
!= io
.ErrUnexpectedEOF
{
383 t
.Errorf("unexpected error: %v", err
)
387 // Test that Scan finishes if we have endless empty reads.
388 type endlessZeros
struct{}
390 func (endlessZeros
) Read(p
[]byte) (int, error
) {
394 func TestBadReader(t
*testing
.T
) {
395 scanner
:= NewScanner(endlessZeros
{})
397 t
.Fatal("read should fail")
400 if err
!= io
.ErrNoProgress
{
401 t
.Errorf("unexpected error: %v", err
)
405 func TestScanWordsExcessiveWhiteSpace(t
*testing
.T
) {
407 s
:= strings
.Repeat(" ", 4*smallMaxTokenSize
) + word
408 scanner
:= NewScanner(strings
.NewReader(s
))
409 scanner
.MaxTokenSize(smallMaxTokenSize
)
410 scanner
.Split(ScanWords
)
412 t
.Fatalf("scan failed: %v", scanner
.Err())
414 if token
:= scanner
.Text(); token
!= word
{
415 t
.Fatalf("unexpected token: %v", token
)
419 // Test that empty tokens, including at end of line or end of file, are found by the scanner.
420 // Issue 8672: Could miss final empty token.
422 func commaSplit(data
[]byte, atEOF
bool) (advance
int, token
[]byte, err error
) {
423 for i
:= 0; i
< len(data
); i
++ {
425 return i
+ 1, data
[:i
], nil
428 return 0, data
, ErrFinalToken
431 func testEmptyTokens(t
*testing
.T
, text
string, values
[]string) {
432 s
:= NewScanner(strings
.NewReader(text
))
435 for i
= 0; s
.Scan(); i
++ {
436 if i
>= len(values
) {
437 t
.Fatalf("got %d fields, expected %d", i
+1, len(values
))
439 if s
.Text() != values
[i
] {
440 t
.Errorf("%d: expected %q got %q", i
, values
[i
], s
.Text())
443 if i
!= len(values
) {
444 t
.Fatalf("got %d fields, expected %d", i
, len(values
))
446 if err
:= s
.Err(); err
!= nil {
451 func TestEmptyTokens(t
*testing
.T
) {
452 testEmptyTokens(t
, "1,2,3,", []string{"1", "2", "3", ""})
455 func TestWithNoEmptyTokens(t
*testing
.T
) {
456 testEmptyTokens(t
, "1,2,3", []string{"1", "2", "3"})
459 func loopAtEOFSplit(data
[]byte, atEOF
bool) (advance
int, token
[]byte, err error
) {
461 return 1, data
[:1], nil
466 func TestDontLoopForever(t
*testing
.T
) {
467 s
:= NewScanner(strings
.NewReader("abc"))
468 s
.Split(loopAtEOFSplit
)
473 t
.Fatal("should have panicked")
475 if msg
, ok
:= err
.(string); !ok ||
!strings
.Contains(msg
, "empty tokens") {
479 for count
:= 0; s
.Scan(); count
++ {
485 t
.Fatal("after scan:", s
.Err())
489 func TestBlankLines(t
*testing
.T
) {
490 s
:= NewScanner(strings
.NewReader(strings
.Repeat("\n", 1000)))
491 for count
:= 0; s
.Scan(); count
++ {
497 t
.Fatal("after scan:", s
.Err())
503 func (c
*countdown
) split(data
[]byte, atEOF
bool) (advance
int, token
[]byte, err error
) {
506 return 1, data
[:1], nil
511 // Check that the looping-at-EOF check doesn't trigger for merely empty tokens.
512 func TestEmptyLinesOK(t
*testing
.T
) {
513 c
:= countdown(10000)
514 s
:= NewScanner(strings
.NewReader(strings
.Repeat("\n", 10000)))
519 t
.Fatal("after scan:", s
.Err())
522 t
.Fatalf("stopped with %d left to process", c
)
526 // Make sure we can read a huge token if a big enough buffer is provided.
527 func TestHugeBuffer(t
*testing
.T
) {
528 text
:= strings
.Repeat("x", 2*MaxScanTokenSize
)
529 s
:= NewScanner(strings
.NewReader(text
+ "\n"))
530 s
.Buffer(make([]byte, 100), 3*MaxScanTokenSize
)
534 t
.Errorf("scan got incorrect token of length %d", len(token
))
538 t
.Fatal("after scan:", s
.Err())