1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
18 const smallMaxTokenSize
= 256 // Much smaller for more efficient testing.
20 // Test white space table matches the Unicode definition.
21 func TestSpace(t
*testing
.T
) {
22 for r
:= rune(0); r
<= utf8
.MaxRune
; r
++ {
23 if IsSpace(r
) != unicode
.IsSpace(r
) {
24 t
.Fatalf("white space property disagrees: %#U should be %t", r
, unicode
.IsSpace(r
))
29 var scanTests
= []string{
34 "\x81", // UTF-8 error
35 "\uFFFD", // correctly encoded RuneError
38 "abc¼☹\x81\uFFFD日本語\x82abc",
41 func TestScanByte(t
*testing
.T
) {
42 for n
, test
:= range scanTests
{
43 buf
:= strings
.NewReader(test
)
47 for i
= 0; s
.Scan(); i
++ {
48 if b
:= s
.Bytes(); len(b
) != 1 || b
[0] != test
[i
] {
49 t
.Errorf("#%d: %d: expected %q got %q", n
, i
, test
, b
)
53 t
.Errorf("#%d: termination expected at %d; got %d", n
, len(test
), i
)
57 t
.Errorf("#%d: %v", n
, err
)
62 // Test that the rune splitter returns same sequence of runes (not bytes) as for range string.
63 func TestScanRune(t
*testing
.T
) {
64 for n
, test
:= range scanTests
{
65 buf
:= strings
.NewReader(test
)
70 // Use a string range loop to validate the sequence of runes.
71 for i
, expect
= range string(test
) {
76 got
, _
:= utf8
.DecodeRune(s
.Bytes())
78 t
.Errorf("#%d: %d: expected %q got %q", n
, i
, expect
, got
)
82 t
.Errorf("#%d: scan ran too long, got %q", n
, s
.Text())
84 testRuneCount
:= utf8
.RuneCountInString(test
)
85 if runeCount
!= testRuneCount
{
86 t
.Errorf("#%d: termination expected at %d; got %d", n
, testRuneCount
, runeCount
)
90 t
.Errorf("#%d: %v", n
, err
)
95 var wordScanTests
= []string{
103 " abc\tdef\nghi\rjkl\fmno\vpqr\u0085stu\u00a0\n",
106 // Test that the word splitter returns the same data as strings.Fields.
107 func TestScanWords(t
*testing
.T
) {
108 for n
, test
:= range wordScanTests
{
109 buf
:= strings
.NewReader(test
)
112 words
:= strings
.Fields(test
)
114 for wordCount
= 0; wordCount
< len(words
); wordCount
++ {
119 if got
!= words
[wordCount
] {
120 t
.Errorf("#%d: %d: expected %q got %q", n
, wordCount
, words
[wordCount
], got
)
124 t
.Errorf("#%d: scan ran too long, got %q", n
, s
.Text())
126 if wordCount
!= len(words
) {
127 t
.Errorf("#%d: termination expected at %d; got %d", n
, len(words
), wordCount
)
131 t
.Errorf("#%d: %v", n
, err
)
136 // slowReader is a reader that returns only a few bytes at a time, to test the incremental
137 // reads in Scanner.Scan.
138 type slowReader
struct {
143 func (sr
*slowReader
) Read(p
[]byte) (n
int, err error
) {
147 return sr
.buf
.Read(p
)
150 // genLine writes to buf a predictable but non-trivial line of text of length
151 // n, including the terminal newline and an occasional carriage return.
152 // If addNewline is false, the \r and \n are not emitted.
153 func genLine(buf
*bytes
.Buffer
, lineNum
, n
int, addNewline
bool) {
155 doCR
:= lineNum%5
== 0
159 for i
:= 0; i
< n
-1; i
++ { // Stop early for \n.
160 c
:= 'a' + byte(lineNum
+i
)
161 if c
== '\n' || c
== '\r' { // Don't confuse us.
174 // Test the line splitter, including some carriage returns but no long lines.
175 func TestScanLongLines(t
*testing
.T
) {
176 // Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
177 tmp
:= new(bytes
.Buffer
)
178 buf
:= new(bytes
.Buffer
)
181 for i
:= 0; i
< 2*smallMaxTokenSize
; i
++ {
182 genLine(tmp
, lineNum
, j
, true)
183 if j
< smallMaxTokenSize
{
188 buf
.Write(tmp
.Bytes())
191 s
:= NewScanner(&slowReader
{1, buf
})
193 s
.MaxTokenSize(smallMaxTokenSize
)
195 for lineNum
:= 0; s
.Scan(); lineNum
++ {
196 genLine(tmp
, lineNum
, j
, false)
197 if j
< smallMaxTokenSize
{
202 line
:= tmp
.String() // We use the string-valued token here, for variety.
203 if s
.Text() != line
{
204 t
.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum
, len(s
.Bytes()), len(line
), s
.Text(), line
)
213 // Test that the line splitter errors out on a long line.
214 func TestScanLineTooLong(t
*testing
.T
) {
215 const smallMaxTokenSize
= 256 // Much smaller for more efficient testing.
216 // Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
217 tmp
:= new(bytes
.Buffer
)
218 buf
:= new(bytes
.Buffer
)
221 for i
:= 0; i
< 2*smallMaxTokenSize
; i
++ {
222 genLine(tmp
, lineNum
, j
, true)
224 buf
.Write(tmp
.Bytes())
227 s
:= NewScanner(&slowReader
{3, buf
})
229 s
.MaxTokenSize(smallMaxTokenSize
)
231 for lineNum
:= 0; s
.Scan(); lineNum
++ {
232 genLine(tmp
, lineNum
, j
, false)
233 if j
< smallMaxTokenSize
{
239 if !bytes
.Equal(s
.Bytes(), line
) {
240 t
.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum
, len(s
.Bytes()), len(line
), s
.Bytes(), line
)
244 if err
!= ErrTooLong
{
245 t
.Fatalf("expected ErrTooLong; got %s", err
)
249 // Test that the line splitter handles a final line without a newline.
250 func testNoNewline(text
string, lines
[]string, t
*testing
.T
) {
251 buf
:= strings
.NewReader(text
)
252 s
:= NewScanner(&slowReader
{7, buf
})
254 for lineNum
:= 0; s
.Scan(); lineNum
++ {
255 line
:= lines
[lineNum
]
256 if s
.Text() != line
{
257 t
.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum
, len(s
.Bytes()), len(line
), s
.Bytes(), line
)
266 // Test that the line splitter handles a final line without a newline.
267 func TestScanLineNoNewline(t
*testing
.T
) {
268 const text
= "abcdefghijklmn\nopqrstuvwxyz"
273 testNoNewline(text
, lines
, t
)
276 // Test that the line splitter handles a final line with a carriage return but no newline.
277 func TestScanLineReturnButNoNewline(t
*testing
.T
) {
278 const text
= "abcdefghijklmn\nopqrstuvwxyz\r"
283 testNoNewline(text
, lines
, t
)
286 // Test that the line splitter handles a final empty line.
287 func TestScanLineEmptyFinalLine(t
*testing
.T
) {
288 const text
= "abcdefghijklmn\nopqrstuvwxyz\n\n"
294 testNoNewline(text
, lines
, t
)
297 // Test that the line splitter handles a final empty line with a carriage return but no newline.
298 func TestScanLineEmptyFinalLineWithCR(t
*testing
.T
) {
299 const text
= "abcdefghijklmn\nopqrstuvwxyz\n\r"
305 testNoNewline(text
, lines
, t
)
308 var testError
= errors
.New("testError")
310 // Test the correct error is returned when the split function errors out.
311 func TestSplitError(t
*testing
.T
) {
312 // Create a split function that delivers a little data, then a predictable error.
315 errorSplit
:= func(data
[]byte, atEOF
bool) (advance
int, token
[]byte, err error
) {
317 panic("didn't get enough data")
319 if numSplits
>= okCount
{
320 return 0, nil, testError
323 return 1, data
[0:1], nil
326 const text
= "abcdefghijklmnopqrstuvwxyz"
327 buf
:= strings
.NewReader(text
)
328 s
:= NewScanner(&slowReader
{1, buf
})
331 for i
= 0; s
.Scan(); i
++ {
332 if len(s
.Bytes()) != 1 || text
[i
] != s
.Bytes()[0] {
333 t
.Errorf("#%d: expected %q got %q", i
, text
[i
], s
.Bytes()[0])
336 // Check correct termination location and error.
338 t
.Errorf("unexpected termination; expected %d tokens got %d", okCount
, i
)
341 if err
!= testError
{
342 t
.Fatalf("expected %q got %v", testError
, err
)
346 // Test that an EOF is overridden by a user-generated scan error.
347 func TestErrAtEOF(t
*testing
.T
) {
348 s
:= NewScanner(strings
.NewReader("1 2 33"))
349 // This splitter will fail on last entry, after s.err==EOF.
350 split
:= func(data
[]byte, atEOF
bool) (advance
int, token
[]byte, err error
) {
351 advance
, token
, err
= ScanWords(data
, atEOF
)
353 if s
.ErrOrEOF() != io
.EOF
{
354 t
.Fatal("not testing EOF")
363 if s
.Err() != testError
{
364 t
.Fatal("wrong error:", s
.Err())
368 // Test for issue 5268.
369 type alwaysError
struct{}
371 func (alwaysError
) Read(p
[]byte) (int, error
) {
372 return 0, io
.ErrUnexpectedEOF
375 func TestNonEOFWithEmptyRead(t
*testing
.T
) {
376 scanner
:= NewScanner(alwaysError
{})
378 t
.Fatal("read should fail")
381 if err
!= io
.ErrUnexpectedEOF
{
382 t
.Errorf("unexpected error: %v", err
)
386 // Test that Scan finishes if we have endless empty reads.
387 type endlessZeros
struct{}
389 func (endlessZeros
) Read(p
[]byte) (int, error
) {
393 func TestBadReader(t
*testing
.T
) {
394 scanner
:= NewScanner(endlessZeros
{})
396 t
.Fatal("read should fail")
399 if err
!= io
.ErrNoProgress
{
400 t
.Errorf("unexpected error: %v", err
)
404 func TestScanWordsExcessiveWhiteSpace(t
*testing
.T
) {
406 s
:= strings
.Repeat(" ", 4*smallMaxTokenSize
) + word
407 scanner
:= NewScanner(strings
.NewReader(s
))
408 scanner
.MaxTokenSize(smallMaxTokenSize
)
409 scanner
.Split(ScanWords
)
411 t
.Fatalf("scan failed: %v", scanner
.Err())
413 if token
:= scanner
.Text(); token
!= word
{
414 t
.Fatalf("unexpected token: %v", token
)
418 // Test that empty tokens, including at end of line or end of file, are found by the scanner.
419 // Issue 8672: Could miss final empty token.
421 func commaSplit(data
[]byte, atEOF
bool) (advance
int, token
[]byte, err error
) {
422 for i
:= 0; i
< len(data
); i
++ {
424 return i
+ 1, data
[:i
], nil
427 return 0, data
, ErrFinalToken
430 func testEmptyTokens(t
*testing
.T
, text
string, values
[]string) {
431 s
:= NewScanner(strings
.NewReader(text
))
434 for i
= 0; s
.Scan(); i
++ {
435 if i
>= len(values
) {
436 t
.Fatalf("got %d fields, expected %d", i
+1, len(values
))
438 if s
.Text() != values
[i
] {
439 t
.Errorf("%d: expected %q got %q", i
, values
[i
], s
.Text())
442 if i
!= len(values
) {
443 t
.Fatalf("got %d fields, expected %d", i
, len(values
))
445 if err
:= s
.Err(); err
!= nil {
450 func TestEmptyTokens(t
*testing
.T
) {
451 testEmptyTokens(t
, "1,2,3,", []string{"1", "2", "3", ""})
454 func TestWithNoEmptyTokens(t
*testing
.T
) {
455 testEmptyTokens(t
, "1,2,3", []string{"1", "2", "3"})
458 func loopAtEOFSplit(data
[]byte, atEOF
bool) (advance
int, token
[]byte, err error
) {
460 return 1, data
[:1], nil
465 func TestDontLoopForever(t
*testing
.T
) {
466 s
:= NewScanner(strings
.NewReader("abc"))
467 s
.Split(loopAtEOFSplit
)
472 t
.Fatal("should have panicked")
474 if msg
, ok
:= err
.(string); !ok ||
!strings
.Contains(msg
, "empty tokens") {
478 for count
:= 0; s
.Scan(); count
++ {
484 t
.Fatal("after scan:", s
.Err())
488 func TestBlankLines(t
*testing
.T
) {
489 s
:= NewScanner(strings
.NewReader(strings
.Repeat("\n", 1000)))
490 for count
:= 0; s
.Scan(); count
++ {
496 t
.Fatal("after scan:", s
.Err())
502 func (c
*countdown
) split(data
[]byte, atEOF
bool) (advance
int, token
[]byte, err error
) {
505 return 1, data
[:1], nil
510 // Check that the looping-at-EOF check doesn't trigger for merely empty tokens.
511 func TestEmptyLinesOK(t
*testing
.T
) {
512 c
:= countdown(10000)
513 s
:= NewScanner(strings
.NewReader(strings
.Repeat("\n", 10000)))
518 t
.Fatal("after scan:", s
.Err())
521 t
.Fatalf("stopped with %d left to process", c
)
525 // Make sure we can read a huge token if a big enough buffer is provided.
526 func TestHugeBuffer(t
*testing
.T
) {
527 text
:= strings
.Repeat("x", 2*MaxScanTokenSize
)
528 s
:= NewScanner(strings
.NewReader(text
+ "\n"))
529 s
.Buffer(make([]byte, 100), 3*MaxScanTokenSize
)
533 t
.Errorf("scan got incorrect token of length %d", len(token
))
537 t
.Fatal("after scan:", s
.Err())