1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
17 type readTest
struct {
24 // These fields are copied into the Reader
27 UseFieldsPerRecord
bool // false (default) means FieldsPerRecord is -1
34 // In these tests, the §, ¶ and ∑ characters in readTest.Input are used to denote
35 // the start of a field, a record boundary and the position of an error respectively.
36 // They are removed before parsing and are used to verify the position
37 // information reported by FieldPos.
39 var readTests
= []readTest
{{
42 Output
: [][]string{{"a", "b", "c"}},
45 Input
: "§a,§b\r\n¶§c,§d\r\n",
46 Output
: [][]string{{"a", "b"}, {"c", "d"}},
49 Input
: "§a,§b\rc,§d\r\n",
50 Output
: [][]string{{"a", "b\rc", "d"}},
53 Input
: `§#field1,§field2,§field3
56 ¶§"a,a",§"b""bb",§"ccc"
60 {"#field1", "field2", "field3"},
61 {"aaa", "bb\nb", "ccc"},
62 {"a,a", `b"bb`, "ccc"},
63 {"zzz", "yyy", "xxx"},
65 UseFieldsPerRecord
: true,
70 Output
: [][]string{{"a", "b", "c"}},
74 Output
: [][]string{{"a", "b", "c"}},
79 line",§"one line",§"three
82 Output
: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
85 Input
: "§a,§b,§c\n\n¶§d,§e,§f\n\n",
91 Name
: "BlankLineFieldCount",
92 Input
: "§a,§b,§c\n\n¶§d,§e,§f\n\n",
97 UseFieldsPerRecord
: true,
101 Input
: " §a, §b, §c\n",
102 Output
: [][]string{{"a", "b", "c"}},
103 TrimLeadingSpace
: true,
105 Name
: "LeadingSpace",
106 Input
: "§ a,§ b,§ c\n",
107 Output
: [][]string{{" a", " b", " c"}},
110 Input
: "#1,2,3\n§a,§b,§c\n#comment",
111 Output
: [][]string{{"a", "b", "c"}},
115 Input
: "§#1,§2,§3\n¶§a,§b,§c",
116 Output
: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
119 Input
: `§a "word",§"1"2",§a",§"b`,
120 Output
: [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
124 Input
: `§a "word",§"1"2",§a"`,
125 Output
: [][]string{{`a "word"`, `1"2`, `a"`}},
128 Name
: "BareDoubleQuotes",
130 Output
: [][]string{{`a""b`, `c`}},
133 Name
: "BadDoubleQuotes",
135 Errors
: []error
{&ParseError
{Err
: ErrBareQuote
}},
138 Input
: ` §"a",§" b",§c`,
139 Output
: [][]string{{"a", " b", "c"}},
140 TrimLeadingSpace
: true,
142 Name
: "BadBareQuote",
143 Input
: `§a ∑"word","b"`,
144 Errors
: []error
{&ParseError
{Err
: ErrBareQuote
}},
146 Name
: "BadTrailingQuote",
147 Input
: `§"a word",b∑"`,
148 Errors
: []error
{&ParseError
{Err
: ErrBareQuote
}},
150 Name
: "ExtraneousQuote",
151 Input
: `§"a ∑"word","b"`,
152 Errors
: []error
{&ParseError
{Err
: ErrQuote
}},
154 Name
: "BadFieldCount",
155 Input
: "§a,§b,§c\n¶∑§d,§e",
156 Errors
: []error
{nil, &ParseError
{Err
: ErrFieldCount
}},
157 Output
: [][]string{{"a", "b", "c"}, {"d", "e"}},
158 UseFieldsPerRecord
: true,
161 Name
: "BadFieldCountMultiple",
162 Input
: "§a,§b,§c\n¶∑§d,§e\n¶∑§f",
163 Errors
: []error
{nil, &ParseError
{Err
: ErrFieldCount
}, &ParseError
{Err
: ErrFieldCount
}},
164 Output
: [][]string{{"a", "b", "c"}, {"d", "e"}, {"f"}},
165 UseFieldsPerRecord
: true,
168 Name
: "BadFieldCount1",
170 Errors
: []error
{&ParseError
{Err
: ErrFieldCount
}},
171 Output
: [][]string{{"a", "b", "c"}},
172 UseFieldsPerRecord
: true,
176 Input
: "§a,§b,§c\n¶§d,§e",
177 Output
: [][]string{{"a", "b", "c"}, {"d", "e"}},
179 Name
: "TrailingCommaEOF",
181 Output
: [][]string{{"a", "b", "c", ""}},
183 Name
: "TrailingCommaEOL",
184 Input
: "§a,§b,§c,§\n",
185 Output
: [][]string{{"a", "b", "c", ""}},
187 Name
: "TrailingCommaSpaceEOF",
188 Input
: "§a,§b,§c, §",
189 Output
: [][]string{{"a", "b", "c", ""}},
190 TrimLeadingSpace
: true,
192 Name
: "TrailingCommaSpaceEOL",
193 Input
: "§a,§b,§c, §\n",
194 Output
: [][]string{{"a", "b", "c", ""}},
195 TrimLeadingSpace
: true,
197 Name
: "TrailingCommaLine3",
198 Input
: "§a,§b,§c\n¶§d,§e,§f\n¶§g,§hi,§",
199 Output
: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
200 TrimLeadingSpace
: true,
202 Name
: "NotTrailingComma3",
203 Input
: "§a,§b,§c,§ \n",
204 Output
: [][]string{{"a", "b", "c", " "}},
206 Name
: "CommaFieldTest",
219 {"x", "y", "z", "w"},
224 {"x", "y", "z", "w"},
231 Name
: "TrailingCommaIneffective1",
232 Input
: "§a,§b,§\n¶§c,§d,§e",
237 TrimLeadingSpace
: true,
239 Name
: "ReadAllReuseRecord",
240 Input
: "§a,§b\n¶§c,§d",
247 Name
: "StartLine1", // Issue 19019
248 Input
: "§a,\"b\nc∑\"d,e",
249 Errors
: []error
{&ParseError
{Err
: ErrQuote
}},
252 Input
: "§a,§b\n¶§\"d\n\n,e∑",
253 Errors
: []error
{nil, &ParseError
{Err
: ErrQuote
}},
254 Output
: [][]string{{"a", "b"}},
256 Name
: "CRLFInQuotedField", // Issue 21201
257 Input
: "§A,§\"Hello\r\nHi\",§B\r\n",
259 {"A", "Hello\nHi", "B"},
262 Name
: "BinaryBlobField", // Issue 19410
263 Input
: "§x09\x41\xb4\x1c,§aktau",
264 Output
: [][]string{{"x09A\xb4\x1c", "aktau"}},
267 Input
: "§field1,§field2\r",
268 Output
: [][]string{{"field1", "field2"}},
270 Name
: "QuotedTrailingCR",
271 Input
: "§\"field\"\r",
272 Output
: [][]string{{"field"}},
274 Name
: "QuotedTrailingCRCR",
275 Input
: "§\"field∑\"\r\r",
276 Errors
: []error
{&ParseError
{Err
: ErrQuote
}},
279 Input
: "§field\rfield\r",
280 Output
: [][]string{{"field\rfield"}},
283 Input
: "§field\r\rfield\r\r",
284 Output
: [][]string{{"field\r\rfield\r"}},
287 Input
: "§field\r\r\n¶§field\r\r\n",
288 Output
: [][]string{{"field\r"}, {"field\r"}},
290 Name
: "FieldCRCRLFCR",
291 Input
: "§field\r\r\n¶§\rfield\r\r\n\r",
292 Output
: [][]string{{"field\r"}, {"\rfield\r"}},
294 Name
: "FieldCRCRLFCRCR",
295 Input
: "§field\r\r\n¶§\r\rfield\r\r\n¶§\r\r",
296 Output
: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}},
298 Name
: "MultiFieldCRCRLFCRCR",
299 Input
: "§field1,§field2\r\r\n¶§\r\rfield1,§field2\r\r\n¶§\r\r,§",
301 {"field1", "field2\r"},
302 {"\r\rfield1", "field2\r"},
306 Name
: "NonASCIICommaAndComment",
307 Input
: "§a£§b,c£ \t§d,e\n€ comment\n",
308 Output
: [][]string{{"a", "b,c", "d,e"}},
309 TrimLeadingSpace
: true,
313 Name
: "NonASCIICommaAndCommentWithQuotes",
314 Input
: "§a€§\" b,\"€§ c\nλ comment\n",
315 Output
: [][]string{{"a", " b,", " c"}},
319 // λ and θ start with the same byte.
320 // This tests that the parser doesn't confuse such characters.
321 Name
: "NonASCIICommaConfusion",
322 Input
: "§\"abθcd\"λ§efθgh",
323 Output
: [][]string{{"abθcd", "efθgh"}},
327 Name
: "NonASCIICommentConfusion",
328 Input
: "§λ\n¶§λ\nθ\n¶§λ\n",
329 Output
: [][]string{{"λ"}, {"λ"}, {"λ"}},
332 Name
: "QuotedFieldMultipleLF",
333 Input
: "§\"\n\n\n\n\"",
334 Output
: [][]string{{"\n\n\n\n"}},
336 Name
: "MultipleCRLF",
337 Input
: "\r\n\r\n\r\n\r\n",
339 // The implementation may read each line in several chunks if it doesn't fit entirely
340 // in the read buffer, so we should test the code to handle that condition.
342 Input
: strings
.Repeat("#ignore\n", 10000) + "§" + strings
.Repeat("@", 5000) + ",§" + strings
.Repeat("*", 5000),
343 Output
: [][]string{{strings
.Repeat("@", 5000), strings
.Repeat("*", 5000)}},
346 Name
: "QuoteWithTrailingCRLF",
347 Input
: "§\"foo∑\"bar\"\r\n",
348 Errors
: []error
{&ParseError
{Err
: ErrQuote
}},
350 Name
: "LazyQuoteWithTrailingCRLF",
351 Input
: "§\"foo\"bar\"\r\n",
352 Output
: [][]string{{`foo"bar`}},
355 Name
: "DoubleQuoteWithTrailingCRLF",
356 Input
: "§\"foo\"\"bar\"\r\n",
357 Output
: [][]string{{`foo"bar`}},
361 Output
: [][]string{{`"""`}},
365 Errors
: []error
{&ParseError
{Err
: ErrQuote
}},
367 Name
: "LazyOddQuotes",
369 Output
: [][]string{{`"""`}},
374 Errors
: []error
{errInvalidDelim
},
378 Errors
: []error
{errInvalidDelim
},
382 Errors
: []error
{errInvalidDelim
},
385 Comma
: utf8
.RuneError
,
386 Errors
: []error
{errInvalidDelim
},
390 Errors
: []error
{errInvalidDelim
},
394 Errors
: []error
{errInvalidDelim
},
397 Comment
: utf8
.RuneError
,
398 Errors
: []error
{errInvalidDelim
},
400 Name
: "BadCommaComment",
403 Errors
: []error
{errInvalidDelim
},
406 func TestRead(t
*testing
.T
) {
407 newReader
:= func(tt readTest
) (*Reader
, [][][2]int, map[int][2]int) {
408 positions
, errPositions
, input
:= makePositions(tt
.Input
)
409 r
:= NewReader(strings
.NewReader(input
))
414 r
.Comment
= tt
.Comment
415 if tt
.UseFieldsPerRecord
{
416 r
.FieldsPerRecord
= tt
.FieldsPerRecord
418 r
.FieldsPerRecord
= -1
420 r
.LazyQuotes
= tt
.LazyQuotes
421 r
.TrimLeadingSpace
= tt
.TrimLeadingSpace
422 r
.ReuseRecord
= tt
.ReuseRecord
423 return r
, positions
, errPositions
426 for _
, tt
:= range readTests
{
427 t
.Run(tt
.Name
, func(t
*testing
.T
) {
428 r
, positions
, errPositions
:= newReader(tt
)
429 out
, err
:= r
.ReadAll()
430 if wantErr
:= firstError(tt
.Errors
, positions
, errPositions
); wantErr
!= nil {
431 if !reflect
.DeepEqual(err
, wantErr
) {
432 t
.Fatalf("ReadAll() error mismatch:\ngot %v (%#v)\nwant %v (%#v)", err
, err
, wantErr
, wantErr
)
435 t
.Fatalf("ReadAll() output:\ngot %q\nwant nil", out
)
439 t
.Fatalf("unexpected Readall() error: %v", err
)
441 if !reflect
.DeepEqual(out
, tt
.Output
) {
442 t
.Fatalf("ReadAll() output:\ngot %q\nwant %q", out
, tt
.Output
)
446 // Check field and error positions.
447 r
, _
, _
= newReader(tt
)
448 for recNum
:= 0; ; recNum
++ {
451 if recNum
< len(tt
.Errors
) && tt
.Errors
[recNum
] != nil {
452 wantErr
= errorWithPosition(tt
.Errors
[recNum
], recNum
, positions
, errPositions
)
453 } else if recNum
>= len(tt
.Output
) {
456 if !reflect
.DeepEqual(err
, wantErr
) {
457 t
.Fatalf("Read() error at record %d:\ngot %v (%#v)\nwant %v (%#v)", recNum
, err
, err
, wantErr
, wantErr
)
459 // ErrFieldCount is explicitly non-fatal.
460 if err
!= nil && !errors
.Is(err
, ErrFieldCount
) {
461 if recNum
< len(tt
.Output
) {
462 t
.Fatalf("need more records; got %d want %d", recNum
, len(tt
.Output
))
466 if got
, want
:= rec
, tt
.Output
[recNum
]; !reflect
.DeepEqual(got
, want
) {
467 t
.Errorf("Read vs ReadAll mismatch;\ngot %q\nwant %q", got
, want
)
469 pos
:= positions
[recNum
]
470 if len(pos
) != len(rec
) {
471 t
.Fatalf("mismatched position length at record %d", recNum
)
474 line
, col
:= r
.FieldPos(i
)
475 if got
, want
:= [2]int{line
, col
}, pos
[i
]; got
!= want
{
476 t
.Errorf("position mismatch at record %d, field %d;\ngot %v\nwant %v", recNum
, i
, got
, want
)
484 // firstError returns the first non-nil error in errs,
485 // with the position adjusted according to the error's
486 // index inside positions.
487 func firstError(errs
[]error
, positions
[][][2]int, errPositions
map[int][2]int) error
{
488 for i
, err
:= range errs
{
490 return errorWithPosition(err
, i
, positions
, errPositions
)
496 func errorWithPosition(err error
, recNum
int, positions
[][][2]int, errPositions
map[int][2]int) error
{
497 parseErr
, ok
:= err
.(*ParseError
)
501 if recNum
>= len(positions
) {
502 panic(fmt
.Errorf("no positions found for error at record %d", recNum
))
504 errPos
, ok
:= errPositions
[recNum
]
506 panic(fmt
.Errorf("no error position found for error at record %d", recNum
))
508 parseErr1
:= *parseErr
509 parseErr1
.StartLine
= positions
[recNum
][0][0]
510 parseErr1
.Line
= errPos
[0]
511 parseErr1
.Column
= errPos
[1]
515 // makePositions returns the expected field positions of all
516 // the fields in text, the positions of any errors, and the text with the position markers
519 // The start of each field is marked with a § symbol;
520 // CSV lines are separated by ¶ symbols;
521 // Error positions are marked with ∑ symbols.
522 func makePositions(text
string) ([][][2]int, map[int][2]int, string) {
523 buf
:= make([]byte, 0, len(text
))
524 var positions
[][][2]int
525 errPositions
:= make(map[int][2]int)
530 r
, size
:= utf8
.DecodeRuneInString(text
)
535 buf
= append(buf
, '\n')
537 if len(positions
) == 0 {
538 positions
= append(positions
, [][2]int{})
540 positions
[len(positions
)-1] = append(positions
[len(positions
)-1], [2]int{line
, col
})
542 positions
= append(positions
, [][2]int{})
545 errPositions
[recNum
] = [2]int{line
, col
}
547 buf
= append(buf
, text
[:size
]...)
552 return positions
, errPositions
, string(buf
)
555 // nTimes is an io.Reader which yields the string s n times.
562 func (r
*nTimes
) Read(p
[]byte) (n
int, err error
) {
564 if r
.n
<= 0 || r
.s
== "" {
567 n0
:= copy(p
, r
.s
[r
.off
:])
571 if r
.off
== len(r
.s
) {
581 // benchmarkRead measures reading the provided CSV rows data.
582 // initReader, if non-nil, modifies the Reader before it's used.
583 func benchmarkRead(b
*testing
.B
, initReader
func(*Reader
), rows
string) {
585 r
:= NewReader(&nTimes
{s
: rows
, n
: b
.N
})
586 if initReader
!= nil {
600 const benchmarkCSVData
= `x,y,z,w
612 func BenchmarkRead(b
*testing
.B
) {
613 benchmarkRead(b
, nil, benchmarkCSVData
)
616 func BenchmarkReadWithFieldsPerRecord(b
*testing
.B
) {
617 benchmarkRead(b
, func(r
*Reader
) { r
.FieldsPerRecord
= 4 }, benchmarkCSVData
)
620 func BenchmarkReadWithoutFieldsPerRecord(b
*testing
.B
) {
621 benchmarkRead(b
, func(r
*Reader
) { r
.FieldsPerRecord
= -1 }, benchmarkCSVData
)
624 func BenchmarkReadLargeFields(b
*testing
.B
) {
625 benchmarkRead(b
, nil, strings
.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
626 xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
627 ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
628 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
632 func BenchmarkReadReuseRecord(b
*testing
.B
) {
633 benchmarkRead(b
, func(r
*Reader
) { r
.ReuseRecord
= true }, benchmarkCSVData
)
636 func BenchmarkReadReuseRecordWithFieldsPerRecord(b
*testing
.B
) {
637 benchmarkRead(b
, func(r
*Reader
) { r
.ReuseRecord
= true; r
.FieldsPerRecord
= 4 }, benchmarkCSVData
)
640 func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b
*testing
.B
) {
641 benchmarkRead(b
, func(r
*Reader
) { r
.ReuseRecord
= true; r
.FieldsPerRecord
= -1 }, benchmarkCSVData
)
644 func BenchmarkReadReuseRecordLargeFields(b
*testing
.B
) {
645 benchmarkRead(b
, func(r
*Reader
) { r
.ReuseRecord
= true }, strings
.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
646 xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
647 ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
648 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv