1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
15 func TestRead(t
*testing
.T
) {
22 // These fields are copied into the Reader
25 UseFieldsPerRecord
bool // false (default) means FieldsPerRecord is -1
33 Output
: [][]string{{"a", "b", "c"}},
36 Input
: "a,b\r\nc,d\r\n",
37 Output
: [][]string{{"a", "b"}, {"c", "d"}},
40 Input
: "a,b\rc,d\r\n",
41 Output
: [][]string{{"a", "b\rc", "d"}},
44 Input
: `#field1,field2,field3
51 {"#field1", "field2", "field3"},
52 {"aaa", "bb\nb", "ccc"},
53 {"a,a", `b"bb`, "ccc"},
54 {"zzz", "yyy", "xxx"},
56 UseFieldsPerRecord
: true,
61 Output
: [][]string{{"a", "b", "c"}},
65 Output
: [][]string{{"a", "b", "c"}},
70 line","one line","three
73 Output
: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
76 Input
: "a,b,c\n\nd,e,f\n\n",
82 Name
: "BlankLineFieldCount",
83 Input
: "a,b,c\n\nd,e,f\n\n",
88 UseFieldsPerRecord
: true,
93 Output
: [][]string{{"a", "b", "c"}},
94 TrimLeadingSpace
: true,
98 Output
: [][]string{{" a", " b", " c"}},
101 Input
: "#1,2,3\na,b,c\n#comment",
102 Output
: [][]string{{"a", "b", "c"}},
106 Input
: "#1,2,3\na,b,c",
107 Output
: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
110 Input
: `a "word","1"2",a","b`,
111 Output
: [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
115 Input
: `a "word","1"2",a"`,
116 Output
: [][]string{{`a "word"`, `1"2`, `a"`}},
119 Name
: "BareDoubleQuotes",
121 Output
: [][]string{{`a""b`, `c`}},
124 Name
: "BadDoubleQuotes",
126 Error
: &ParseError
{StartLine
: 1, Line
: 1, Column
: 1, Err
: ErrBareQuote
},
129 Input
: ` "a"," b",c`,
130 Output
: [][]string{{"a", " b", "c"}},
131 TrimLeadingSpace
: true,
133 Name
: "BadBareQuote",
134 Input
: `a "word","b"`,
135 Error
: &ParseError
{StartLine
: 1, Line
: 1, Column
: 2, Err
: ErrBareQuote
},
137 Name
: "BadTrailingQuote",
138 Input
: `"a word",b"`,
139 Error
: &ParseError
{StartLine
: 1, Line
: 1, Column
: 10, Err
: ErrBareQuote
},
141 Name
: "ExtraneousQuote",
142 Input
: `"a "word","b"`,
143 Error
: &ParseError
{StartLine
: 1, Line
: 1, Column
: 3, Err
: ErrQuote
},
145 Name
: "BadFieldCount",
147 Error
: &ParseError
{StartLine
: 2, Line
: 2, Err
: ErrFieldCount
},
148 UseFieldsPerRecord
: true,
151 Name
: "BadFieldCount1",
153 Error
: &ParseError
{StartLine
: 1, Line
: 1, Err
: ErrFieldCount
},
154 UseFieldsPerRecord
: true,
159 Output
: [][]string{{"a", "b", "c"}, {"d", "e"}},
161 Name
: "TrailingCommaEOF",
163 Output
: [][]string{{"a", "b", "c", ""}},
165 Name
: "TrailingCommaEOL",
167 Output
: [][]string{{"a", "b", "c", ""}},
169 Name
: "TrailingCommaSpaceEOF",
171 Output
: [][]string{{"a", "b", "c", ""}},
172 TrimLeadingSpace
: true,
174 Name
: "TrailingCommaSpaceEOL",
176 Output
: [][]string{{"a", "b", "c", ""}},
177 TrimLeadingSpace
: true,
179 Name
: "TrailingCommaLine3",
180 Input
: "a,b,c\nd,e,f\ng,hi,",
181 Output
: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
182 TrimLeadingSpace
: true,
184 Name
: "NotTrailingComma3",
186 Output
: [][]string{{"a", "b", "c", " "}},
188 Name
: "CommaFieldTest",
201 {"x", "y", "z", "w"},
206 {"x", "y", "z", "w"},
213 Name
: "TrailingCommaIneffective1",
214 Input
: "a,b,\nc,d,e",
219 TrimLeadingSpace
: true,
221 Name
: "ReadAllReuseRecord",
229 Name
: "StartLine1", // Issue 19019
230 Input
: "a,\"b\nc\"d,e",
231 Error
: &ParseError
{StartLine
: 1, Line
: 2, Column
: 1, Err
: ErrQuote
},
234 Input
: "a,b\n\"d\n\n,e",
235 Error
: &ParseError
{StartLine
: 2, Line
: 5, Column
: 0, Err
: ErrQuote
},
237 Name
: "CRLFInQuotedField", // Issue 21201
238 Input
: "A,\"Hello\r\nHi\",B\r\n",
240 {"A", "Hello\nHi", "B"},
243 Name
: "BinaryBlobField", // Issue 19410
244 Input
: "x09\x41\xb4\x1c,aktau",
245 Output
: [][]string{{"x09A\xb4\x1c", "aktau"}},
248 Input
: "field1,field2\r",
249 Output
: [][]string{{"field1", "field2"}},
251 Name
: "QuotedTrailingCR",
252 Input
: "\"field\"\r",
253 Output
: [][]string{{"field"}},
255 Name
: "QuotedTrailingCRCR",
256 Input
: "\"field\"\r\r",
257 Error
: &ParseError
{StartLine
: 1, Line
: 1, Column
: 6, Err
: ErrQuote
},
260 Input
: "field\rfield\r",
261 Output
: [][]string{{"field\rfield"}},
264 Input
: "field\r\rfield\r\r",
265 Output
: [][]string{{"field\r\rfield\r"}},
268 Input
: "field\r\r\nfield\r\r\n",
269 Output
: [][]string{{"field\r"}, {"field\r"}},
271 Name
: "FieldCRCRLFCR",
272 Input
: "field\r\r\n\rfield\r\r\n\r",
273 Output
: [][]string{{"field\r"}, {"\rfield\r"}},
275 Name
: "FieldCRCRLFCRCR",
276 Input
: "field\r\r\n\r\rfield\r\r\n\r\r",
277 Output
: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}},
279 Name
: "MultiFieldCRCRLFCRCR",
280 Input
: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,",
282 {"field1", "field2\r"},
283 {"\r\rfield1", "field2\r"},
287 Name
: "NonASCIICommaAndComment",
288 Input
: "a£b,c£ \td,e\n€ comment\n",
289 Output
: [][]string{{"a", "b,c", "d,e"}},
290 TrimLeadingSpace
: true,
294 Name
: "NonASCIICommaAndCommentWithQuotes",
295 Input
: "a€\" b,\"€ c\nλ comment\n",
296 Output
: [][]string{{"a", " b,", " c"}},
300 // λ and θ start with the same byte.
301 // This tests that the parser doesn't confuse such characters.
302 Name
: "NonASCIICommaConfusion",
303 Input
: "\"abθcd\"λefθgh",
304 Output
: [][]string{{"abθcd", "efθgh"}},
308 Name
: "NonASCIICommentConfusion",
309 Input
: "λ\nλ\nθ\nλ\n",
310 Output
: [][]string{{"λ"}, {"λ"}, {"λ"}},
313 Name
: "QuotedFieldMultipleLF",
314 Input
: "\"\n\n\n\n\"",
315 Output
: [][]string{{"\n\n\n\n"}},
317 Name
: "MultipleCRLF",
318 Input
: "\r\n\r\n\r\n\r\n",
320 // The implementation may read each line in several chunks if it doesn't fit entirely
321 // in the read buffer, so we should test the code to handle that condition.
323 Input
: strings
.Repeat("#ignore\n", 10000) + strings
.Repeat("@", 5000) + "," + strings
.Repeat("*", 5000),
324 Output
: [][]string{{strings
.Repeat("@", 5000), strings
.Repeat("*", 5000)}},
327 Name
: "QuoteWithTrailingCRLF",
328 Input
: "\"foo\"bar\"\r\n",
329 Error
: &ParseError
{StartLine
: 1, Line
: 1, Column
: 4, Err
: ErrQuote
},
331 Name
: "LazyQuoteWithTrailingCRLF",
332 Input
: "\"foo\"bar\"\r\n",
333 Output
: [][]string{{`foo"bar`}},
336 Name
: "DoubleQuoteWithTrailingCRLF",
337 Input
: "\"foo\"\"bar\"\r\n",
338 Output
: [][]string{{`foo"bar`}},
342 Output
: [][]string{{`"""`}},
346 Error
: &ParseError
{StartLine
: 1, Line
: 1, Column
: 7, Err
: ErrQuote
},
348 Name
: "LazyOddQuotes",
350 Output
: [][]string{{`"""`}},
355 Error
: errInvalidDelim
,
359 Error
: errInvalidDelim
,
363 Error
: errInvalidDelim
,
366 Comma
: utf8
.RuneError
,
367 Error
: errInvalidDelim
,
371 Error
: errInvalidDelim
,
375 Error
: errInvalidDelim
,
378 Comment
: utf8
.RuneError
,
379 Error
: errInvalidDelim
,
381 Name
: "BadCommaComment",
384 Error
: errInvalidDelim
,
387 for _
, tt
:= range tests
{
388 t
.Run(tt
.Name
, func(t
*testing
.T
) {
389 r
:= NewReader(strings
.NewReader(tt
.Input
))
394 r
.Comment
= tt
.Comment
395 if tt
.UseFieldsPerRecord
{
396 r
.FieldsPerRecord
= tt
.FieldsPerRecord
398 r
.FieldsPerRecord
= -1
400 r
.LazyQuotes
= tt
.LazyQuotes
401 r
.TrimLeadingSpace
= tt
.TrimLeadingSpace
402 r
.ReuseRecord
= tt
.ReuseRecord
404 out
, err
:= r
.ReadAll()
405 if !reflect
.DeepEqual(err
, tt
.Error
) {
406 t
.Errorf("ReadAll() error:\ngot %v\nwant %v", err
, tt
.Error
)
407 } else if !reflect
.DeepEqual(out
, tt
.Output
) {
408 t
.Errorf("ReadAll() output:\ngot %q\nwant %q", out
, tt
.Output
)
414 // nTimes is an io.Reader which yields the string s n times.
421 func (r
*nTimes
) Read(p
[]byte) (n
int, err error
) {
423 if r
.n
<= 0 || r
.s
== "" {
426 n0
:= copy(p
, r
.s
[r
.off
:])
430 if r
.off
== len(r
.s
) {
440 // benchmarkRead measures reading the provided CSV rows data.
441 // initReader, if non-nil, modifies the Reader before it's used.
442 func benchmarkRead(b
*testing
.B
, initReader
func(*Reader
), rows
string) {
444 r
:= NewReader(&nTimes
{s
: rows
, n
: b
.N
})
445 if initReader
!= nil {
459 const benchmarkCSVData
= `x,y,z,w
471 func BenchmarkRead(b
*testing
.B
) {
472 benchmarkRead(b
, nil, benchmarkCSVData
)
475 func BenchmarkReadWithFieldsPerRecord(b
*testing
.B
) {
476 benchmarkRead(b
, func(r
*Reader
) { r
.FieldsPerRecord
= 4 }, benchmarkCSVData
)
479 func BenchmarkReadWithoutFieldsPerRecord(b
*testing
.B
) {
480 benchmarkRead(b
, func(r
*Reader
) { r
.FieldsPerRecord
= -1 }, benchmarkCSVData
)
483 func BenchmarkReadLargeFields(b
*testing
.B
) {
484 benchmarkRead(b
, nil, strings
.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
485 xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
486 ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
487 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
491 func BenchmarkReadReuseRecord(b
*testing
.B
) {
492 benchmarkRead(b
, func(r
*Reader
) { r
.ReuseRecord
= true }, benchmarkCSVData
)
495 func BenchmarkReadReuseRecordWithFieldsPerRecord(b
*testing
.B
) {
496 benchmarkRead(b
, func(r
*Reader
) { r
.ReuseRecord
= true; r
.FieldsPerRecord
= 4 }, benchmarkCSVData
)
499 func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b
*testing
.B
) {
500 benchmarkRead(b
, func(r
*Reader
) { r
.ReuseRecord
= true; r
.FieldsPerRecord
= -1 }, benchmarkCSVData
)
503 func BenchmarkReadReuseRecordLargeFields(b
*testing
.B
) {
504 benchmarkRead(b
, func(r
*Reader
) { r
.ReuseRecord
= true }, strings
.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
505 xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
506 ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
507 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv