libgo: update to Go 1.11
[official-gcc.git] / libgo / go / encoding / csv / reader_test.go
blob5121791cb36005736417730b981a386d38494e86
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package csv
7 import (
8 "io"
9 "reflect"
10 "strings"
11 "testing"
12 "unicode/utf8"
15 func TestRead(t *testing.T) {
16 tests := []struct {
17 Name string
18 Input string
19 Output [][]string
20 Error error
22 // These fields are copied into the Reader
23 Comma rune
24 Comment rune
25 UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
26 FieldsPerRecord int
27 LazyQuotes bool
28 TrimLeadingSpace bool
29 ReuseRecord bool
30 }{{
31 Name: "Simple",
32 Input: "a,b,c\n",
33 Output: [][]string{{"a", "b", "c"}},
34 }, {
35 Name: "CRLF",
36 Input: "a,b\r\nc,d\r\n",
37 Output: [][]string{{"a", "b"}, {"c", "d"}},
38 }, {
39 Name: "BareCR",
40 Input: "a,b\rc,d\r\n",
41 Output: [][]string{{"a", "b\rc", "d"}},
42 }, {
43 Name: "RFC4180test",
44 Input: `#field1,field2,field3
45 "aaa","bb
46 b","ccc"
47 "a,a","b""bb","ccc"
48 zzz,yyy,xxx
50 Output: [][]string{
51 {"#field1", "field2", "field3"},
52 {"aaa", "bb\nb", "ccc"},
53 {"a,a", `b"bb`, "ccc"},
54 {"zzz", "yyy", "xxx"},
56 UseFieldsPerRecord: true,
57 FieldsPerRecord: 0,
58 }, {
59 Name: "NoEOLTest",
60 Input: "a,b,c",
61 Output: [][]string{{"a", "b", "c"}},
62 }, {
63 Name: "Semicolon",
64 Input: "a;b;c\n",
65 Output: [][]string{{"a", "b", "c"}},
66 Comma: ';',
67 }, {
68 Name: "MultiLine",
69 Input: `"two
70 line","one line","three
71 line
72 field"`,
73 Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
74 }, {
75 Name: "BlankLine",
76 Input: "a,b,c\n\nd,e,f\n\n",
77 Output: [][]string{
78 {"a", "b", "c"},
79 {"d", "e", "f"},
81 }, {
82 Name: "BlankLineFieldCount",
83 Input: "a,b,c\n\nd,e,f\n\n",
84 Output: [][]string{
85 {"a", "b", "c"},
86 {"d", "e", "f"},
88 UseFieldsPerRecord: true,
89 FieldsPerRecord: 0,
90 }, {
91 Name: "TrimSpace",
92 Input: " a, b, c\n",
93 Output: [][]string{{"a", "b", "c"}},
94 TrimLeadingSpace: true,
95 }, {
96 Name: "LeadingSpace",
97 Input: " a, b, c\n",
98 Output: [][]string{{" a", " b", " c"}},
99 }, {
100 Name: "Comment",
101 Input: "#1,2,3\na,b,c\n#comment",
102 Output: [][]string{{"a", "b", "c"}},
103 Comment: '#',
104 }, {
105 Name: "NoComment",
106 Input: "#1,2,3\na,b,c",
107 Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
108 }, {
109 Name: "LazyQuotes",
110 Input: `a "word","1"2",a","b`,
111 Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
112 LazyQuotes: true,
113 }, {
114 Name: "BareQuotes",
115 Input: `a "word","1"2",a"`,
116 Output: [][]string{{`a "word"`, `1"2`, `a"`}},
117 LazyQuotes: true,
118 }, {
119 Name: "BareDoubleQuotes",
120 Input: `a""b,c`,
121 Output: [][]string{{`a""b`, `c`}},
122 LazyQuotes: true,
123 }, {
124 Name: "BadDoubleQuotes",
125 Input: `a""b,c`,
126 Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote},
127 }, {
128 Name: "TrimQuote",
129 Input: ` "a"," b",c`,
130 Output: [][]string{{"a", " b", "c"}},
131 TrimLeadingSpace: true,
132 }, {
133 Name: "BadBareQuote",
134 Input: `a "word","b"`,
135 Error: &ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote},
136 }, {
137 Name: "BadTrailingQuote",
138 Input: `"a word",b"`,
139 Error: &ParseError{StartLine: 1, Line: 1, Column: 10, Err: ErrBareQuote},
140 }, {
141 Name: "ExtraneousQuote",
142 Input: `"a "word","b"`,
143 Error: &ParseError{StartLine: 1, Line: 1, Column: 3, Err: ErrQuote},
144 }, {
145 Name: "BadFieldCount",
146 Input: "a,b,c\nd,e",
147 Error: &ParseError{StartLine: 2, Line: 2, Err: ErrFieldCount},
148 UseFieldsPerRecord: true,
149 FieldsPerRecord: 0,
150 }, {
151 Name: "BadFieldCount1",
152 Input: `a,b,c`,
153 Error: &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount},
154 UseFieldsPerRecord: true,
155 FieldsPerRecord: 2,
156 }, {
157 Name: "FieldCount",
158 Input: "a,b,c\nd,e",
159 Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
160 }, {
161 Name: "TrailingCommaEOF",
162 Input: "a,b,c,",
163 Output: [][]string{{"a", "b", "c", ""}},
164 }, {
165 Name: "TrailingCommaEOL",
166 Input: "a,b,c,\n",
167 Output: [][]string{{"a", "b", "c", ""}},
168 }, {
169 Name: "TrailingCommaSpaceEOF",
170 Input: "a,b,c, ",
171 Output: [][]string{{"a", "b", "c", ""}},
172 TrimLeadingSpace: true,
173 }, {
174 Name: "TrailingCommaSpaceEOL",
175 Input: "a,b,c, \n",
176 Output: [][]string{{"a", "b", "c", ""}},
177 TrimLeadingSpace: true,
178 }, {
179 Name: "TrailingCommaLine3",
180 Input: "a,b,c\nd,e,f\ng,hi,",
181 Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
182 TrimLeadingSpace: true,
183 }, {
184 Name: "NotTrailingComma3",
185 Input: "a,b,c, \n",
186 Output: [][]string{{"a", "b", "c", " "}},
187 }, {
188 Name: "CommaFieldTest",
189 Input: `x,y,z,w
190 x,y,z,
191 x,y,,
192 x,,,
194 "x","y","z","w"
195 "x","y","z",""
196 "x","y","",""
197 "x","","",""
198 "","","",""
200 Output: [][]string{
201 {"x", "y", "z", "w"},
202 {"x", "y", "z", ""},
203 {"x", "y", "", ""},
204 {"x", "", "", ""},
205 {"", "", "", ""},
206 {"x", "y", "z", "w"},
207 {"x", "y", "z", ""},
208 {"x", "y", "", ""},
209 {"x", "", "", ""},
210 {"", "", "", ""},
212 }, {
213 Name: "TrailingCommaIneffective1",
214 Input: "a,b,\nc,d,e",
215 Output: [][]string{
216 {"a", "b", ""},
217 {"c", "d", "e"},
219 TrimLeadingSpace: true,
220 }, {
221 Name: "ReadAllReuseRecord",
222 Input: "a,b\nc,d",
223 Output: [][]string{
224 {"a", "b"},
225 {"c", "d"},
227 ReuseRecord: true,
228 }, {
229 Name: "StartLine1", // Issue 19019
230 Input: "a,\"b\nc\"d,e",
231 Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote},
232 }, {
233 Name: "StartLine2",
234 Input: "a,b\n\"d\n\n,e",
235 Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote},
236 }, {
237 Name: "CRLFInQuotedField", // Issue 21201
238 Input: "A,\"Hello\r\nHi\",B\r\n",
239 Output: [][]string{
240 {"A", "Hello\nHi", "B"},
242 }, {
243 Name: "BinaryBlobField", // Issue 19410
244 Input: "x09\x41\xb4\x1c,aktau",
245 Output: [][]string{{"x09A\xb4\x1c", "aktau"}},
246 }, {
247 Name: "TrailingCR",
248 Input: "field1,field2\r",
249 Output: [][]string{{"field1", "field2"}},
250 }, {
251 Name: "QuotedTrailingCR",
252 Input: "\"field\"\r",
253 Output: [][]string{{"field"}},
254 }, {
255 Name: "QuotedTrailingCRCR",
256 Input: "\"field\"\r\r",
257 Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote},
258 }, {
259 Name: "FieldCR",
260 Input: "field\rfield\r",
261 Output: [][]string{{"field\rfield"}},
262 }, {
263 Name: "FieldCRCR",
264 Input: "field\r\rfield\r\r",
265 Output: [][]string{{"field\r\rfield\r"}},
266 }, {
267 Name: "FieldCRCRLF",
268 Input: "field\r\r\nfield\r\r\n",
269 Output: [][]string{{"field\r"}, {"field\r"}},
270 }, {
271 Name: "FieldCRCRLFCR",
272 Input: "field\r\r\n\rfield\r\r\n\r",
273 Output: [][]string{{"field\r"}, {"\rfield\r"}},
274 }, {
275 Name: "FieldCRCRLFCRCR",
276 Input: "field\r\r\n\r\rfield\r\r\n\r\r",
277 Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}},
278 }, {
279 Name: "MultiFieldCRCRLFCRCR",
280 Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,",
281 Output: [][]string{
282 {"field1", "field2\r"},
283 {"\r\rfield1", "field2\r"},
284 {"\r\r", ""},
286 }, {
287 Name: "NonASCIICommaAndComment",
288 Input: "a£b,c£ \td,e\n€ comment\n",
289 Output: [][]string{{"a", "b,c", "d,e"}},
290 TrimLeadingSpace: true,
291 Comma: '£',
292 Comment: '€',
293 }, {
294 Name: "NonASCIICommaAndCommentWithQuotes",
295 Input: "a€\" b,\"€ c\nλ comment\n",
296 Output: [][]string{{"a", " b,", " c"}},
297 Comma: '€',
298 Comment: 'λ',
299 }, {
300 // λ and θ start with the same byte.
301 // This tests that the parser doesn't confuse such characters.
302 Name: "NonASCIICommaConfusion",
303 Input: "\"abθcd\"λefθgh",
304 Output: [][]string{{"abθcd", "efθgh"}},
305 Comma: 'λ',
306 Comment: '€',
307 }, {
308 Name: "NonASCIICommentConfusion",
309 Input: \nλ\nθ\nλ\n",
310 Output: [][]string{{"λ"}, {"λ"}, {"λ"}},
311 Comment: 'θ',
312 }, {
313 Name: "QuotedFieldMultipleLF",
314 Input: "\"\n\n\n\n\"",
315 Output: [][]string{{"\n\n\n\n"}},
316 }, {
317 Name: "MultipleCRLF",
318 Input: "\r\n\r\n\r\n\r\n",
319 }, {
320 // The implementation may read each line in several chunks if it doesn't fit entirely
321 // in the read buffer, so we should test the code to handle that condition.
322 Name: "HugeLines",
323 Input: strings.Repeat("#ignore\n", 10000) + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000),
324 Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}},
325 Comment: '#',
326 }, {
327 Name: "QuoteWithTrailingCRLF",
328 Input: "\"foo\"bar\"\r\n",
329 Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote},
330 }, {
331 Name: "LazyQuoteWithTrailingCRLF",
332 Input: "\"foo\"bar\"\r\n",
333 Output: [][]string{{`foo"bar`}},
334 LazyQuotes: true,
335 }, {
336 Name: "DoubleQuoteWithTrailingCRLF",
337 Input: "\"foo\"\"bar\"\r\n",
338 Output: [][]string{{`foo"bar`}},
339 }, {
340 Name: "EvenQuotes",
341 Input: `""""""""`,
342 Output: [][]string{{`"""`}},
343 }, {
344 Name: "OddQuotes",
345 Input: `"""""""`,
346 Error: &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote},
347 }, {
348 Name: "LazyOddQuotes",
349 Input: `"""""""`,
350 Output: [][]string{{`"""`}},
351 LazyQuotes: true,
352 }, {
353 Name: "BadComma1",
354 Comma: '\n',
355 Error: errInvalidDelim,
356 }, {
357 Name: "BadComma2",
358 Comma: '\r',
359 Error: errInvalidDelim,
360 }, {
361 Name: "BadComma3",
362 Comma: '"',
363 Error: errInvalidDelim,
364 }, {
365 Name: "BadComma4",
366 Comma: utf8.RuneError,
367 Error: errInvalidDelim,
368 }, {
369 Name: "BadComment1",
370 Comment: '\n',
371 Error: errInvalidDelim,
372 }, {
373 Name: "BadComment2",
374 Comment: '\r',
375 Error: errInvalidDelim,
376 }, {
377 Name: "BadComment3",
378 Comment: utf8.RuneError,
379 Error: errInvalidDelim,
380 }, {
381 Name: "BadCommaComment",
382 Comma: 'X',
383 Comment: 'X',
384 Error: errInvalidDelim,
387 for _, tt := range tests {
388 t.Run(tt.Name, func(t *testing.T) {
389 r := NewReader(strings.NewReader(tt.Input))
391 if tt.Comma != 0 {
392 r.Comma = tt.Comma
394 r.Comment = tt.Comment
395 if tt.UseFieldsPerRecord {
396 r.FieldsPerRecord = tt.FieldsPerRecord
397 } else {
398 r.FieldsPerRecord = -1
400 r.LazyQuotes = tt.LazyQuotes
401 r.TrimLeadingSpace = tt.TrimLeadingSpace
402 r.ReuseRecord = tt.ReuseRecord
404 out, err := r.ReadAll()
405 if !reflect.DeepEqual(err, tt.Error) {
406 t.Errorf("ReadAll() error:\ngot %v\nwant %v", err, tt.Error)
407 } else if !reflect.DeepEqual(out, tt.Output) {
408 t.Errorf("ReadAll() output:\ngot %q\nwant %q", out, tt.Output)
414 // nTimes is an io.Reader which yields the string s n times.
415 type nTimes struct {
416 s string
417 n int
418 off int
421 func (r *nTimes) Read(p []byte) (n int, err error) {
422 for {
423 if r.n <= 0 || r.s == "" {
424 return n, io.EOF
426 n0 := copy(p, r.s[r.off:])
427 p = p[n0:]
428 n += n0
429 r.off += n0
430 if r.off == len(r.s) {
431 r.off = 0
432 r.n--
434 if len(p) == 0 {
435 return
440 // benchmarkRead measures reading the provided CSV rows data.
441 // initReader, if non-nil, modifies the Reader before it's used.
442 func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) {
443 b.ReportAllocs()
444 r := NewReader(&nTimes{s: rows, n: b.N})
445 if initReader != nil {
446 initReader(r)
448 for {
449 _, err := r.Read()
450 if err == io.EOF {
451 break
453 if err != nil {
454 b.Fatal(err)
459 const benchmarkCSVData = `x,y,z,w
460 x,y,z,
461 x,y,,
462 x,,,
464 "x","y","z","w"
465 "x","y","z",""
466 "x","y","",""
467 "x","","",""
468 "","","",""
471 func BenchmarkRead(b *testing.B) {
472 benchmarkRead(b, nil, benchmarkCSVData)
475 func BenchmarkReadWithFieldsPerRecord(b *testing.B) {
476 benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData)
479 func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) {
480 benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData)
483 func BenchmarkReadLargeFields(b *testing.B) {
484 benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
485 xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
486 ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
487 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
488 `, 3))
491 func BenchmarkReadReuseRecord(b *testing.B) {
492 benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, benchmarkCSVData)
495 func BenchmarkReadReuseRecordWithFieldsPerRecord(b *testing.B) {
496 benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = 4 }, benchmarkCSVData)
499 func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b *testing.B) {
500 benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = -1 }, benchmarkCSVData)
503 func BenchmarkReadReuseRecordLargeFields(b *testing.B) {
504 benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
505 xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
506 ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
507 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
508 `, 3))