libgo: Merge from revision 18783:00cce3a34d7e of master library.
[official-gcc.git] / libgo / go / encoding / csv / reader.go
blobd9432954ac9d6ec7e92c547971d2f5e5e1593dd0
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package csv reads and writes comma-separated values (CSV) files.
6 //
7 // A csv file contains zero or more records of one or more fields per record.
8 // Each record is separated by the newline character. The final record may
9 // optionally be followed by a newline character.
11 // field1,field2,field3
13 // White space is considered part of a field.
15 // Carriage returns before newline characters are silently removed.
17 // Blank lines are ignored. A line with only whitespace characters (excluding
18 // the ending newline character) is not considered a blank line.
20 // Fields which start and stop with the quote character " are called
21 // quoted-fields. The beginning and ending quote are not part of the
22 // field.
24 // The source:
26 // normal string,"quoted-field"
28 // results in the fields
30 // {`normal string`, `quoted-field`}
32 // Within a quoted-field a quote character followed by a second quote
33 // character is considered a single quote.
35 // "the ""word"" is true","a ""quoted-field"""
37 // results in
39 // {`the "word" is true`, `a "quoted-field"`}
41 // Newlines and commas may be included in a quoted-field
43 // "Multi-line
44 // field","comma is ,"
46 // results in
48 // {`Multi-line
49 // field`, `comma is ,`}
50 package csv
52 import (
53 "bufio"
54 "bytes"
55 "errors"
56 "fmt"
57 "io"
58 "unicode"
61 // A ParseError is returned for parsing errors.
62 // The first line is 1. The first column is 0.
63 type ParseError struct {
64 Line int // Line where the error occurred
65 Column int // Column (rune index) where the error occurred
66 Err error // The actual error
69 func (e *ParseError) Error() string {
70 return fmt.Sprintf("line %d, column %d: %s", e.Line, e.Column, e.Err)
73 // These are the errors that can be returned in ParseError.Error
74 var (
75 ErrTrailingComma = errors.New("extra delimiter at end of line") // no longer used
76 ErrBareQuote = errors.New("bare \" in non-quoted-field")
77 ErrQuote = errors.New("extraneous \" in field")
78 ErrFieldCount = errors.New("wrong number of fields in line")
81 // A Reader reads records from a CSV-encoded file.
83 // As returned by NewReader, a Reader expects input conforming to RFC 4180.
84 // The exported fields can be changed to customize the details before the
85 // first call to Read or ReadAll.
87 // Comma is the field delimiter. It defaults to ','.
89 // Comment, if not 0, is the comment character. Lines beginning with the
90 // Comment character are ignored.
92 // If FieldsPerRecord is positive, Read requires each record to
93 // have the given number of fields. If FieldsPerRecord is 0, Read sets it to
94 // the number of fields in the first record, so that future records must
95 // have the same field count. If FieldsPerRecord is negative, no check is
96 // made and records may have a variable number of fields.
98 // If LazyQuotes is true, a quote may appear in an unquoted field and a
99 // non-doubled quote may appear in a quoted field.
101 // If TrimLeadingSpace is true, leading white space in a field is ignored.
102 type Reader struct {
103 Comma rune // field delimiter (set to ',' by NewReader)
104 Comment rune // comment character for start of line
105 FieldsPerRecord int // number of expected fields per record
106 LazyQuotes bool // allow lazy quotes
107 TrailingComma bool // ignored; here for backwards compatibility
108 TrimLeadingSpace bool // trim leading space
109 line int
110 column int
111 r *bufio.Reader
112 field bytes.Buffer
115 // NewReader returns a new Reader that reads from r.
116 func NewReader(r io.Reader) *Reader {
117 return &Reader{
118 Comma: ',',
119 r: bufio.NewReader(r),
123 // error creates a new ParseError based on err.
124 func (r *Reader) error(err error) error {
125 return &ParseError{
126 Line: r.line,
127 Column: r.column,
128 Err: err,
132 // Read reads one record from r. The record is a slice of strings with each
133 // string representing one field.
134 func (r *Reader) Read() (record []string, err error) {
135 for {
136 record, err = r.parseRecord()
137 if record != nil {
138 break
140 if err != nil {
141 return nil, err
145 if r.FieldsPerRecord > 0 {
146 if len(record) != r.FieldsPerRecord {
147 r.column = 0 // report at start of record
148 return record, r.error(ErrFieldCount)
150 } else if r.FieldsPerRecord == 0 {
151 r.FieldsPerRecord = len(record)
153 return record, nil
156 // ReadAll reads all the remaining records from r.
157 // Each record is a slice of fields.
158 // A successful call returns err == nil, not err == EOF. Because ReadAll is
159 // defined to read until EOF, it does not treat end of file as an error to be
160 // reported.
161 func (r *Reader) ReadAll() (records [][]string, err error) {
162 for {
163 record, err := r.Read()
164 if err == io.EOF {
165 return records, nil
167 if err != nil {
168 return nil, err
170 records = append(records, record)
174 // readRune reads one rune from r, folding \r\n to \n and keeping track
175 // of how far into the line we have read. r.column will point to the start
176 // of this rune, not the end of this rune.
177 func (r *Reader) readRune() (rune, error) {
178 r1, _, err := r.r.ReadRune()
180 // Handle \r\n here. We make the simplifying assumption that
181 // anytime \r is followed by \n that it can be folded to \n.
182 // We will not detect files which contain both \r\n and bare \n.
183 if r1 == '\r' {
184 r1, _, err = r.r.ReadRune()
185 if err == nil {
186 if r1 != '\n' {
187 r.r.UnreadRune()
188 r1 = '\r'
192 r.column++
193 return r1, err
196 // skip reads runes up to and including the rune delim or until error.
197 func (r *Reader) skip(delim rune) error {
198 for {
199 r1, err := r.readRune()
200 if err != nil {
201 return err
203 if r1 == delim {
204 return nil
209 // parseRecord reads and parses a single csv record from r.
210 func (r *Reader) parseRecord() (fields []string, err error) {
211 // Each record starts on a new line. We increment our line
212 // number (lines start at 1, not 0) and set column to -1
213 // so as we increment in readRune it points to the character we read.
214 r.line++
215 r.column = -1
217 // Peek at the first rune. If it is an error we are done.
218 // If we are support comments and it is the comment character
219 // then skip to the end of line.
221 r1, _, err := r.r.ReadRune()
222 if err != nil {
223 return nil, err
226 if r.Comment != 0 && r1 == r.Comment {
227 return nil, r.skip('\n')
229 r.r.UnreadRune()
231 // At this point we have at least one field.
232 for {
233 haveField, delim, err := r.parseField()
234 if haveField {
235 fields = append(fields, r.field.String())
237 if delim == '\n' || err == io.EOF {
238 return fields, err
239 } else if err != nil {
240 return nil, err
245 // parseField parses the next field in the record. The read field is
246 // located in r.field. Delim is the first character not part of the field
247 // (r.Comma or '\n').
248 func (r *Reader) parseField() (haveField bool, delim rune, err error) {
249 r.field.Reset()
251 r1, err := r.readRune()
252 for err == nil && r.TrimLeadingSpace && r1 != '\n' && unicode.IsSpace(r1) {
253 r1, err = r.readRune()
256 if err == io.EOF && r.column != 0 {
257 return true, 0, err
259 if err != nil {
260 return false, 0, err
263 switch r1 {
264 case r.Comma:
265 // will check below
267 case '\n':
268 // We are a trailing empty field or a blank line
269 if r.column == 0 {
270 return false, r1, nil
272 return true, r1, nil
274 case '"':
275 // quoted field
276 Quoted:
277 for {
278 r1, err = r.readRune()
279 if err != nil {
280 if err == io.EOF {
281 if r.LazyQuotes {
282 return true, 0, err
284 return false, 0, r.error(ErrQuote)
286 return false, 0, err
288 switch r1 {
289 case '"':
290 r1, err = r.readRune()
291 if err != nil || r1 == r.Comma {
292 break Quoted
294 if r1 == '\n' {
295 return true, r1, nil
297 if r1 != '"' {
298 if !r.LazyQuotes {
299 r.column--
300 return false, 0, r.error(ErrQuote)
302 // accept the bare quote
303 r.field.WriteRune('"')
305 case '\n':
306 r.line++
307 r.column = -1
309 r.field.WriteRune(r1)
312 default:
313 // unquoted field
314 for {
315 r.field.WriteRune(r1)
316 r1, err = r.readRune()
317 if err != nil || r1 == r.Comma {
318 break
320 if r1 == '\n' {
321 return true, r1, nil
323 if !r.LazyQuotes && r1 == '"' {
324 return false, 0, r.error(ErrBareQuote)
329 if err != nil {
330 if err == io.EOF {
331 return true, 0, err
333 return false, 0, err
336 return true, r1, nil