5 This module provides the low-level underpinnings of a CSV reading/writing
6 module. Users should not use this module directly, but import the csv.py
9 **** For people modifying this code, please note that as of this writing
10 **** (2003-03-23), it is intended that this code should work with Python
15 #define MODULE_VERSION "1.0"
18 #include "structmember.h"
20 #define IS_BASESTRING(o) \
23 static PyObject
*error_obj
; /* CSV exception */
24 static PyObject
*dialects
; /* Dialect registry */
25 static long field_limit
= 128 * 1024; /* max parsed field size */
28 START_RECORD
, START_FIELD
, ESCAPED_CHAR
, IN_FIELD
,
29 IN_QUOTED_FIELD
, ESCAPE_IN_QUOTED_FIELD
, QUOTE_IN_QUOTED_FIELD
,
34 QUOTE_MINIMAL
, QUOTE_ALL
, QUOTE_NONNUMERIC
, QUOTE_NONE
42 static StyleDesc quote_styles
[] = {
43 { QUOTE_MINIMAL
, "QUOTE_MINIMAL" },
44 { QUOTE_ALL
, "QUOTE_ALL" },
45 { QUOTE_NONNUMERIC
, "QUOTE_NONNUMERIC" },
46 { QUOTE_NONE
, "QUOTE_NONE" },
53 int doublequote
; /* is " represented by ""? */
54 Py_UNICODE delimiter
; /* field separator */
55 Py_UNICODE quotechar
; /* quote character */
56 Py_UNICODE escapechar
; /* escape character */
57 int skipinitialspace
; /* ignore spaces following delimiter? */
58 PyObject
*lineterminator
; /* string to write between records */
59 int quoting
; /* style of quoting to write */
61 int strict
; /* raise exception on bad CSV */
64 static PyTypeObject Dialect_Type
;
69 PyObject
*input_iter
; /* iterate over this for input lines */
71 DialectObj
*dialect
; /* parsing dialect */
73 PyObject
*fields
; /* field list for current record */
74 ParserState state
; /* current CSV parse state */
75 Py_UNICODE
*field
; /* build current field in here */
76 int field_size
; /* size of allocated buffer */
77 Py_ssize_t field_len
; /* length of current field */
78 int numeric_field
; /* treat field as numeric */
79 unsigned long line_num
; /* Source-file line number */
82 static PyTypeObject Reader_Type
;
84 #define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
89 PyObject
*writeline
; /* write output lines to this file */
91 DialectObj
*dialect
; /* parsing dialect */
93 Py_UNICODE
*rec
; /* buffer for parser.join */
94 int rec_size
; /* size of allocated record */
95 Py_ssize_t rec_len
; /* length of record */
96 int num_fields
; /* number of fields in record */
99 static PyTypeObject Writer_Type
;
106 get_dialect_from_registry(PyObject
* name_obj
)
108 PyObject
*dialect_obj
;
110 dialect_obj
= PyDict_GetItem(dialects
, name_obj
);
111 if (dialect_obj
== NULL
) {
112 if (!PyErr_Occurred())
113 PyErr_Format(error_obj
, "unknown dialect");
116 Py_INCREF(dialect_obj
);
121 get_string(PyObject
*str
)
128 get_nullchar_as_None(Py_UNICODE c
)
135 return PyUnicode_FromUnicode((Py_UNICODE
*)&c
, 1);
139 Dialect_get_lineterminator(DialectObj
*self
)
141 return get_string(self
->lineterminator
);
145 Dialect_get_delimiter(DialectObj
*self
)
147 return get_nullchar_as_None(self
->delimiter
);
151 Dialect_get_escapechar(DialectObj
*self
)
153 return get_nullchar_as_None(self
->escapechar
);
157 Dialect_get_quotechar(DialectObj
*self
)
159 return get_nullchar_as_None(self
->quotechar
);
163 Dialect_get_quoting(DialectObj
*self
)
165 return PyLong_FromLong(self
->quoting
);
169 _set_bool(const char *name
, int *target
, PyObject
*src
, int dflt
)
174 *target
= PyObject_IsTrue(src
);
179 _set_int(const char *name
, int *target
, PyObject
*src
, int dflt
)
185 if (!PyLong_CheckExact(src
)) {
186 PyErr_Format(PyExc_TypeError
,
187 "\"%s\" must be an integer", name
);
190 value
= PyLong_AsLong(src
);
191 if (value
== -1 && PyErr_Occurred())
193 #if SIZEOF_LONG > SIZEOF_INT
194 if (value
> INT_MAX
|| value
< INT_MIN
) {
195 PyErr_Format(PyExc_ValueError
,
196 "integer out of range for \"%s\"", name
);
200 *target
= (int)value
;
206 _set_char(const char *name
, Py_UNICODE
*target
, PyObject
*src
, Py_UNICODE dflt
)
212 if (src
!= Py_None
) {
215 buf
= PyUnicode_AsUnicode(src
);
216 len
= PyUnicode_GetSize(src
);
217 if (buf
== NULL
|| len
> 1) {
218 PyErr_Format(PyExc_TypeError
,
219 "\"%s\" must be an 1-character string",
231 _set_str(const char *name
, PyObject
**target
, PyObject
*src
, const char *dflt
)
234 *target
= PyUnicode_DecodeASCII(dflt
, strlen(dflt
), NULL
);
238 else if (!IS_BASESTRING(src
)) {
239 PyErr_Format(PyExc_TypeError
,
240 "\"%s\" must be a string", name
);
253 dialect_check_quoting(int quoting
)
255 StyleDesc
*qs
= quote_styles
;
257 for (qs
= quote_styles
; qs
->name
; qs
++) {
258 if (qs
->style
== quoting
)
261 PyErr_Format(PyExc_TypeError
, "bad \"quoting\" value");
265 #define D_OFF(x) offsetof(DialectObj, x)
267 static struct PyMemberDef Dialect_memberlist
[] = {
268 { "skipinitialspace", T_INT
, D_OFF(skipinitialspace
), READONLY
},
269 { "doublequote", T_INT
, D_OFF(doublequote
), READONLY
},
270 { "strict", T_INT
, D_OFF(strict
), READONLY
},
274 static PyGetSetDef Dialect_getsetlist
[] = {
275 { "delimiter", (getter
)Dialect_get_delimiter
},
276 { "escapechar", (getter
)Dialect_get_escapechar
},
277 { "lineterminator", (getter
)Dialect_get_lineterminator
},
278 { "quotechar", (getter
)Dialect_get_quotechar
},
279 { "quoting", (getter
)Dialect_get_quoting
},
284 Dialect_dealloc(DialectObj
*self
)
286 Py_XDECREF(self
->lineterminator
);
287 Py_TYPE(self
)->tp_free((PyObject
*)self
);
290 static char *dialect_kws
[] = {
304 dialect_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwargs
)
307 PyObject
*ret
= NULL
;
308 PyObject
*dialect
= NULL
;
309 PyObject
*delimiter
= NULL
;
310 PyObject
*doublequote
= NULL
;
311 PyObject
*escapechar
= NULL
;
312 PyObject
*lineterminator
= NULL
;
313 PyObject
*quotechar
= NULL
;
314 PyObject
*quoting
= NULL
;
315 PyObject
*skipinitialspace
= NULL
;
316 PyObject
*strict
= NULL
;
318 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
,
319 "|OOOOOOOOO", dialect_kws
,
331 if (dialect
!= NULL
) {
332 if (IS_BASESTRING(dialect
)) {
333 dialect
= get_dialect_from_registry(dialect
);
339 /* Can we reuse this instance? */
340 if (PyObject_TypeCheck(dialect
, &Dialect_Type
) &&
344 lineterminator
== 0 &&
347 skipinitialspace
== 0 &&
352 self
= (DialectObj
*)type
->tp_alloc(type
, 0);
357 self
->lineterminator
= NULL
;
359 Py_XINCREF(delimiter
);
360 Py_XINCREF(doublequote
);
361 Py_XINCREF(escapechar
);
362 Py_XINCREF(lineterminator
);
363 Py_XINCREF(quotechar
);
365 Py_XINCREF(skipinitialspace
);
367 if (dialect
!= NULL
) {
368 #define DIALECT_GETATTR(v, n) \
370 v = PyObject_GetAttrString(dialect, n)
371 DIALECT_GETATTR(delimiter
, "delimiter");
372 DIALECT_GETATTR(doublequote
, "doublequote");
373 DIALECT_GETATTR(escapechar
, "escapechar");
374 DIALECT_GETATTR(lineterminator
, "lineterminator");
375 DIALECT_GETATTR(quotechar
, "quotechar");
376 DIALECT_GETATTR(quoting
, "quoting");
377 DIALECT_GETATTR(skipinitialspace
, "skipinitialspace");
378 DIALECT_GETATTR(strict
, "strict");
382 /* check types and convert to C values */
383 #define DIASET(meth, name, target, src, dflt) \
384 if (meth(name, target, src, dflt)) \
386 DIASET(_set_char
, "delimiter", &self
->delimiter
, delimiter
, ',');
387 DIASET(_set_bool
, "doublequote", &self
->doublequote
, doublequote
, 1);
388 DIASET(_set_char
, "escapechar", &self
->escapechar
, escapechar
, 0);
389 DIASET(_set_str
, "lineterminator", &self
->lineterminator
, lineterminator
, "\r\n");
390 DIASET(_set_char
, "quotechar", &self
->quotechar
, quotechar
, '"');
391 DIASET(_set_int
, "quoting", &self
->quoting
, quoting
, QUOTE_MINIMAL
);
392 DIASET(_set_bool
, "skipinitialspace", &self
->skipinitialspace
, skipinitialspace
, 0);
393 DIASET(_set_bool
, "strict", &self
->strict
, strict
, 0);
395 /* validate options */
396 if (dialect_check_quoting(self
->quoting
))
398 if (self
->delimiter
== 0) {
399 PyErr_SetString(PyExc_TypeError
, "delimiter must be set");
402 if (quotechar
== Py_None
&& quoting
== NULL
)
403 self
->quoting
= QUOTE_NONE
;
404 if (self
->quoting
!= QUOTE_NONE
&& self
->quotechar
== 0) {
405 PyErr_SetString(PyExc_TypeError
,
406 "quotechar must be set if quoting enabled");
409 if (self
->lineterminator
== 0) {
410 PyErr_SetString(PyExc_TypeError
, "lineterminator must be set");
414 ret
= (PyObject
*)self
;
419 Py_XDECREF(delimiter
);
420 Py_XDECREF(doublequote
);
421 Py_XDECREF(escapechar
);
422 Py_XDECREF(lineterminator
);
423 Py_XDECREF(quotechar
);
425 Py_XDECREF(skipinitialspace
);
431 PyDoc_STRVAR(Dialect_Type_doc
,
434 "The Dialect type records CSV parsing and generation options.\n");
436 static PyTypeObject Dialect_Type
= {
437 PyVarObject_HEAD_INIT(NULL
, 0)
438 "_csv.Dialect", /* tp_name */
439 sizeof(DialectObj
), /* tp_basicsize */
442 (destructor
)Dialect_dealloc
, /* tp_dealloc */
443 (printfunc
)0, /* tp_print */
444 (getattrfunc
)0, /* tp_getattr */
445 (setattrfunc
)0, /* tp_setattr */
447 (reprfunc
)0, /* tp_repr */
448 0, /* tp_as_number */
449 0, /* tp_as_sequence */
450 0, /* tp_as_mapping */
451 (hashfunc
)0, /* tp_hash */
452 (ternaryfunc
)0, /* tp_call */
453 (reprfunc
)0, /* tp_str */
456 0, /* tp_as_buffer */
457 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /* tp_flags */
458 Dialect_Type_doc
, /* tp_doc */
461 0, /* tp_richcompare */
462 0, /* tp_weaklistoffset */
466 Dialect_memberlist
, /* tp_members */
467 Dialect_getsetlist
, /* tp_getset */
470 0, /* tp_descr_get */
471 0, /* tp_descr_set */
472 0, /* tp_dictoffset */
475 dialect_new
, /* tp_new */
480 * Return an instance of the dialect type, given a Python instance or kwarg
481 * description of the dialect
484 _call_dialect(PyObject
*dialect_inst
, PyObject
*kwargs
)
489 ctor_args
= Py_BuildValue(dialect_inst
? "(O)" : "()", dialect_inst
);
490 if (ctor_args
== NULL
)
492 dialect
= PyObject_Call((PyObject
*)&Dialect_Type
, ctor_args
, kwargs
);
493 Py_DECREF(ctor_args
);
501 parse_save_field(ReaderObj
*self
)
505 field
= PyUnicode_FromUnicode(self
->field
, self
->field_len
);
509 if (self
->numeric_field
) {
512 self
->numeric_field
= 0;
513 tmp
= PyNumber_Float(field
);
521 PyList_Append(self
->fields
, field
);
527 parse_grow_buff(ReaderObj
*self
)
529 if (self
->field_size
== 0) {
530 self
->field_size
= 4096;
531 if (self
->field
!= NULL
)
532 PyMem_Free(self
->field
);
533 self
->field
= PyMem_New(Py_UNICODE
, self
->field_size
);
536 if (self
->field_size
> INT_MAX
/ 2) {
540 self
->field_size
*= 2;
541 self
->field
= PyMem_Resize(self
->field
, Py_UNICODE
,
544 if (self
->field
== NULL
) {
552 parse_add_char(ReaderObj
*self
, Py_UNICODE c
)
554 if (self
->field_len
>= field_limit
) {
555 PyErr_Format(error_obj
, "field larger than field limit (%ld)",
559 if (self
->field_len
== self
->field_size
&& !parse_grow_buff(self
))
561 self
->field
[self
->field_len
++] = c
;
566 parse_process_char(ReaderObj
*self
, Py_UNICODE c
)
568 DialectObj
*dialect
= self
->dialect
;
570 switch (self
->state
) {
572 /* start of record */
574 /* empty line - return [] */
576 else if (c
== '\n' || c
== '\r') {
577 self
->state
= EAT_CRNL
;
580 /* normal character - handle as START_FIELD */
581 self
->state
= START_FIELD
;
584 /* expecting field */
585 if (c
== '\n' || c
== '\r' || c
== '\0') {
586 /* save empty field - return [fields] */
587 if (parse_save_field(self
) < 0)
589 self
->state
= (c
== '\0' ? START_RECORD
: EAT_CRNL
);
591 else if (c
== dialect
->quotechar
&&
592 dialect
->quoting
!= QUOTE_NONE
) {
593 /* start quoted field */
594 self
->state
= IN_QUOTED_FIELD
;
596 else if (c
== dialect
->escapechar
) {
597 /* possible escaped character */
598 self
->state
= ESCAPED_CHAR
;
600 else if (c
== ' ' && dialect
->skipinitialspace
)
601 /* ignore space at start of field */
603 else if (c
== dialect
->delimiter
) {
604 /* save empty field */
605 if (parse_save_field(self
) < 0)
609 /* begin new unquoted field */
610 if (dialect
->quoting
== QUOTE_NONNUMERIC
)
611 self
->numeric_field
= 1;
612 if (parse_add_char(self
, c
) < 0)
614 self
->state
= IN_FIELD
;
621 if (parse_add_char(self
, c
) < 0)
623 self
->state
= IN_FIELD
;
627 /* in unquoted field */
628 if (c
== '\n' || c
== '\r' || c
== '\0') {
629 /* end of line - return [fields] */
630 if (parse_save_field(self
) < 0)
632 self
->state
= (c
== '\0' ? START_RECORD
: EAT_CRNL
);
634 else if (c
== dialect
->escapechar
) {
635 /* possible escaped character */
636 self
->state
= ESCAPED_CHAR
;
638 else if (c
== dialect
->delimiter
) {
639 /* save field - wait for new field */
640 if (parse_save_field(self
) < 0)
642 self
->state
= START_FIELD
;
645 /* normal character - save in field */
646 if (parse_add_char(self
, c
) < 0)
651 case IN_QUOTED_FIELD
:
652 /* in quoted field */
655 else if (c
== dialect
->escapechar
) {
656 /* Possible escape character */
657 self
->state
= ESCAPE_IN_QUOTED_FIELD
;
659 else if (c
== dialect
->quotechar
&&
660 dialect
->quoting
!= QUOTE_NONE
) {
661 if (dialect
->doublequote
) {
662 /* doublequote; " represented by "" */
663 self
->state
= QUOTE_IN_QUOTED_FIELD
;
666 /* end of quote part of field */
667 self
->state
= IN_FIELD
;
671 /* normal character - save in field */
672 if (parse_add_char(self
, c
) < 0)
677 case ESCAPE_IN_QUOTED_FIELD
:
680 if (parse_add_char(self
, c
) < 0)
682 self
->state
= IN_QUOTED_FIELD
;
685 case QUOTE_IN_QUOTED_FIELD
:
686 /* doublequote - seen a quote in an quoted field */
687 if (dialect
->quoting
!= QUOTE_NONE
&&
688 c
== dialect
->quotechar
) {
690 if (parse_add_char(self
, c
) < 0)
692 self
->state
= IN_QUOTED_FIELD
;
694 else if (c
== dialect
->delimiter
) {
695 /* save field - wait for new field */
696 if (parse_save_field(self
) < 0)
698 self
->state
= START_FIELD
;
700 else if (c
== '\n' || c
== '\r' || c
== '\0') {
701 /* end of line - return [fields] */
702 if (parse_save_field(self
) < 0)
704 self
->state
= (c
== '\0' ? START_RECORD
: EAT_CRNL
);
706 else if (!dialect
->strict
) {
707 if (parse_add_char(self
, c
) < 0)
709 self
->state
= IN_FIELD
;
713 PyErr_Format(error_obj
, "'%c' expected after '%c'",
721 if (c
== '\n' || c
== '\r')
724 self
->state
= START_RECORD
;
726 PyErr_Format(error_obj
, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
736 parse_reset(ReaderObj
*self
)
738 Py_XDECREF(self
->fields
);
739 self
->fields
= PyList_New(0);
740 if (self
->fields
== NULL
)
743 self
->state
= START_RECORD
;
744 self
->numeric_field
= 0;
749 Reader_iternext(ReaderObj
*self
)
752 PyObject
*fields
= NULL
;
756 if (parse_reset(self
) < 0)
759 lineobj
= PyIter_Next(self
->input_iter
);
760 if (lineobj
== NULL
) {
761 /* End of input OR exception */
762 if (!PyErr_Occurred() && self
->field_len
!= 0)
763 PyErr_Format(error_obj
,
764 "newline inside string");
767 if (!PyUnicode_Check(lineobj
)) {
768 PyErr_Format(error_obj
,
769 "iterator should return strings, "
771 "(did you open the file in text mode?)",
772 lineobj
->ob_type
->tp_name
778 line
= PyUnicode_AsUnicode(lineobj
);
779 linelen
= PyUnicode_GetSize(lineobj
);
780 if (line
== NULL
|| linelen
< 0) {
788 PyErr_Format(error_obj
,
789 "line contains NULL byte");
792 if (parse_process_char(self
, c
) < 0) {
798 if (parse_process_char(self
, 0) < 0)
800 } while (self
->state
!= START_RECORD
);
802 fields
= self
->fields
;
809 Reader_dealloc(ReaderObj
*self
)
811 PyObject_GC_UnTrack(self
);
812 Py_XDECREF(self
->dialect
);
813 Py_XDECREF(self
->input_iter
);
814 Py_XDECREF(self
->fields
);
815 if (self
->field
!= NULL
)
816 PyMem_Free(self
->field
);
817 PyObject_GC_Del(self
);
821 Reader_traverse(ReaderObj
*self
, visitproc visit
, void *arg
)
823 Py_VISIT(self
->dialect
);
824 Py_VISIT(self
->input_iter
);
825 Py_VISIT(self
->fields
);
830 Reader_clear(ReaderObj
*self
)
832 Py_CLEAR(self
->dialect
);
833 Py_CLEAR(self
->input_iter
);
834 Py_CLEAR(self
->fields
);
838 PyDoc_STRVAR(Reader_Type_doc
,
841 "Reader objects are responsible for reading and parsing tabular data\n"
845 static struct PyMethodDef Reader_methods
[] = {
848 #define R_OFF(x) offsetof(ReaderObj, x)
850 static struct PyMemberDef Reader_memberlist
[] = {
851 { "dialect", T_OBJECT
, R_OFF(dialect
), READONLY
},
852 { "line_num", T_ULONG
, R_OFF(line_num
), READONLY
},
857 static PyTypeObject Reader_Type
= {
858 PyVarObject_HEAD_INIT(NULL
, 0)
859 "_csv.reader", /*tp_name*/
860 sizeof(ReaderObj
), /*tp_basicsize*/
863 (destructor
)Reader_dealloc
, /*tp_dealloc*/
864 (printfunc
)0, /*tp_print*/
865 (getattrfunc
)0, /*tp_getattr*/
866 (setattrfunc
)0, /*tp_setattr*/
868 (reprfunc
)0, /*tp_repr*/
870 0, /*tp_as_sequence*/
872 (hashfunc
)0, /*tp_hash*/
873 (ternaryfunc
)0, /*tp_call*/
874 (reprfunc
)0, /*tp_str*/
878 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
|
879 Py_TPFLAGS_HAVE_GC
, /*tp_flags*/
880 Reader_Type_doc
, /*tp_doc*/
881 (traverseproc
)Reader_traverse
, /*tp_traverse*/
882 (inquiry
)Reader_clear
, /*tp_clear*/
883 0, /*tp_richcompare*/
884 0, /*tp_weaklistoffset*/
885 PyObject_SelfIter
, /*tp_iter*/
886 (getiterfunc
)Reader_iternext
, /*tp_iternext*/
887 Reader_methods
, /*tp_methods*/
888 Reader_memberlist
, /*tp_members*/
894 csv_reader(PyObject
*module
, PyObject
*args
, PyObject
*keyword_args
)
896 PyObject
* iterator
, * dialect
= NULL
;
897 ReaderObj
* self
= PyObject_GC_New(ReaderObj
, &Reader_Type
);
902 self
->dialect
= NULL
;
904 self
->input_iter
= NULL
;
906 self
->field_size
= 0;
909 if (parse_reset(self
) < 0) {
914 if (!PyArg_UnpackTuple(args
, "", 1, 2, &iterator
, &dialect
)) {
918 self
->input_iter
= PyObject_GetIter(iterator
);
919 if (self
->input_iter
== NULL
) {
920 PyErr_SetString(PyExc_TypeError
,
921 "argument 1 must be an iterator");
925 self
->dialect
= (DialectObj
*)_call_dialect(dialect
, keyword_args
);
926 if (self
->dialect
== NULL
) {
931 PyObject_GC_Track(self
);
932 return (PyObject
*)self
;
938 /* ---------------------------------------------------------------- */
940 join_reset(WriterObj
*self
)
943 self
->num_fields
= 0;
946 #define MEM_INCR 32768
948 /* Calculate new record length or append field to record. Return new
952 join_append_data(WriterObj
*self
, Py_UNICODE
*field
, int quote_empty
,
953 int *quoted
, int copy_phase
)
955 DialectObj
*dialect
= self
->dialect
;
958 Py_UNICODE
*lineterm
;
963 self->rec[rec_len] = c;\
967 lineterm
= PyUnicode_AsUnicode(dialect
->lineterminator
);
968 if (lineterm
== NULL
)
971 rec_len
= self
->rec_len
;
973 /* If this is not the first field we need a field separator */
974 if (self
->num_fields
> 0)
975 ADDCH(dialect
->delimiter
);
977 /* Handle preceding quote */
978 if (copy_phase
&& *quoted
)
979 ADDCH(dialect
->quotechar
);
981 /* Copy/count field data */
982 /* If field is null just pass over */
983 for (i
= 0; field
; i
++) {
984 Py_UNICODE c
= field
[i
];
990 if (c
== dialect
->delimiter
||
991 c
== dialect
->escapechar
||
992 c
== dialect
->quotechar
||
993 Py_UNICODE_strchr(lineterm
, c
)) {
994 if (dialect
->quoting
== QUOTE_NONE
)
997 if (c
== dialect
->quotechar
) {
998 if (dialect
->doublequote
)
999 ADDCH(dialect
->quotechar
);
1007 if (!dialect
->escapechar
) {
1008 PyErr_Format(error_obj
,
1009 "need to escape, but no escapechar set");
1012 ADDCH(dialect
->escapechar
);
1015 /* Copy field character into record buffer.
1020 /* If field is empty check if it needs to be quoted.
1022 if (i
== 0 && quote_empty
) {
1023 if (dialect
->quoting
== QUOTE_NONE
) {
1024 PyErr_Format(error_obj
,
1025 "single empty field record must be quoted");
1034 ADDCH(dialect
->quotechar
);
1043 join_check_rec_size(WriterObj
*self
, int rec_len
)
1046 if (rec_len
< 0 || rec_len
> INT_MAX
- MEM_INCR
) {
1051 if (rec_len
> self
->rec_size
) {
1052 if (self
->rec_size
== 0) {
1053 self
->rec_size
= (rec_len
/ MEM_INCR
+ 1) * MEM_INCR
;
1054 if (self
->rec
!= NULL
)
1055 PyMem_Free(self
->rec
);
1056 self
->rec
= PyMem_New(Py_UNICODE
, self
->rec_size
);
1059 Py_UNICODE
* old_rec
= self
->rec
;
1061 self
->rec_size
= (rec_len
/ MEM_INCR
+ 1) * MEM_INCR
;
1062 self
->rec
= PyMem_Resize(self
->rec
, Py_UNICODE
,
1064 if (self
->rec
== NULL
)
1065 PyMem_Free(old_rec
);
1067 if (self
->rec
== NULL
) {
1076 join_append(WriterObj
*self
, Py_UNICODE
*field
, int *quoted
, int quote_empty
)
1080 rec_len
= join_append_data(self
, field
, quote_empty
, quoted
, 0);
1084 /* grow record buffer if necessary */
1085 if (!join_check_rec_size(self
, rec_len
))
1088 self
->rec_len
= join_append_data(self
, field
, quote_empty
, quoted
, 1);
1095 join_append_lineterminator(WriterObj
*self
)
1098 Py_UNICODE
*terminator
;
1100 terminator_len
= PyUnicode_GetSize(self
->dialect
->lineterminator
);
1101 if (terminator_len
== -1)
1104 /* grow record buffer if necessary */
1105 if (!join_check_rec_size(self
, self
->rec_len
+ terminator_len
))
1108 terminator
= PyUnicode_AsUnicode(self
->dialect
->lineterminator
);
1109 if (terminator
== NULL
)
1111 memmove(self
->rec
+ self
->rec_len
, terminator
,
1112 sizeof(Py_UNICODE
)*terminator_len
);
1113 self
->rec_len
+= terminator_len
;
1118 PyDoc_STRVAR(csv_writerow_doc
,
1119 "writerow(sequence)\n"
1121 "Construct and write a CSV record from a sequence of fields. Non-string\n"
1122 "elements will be converted to string.");
1125 csv_writerow(WriterObj
*self
, PyObject
*seq
)
1127 DialectObj
*dialect
= self
->dialect
;
1130 if (!PySequence_Check(seq
))
1131 return PyErr_Format(error_obj
, "sequence expected");
1133 len
= PySequence_Length(seq
);
1137 /* Join all fields in internal buffer.
1140 for (i
= 0; i
< len
; i
++) {
1145 field
= PySequence_GetItem(seq
, i
);
1149 switch (dialect
->quoting
) {
1150 case QUOTE_NONNUMERIC
:
1151 quoted
= !PyNumber_Check(field
);
1161 if (PyUnicode_Check(field
)) {
1162 append_ok
= join_append(self
,
1163 PyUnicode_AS_UNICODE(field
),
1167 else if (field
== Py_None
) {
1168 append_ok
= join_append(self
, NULL
,
1175 str
= PyObject_Str(field
);
1179 append_ok
= join_append(self
,
1180 PyUnicode_AS_UNICODE(str
),
1188 /* Add line terminator.
1190 if (!join_append_lineterminator(self
))
1193 return PyObject_CallFunction(self
->writeline
,
1198 PyDoc_STRVAR(csv_writerows_doc
,
1199 "writerows(sequence of sequences)\n"
1201 "Construct and write a series of sequences to a csv file. Non-string\n"
1202 "elements will be converted to string.");
1205 csv_writerows(WriterObj
*self
, PyObject
*seqseq
)
1207 PyObject
*row_iter
, *row_obj
, *result
;
1209 row_iter
= PyObject_GetIter(seqseq
);
1210 if (row_iter
== NULL
) {
1211 PyErr_SetString(PyExc_TypeError
,
1212 "writerows() argument must be iterable");
1215 while ((row_obj
= PyIter_Next(row_iter
))) {
1216 result
= csv_writerow(self
, row_obj
);
1219 Py_DECREF(row_iter
);
1225 Py_DECREF(row_iter
);
1226 if (PyErr_Occurred())
1232 static struct PyMethodDef Writer_methods
[] = {
1233 { "writerow", (PyCFunction
)csv_writerow
, METH_O
, csv_writerow_doc
},
1234 { "writerows", (PyCFunction
)csv_writerows
, METH_O
, csv_writerows_doc
},
1238 #define W_OFF(x) offsetof(WriterObj, x)
1240 static struct PyMemberDef Writer_memberlist
[] = {
1241 { "dialect", T_OBJECT
, W_OFF(dialect
), READONLY
},
1246 Writer_dealloc(WriterObj
*self
)
1248 PyObject_GC_UnTrack(self
);
1249 Py_XDECREF(self
->dialect
);
1250 Py_XDECREF(self
->writeline
);
1251 if (self
->rec
!= NULL
)
1252 PyMem_Free(self
->rec
);
1253 PyObject_GC_Del(self
);
1257 Writer_traverse(WriterObj
*self
, visitproc visit
, void *arg
)
1259 Py_VISIT(self
->dialect
);
1260 Py_VISIT(self
->writeline
);
1265 Writer_clear(WriterObj
*self
)
1267 Py_CLEAR(self
->dialect
);
1268 Py_CLEAR(self
->writeline
);
1272 PyDoc_STRVAR(Writer_Type_doc
,
1275 "Writer objects are responsible for generating tabular data\n"
1276 "in CSV format from sequence input.\n"
1279 static PyTypeObject Writer_Type
= {
1280 PyVarObject_HEAD_INIT(NULL
, 0)
1281 "_csv.writer", /*tp_name*/
1282 sizeof(WriterObj
), /*tp_basicsize*/
1285 (destructor
)Writer_dealloc
, /*tp_dealloc*/
1286 (printfunc
)0, /*tp_print*/
1287 (getattrfunc
)0, /*tp_getattr*/
1288 (setattrfunc
)0, /*tp_setattr*/
1290 (reprfunc
)0, /*tp_repr*/
1292 0, /*tp_as_sequence*/
1293 0, /*tp_as_mapping*/
1294 (hashfunc
)0, /*tp_hash*/
1295 (ternaryfunc
)0, /*tp_call*/
1296 (reprfunc
)0, /*tp_str*/
1300 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
|
1301 Py_TPFLAGS_HAVE_GC
, /*tp_flags*/
1303 (traverseproc
)Writer_traverse
, /*tp_traverse*/
1304 (inquiry
)Writer_clear
, /*tp_clear*/
1305 0, /*tp_richcompare*/
1306 0, /*tp_weaklistoffset*/
1307 (getiterfunc
)0, /*tp_iter*/
1308 (getiterfunc
)0, /*tp_iternext*/
1309 Writer_methods
, /*tp_methods*/
1310 Writer_memberlist
, /*tp_members*/
1315 csv_writer(PyObject
*module
, PyObject
*args
, PyObject
*keyword_args
)
1317 PyObject
* output_file
, * dialect
= NULL
;
1318 WriterObj
* self
= PyObject_GC_New(WriterObj
, &Writer_Type
);
1323 self
->dialect
= NULL
;
1324 self
->writeline
= NULL
;
1329 self
->num_fields
= 0;
1331 if (!PyArg_UnpackTuple(args
, "", 1, 2, &output_file
, &dialect
)) {
1335 self
->writeline
= PyObject_GetAttrString(output_file
, "write");
1336 if (self
->writeline
== NULL
|| !PyCallable_Check(self
->writeline
)) {
1337 PyErr_SetString(PyExc_TypeError
,
1338 "argument 1 must have a \"write\" method");
1342 self
->dialect
= (DialectObj
*)_call_dialect(dialect
, keyword_args
);
1343 if (self
->dialect
== NULL
) {
1347 PyObject_GC_Track(self
);
1348 return (PyObject
*)self
;
1355 csv_list_dialects(PyObject
*module
, PyObject
*args
)
1357 return PyDict_Keys(dialects
);
1361 csv_register_dialect(PyObject
*module
, PyObject
*args
, PyObject
*kwargs
)
1363 PyObject
*name_obj
, *dialect_obj
= NULL
;
1366 if (!PyArg_UnpackTuple(args
, "", 1, 2, &name_obj
, &dialect_obj
))
1368 if (!IS_BASESTRING(name_obj
)) {
1369 PyErr_SetString(PyExc_TypeError
,
1370 "dialect name must be a string or unicode");
1373 dialect
= _call_dialect(dialect_obj
, kwargs
);
1374 if (dialect
== NULL
)
1376 if (PyDict_SetItem(dialects
, name_obj
, dialect
) < 0) {
1386 csv_unregister_dialect(PyObject
*module
, PyObject
*name_obj
)
1388 if (PyDict_DelItem(dialects
, name_obj
) < 0)
1389 return PyErr_Format(error_obj
, "unknown dialect");
1395 csv_get_dialect(PyObject
*module
, PyObject
*name_obj
)
1397 return get_dialect_from_registry(name_obj
);
1401 csv_field_size_limit(PyObject
*module
, PyObject
*args
)
1403 PyObject
*new_limit
= NULL
;
1404 long old_limit
= field_limit
;
1406 if (!PyArg_UnpackTuple(args
, "field_size_limit", 0, 1, &new_limit
))
1408 if (new_limit
!= NULL
) {
1409 if (!PyLong_CheckExact(new_limit
)) {
1410 PyErr_Format(PyExc_TypeError
,
1411 "limit must be an integer");
1414 field_limit
= PyLong_AsLong(new_limit
);
1415 if (field_limit
== -1 && PyErr_Occurred()) {
1416 field_limit
= old_limit
;
1420 return PyLong_FromLong(old_limit
);
1427 PyDoc_STRVAR(csv_module_doc
,
1428 "CSV parsing and writing.\n"
1430 "This module provides classes that assist in the reading and writing\n"
1431 "of Comma Separated Value (CSV) files, and implements the interface\n"
1432 "described by PEP 305. Although many CSV files are simple to parse,\n"
1433 "the format is not formally defined by a stable specification and\n"
1434 "is subtle enough that parsing lines of a CSV file with something\n"
1435 "like line.split(\",\") is bound to fail. The module supports three\n"
1436 "basic APIs: reading, writing, and registration of dialects.\n"
1439 "DIALECT REGISTRATION:\n"
1441 "Readers and writers support a dialect argument, which is a convenient\n"
1442 "handle on a group of settings. When the dialect argument is a string,\n"
1443 "it identifies one of the dialects previously registered with the module.\n"
1444 "If it is a class or instance, the attributes of the argument are used as\n"
1445 "the settings for the reader or writer:\n"
1448 " delimiter = ','\n"
1449 " quotechar = '\"'\n"
1450 " escapechar = None\n"
1451 " doublequote = True\n"
1452 " skipinitialspace = False\n"
1453 " lineterminator = '\\r\\n'\n"
1454 " quoting = QUOTE_MINIMAL\n"
1458 " * quotechar - specifies a one-character string to use as the \n"
1459 " quoting character. It defaults to '\"'.\n"
1460 " * delimiter - specifies a one-character string to use as the \n"
1461 " field separator. It defaults to ','.\n"
1462 " * skipinitialspace - specifies how to interpret whitespace which\n"
1463 " immediately follows a delimiter. It defaults to False, which\n"
1464 " means that whitespace immediately following a delimiter is part\n"
1465 " of the following field.\n"
1466 " * lineterminator - specifies the character sequence which should \n"
1467 " terminate rows.\n"
1468 " * quoting - controls when quotes should be generated by the writer.\n"
1469 " It can take on any of the following module constants:\n"
1471 " csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1472 " field contains either the quotechar or the delimiter\n"
1473 " csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1474 " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1475 " fields which do not parse as integers or floating point\n"
1477 " csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1478 " * escapechar - specifies a one-character string used to escape \n"
1479 " the delimiter when quoting is set to QUOTE_NONE.\n"
1480 " * doublequote - controls the handling of quotes inside fields. When\n"
1481 " True, two consecutive quotes are interpreted as one during read,\n"
1482 " and when writing, each quote character embedded in the data is\n"
1483 " written as two quotes\n");
1485 PyDoc_STRVAR(csv_reader_doc
,
1486 " csv_reader = reader(iterable [, dialect='excel']\n"
1487 " [optional keyword args])\n"
1488 " for row in csv_reader:\n"
1491 "The \"iterable\" argument can be any object that returns a line\n"
1492 "of input for each iteration, such as a file object or a list. The\n"
1493 "optional \"dialect\" parameter is discussed below. The function\n"
1494 "also accepts optional keyword arguments which override settings\n"
1495 "provided by the dialect.\n"
1497 "The returned object is an iterator. Each iteration returns a row\n"
1498 "of the CSV file (which can span multiple input lines):\n");
1500 PyDoc_STRVAR(csv_writer_doc
,
1501 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1502 " [optional keyword args])\n"
1503 " for row in sequence:\n"
1504 " csv_writer.writerow(row)\n"
1508 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1509 " [optional keyword args])\n"
1510 " csv_writer.writerows(rows)\n"
1512 "The \"fileobj\" argument can be any object that supports the file API.\n");
1514 PyDoc_STRVAR(csv_list_dialects_doc
,
1515 "Return a list of all know dialect names.\n"
1516 " names = csv.list_dialects()");
1518 PyDoc_STRVAR(csv_get_dialect_doc
,
1519 "Return the dialect instance associated with name.\n"
1520 " dialect = csv.get_dialect(name)");
1522 PyDoc_STRVAR(csv_register_dialect_doc
,
1523 "Create a mapping from a string name to a dialect class.\n"
1524 " dialect = csv.register_dialect(name, dialect)");
1526 PyDoc_STRVAR(csv_unregister_dialect_doc
,
1527 "Delete the name/dialect mapping associated with a string name.\n"
1528 " csv.unregister_dialect(name)");
1530 PyDoc_STRVAR(csv_field_size_limit_doc
,
1531 "Sets an upper limit on parsed fields.\n"
1532 " csv.field_size_limit([limit])\n"
1534 "Returns old limit. If limit is not given, no new limit is set and\n"
1535 "the old limit is returned");
1537 static struct PyMethodDef csv_methods
[] = {
1538 { "reader", (PyCFunction
)csv_reader
,
1539 METH_VARARGS
| METH_KEYWORDS
, csv_reader_doc
},
1540 { "writer", (PyCFunction
)csv_writer
,
1541 METH_VARARGS
| METH_KEYWORDS
, csv_writer_doc
},
1542 { "list_dialects", (PyCFunction
)csv_list_dialects
,
1543 METH_NOARGS
, csv_list_dialects_doc
},
1544 { "register_dialect", (PyCFunction
)csv_register_dialect
,
1545 METH_VARARGS
| METH_KEYWORDS
, csv_register_dialect_doc
},
1546 { "unregister_dialect", (PyCFunction
)csv_unregister_dialect
,
1547 METH_O
, csv_unregister_dialect_doc
},
1548 { "get_dialect", (PyCFunction
)csv_get_dialect
,
1549 METH_O
, csv_get_dialect_doc
},
1550 { "field_size_limit", (PyCFunction
)csv_field_size_limit
,
1551 METH_VARARGS
, csv_field_size_limit_doc
},
1556 static struct PyModuleDef _csvmodule
= {
1557 PyModuleDef_HEAD_INIT
,
1574 if (PyType_Ready(&Dialect_Type
) < 0)
1577 if (PyType_Ready(&Reader_Type
) < 0)
1580 if (PyType_Ready(&Writer_Type
) < 0)
1583 /* Create the module and add the functions */
1584 module
= PyModule_Create(&_csvmodule
);
1588 /* Add version to the module. */
1589 if (PyModule_AddStringConstant(module
, "__version__",
1590 MODULE_VERSION
) == -1)
1593 /* Add _dialects dictionary */
1594 dialects
= PyDict_New();
1595 if (dialects
== NULL
)
1597 if (PyModule_AddObject(module
, "_dialects", dialects
))
1600 /* Add quote styles into dictionary */
1601 for (style
= quote_styles
; style
->name
; style
++) {
1602 if (PyModule_AddIntConstant(module
, style
->name
,
1603 style
->style
) == -1)
1607 /* Add the Dialect type */
1608 Py_INCREF(&Dialect_Type
);
1609 if (PyModule_AddObject(module
, "Dialect", (PyObject
*)&Dialect_Type
))
1612 /* Add the CSV exception object to the module. */
1613 error_obj
= PyErr_NewException("_csv.Error", NULL
, NULL
);
1614 if (error_obj
== NULL
)
1616 PyModule_AddObject(module
, "Error", error_obj
);