5 This module provides the low-level underpinnings of a CSV reading/writing
6 module. Users should not use this module directly, but import the csv.py
9 **** For people modifying this code, please note that as of this writing
10 **** (2003-03-23), it is intended that this code should work with Python
15 #define MODULE_VERSION "1.0"
18 #include "structmember.h"
21 /* begin 2.2 compatibility macros */
23 /* Define macros for inline documentation. */
24 #define PyDoc_VAR(name) static char name[]
25 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26 #ifdef WITH_DOC_STRINGS
27 #define PyDoc_STR(str) str
29 #define PyDoc_STR(str) ""
31 #endif /* ifndef PyDoc_STRVAR */
33 #ifndef PyMODINIT_FUNC
34 # if defined(__cplusplus)
35 # define PyMODINIT_FUNC extern "C" void
36 # else /* __cplusplus */
37 # define PyMODINIT_FUNC void
38 # endif /* __cplusplus */
42 #define Py_CLEAR(op) \
45 PyObject *tmp = (PyObject *)(op); \
52 #define Py_VISIT(op) \
55 int vret = visit((PyObject *)(op), arg); \
62 /* end 2.2 compatibility macros */
64 #define IS_BASESTRING(o) \
65 PyObject_TypeCheck(o, &PyBaseString_Type)
67 static PyObject
*error_obj
; /* CSV exception */
68 static PyObject
*dialects
; /* Dialect registry */
69 static long field_limit
= 128 * 1024; /* max parsed field size */
72 START_RECORD
, START_FIELD
, ESCAPED_CHAR
, IN_FIELD
,
73 IN_QUOTED_FIELD
, ESCAPE_IN_QUOTED_FIELD
, QUOTE_IN_QUOTED_FIELD
,
78 QUOTE_MINIMAL
, QUOTE_ALL
, QUOTE_NONNUMERIC
, QUOTE_NONE
86 static StyleDesc quote_styles
[] = {
87 { QUOTE_MINIMAL
, "QUOTE_MINIMAL" },
88 { QUOTE_ALL
, "QUOTE_ALL" },
89 { QUOTE_NONNUMERIC
, "QUOTE_NONNUMERIC" },
90 { QUOTE_NONE
, "QUOTE_NONE" },
97 int doublequote
; /* is " represented by ""? */
98 char delimiter
; /* field separator */
99 char quotechar
; /* quote character */
100 char escapechar
; /* escape character */
101 int skipinitialspace
; /* ignore spaces following delimiter? */
102 PyObject
*lineterminator
; /* string to write between records */
103 int quoting
; /* style of quoting to write */
105 int strict
; /* raise exception on bad CSV */
108 staticforward PyTypeObject Dialect_Type
;
113 PyObject
*input_iter
; /* iterate over this for input lines */
115 DialectObj
*dialect
; /* parsing dialect */
117 PyObject
*fields
; /* field list for current record */
118 ParserState state
; /* current CSV parse state */
119 char *field
; /* build current field in here */
120 int field_size
; /* size of allocated buffer */
121 int field_len
; /* length of current field */
122 int numeric_field
; /* treat field as numeric */
123 unsigned long line_num
; /* Source-file line number */
126 staticforward PyTypeObject Reader_Type
;
128 #define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
133 PyObject
*writeline
; /* write output lines to this file */
135 DialectObj
*dialect
; /* parsing dialect */
137 char *rec
; /* buffer for parser.join */
138 int rec_size
; /* size of allocated record */
139 int rec_len
; /* length of record */
140 int num_fields
; /* number of fields in record */
143 staticforward PyTypeObject Writer_Type
;
150 get_dialect_from_registry(PyObject
* name_obj
)
152 PyObject
*dialect_obj
;
154 dialect_obj
= PyDict_GetItem(dialects
, name_obj
);
155 if (dialect_obj
== NULL
) {
156 if (!PyErr_Occurred())
157 PyErr_Format(error_obj
, "unknown dialect");
160 Py_INCREF(dialect_obj
);
165 get_string(PyObject
*str
)
172 get_nullchar_as_None(char c
)
179 return PyBytes_FromStringAndSize((char*)&c
, 1);
183 Dialect_get_lineterminator(DialectObj
*self
)
185 return get_string(self
->lineterminator
);
189 Dialect_get_escapechar(DialectObj
*self
)
191 return get_nullchar_as_None(self
->escapechar
);
195 Dialect_get_quotechar(DialectObj
*self
)
197 return get_nullchar_as_None(self
->quotechar
);
201 Dialect_get_quoting(DialectObj
*self
)
203 return PyInt_FromLong(self
->quoting
);
207 _set_bool(const char *name
, int *target
, PyObject
*src
, int dflt
)
212 *target
= PyObject_IsTrue(src
);
217 _set_int(const char *name
, int *target
, PyObject
*src
, int dflt
)
222 if (!PyInt_Check(src
)) {
223 PyErr_Format(PyExc_TypeError
,
224 "\"%s\" must be an integer", name
);
227 *target
= PyInt_AsLong(src
);
233 _set_char(const char *name
, char *target
, PyObject
*src
, char dflt
)
238 if (src
== Py_None
|| PyBytes_Size(src
) == 0)
240 else if (!PyBytes_Check(src
) || PyBytes_Size(src
) != 1) {
241 PyErr_Format(PyExc_TypeError
,
242 "\"%s\" must be an 1-character string",
247 char *s
= PyBytes_AsString(src
);
257 _set_str(const char *name
, PyObject
**target
, PyObject
*src
, const char *dflt
)
260 *target
= PyBytes_FromString(dflt
);
264 else if (!IS_BASESTRING(src
)) {
265 PyErr_Format(PyExc_TypeError
,
266 "\"%s\" must be an string", name
);
279 dialect_check_quoting(int quoting
)
281 StyleDesc
*qs
= quote_styles
;
283 for (qs
= quote_styles
; qs
->name
; qs
++) {
284 if (qs
->style
== quoting
)
287 PyErr_Format(PyExc_TypeError
, "bad \"quoting\" value");
291 #define D_OFF(x) offsetof(DialectObj, x)
293 static struct PyMemberDef Dialect_memberlist
[] = {
294 { "delimiter", T_CHAR
, D_OFF(delimiter
), READONLY
},
295 { "skipinitialspace", T_INT
, D_OFF(skipinitialspace
), READONLY
},
296 { "doublequote", T_INT
, D_OFF(doublequote
), READONLY
},
297 { "strict", T_INT
, D_OFF(strict
), READONLY
},
301 static PyGetSetDef Dialect_getsetlist
[] = {
302 { "escapechar", (getter
)Dialect_get_escapechar
},
303 { "lineterminator", (getter
)Dialect_get_lineterminator
},
304 { "quotechar", (getter
)Dialect_get_quotechar
},
305 { "quoting", (getter
)Dialect_get_quoting
},
310 Dialect_dealloc(DialectObj
*self
)
312 Py_XDECREF(self
->lineterminator
);
313 Py_TYPE(self
)->tp_free((PyObject
*)self
);
316 static char *dialect_kws
[] = {
330 dialect_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwargs
)
333 PyObject
*ret
= NULL
;
334 PyObject
*dialect
= NULL
;
335 PyObject
*delimiter
= NULL
;
336 PyObject
*doublequote
= NULL
;
337 PyObject
*escapechar
= NULL
;
338 PyObject
*lineterminator
= NULL
;
339 PyObject
*quotechar
= NULL
;
340 PyObject
*quoting
= NULL
;
341 PyObject
*skipinitialspace
= NULL
;
342 PyObject
*strict
= NULL
;
344 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
,
345 "|OOOOOOOOO", dialect_kws
,
357 if (dialect
!= NULL
) {
358 if (IS_BASESTRING(dialect
)) {
359 dialect
= get_dialect_from_registry(dialect
);
365 /* Can we reuse this instance? */
366 if (PyObject_TypeCheck(dialect
, &Dialect_Type
) &&
370 lineterminator
== 0 &&
373 skipinitialspace
== 0 &&
378 self
= (DialectObj
*)type
->tp_alloc(type
, 0);
383 self
->lineterminator
= NULL
;
385 Py_XINCREF(delimiter
);
386 Py_XINCREF(doublequote
);
387 Py_XINCREF(escapechar
);
388 Py_XINCREF(lineterminator
);
389 Py_XINCREF(quotechar
);
391 Py_XINCREF(skipinitialspace
);
393 if (dialect
!= NULL
) {
394 #define DIALECT_GETATTR(v, n) \
396 v = PyObject_GetAttrString(dialect, n)
397 DIALECT_GETATTR(delimiter
, "delimiter");
398 DIALECT_GETATTR(doublequote
, "doublequote");
399 DIALECT_GETATTR(escapechar
, "escapechar");
400 DIALECT_GETATTR(lineterminator
, "lineterminator");
401 DIALECT_GETATTR(quotechar
, "quotechar");
402 DIALECT_GETATTR(quoting
, "quoting");
403 DIALECT_GETATTR(skipinitialspace
, "skipinitialspace");
404 DIALECT_GETATTR(strict
, "strict");
408 /* check types and convert to C values */
409 #define DIASET(meth, name, target, src, dflt) \
410 if (meth(name, target, src, dflt)) \
412 DIASET(_set_char
, "delimiter", &self
->delimiter
, delimiter
, ',');
413 DIASET(_set_bool
, "doublequote", &self
->doublequote
, doublequote
, 1);
414 DIASET(_set_char
, "escapechar", &self
->escapechar
, escapechar
, 0);
415 DIASET(_set_str
, "lineterminator", &self
->lineterminator
, lineterminator
, "\r\n");
416 DIASET(_set_char
, "quotechar", &self
->quotechar
, quotechar
, '"');
417 DIASET(_set_int
, "quoting", &self
->quoting
, quoting
, QUOTE_MINIMAL
);
418 DIASET(_set_bool
, "skipinitialspace", &self
->skipinitialspace
, skipinitialspace
, 0);
419 DIASET(_set_bool
, "strict", &self
->strict
, strict
, 0);
421 /* validate options */
422 if (dialect_check_quoting(self
->quoting
))
424 if (self
->delimiter
== 0) {
425 PyErr_SetString(PyExc_TypeError
, "delimiter must be set");
428 if (quotechar
== Py_None
&& quoting
== NULL
)
429 self
->quoting
= QUOTE_NONE
;
430 if (self
->quoting
!= QUOTE_NONE
&& self
->quotechar
== 0) {
431 PyErr_SetString(PyExc_TypeError
,
432 "quotechar must be set if quoting enabled");
435 if (self
->lineterminator
== 0) {
436 PyErr_SetString(PyExc_TypeError
, "lineterminator must be set");
440 ret
= (PyObject
*)self
;
445 Py_XDECREF(delimiter
);
446 Py_XDECREF(doublequote
);
447 Py_XDECREF(escapechar
);
448 Py_XDECREF(lineterminator
);
449 Py_XDECREF(quotechar
);
451 Py_XDECREF(skipinitialspace
);
457 PyDoc_STRVAR(Dialect_Type_doc
,
460 "The Dialect type records CSV parsing and generation options.\n");
462 static PyTypeObject Dialect_Type
= {
463 PyVarObject_HEAD_INIT(NULL
, 0)
464 "_csv.Dialect", /* tp_name */
465 sizeof(DialectObj
), /* tp_basicsize */
468 (destructor
)Dialect_dealloc
, /* tp_dealloc */
469 (printfunc
)0, /* tp_print */
470 (getattrfunc
)0, /* tp_getattr */
471 (setattrfunc
)0, /* tp_setattr */
472 (cmpfunc
)0, /* tp_compare */
473 (reprfunc
)0, /* tp_repr */
474 0, /* tp_as_number */
475 0, /* tp_as_sequence */
476 0, /* tp_as_mapping */
477 (hashfunc
)0, /* tp_hash */
478 (ternaryfunc
)0, /* tp_call */
479 (reprfunc
)0, /* tp_str */
482 0, /* tp_as_buffer */
483 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /* tp_flags */
484 Dialect_Type_doc
, /* tp_doc */
487 0, /* tp_richcompare */
488 0, /* tp_weaklistoffset */
492 Dialect_memberlist
, /* tp_members */
493 Dialect_getsetlist
, /* tp_getset */
496 0, /* tp_descr_get */
497 0, /* tp_descr_set */
498 0, /* tp_dictoffset */
501 dialect_new
, /* tp_new */
506 * Return an instance of the dialect type, given a Python instance or kwarg
507 * description of the dialect
510 _call_dialect(PyObject
*dialect_inst
, PyObject
*kwargs
)
515 ctor_args
= Py_BuildValue(dialect_inst
? "(O)" : "()", dialect_inst
);
516 if (ctor_args
== NULL
)
518 dialect
= PyObject_Call((PyObject
*)&Dialect_Type
, ctor_args
, kwargs
);
519 Py_DECREF(ctor_args
);
527 parse_save_field(ReaderObj
*self
)
531 field
= PyBytes_FromStringAndSize(self
->field
, self
->field_len
);
535 if (self
->numeric_field
) {
538 self
->numeric_field
= 0;
539 tmp
= PyNumber_Float(field
);
547 PyList_Append(self
->fields
, field
);
553 parse_grow_buff(ReaderObj
*self
)
555 if (self
->field_size
== 0) {
556 self
->field_size
= 4096;
557 if (self
->field
!= NULL
)
558 PyMem_Free(self
->field
);
559 self
->field
= PyMem_Malloc(self
->field_size
);
562 self
->field_size
*= 2;
563 self
->field
= PyMem_Realloc(self
->field
, self
->field_size
);
565 if (self
->field
== NULL
) {
573 parse_add_char(ReaderObj
*self
, char c
)
575 if (self
->field_len
>= field_limit
) {
576 PyErr_Format(error_obj
, "field larger than field limit (%ld)",
580 if (self
->field_len
== self
->field_size
&& !parse_grow_buff(self
))
582 self
->field
[self
->field_len
++] = c
;
587 parse_process_char(ReaderObj
*self
, char c
)
589 DialectObj
*dialect
= self
->dialect
;
591 switch (self
->state
) {
593 /* start of record */
595 /* empty line - return [] */
597 else if (c
== '\n' || c
== '\r') {
598 self
->state
= EAT_CRNL
;
601 /* normal character - handle as START_FIELD */
602 self
->state
= START_FIELD
;
605 /* expecting field */
606 if (c
== '\n' || c
== '\r' || c
== '\0') {
607 /* save empty field - return [fields] */
608 if (parse_save_field(self
) < 0)
610 self
->state
= (c
== '\0' ? START_RECORD
: EAT_CRNL
);
612 else if (c
== dialect
->quotechar
&&
613 dialect
->quoting
!= QUOTE_NONE
) {
614 /* start quoted field */
615 self
->state
= IN_QUOTED_FIELD
;
617 else if (c
== dialect
->escapechar
) {
618 /* possible escaped character */
619 self
->state
= ESCAPED_CHAR
;
621 else if (c
== ' ' && dialect
->skipinitialspace
)
622 /* ignore space at start of field */
624 else if (c
== dialect
->delimiter
) {
625 /* save empty field */
626 if (parse_save_field(self
) < 0)
630 /* begin new unquoted field */
631 if (dialect
->quoting
== QUOTE_NONNUMERIC
)
632 self
->numeric_field
= 1;
633 if (parse_add_char(self
, c
) < 0)
635 self
->state
= IN_FIELD
;
642 if (parse_add_char(self
, c
) < 0)
644 self
->state
= IN_FIELD
;
648 /* in unquoted field */
649 if (c
== '\n' || c
== '\r' || c
== '\0') {
650 /* end of line - return [fields] */
651 if (parse_save_field(self
) < 0)
653 self
->state
= (c
== '\0' ? START_RECORD
: EAT_CRNL
);
655 else if (c
== dialect
->escapechar
) {
656 /* possible escaped character */
657 self
->state
= ESCAPED_CHAR
;
659 else if (c
== dialect
->delimiter
) {
660 /* save field - wait for new field */
661 if (parse_save_field(self
) < 0)
663 self
->state
= START_FIELD
;
666 /* normal character - save in field */
667 if (parse_add_char(self
, c
) < 0)
672 case IN_QUOTED_FIELD
:
673 /* in quoted field */
676 else if (c
== dialect
->escapechar
) {
677 /* Possible escape character */
678 self
->state
= ESCAPE_IN_QUOTED_FIELD
;
680 else if (c
== dialect
->quotechar
&&
681 dialect
->quoting
!= QUOTE_NONE
) {
682 if (dialect
->doublequote
) {
683 /* doublequote; " represented by "" */
684 self
->state
= QUOTE_IN_QUOTED_FIELD
;
687 /* end of quote part of field */
688 self
->state
= IN_FIELD
;
692 /* normal character - save in field */
693 if (parse_add_char(self
, c
) < 0)
698 case ESCAPE_IN_QUOTED_FIELD
:
701 if (parse_add_char(self
, c
) < 0)
703 self
->state
= IN_QUOTED_FIELD
;
706 case QUOTE_IN_QUOTED_FIELD
:
707 /* doublequote - seen a quote in an quoted field */
708 if (dialect
->quoting
!= QUOTE_NONE
&&
709 c
== dialect
->quotechar
) {
711 if (parse_add_char(self
, c
) < 0)
713 self
->state
= IN_QUOTED_FIELD
;
715 else if (c
== dialect
->delimiter
) {
716 /* save field - wait for new field */
717 if (parse_save_field(self
) < 0)
719 self
->state
= START_FIELD
;
721 else if (c
== '\n' || c
== '\r' || c
== '\0') {
722 /* end of line - return [fields] */
723 if (parse_save_field(self
) < 0)
725 self
->state
= (c
== '\0' ? START_RECORD
: EAT_CRNL
);
727 else if (!dialect
->strict
) {
728 if (parse_add_char(self
, c
) < 0)
730 self
->state
= IN_FIELD
;
734 PyErr_Format(error_obj
, "'%c' expected after '%c'",
742 if (c
== '\n' || c
== '\r')
745 self
->state
= START_RECORD
;
747 PyErr_Format(error_obj
, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
757 parse_reset(ReaderObj
*self
)
759 Py_XDECREF(self
->fields
);
760 self
->fields
= PyList_New(0);
761 if (self
->fields
== NULL
)
764 self
->state
= START_RECORD
;
765 self
->numeric_field
= 0;
770 Reader_iternext(ReaderObj
*self
)
773 PyObject
*fields
= NULL
;
777 if (parse_reset(self
) < 0)
780 lineobj
= PyIter_Next(self
->input_iter
);
781 if (lineobj
== NULL
) {
782 /* End of input OR exception */
783 if (!PyErr_Occurred() && self
->field_len
!= 0)
784 PyErr_Format(error_obj
,
785 "newline inside string");
790 line
= PyBytes_AsString(lineobj
);
791 linelen
= PyBytes_Size(lineobj
);
793 if (line
== NULL
|| linelen
< 0) {
801 PyErr_Format(error_obj
,
802 "line contains NULL byte");
805 if (parse_process_char(self
, c
) < 0) {
811 if (parse_process_char(self
, 0) < 0)
813 } while (self
->state
!= START_RECORD
);
815 fields
= self
->fields
;
822 Reader_dealloc(ReaderObj
*self
)
824 PyObject_GC_UnTrack(self
);
825 Py_XDECREF(self
->dialect
);
826 Py_XDECREF(self
->input_iter
);
827 Py_XDECREF(self
->fields
);
828 if (self
->field
!= NULL
)
829 PyMem_Free(self
->field
);
830 PyObject_GC_Del(self
);
834 Reader_traverse(ReaderObj
*self
, visitproc visit
, void *arg
)
836 Py_VISIT(self
->dialect
);
837 Py_VISIT(self
->input_iter
);
838 Py_VISIT(self
->fields
);
843 Reader_clear(ReaderObj
*self
)
845 Py_CLEAR(self
->dialect
);
846 Py_CLEAR(self
->input_iter
);
847 Py_CLEAR(self
->fields
);
851 PyDoc_STRVAR(Reader_Type_doc
,
854 "Reader objects are responsible for reading and parsing tabular data\n"
858 static struct PyMethodDef Reader_methods
[] = {
861 #define R_OFF(x) offsetof(ReaderObj, x)
863 static struct PyMemberDef Reader_memberlist
[] = {
864 { "dialect", T_OBJECT
, R_OFF(dialect
), RO
},
865 { "line_num", T_ULONG
, R_OFF(line_num
), RO
},
870 static PyTypeObject Reader_Type
= {
871 PyVarObject_HEAD_INIT(NULL
, 0)
872 "_csv.reader", /*tp_name*/
873 sizeof(ReaderObj
), /*tp_basicsize*/
876 (destructor
)Reader_dealloc
, /*tp_dealloc*/
877 (printfunc
)0, /*tp_print*/
878 (getattrfunc
)0, /*tp_getattr*/
879 (setattrfunc
)0, /*tp_setattr*/
880 (cmpfunc
)0, /*tp_compare*/
881 (reprfunc
)0, /*tp_repr*/
883 0, /*tp_as_sequence*/
885 (hashfunc
)0, /*tp_hash*/
886 (ternaryfunc
)0, /*tp_call*/
887 (reprfunc
)0, /*tp_str*/
891 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
|
892 Py_TPFLAGS_HAVE_GC
, /*tp_flags*/
893 Reader_Type_doc
, /*tp_doc*/
894 (traverseproc
)Reader_traverse
, /*tp_traverse*/
895 (inquiry
)Reader_clear
, /*tp_clear*/
896 0, /*tp_richcompare*/
897 0, /*tp_weaklistoffset*/
898 PyObject_SelfIter
, /*tp_iter*/
899 (getiterfunc
)Reader_iternext
, /*tp_iternext*/
900 Reader_methods
, /*tp_methods*/
901 Reader_memberlist
, /*tp_members*/
907 csv_reader(PyObject
*module
, PyObject
*args
, PyObject
*keyword_args
)
909 PyObject
* iterator
, * dialect
= NULL
;
910 ReaderObj
* self
= PyObject_GC_New(ReaderObj
, &Reader_Type
);
915 self
->dialect
= NULL
;
917 self
->input_iter
= NULL
;
919 self
->field_size
= 0;
922 if (parse_reset(self
) < 0) {
927 if (!PyArg_UnpackTuple(args
, "", 1, 2, &iterator
, &dialect
)) {
931 self
->input_iter
= PyObject_GetIter(iterator
);
932 if (self
->input_iter
== NULL
) {
933 PyErr_SetString(PyExc_TypeError
,
934 "argument 1 must be an iterator");
938 self
->dialect
= (DialectObj
*)_call_dialect(dialect
, keyword_args
);
939 if (self
->dialect
== NULL
) {
944 PyObject_GC_Track(self
);
945 return (PyObject
*)self
;
951 /* ---------------------------------------------------------------- */
953 join_reset(WriterObj
*self
)
956 self
->num_fields
= 0;
959 #define MEM_INCR 32768
961 /* Calculate new record length or append field to record. Return new
965 join_append_data(WriterObj
*self
, char *field
, int quote_empty
,
966 int *quoted
, int copy_phase
)
968 DialectObj
*dialect
= self
->dialect
;
975 self->rec[rec_len] = c;\
979 lineterm
= PyBytes_AsString(dialect
->lineterminator
);
980 if (lineterm
== NULL
)
983 rec_len
= self
->rec_len
;
985 /* If this is not the first field we need a field separator */
986 if (self
->num_fields
> 0)
987 ADDCH(dialect
->delimiter
);
989 /* Handle preceding quote */
990 if (copy_phase
&& *quoted
)
991 ADDCH(dialect
->quotechar
);
993 /* Copy/count field data */
1001 if (c
== dialect
->delimiter
||
1002 c
== dialect
->escapechar
||
1003 c
== dialect
->quotechar
||
1004 strchr(lineterm
, c
)) {
1005 if (dialect
->quoting
== QUOTE_NONE
)
1008 if (c
== dialect
->quotechar
) {
1009 if (dialect
->doublequote
)
1010 ADDCH(dialect
->quotechar
);
1018 if (!dialect
->escapechar
) {
1019 PyErr_Format(error_obj
,
1020 "need to escape, but no escapechar set");
1023 ADDCH(dialect
->escapechar
);
1026 /* Copy field character into record buffer.
1031 /* If field is empty check if it needs to be quoted.
1033 if (i
== 0 && quote_empty
) {
1034 if (dialect
->quoting
== QUOTE_NONE
) {
1035 PyErr_Format(error_obj
,
1036 "single empty field record must be quoted");
1045 ADDCH(dialect
->quotechar
);
1054 join_check_rec_size(WriterObj
*self
, int rec_len
)
1056 if (rec_len
> self
->rec_size
) {
1057 if (self
->rec_size
== 0) {
1058 self
->rec_size
= (rec_len
/ MEM_INCR
+ 1) * MEM_INCR
;
1059 if (self
->rec
!= NULL
)
1060 PyMem_Free(self
->rec
);
1061 self
->rec
= PyMem_Malloc(self
->rec_size
);
1064 char *old_rec
= self
->rec
;
1066 self
->rec_size
= (rec_len
/ MEM_INCR
+ 1) * MEM_INCR
;
1067 self
->rec
= PyMem_Realloc(self
->rec
, self
->rec_size
);
1068 if (self
->rec
== NULL
)
1069 PyMem_Free(old_rec
);
1071 if (self
->rec
== NULL
) {
1080 join_append(WriterObj
*self
, char *field
, int *quoted
, int quote_empty
)
1084 rec_len
= join_append_data(self
, field
, quote_empty
, quoted
, 0);
1088 /* grow record buffer if necessary */
1089 if (!join_check_rec_size(self
, rec_len
))
1092 self
->rec_len
= join_append_data(self
, field
, quote_empty
, quoted
, 1);
1099 join_append_lineterminator(WriterObj
*self
)
1104 terminator_len
= PyBytes_Size(self
->dialect
->lineterminator
);
1105 if (terminator_len
== -1)
1108 /* grow record buffer if necessary */
1109 if (!join_check_rec_size(self
, self
->rec_len
+ terminator_len
))
1112 terminator
= PyBytes_AsString(self
->dialect
->lineterminator
);
1113 if (terminator
== NULL
)
1115 memmove(self
->rec
+ self
->rec_len
, terminator
, terminator_len
);
1116 self
->rec_len
+= terminator_len
;
1121 PyDoc_STRVAR(csv_writerow_doc
,
1122 "writerow(sequence)\n"
1124 "Construct and write a CSV record from a sequence of fields. Non-string\n"
1125 "elements will be converted to string.");
1128 csv_writerow(WriterObj
*self
, PyObject
*seq
)
1130 DialectObj
*dialect
= self
->dialect
;
1133 if (!PySequence_Check(seq
))
1134 return PyErr_Format(error_obj
, "sequence expected");
1136 len
= PySequence_Length(seq
);
1140 /* Join all fields in internal buffer.
1143 for (i
= 0; i
< len
; i
++) {
1148 field
= PySequence_GetItem(seq
, i
);
1152 switch (dialect
->quoting
) {
1153 case QUOTE_NONNUMERIC
:
1154 quoted
= !PyNumber_Check(field
);
1164 if (PyBytes_Check(field
)) {
1165 append_ok
= join_append(self
,
1166 PyBytes_AS_STRING(field
),
1170 else if (field
== Py_None
) {
1171 append_ok
= join_append(self
, "", "ed
, len
== 1);
1177 str
= PyObject_Str(field
);
1182 append_ok
= join_append(self
, PyBytes_AS_STRING(str
),
1190 /* Add line terminator.
1192 if (!join_append_lineterminator(self
))
1195 return PyObject_CallFunction(self
->writeline
,
1196 "(s#)", self
->rec
, self
->rec_len
);
1199 PyDoc_STRVAR(csv_writerows_doc
,
1200 "writerows(sequence of sequences)\n"
1202 "Construct and write a series of sequences to a csv file. Non-string\n"
1203 "elements will be converted to string.");
1206 csv_writerows(WriterObj
*self
, PyObject
*seqseq
)
1208 PyObject
*row_iter
, *row_obj
, *result
;
1210 row_iter
= PyObject_GetIter(seqseq
);
1211 if (row_iter
== NULL
) {
1212 PyErr_SetString(PyExc_TypeError
,
1213 "writerows() argument must be iterable");
1216 while ((row_obj
= PyIter_Next(row_iter
))) {
1217 result
= csv_writerow(self
, row_obj
);
1220 Py_DECREF(row_iter
);
1226 Py_DECREF(row_iter
);
1227 if (PyErr_Occurred())
1233 static struct PyMethodDef Writer_methods
[] = {
1234 { "writerow", (PyCFunction
)csv_writerow
, METH_O
, csv_writerow_doc
},
1235 { "writerows", (PyCFunction
)csv_writerows
, METH_O
, csv_writerows_doc
},
1239 #define W_OFF(x) offsetof(WriterObj, x)
1241 static struct PyMemberDef Writer_memberlist
[] = {
1242 { "dialect", T_OBJECT
, W_OFF(dialect
), RO
},
1247 Writer_dealloc(WriterObj
*self
)
1249 PyObject_GC_UnTrack(self
);
1250 Py_XDECREF(self
->dialect
);
1251 Py_XDECREF(self
->writeline
);
1252 if (self
->rec
!= NULL
)
1253 PyMem_Free(self
->rec
);
1254 PyObject_GC_Del(self
);
1258 Writer_traverse(WriterObj
*self
, visitproc visit
, void *arg
)
1260 Py_VISIT(self
->dialect
);
1261 Py_VISIT(self
->writeline
);
1266 Writer_clear(WriterObj
*self
)
1268 Py_CLEAR(self
->dialect
);
1269 Py_CLEAR(self
->writeline
);
1273 PyDoc_STRVAR(Writer_Type_doc
,
1276 "Writer objects are responsible for generating tabular data\n"
1277 "in CSV format from sequence input.\n"
1280 static PyTypeObject Writer_Type
= {
1281 PyVarObject_HEAD_INIT(NULL
, 0)
1282 "_csv.writer", /*tp_name*/
1283 sizeof(WriterObj
), /*tp_basicsize*/
1286 (destructor
)Writer_dealloc
, /*tp_dealloc*/
1287 (printfunc
)0, /*tp_print*/
1288 (getattrfunc
)0, /*tp_getattr*/
1289 (setattrfunc
)0, /*tp_setattr*/
1290 (cmpfunc
)0, /*tp_compare*/
1291 (reprfunc
)0, /*tp_repr*/
1293 0, /*tp_as_sequence*/
1294 0, /*tp_as_mapping*/
1295 (hashfunc
)0, /*tp_hash*/
1296 (ternaryfunc
)0, /*tp_call*/
1297 (reprfunc
)0, /*tp_str*/
1301 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
|
1302 Py_TPFLAGS_HAVE_GC
, /*tp_flags*/
1304 (traverseproc
)Writer_traverse
, /*tp_traverse*/
1305 (inquiry
)Writer_clear
, /*tp_clear*/
1306 0, /*tp_richcompare*/
1307 0, /*tp_weaklistoffset*/
1308 (getiterfunc
)0, /*tp_iter*/
1309 (getiterfunc
)0, /*tp_iternext*/
1310 Writer_methods
, /*tp_methods*/
1311 Writer_memberlist
, /*tp_members*/
1316 csv_writer(PyObject
*module
, PyObject
*args
, PyObject
*keyword_args
)
1318 PyObject
* output_file
, * dialect
= NULL
;
1319 WriterObj
* self
= PyObject_GC_New(WriterObj
, &Writer_Type
);
1324 self
->dialect
= NULL
;
1325 self
->writeline
= NULL
;
1330 self
->num_fields
= 0;
1332 if (!PyArg_UnpackTuple(args
, "", 1, 2, &output_file
, &dialect
)) {
1336 self
->writeline
= PyObject_GetAttrString(output_file
, "write");
1337 if (self
->writeline
== NULL
|| !PyCallable_Check(self
->writeline
)) {
1338 PyErr_SetString(PyExc_TypeError
,
1339 "argument 1 must have a \"write\" method");
1343 self
->dialect
= (DialectObj
*)_call_dialect(dialect
, keyword_args
);
1344 if (self
->dialect
== NULL
) {
1348 PyObject_GC_Track(self
);
1349 return (PyObject
*)self
;
1356 csv_list_dialects(PyObject
*module
, PyObject
*args
)
1358 return PyDict_Keys(dialects
);
1362 csv_register_dialect(PyObject
*module
, PyObject
*args
, PyObject
*kwargs
)
1364 PyObject
*name_obj
, *dialect_obj
= NULL
;
1367 if (!PyArg_UnpackTuple(args
, "", 1, 2, &name_obj
, &dialect_obj
))
1369 if (!IS_BASESTRING(name_obj
)) {
1370 PyErr_SetString(PyExc_TypeError
,
1371 "dialect name must be a string or unicode");
1374 dialect
= _call_dialect(dialect_obj
, kwargs
);
1375 if (dialect
== NULL
)
1377 if (PyDict_SetItem(dialects
, name_obj
, dialect
) < 0) {
1387 csv_unregister_dialect(PyObject
*module
, PyObject
*name_obj
)
1389 if (PyDict_DelItem(dialects
, name_obj
) < 0)
1390 return PyErr_Format(error_obj
, "unknown dialect");
1396 csv_get_dialect(PyObject
*module
, PyObject
*name_obj
)
1398 return get_dialect_from_registry(name_obj
);
1402 csv_field_size_limit(PyObject
*module
, PyObject
*args
)
1404 PyObject
*new_limit
= NULL
;
1405 long old_limit
= field_limit
;
1407 if (!PyArg_UnpackTuple(args
, "field_size_limit", 0, 1, &new_limit
))
1409 if (new_limit
!= NULL
) {
1410 if (!PyInt_Check(new_limit
)) {
1411 PyErr_Format(PyExc_TypeError
,
1412 "limit must be an integer");
1415 field_limit
= PyInt_AsLong(new_limit
);
1417 return PyInt_FromLong(old_limit
);
1424 PyDoc_STRVAR(csv_module_doc
,
1425 "CSV parsing and writing.\n"
1427 "This module provides classes that assist in the reading and writing\n"
1428 "of Comma Separated Value (CSV) files, and implements the interface\n"
1429 "described by PEP 305. Although many CSV files are simple to parse,\n"
1430 "the format is not formally defined by a stable specification and\n"
1431 "is subtle enough that parsing lines of a CSV file with something\n"
1432 "like line.split(\",\") is bound to fail. The module supports three\n"
1433 "basic APIs: reading, writing, and registration of dialects.\n"
1436 "DIALECT REGISTRATION:\n"
1438 "Readers and writers support a dialect argument, which is a convenient\n"
1439 "handle on a group of settings. When the dialect argument is a string,\n"
1440 "it identifies one of the dialects previously registered with the module.\n"
1441 "If it is a class or instance, the attributes of the argument are used as\n"
1442 "the settings for the reader or writer:\n"
1445 " delimiter = ','\n"
1446 " quotechar = '\"'\n"
1447 " escapechar = None\n"
1448 " doublequote = True\n"
1449 " skipinitialspace = False\n"
1450 " lineterminator = '\\r\\n'\n"
1451 " quoting = QUOTE_MINIMAL\n"
1455 " * quotechar - specifies a one-character string to use as the \n"
1456 " quoting character. It defaults to '\"'.\n"
1457 " * delimiter - specifies a one-character string to use as the \n"
1458 " field separator. It defaults to ','.\n"
1459 " * skipinitialspace - specifies how to interpret whitespace which\n"
1460 " immediately follows a delimiter. It defaults to False, which\n"
1461 " means that whitespace immediately following a delimiter is part\n"
1462 " of the following field.\n"
1463 " * lineterminator - specifies the character sequence which should \n"
1464 " terminate rows.\n"
1465 " * quoting - controls when quotes should be generated by the writer.\n"
1466 " It can take on any of the following module constants:\n"
1468 " csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1469 " field contains either the quotechar or the delimiter\n"
1470 " csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1471 " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1472 " fields which do not parse as integers or floating point\n"
1474 " csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1475 " * escapechar - specifies a one-character string used to escape \n"
1476 " the delimiter when quoting is set to QUOTE_NONE.\n"
1477 " * doublequote - controls the handling of quotes inside fields. When\n"
1478 " True, two consecutive quotes are interpreted as one during read,\n"
1479 " and when writing, each quote character embedded in the data is\n"
1480 " written as two quotes\n");
1482 PyDoc_STRVAR(csv_reader_doc
,
1483 " csv_reader = reader(iterable [, dialect='excel']\n"
1484 " [optional keyword args])\n"
1485 " for row in csv_reader:\n"
1488 "The \"iterable\" argument can be any object that returns a line\n"
1489 "of input for each iteration, such as a file object or a list. The\n"
1490 "optional \"dialect\" parameter is discussed below. The function\n"
1491 "also accepts optional keyword arguments which override settings\n"
1492 "provided by the dialect.\n"
1494 "The returned object is an iterator. Each iteration returns a row\n"
1495 "of the CSV file (which can span multiple input lines):\n");
1497 PyDoc_STRVAR(csv_writer_doc
,
1498 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1499 " [optional keyword args])\n"
1500 " for row in sequence:\n"
1501 " csv_writer.writerow(row)\n"
1505 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1506 " [optional keyword args])\n"
1507 " csv_writer.writerows(rows)\n"
1509 "The \"fileobj\" argument can be any object that supports the file API.\n");
1511 PyDoc_STRVAR(csv_list_dialects_doc
,
1512 "Return a list of all know dialect names.\n"
1513 " names = csv.list_dialects()");
1515 PyDoc_STRVAR(csv_get_dialect_doc
,
1516 "Return the dialect instance associated with name.\n"
1517 " dialect = csv.get_dialect(name)");
1519 PyDoc_STRVAR(csv_register_dialect_doc
,
1520 "Create a mapping from a string name to a dialect class.\n"
1521 " dialect = csv.register_dialect(name, dialect)");
1523 PyDoc_STRVAR(csv_unregister_dialect_doc
,
1524 "Delete the name/dialect mapping associated with a string name.\n"
1525 " csv.unregister_dialect(name)");
1527 PyDoc_STRVAR(csv_field_size_limit_doc
,
1528 "Sets an upper limit on parsed fields.\n"
1529 " csv.field_size_limit([limit])\n"
1531 "Returns old limit. If limit is not given, no new limit is set and\n"
1532 "the old limit is returned");
1534 static struct PyMethodDef csv_methods
[] = {
1535 { "reader", (PyCFunction
)csv_reader
,
1536 METH_VARARGS
| METH_KEYWORDS
, csv_reader_doc
},
1537 { "writer", (PyCFunction
)csv_writer
,
1538 METH_VARARGS
| METH_KEYWORDS
, csv_writer_doc
},
1539 { "list_dialects", (PyCFunction
)csv_list_dialects
,
1540 METH_NOARGS
, csv_list_dialects_doc
},
1541 { "register_dialect", (PyCFunction
)csv_register_dialect
,
1542 METH_VARARGS
| METH_KEYWORDS
, csv_register_dialect_doc
},
1543 { "unregister_dialect", (PyCFunction
)csv_unregister_dialect
,
1544 METH_O
, csv_unregister_dialect_doc
},
1545 { "get_dialect", (PyCFunction
)csv_get_dialect
,
1546 METH_O
, csv_get_dialect_doc
},
1547 { "field_size_limit", (PyCFunction
)csv_field_size_limit
,
1548 METH_VARARGS
, csv_field_size_limit_doc
},
1558 if (PyType_Ready(&Dialect_Type
) < 0)
1561 if (PyType_Ready(&Reader_Type
) < 0)
1564 if (PyType_Ready(&Writer_Type
) < 0)
1567 /* Create the module and add the functions */
1568 module
= Py_InitModule3("_csv", csv_methods
, csv_module_doc
);
1572 /* Add version to the module. */
1573 if (PyModule_AddStringConstant(module
, "__version__",
1574 MODULE_VERSION
) == -1)
1577 /* Add _dialects dictionary */
1578 dialects
= PyDict_New();
1579 if (dialects
== NULL
)
1581 if (PyModule_AddObject(module
, "_dialects", dialects
))
1584 /* Add quote styles into dictionary */
1585 for (style
= quote_styles
; style
->name
; style
++) {
1586 if (PyModule_AddIntConstant(module
, style
->name
,
1587 style
->style
) == -1)
1591 /* Add the Dialect type */
1592 Py_INCREF(&Dialect_Type
);
1593 if (PyModule_AddObject(module
, "Dialect", (PyObject
*)&Dialect_Type
))
1596 /* Add the CSV exception object to the module. */
1597 error_obj
= PyErr_NewException("_csv.Error", NULL
, NULL
);
1598 if (error_obj
== NULL
)
1600 PyModule_AddObject(module
, "Error", error_obj
);