5 This module provides the low-level underpinnings of a CSV reading/writing
6 module. Users should not use this module directly, but import the csv.py
9 **** For people modifying this code, please note that as of this writing
10 **** (2003-03-23), it is intended that this code should work with Python
15 #define MODULE_VERSION "1.0"
18 #include "structmember.h"
21 /* begin 2.2 compatibility macros */
23 /* Define macros for inline documentation. */
24 #define PyDoc_VAR(name) static char name[]
25 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26 #ifdef WITH_DOC_STRINGS
27 #define PyDoc_STR(str) str
29 #define PyDoc_STR(str) ""
31 #endif /* ifndef PyDoc_STRVAR */
33 #ifndef PyMODINIT_FUNC
34 # if defined(__cplusplus)
35 # define PyMODINIT_FUNC extern "C" void
36 # else /* __cplusplus */
37 # define PyMODINIT_FUNC void
38 # endif /* __cplusplus */
42 #define Py_CLEAR(op) \
45 PyObject *tmp = (PyObject *)(op); \
52 #define Py_VISIT(op) \
55 int vret = visit((PyObject *)(op), arg); \
62 /* end 2.2 compatibility macros */
64 #define IS_BASESTRING(o) \
65 PyObject_TypeCheck(o, &PyBaseString_Type)
67 static PyObject
*error_obj
; /* CSV exception */
68 static PyObject
*dialects
; /* Dialect registry */
69 static long field_limit
= 128 * 1024; /* max parsed field size */
72 START_RECORD
, START_FIELD
, ESCAPED_CHAR
, IN_FIELD
,
73 IN_QUOTED_FIELD
, ESCAPE_IN_QUOTED_FIELD
, QUOTE_IN_QUOTED_FIELD
,
78 QUOTE_MINIMAL
, QUOTE_ALL
, QUOTE_NONNUMERIC
, QUOTE_NONE
86 static StyleDesc quote_styles
[] = {
87 { QUOTE_MINIMAL
, "QUOTE_MINIMAL" },
88 { QUOTE_ALL
, "QUOTE_ALL" },
89 { QUOTE_NONNUMERIC
, "QUOTE_NONNUMERIC" },
90 { QUOTE_NONE
, "QUOTE_NONE" },
97 int doublequote
; /* is " represented by ""? */
98 char delimiter
; /* field separator */
99 char quotechar
; /* quote character */
100 char escapechar
; /* escape character */
101 int skipinitialspace
; /* ignore spaces following delimiter? */
102 PyObject
*lineterminator
; /* string to write between records */
103 int quoting
; /* style of quoting to write */
105 int strict
; /* raise exception on bad CSV */
108 staticforward PyTypeObject Dialect_Type
;
113 PyObject
*input_iter
; /* iterate over this for input lines */
115 DialectObj
*dialect
; /* parsing dialect */
117 PyObject
*fields
; /* field list for current record */
118 ParserState state
; /* current CSV parse state */
119 char *field
; /* build current field in here */
120 int field_size
; /* size of allocated buffer */
121 int field_len
; /* length of current field */
122 int numeric_field
; /* treat field as numeric */
123 unsigned long line_num
; /* Source-file line number */
126 staticforward PyTypeObject Reader_Type
;
128 #define ReaderObject_Check(v) ((v)->ob_type == &Reader_Type)
133 PyObject
*writeline
; /* write output lines to this file */
135 DialectObj
*dialect
; /* parsing dialect */
137 char *rec
; /* buffer for parser.join */
138 int rec_size
; /* size of allocated record */
139 int rec_len
; /* length of record */
140 int num_fields
; /* number of fields in record */
143 staticforward PyTypeObject Writer_Type
;
150 get_dialect_from_registry(PyObject
* name_obj
)
152 PyObject
*dialect_obj
;
154 dialect_obj
= PyDict_GetItem(dialects
, name_obj
);
155 if (dialect_obj
== NULL
) {
156 if (!PyErr_Occurred())
157 PyErr_Format(error_obj
, "unknown dialect");
160 Py_INCREF(dialect_obj
);
165 get_string(PyObject
*str
)
172 get_nullchar_as_None(char c
)
179 return PyString_FromStringAndSize((char*)&c
, 1);
183 Dialect_get_lineterminator(DialectObj
*self
)
185 return get_string(self
->lineterminator
);
189 Dialect_get_escapechar(DialectObj
*self
)
191 return get_nullchar_as_None(self
->escapechar
);
195 Dialect_get_quotechar(DialectObj
*self
)
197 return get_nullchar_as_None(self
->quotechar
);
201 Dialect_get_quoting(DialectObj
*self
)
203 return PyInt_FromLong(self
->quoting
);
207 _set_bool(const char *name
, int *target
, PyObject
*src
, int dflt
)
212 *target
= PyObject_IsTrue(src
);
217 _set_int(const char *name
, int *target
, PyObject
*src
, int dflt
)
222 if (!PyInt_Check(src
)) {
223 PyErr_Format(PyExc_TypeError
,
224 "\"%s\" must be an integer", name
);
227 *target
= PyInt_AsLong(src
);
233 _set_char(const char *name
, char *target
, PyObject
*src
, char dflt
)
238 if (src
== Py_None
|| PyString_Size(src
) == 0)
240 else if (!PyString_Check(src
) || PyString_Size(src
) != 1) {
241 PyErr_Format(PyExc_TypeError
,
242 "\"%s\" must be an 1-character string",
247 char *s
= PyString_AsString(src
);
257 _set_str(const char *name
, PyObject
**target
, PyObject
*src
, const char *dflt
)
260 *target
= PyString_FromString(dflt
);
264 else if (!IS_BASESTRING(src
)) {
265 PyErr_Format(PyExc_TypeError
,
266 "\"%s\" must be an string", name
);
279 dialect_check_quoting(int quoting
)
281 StyleDesc
*qs
= quote_styles
;
283 for (qs
= quote_styles
; qs
->name
; qs
++) {
284 if (qs
->style
== quoting
)
287 PyErr_Format(PyExc_TypeError
, "bad \"quoting\" value");
291 #define D_OFF(x) offsetof(DialectObj, x)
293 static struct PyMemberDef Dialect_memberlist
[] = {
294 { "delimiter", T_CHAR
, D_OFF(delimiter
), READONLY
},
295 { "skipinitialspace", T_INT
, D_OFF(skipinitialspace
), READONLY
},
296 { "doublequote", T_INT
, D_OFF(doublequote
), READONLY
},
297 { "strict", T_INT
, D_OFF(strict
), READONLY
},
301 static PyGetSetDef Dialect_getsetlist
[] = {
302 { "escapechar", (getter
)Dialect_get_escapechar
},
303 { "lineterminator", (getter
)Dialect_get_lineterminator
},
304 { "quotechar", (getter
)Dialect_get_quotechar
},
305 { "quoting", (getter
)Dialect_get_quoting
},
310 Dialect_dealloc(DialectObj
*self
)
312 Py_XDECREF(self
->lineterminator
);
313 self
->ob_type
->tp_free((PyObject
*)self
);
316 static char *dialect_kws
[] = {
330 dialect_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwargs
)
333 PyObject
*ret
= NULL
;
334 PyObject
*dialect
= NULL
;
335 PyObject
*delimiter
= NULL
;
336 PyObject
*doublequote
= NULL
;
337 PyObject
*escapechar
= NULL
;
338 PyObject
*lineterminator
= NULL
;
339 PyObject
*quotechar
= NULL
;
340 PyObject
*quoting
= NULL
;
341 PyObject
*skipinitialspace
= NULL
;
342 PyObject
*strict
= NULL
;
344 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
,
345 "|OOOOOOOOO", dialect_kws
,
357 if (dialect
!= NULL
) {
358 if (IS_BASESTRING(dialect
)) {
359 dialect
= get_dialect_from_registry(dialect
);
365 /* Can we reuse this instance? */
366 if (PyObject_TypeCheck(dialect
, &Dialect_Type
) &&
370 lineterminator
== 0 &&
373 skipinitialspace
== 0 &&
378 self
= (DialectObj
*)type
->tp_alloc(type
, 0);
383 self
->lineterminator
= NULL
;
385 Py_XINCREF(delimiter
);
386 Py_XINCREF(doublequote
);
387 Py_XINCREF(escapechar
);
388 Py_XINCREF(lineterminator
);
389 Py_XINCREF(quotechar
);
391 Py_XINCREF(skipinitialspace
);
393 if (dialect
!= NULL
) {
394 #define DIALECT_GETATTR(v, n) \
396 v = PyObject_GetAttrString(dialect, n)
397 DIALECT_GETATTR(delimiter
, "delimiter");
398 DIALECT_GETATTR(doublequote
, "doublequote");
399 DIALECT_GETATTR(escapechar
, "escapechar");
400 DIALECT_GETATTR(lineterminator
, "lineterminator");
401 DIALECT_GETATTR(quotechar
, "quotechar");
402 DIALECT_GETATTR(quoting
, "quoting");
403 DIALECT_GETATTR(skipinitialspace
, "skipinitialspace");
404 DIALECT_GETATTR(strict
, "strict");
408 /* check types and convert to C values */
409 #define DIASET(meth, name, target, src, dflt) \
410 if (meth(name, target, src, dflt)) \
412 DIASET(_set_char
, "delimiter", &self
->delimiter
, delimiter
, ',');
413 DIASET(_set_bool
, "doublequote", &self
->doublequote
, doublequote
, 1);
414 DIASET(_set_char
, "escapechar", &self
->escapechar
, escapechar
, 0);
415 DIASET(_set_str
, "lineterminator", &self
->lineterminator
, lineterminator
, "\r\n");
416 DIASET(_set_char
, "quotechar", &self
->quotechar
, quotechar
, '"');
417 DIASET(_set_int
, "quoting", &self
->quoting
, quoting
, QUOTE_MINIMAL
);
418 DIASET(_set_bool
, "skipinitialspace", &self
->skipinitialspace
, skipinitialspace
, 0);
419 DIASET(_set_bool
, "strict", &self
->strict
, strict
, 0);
421 /* validate options */
422 if (dialect_check_quoting(self
->quoting
))
424 if (self
->delimiter
== 0) {
425 PyErr_SetString(PyExc_TypeError
, "delimiter must be set");
428 if (quotechar
== Py_None
&& quoting
== NULL
)
429 self
->quoting
= QUOTE_NONE
;
430 if (self
->quoting
!= QUOTE_NONE
&& self
->quotechar
== 0) {
431 PyErr_SetString(PyExc_TypeError
,
432 "quotechar must be set if quoting enabled");
435 if (self
->lineterminator
== 0) {
436 PyErr_SetString(PyExc_TypeError
, "lineterminator must be set");
440 ret
= (PyObject
*)self
;
445 Py_XDECREF(delimiter
);
446 Py_XDECREF(doublequote
);
447 Py_XDECREF(escapechar
);
448 Py_XDECREF(lineterminator
);
449 Py_XDECREF(quotechar
);
451 Py_XDECREF(skipinitialspace
);
457 PyDoc_STRVAR(Dialect_Type_doc
,
460 "The Dialect type records CSV parsing and generation options.\n");
462 static PyTypeObject Dialect_Type
= {
463 PyObject_HEAD_INIT(NULL
)
465 "_csv.Dialect", /* tp_name */
466 sizeof(DialectObj
), /* tp_basicsize */
469 (destructor
)Dialect_dealloc
, /* tp_dealloc */
470 (printfunc
)0, /* tp_print */
471 (getattrfunc
)0, /* tp_getattr */
472 (setattrfunc
)0, /* tp_setattr */
473 (cmpfunc
)0, /* tp_compare */
474 (reprfunc
)0, /* tp_repr */
475 0, /* tp_as_number */
476 0, /* tp_as_sequence */
477 0, /* tp_as_mapping */
478 (hashfunc
)0, /* tp_hash */
479 (ternaryfunc
)0, /* tp_call */
480 (reprfunc
)0, /* tp_str */
483 0, /* tp_as_buffer */
484 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /* tp_flags */
485 Dialect_Type_doc
, /* tp_doc */
488 0, /* tp_richcompare */
489 0, /* tp_weaklistoffset */
493 Dialect_memberlist
, /* tp_members */
494 Dialect_getsetlist
, /* tp_getset */
497 0, /* tp_descr_get */
498 0, /* tp_descr_set */
499 0, /* tp_dictoffset */
502 dialect_new
, /* tp_new */
507 * Return an instance of the dialect type, given a Python instance or kwarg
508 * description of the dialect
511 _call_dialect(PyObject
*dialect_inst
, PyObject
*kwargs
)
516 ctor_args
= Py_BuildValue(dialect_inst
? "(O)" : "()", dialect_inst
);
517 if (ctor_args
== NULL
)
519 dialect
= PyObject_Call((PyObject
*)&Dialect_Type
, ctor_args
, kwargs
);
520 Py_DECREF(ctor_args
);
528 parse_save_field(ReaderObj
*self
)
532 field
= PyString_FromStringAndSize(self
->field
, self
->field_len
);
536 if (self
->numeric_field
) {
539 self
->numeric_field
= 0;
540 tmp
= PyNumber_Float(field
);
548 PyList_Append(self
->fields
, field
);
554 parse_grow_buff(ReaderObj
*self
)
556 if (self
->field_size
== 0) {
557 self
->field_size
= 4096;
558 if (self
->field
!= NULL
)
559 PyMem_Free(self
->field
);
560 self
->field
= PyMem_Malloc(self
->field_size
);
563 self
->field_size
*= 2;
564 self
->field
= PyMem_Realloc(self
->field
, self
->field_size
);
566 if (self
->field
== NULL
) {
574 parse_add_char(ReaderObj
*self
, char c
)
576 if (self
->field_len
>= field_limit
) {
577 PyErr_Format(error_obj
, "field larger than field limit (%ld)",
581 if (self
->field_len
== self
->field_size
&& !parse_grow_buff(self
))
583 self
->field
[self
->field_len
++] = c
;
588 parse_process_char(ReaderObj
*self
, char c
)
590 DialectObj
*dialect
= self
->dialect
;
592 switch (self
->state
) {
594 /* start of record */
596 /* empty line - return [] */
598 else if (c
== '\n' || c
== '\r') {
599 self
->state
= EAT_CRNL
;
602 /* normal character - handle as START_FIELD */
603 self
->state
= START_FIELD
;
606 /* expecting field */
607 if (c
== '\n' || c
== '\r' || c
== '\0') {
608 /* save empty field - return [fields] */
609 if (parse_save_field(self
) < 0)
611 self
->state
= (c
== '\0' ? START_RECORD
: EAT_CRNL
);
613 else if (c
== dialect
->quotechar
&&
614 dialect
->quoting
!= QUOTE_NONE
) {
615 /* start quoted field */
616 self
->state
= IN_QUOTED_FIELD
;
618 else if (c
== dialect
->escapechar
) {
619 /* possible escaped character */
620 self
->state
= ESCAPED_CHAR
;
622 else if (c
== ' ' && dialect
->skipinitialspace
)
623 /* ignore space at start of field */
625 else if (c
== dialect
->delimiter
) {
626 /* save empty field */
627 if (parse_save_field(self
) < 0)
631 /* begin new unquoted field */
632 if (dialect
->quoting
== QUOTE_NONNUMERIC
)
633 self
->numeric_field
= 1;
634 if (parse_add_char(self
, c
) < 0)
636 self
->state
= IN_FIELD
;
643 if (parse_add_char(self
, c
) < 0)
645 self
->state
= IN_FIELD
;
649 /* in unquoted field */
650 if (c
== '\n' || c
== '\r' || c
== '\0') {
651 /* end of line - return [fields] */
652 if (parse_save_field(self
) < 0)
654 self
->state
= (c
== '\0' ? START_RECORD
: EAT_CRNL
);
656 else if (c
== dialect
->escapechar
) {
657 /* possible escaped character */
658 self
->state
= ESCAPED_CHAR
;
660 else if (c
== dialect
->delimiter
) {
661 /* save field - wait for new field */
662 if (parse_save_field(self
) < 0)
664 self
->state
= START_FIELD
;
667 /* normal character - save in field */
668 if (parse_add_char(self
, c
) < 0)
673 case IN_QUOTED_FIELD
:
674 /* in quoted field */
677 else if (c
== dialect
->escapechar
) {
678 /* Possible escape character */
679 self
->state
= ESCAPE_IN_QUOTED_FIELD
;
681 else if (c
== dialect
->quotechar
&&
682 dialect
->quoting
!= QUOTE_NONE
) {
683 if (dialect
->doublequote
) {
684 /* doublequote; " represented by "" */
685 self
->state
= QUOTE_IN_QUOTED_FIELD
;
688 /* end of quote part of field */
689 self
->state
= IN_FIELD
;
693 /* normal character - save in field */
694 if (parse_add_char(self
, c
) < 0)
699 case ESCAPE_IN_QUOTED_FIELD
:
702 if (parse_add_char(self
, c
) < 0)
704 self
->state
= IN_QUOTED_FIELD
;
707 case QUOTE_IN_QUOTED_FIELD
:
708 /* doublequote - seen a quote in an quoted field */
709 if (dialect
->quoting
!= QUOTE_NONE
&&
710 c
== dialect
->quotechar
) {
712 if (parse_add_char(self
, c
) < 0)
714 self
->state
= IN_QUOTED_FIELD
;
716 else if (c
== dialect
->delimiter
) {
717 /* save field - wait for new field */
718 if (parse_save_field(self
) < 0)
720 self
->state
= START_FIELD
;
722 else if (c
== '\n' || c
== '\r' || c
== '\0') {
723 /* end of line - return [fields] */
724 if (parse_save_field(self
) < 0)
726 self
->state
= (c
== '\0' ? START_RECORD
: EAT_CRNL
);
728 else if (!dialect
->strict
) {
729 if (parse_add_char(self
, c
) < 0)
731 self
->state
= IN_FIELD
;
735 PyErr_Format(error_obj
, "'%c' expected after '%c'",
743 if (c
== '\n' || c
== '\r')
746 self
->state
= START_RECORD
;
748 PyErr_Format(error_obj
, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
758 parse_reset(ReaderObj
*self
)
760 Py_XDECREF(self
->fields
);
761 self
->fields
= PyList_New(0);
762 if (self
->fields
== NULL
)
765 self
->state
= START_RECORD
;
766 self
->numeric_field
= 0;
771 Reader_iternext(ReaderObj
*self
)
774 PyObject
*fields
= NULL
;
778 if (parse_reset(self
) < 0)
781 lineobj
= PyIter_Next(self
->input_iter
);
782 if (lineobj
== NULL
) {
783 /* End of input OR exception */
784 if (!PyErr_Occurred() && self
->field_len
!= 0)
785 PyErr_Format(error_obj
,
786 "newline inside string");
791 line
= PyString_AsString(lineobj
);
792 linelen
= PyString_Size(lineobj
);
794 if (line
== NULL
|| linelen
< 0) {
802 PyErr_Format(error_obj
,
803 "line contains NULL byte");
806 if (parse_process_char(self
, c
) < 0) {
812 if (parse_process_char(self
, 0) < 0)
814 } while (self
->state
!= START_RECORD
);
816 fields
= self
->fields
;
823 Reader_dealloc(ReaderObj
*self
)
825 PyObject_GC_UnTrack(self
);
826 Py_XDECREF(self
->dialect
);
827 Py_XDECREF(self
->input_iter
);
828 Py_XDECREF(self
->fields
);
829 if (self
->field
!= NULL
)
830 PyMem_Free(self
->field
);
831 PyObject_GC_Del(self
);
835 Reader_traverse(ReaderObj
*self
, visitproc visit
, void *arg
)
837 Py_VISIT(self
->dialect
);
838 Py_VISIT(self
->input_iter
);
839 Py_VISIT(self
->fields
);
844 Reader_clear(ReaderObj
*self
)
846 Py_CLEAR(self
->dialect
);
847 Py_CLEAR(self
->input_iter
);
848 Py_CLEAR(self
->fields
);
852 PyDoc_STRVAR(Reader_Type_doc
,
855 "Reader objects are responsible for reading and parsing tabular data\n"
859 static struct PyMethodDef Reader_methods
[] = {
862 #define R_OFF(x) offsetof(ReaderObj, x)
864 static struct PyMemberDef Reader_memberlist
[] = {
865 { "dialect", T_OBJECT
, R_OFF(dialect
), RO
},
866 { "line_num", T_ULONG
, R_OFF(line_num
), RO
},
871 static PyTypeObject Reader_Type
= {
872 PyObject_HEAD_INIT(NULL
)
874 "_csv.reader", /*tp_name*/
875 sizeof(ReaderObj
), /*tp_basicsize*/
878 (destructor
)Reader_dealloc
, /*tp_dealloc*/
879 (printfunc
)0, /*tp_print*/
880 (getattrfunc
)0, /*tp_getattr*/
881 (setattrfunc
)0, /*tp_setattr*/
882 (cmpfunc
)0, /*tp_compare*/
883 (reprfunc
)0, /*tp_repr*/
885 0, /*tp_as_sequence*/
887 (hashfunc
)0, /*tp_hash*/
888 (ternaryfunc
)0, /*tp_call*/
889 (reprfunc
)0, /*tp_str*/
893 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
|
894 Py_TPFLAGS_HAVE_GC
, /*tp_flags*/
895 Reader_Type_doc
, /*tp_doc*/
896 (traverseproc
)Reader_traverse
, /*tp_traverse*/
897 (inquiry
)Reader_clear
, /*tp_clear*/
898 0, /*tp_richcompare*/
899 0, /*tp_weaklistoffset*/
900 PyObject_SelfIter
, /*tp_iter*/
901 (getiterfunc
)Reader_iternext
, /*tp_iternext*/
902 Reader_methods
, /*tp_methods*/
903 Reader_memberlist
, /*tp_members*/
909 csv_reader(PyObject
*module
, PyObject
*args
, PyObject
*keyword_args
)
911 PyObject
* iterator
, * dialect
= NULL
;
912 ReaderObj
* self
= PyObject_GC_New(ReaderObj
, &Reader_Type
);
917 self
->dialect
= NULL
;
919 self
->input_iter
= NULL
;
921 self
->field_size
= 0;
924 if (parse_reset(self
) < 0) {
929 if (!PyArg_UnpackTuple(args
, "", 1, 2, &iterator
, &dialect
)) {
933 self
->input_iter
= PyObject_GetIter(iterator
);
934 if (self
->input_iter
== NULL
) {
935 PyErr_SetString(PyExc_TypeError
,
936 "argument 1 must be an iterator");
940 self
->dialect
= (DialectObj
*)_call_dialect(dialect
, keyword_args
);
941 if (self
->dialect
== NULL
) {
946 PyObject_GC_Track(self
);
947 return (PyObject
*)self
;
953 /* ---------------------------------------------------------------- */
955 join_reset(WriterObj
*self
)
958 self
->num_fields
= 0;
961 #define MEM_INCR 32768
963 /* Calculate new record length or append field to record. Return new
967 join_append_data(WriterObj
*self
, char *field
, int quote_empty
,
968 int *quoted
, int copy_phase
)
970 DialectObj
*dialect
= self
->dialect
;
977 self->rec[rec_len] = c;\
981 lineterm
= PyString_AsString(dialect
->lineterminator
);
982 if (lineterm
== NULL
)
985 rec_len
= self
->rec_len
;
987 /* If this is not the first field we need a field separator */
988 if (self
->num_fields
> 0)
989 ADDCH(dialect
->delimiter
);
991 /* Handle preceding quote */
992 if (copy_phase
&& *quoted
)
993 ADDCH(dialect
->quotechar
);
995 /* Copy/count field data */
1003 if (c
== dialect
->delimiter
||
1004 c
== dialect
->escapechar
||
1005 c
== dialect
->quotechar
||
1006 strchr(lineterm
, c
)) {
1007 if (dialect
->quoting
== QUOTE_NONE
)
1010 if (c
== dialect
->quotechar
) {
1011 if (dialect
->doublequote
)
1012 ADDCH(dialect
->quotechar
);
1020 if (!dialect
->escapechar
) {
1021 PyErr_Format(error_obj
,
1022 "need to escape, but no escapechar set");
1025 ADDCH(dialect
->escapechar
);
1028 /* Copy field character into record buffer.
1033 /* If field is empty check if it needs to be quoted.
1035 if (i
== 0 && quote_empty
) {
1036 if (dialect
->quoting
== QUOTE_NONE
) {
1037 PyErr_Format(error_obj
,
1038 "single empty field record must be quoted");
1047 ADDCH(dialect
->quotechar
);
1056 join_check_rec_size(WriterObj
*self
, int rec_len
)
1058 if (rec_len
> self
->rec_size
) {
1059 if (self
->rec_size
== 0) {
1060 self
->rec_size
= (rec_len
/ MEM_INCR
+ 1) * MEM_INCR
;
1061 if (self
->rec
!= NULL
)
1062 PyMem_Free(self
->rec
);
1063 self
->rec
= PyMem_Malloc(self
->rec_size
);
1066 char *old_rec
= self
->rec
;
1068 self
->rec_size
= (rec_len
/ MEM_INCR
+ 1) * MEM_INCR
;
1069 self
->rec
= PyMem_Realloc(self
->rec
, self
->rec_size
);
1070 if (self
->rec
== NULL
)
1071 PyMem_Free(old_rec
);
1073 if (self
->rec
== NULL
) {
1082 join_append(WriterObj
*self
, char *field
, int *quoted
, int quote_empty
)
1086 rec_len
= join_append_data(self
, field
, quote_empty
, quoted
, 0);
1090 /* grow record buffer if necessary */
1091 if (!join_check_rec_size(self
, rec_len
))
1094 self
->rec_len
= join_append_data(self
, field
, quote_empty
, quoted
, 1);
1101 join_append_lineterminator(WriterObj
*self
)
1106 terminator_len
= PyString_Size(self
->dialect
->lineterminator
);
1107 if (terminator_len
== -1)
1110 /* grow record buffer if necessary */
1111 if (!join_check_rec_size(self
, self
->rec_len
+ terminator_len
))
1114 terminator
= PyString_AsString(self
->dialect
->lineterminator
);
1115 if (terminator
== NULL
)
1117 memmove(self
->rec
+ self
->rec_len
, terminator
, terminator_len
);
1118 self
->rec_len
+= terminator_len
;
1123 PyDoc_STRVAR(csv_writerow_doc
,
1124 "writerow(sequence)\n"
1126 "Construct and write a CSV record from a sequence of fields. Non-string\n"
1127 "elements will be converted to string.");
1130 csv_writerow(WriterObj
*self
, PyObject
*seq
)
1132 DialectObj
*dialect
= self
->dialect
;
1135 if (!PySequence_Check(seq
))
1136 return PyErr_Format(error_obj
, "sequence expected");
1138 len
= PySequence_Length(seq
);
1142 /* Join all fields in internal buffer.
1145 for (i
= 0; i
< len
; i
++) {
1150 field
= PySequence_GetItem(seq
, i
);
1154 switch (dialect
->quoting
) {
1155 case QUOTE_NONNUMERIC
:
1156 quoted
= !PyNumber_Check(field
);
1166 if (PyString_Check(field
)) {
1167 append_ok
= join_append(self
,
1168 PyString_AS_STRING(field
),
1172 else if (field
== Py_None
) {
1173 append_ok
= join_append(self
, "", "ed
, len
== 1);
1179 str
= PyObject_Str(field
);
1184 append_ok
= join_append(self
, PyString_AS_STRING(str
),
1192 /* Add line terminator.
1194 if (!join_append_lineterminator(self
))
1197 return PyObject_CallFunction(self
->writeline
,
1198 "(s#)", self
->rec
, self
->rec_len
);
1201 PyDoc_STRVAR(csv_writerows_doc
,
1202 "writerows(sequence of sequences)\n"
1204 "Construct and write a series of sequences to a csv file. Non-string\n"
1205 "elements will be converted to string.");
1208 csv_writerows(WriterObj
*self
, PyObject
*seqseq
)
1210 PyObject
*row_iter
, *row_obj
, *result
;
1212 row_iter
= PyObject_GetIter(seqseq
);
1213 if (row_iter
== NULL
) {
1214 PyErr_SetString(PyExc_TypeError
,
1215 "writerows() argument must be iterable");
1218 while ((row_obj
= PyIter_Next(row_iter
))) {
1219 result
= csv_writerow(self
, row_obj
);
1222 Py_DECREF(row_iter
);
1228 Py_DECREF(row_iter
);
1229 if (PyErr_Occurred())
1235 static struct PyMethodDef Writer_methods
[] = {
1236 { "writerow", (PyCFunction
)csv_writerow
, METH_O
, csv_writerow_doc
},
1237 { "writerows", (PyCFunction
)csv_writerows
, METH_O
, csv_writerows_doc
},
1241 #define W_OFF(x) offsetof(WriterObj, x)
1243 static struct PyMemberDef Writer_memberlist
[] = {
1244 { "dialect", T_OBJECT
, W_OFF(dialect
), RO
},
1249 Writer_dealloc(WriterObj
*self
)
1251 PyObject_GC_UnTrack(self
);
1252 Py_XDECREF(self
->dialect
);
1253 Py_XDECREF(self
->writeline
);
1254 if (self
->rec
!= NULL
)
1255 PyMem_Free(self
->rec
);
1256 PyObject_GC_Del(self
);
1260 Writer_traverse(WriterObj
*self
, visitproc visit
, void *arg
)
1262 Py_VISIT(self
->dialect
);
1263 Py_VISIT(self
->writeline
);
1268 Writer_clear(WriterObj
*self
)
1270 Py_CLEAR(self
->dialect
);
1271 Py_CLEAR(self
->writeline
);
1275 PyDoc_STRVAR(Writer_Type_doc
,
1278 "Writer objects are responsible for generating tabular data\n"
1279 "in CSV format from sequence input.\n"
1282 static PyTypeObject Writer_Type
= {
1283 PyObject_HEAD_INIT(NULL
)
1285 "_csv.writer", /*tp_name*/
1286 sizeof(WriterObj
), /*tp_basicsize*/
1289 (destructor
)Writer_dealloc
, /*tp_dealloc*/
1290 (printfunc
)0, /*tp_print*/
1291 (getattrfunc
)0, /*tp_getattr*/
1292 (setattrfunc
)0, /*tp_setattr*/
1293 (cmpfunc
)0, /*tp_compare*/
1294 (reprfunc
)0, /*tp_repr*/
1296 0, /*tp_as_sequence*/
1297 0, /*tp_as_mapping*/
1298 (hashfunc
)0, /*tp_hash*/
1299 (ternaryfunc
)0, /*tp_call*/
1300 (reprfunc
)0, /*tp_str*/
1304 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
|
1305 Py_TPFLAGS_HAVE_GC
, /*tp_flags*/
1307 (traverseproc
)Writer_traverse
, /*tp_traverse*/
1308 (inquiry
)Writer_clear
, /*tp_clear*/
1309 0, /*tp_richcompare*/
1310 0, /*tp_weaklistoffset*/
1311 (getiterfunc
)0, /*tp_iter*/
1312 (getiterfunc
)0, /*tp_iternext*/
1313 Writer_methods
, /*tp_methods*/
1314 Writer_memberlist
, /*tp_members*/
1319 csv_writer(PyObject
*module
, PyObject
*args
, PyObject
*keyword_args
)
1321 PyObject
* output_file
, * dialect
= NULL
;
1322 WriterObj
* self
= PyObject_GC_New(WriterObj
, &Writer_Type
);
1327 self
->dialect
= NULL
;
1328 self
->writeline
= NULL
;
1333 self
->num_fields
= 0;
1335 if (!PyArg_UnpackTuple(args
, "", 1, 2, &output_file
, &dialect
)) {
1339 self
->writeline
= PyObject_GetAttrString(output_file
, "write");
1340 if (self
->writeline
== NULL
|| !PyCallable_Check(self
->writeline
)) {
1341 PyErr_SetString(PyExc_TypeError
,
1342 "argument 1 must have a \"write\" method");
1346 self
->dialect
= (DialectObj
*)_call_dialect(dialect
, keyword_args
);
1347 if (self
->dialect
== NULL
) {
1351 PyObject_GC_Track(self
);
1352 return (PyObject
*)self
;
1359 csv_list_dialects(PyObject
*module
, PyObject
*args
)
1361 return PyDict_Keys(dialects
);
1365 csv_register_dialect(PyObject
*module
, PyObject
*args
, PyObject
*kwargs
)
1367 PyObject
*name_obj
, *dialect_obj
= NULL
;
1370 if (!PyArg_UnpackTuple(args
, "", 1, 2, &name_obj
, &dialect_obj
))
1372 if (!IS_BASESTRING(name_obj
)) {
1373 PyErr_SetString(PyExc_TypeError
,
1374 "dialect name must be a string or unicode");
1377 dialect
= _call_dialect(dialect_obj
, kwargs
);
1378 if (dialect
== NULL
)
1380 if (PyDict_SetItem(dialects
, name_obj
, dialect
) < 0) {
1390 csv_unregister_dialect(PyObject
*module
, PyObject
*name_obj
)
1392 if (PyDict_DelItem(dialects
, name_obj
) < 0)
1393 return PyErr_Format(error_obj
, "unknown dialect");
1399 csv_get_dialect(PyObject
*module
, PyObject
*name_obj
)
1401 return get_dialect_from_registry(name_obj
);
1405 csv_field_size_limit(PyObject
*module
, PyObject
*args
)
1407 PyObject
*new_limit
= NULL
;
1408 long old_limit
= field_limit
;
1410 if (!PyArg_UnpackTuple(args
, "field_size_limit", 0, 1, &new_limit
))
1412 if (new_limit
!= NULL
) {
1413 if (!PyInt_Check(new_limit
)) {
1414 PyErr_Format(PyExc_TypeError
,
1415 "limit must be an integer");
1418 field_limit
= PyInt_AsLong(new_limit
);
1420 return PyInt_FromLong(old_limit
);
1427 PyDoc_STRVAR(csv_module_doc
,
1428 "CSV parsing and writing.\n"
1430 "This module provides classes that assist in the reading and writing\n"
1431 "of Comma Separated Value (CSV) files, and implements the interface\n"
1432 "described by PEP 305. Although many CSV files are simple to parse,\n"
1433 "the format is not formally defined by a stable specification and\n"
1434 "is subtle enough that parsing lines of a CSV file with something\n"
1435 "like line.split(\",\") is bound to fail. The module supports three\n"
1436 "basic APIs: reading, writing, and registration of dialects.\n"
1439 "DIALECT REGISTRATION:\n"
1441 "Readers and writers support a dialect argument, which is a convenient\n"
1442 "handle on a group of settings. When the dialect argument is a string,\n"
1443 "it identifies one of the dialects previously registered with the module.\n"
1444 "If it is a class or instance, the attributes of the argument are used as\n"
1445 "the settings for the reader or writer:\n"
1448 " delimiter = ','\n"
1449 " quotechar = '\"'\n"
1450 " escapechar = None\n"
1451 " doublequote = True\n"
1452 " skipinitialspace = False\n"
1453 " lineterminator = '\\r\\n'\n"
1454 " quoting = QUOTE_MINIMAL\n"
1458 " * quotechar - specifies a one-character string to use as the \n"
1459 " quoting character. It defaults to '\"'.\n"
1460 " * delimiter - specifies a one-character string to use as the \n"
1461 " field separator. It defaults to ','.\n"
1462 " * skipinitialspace - specifies how to interpret whitespace which\n"
1463 " immediately follows a delimiter. It defaults to False, which\n"
1464 " means that whitespace immediately following a delimiter is part\n"
1465 " of the following field.\n"
1466 " * lineterminator - specifies the character sequence which should \n"
1467 " terminate rows.\n"
1468 " * quoting - controls when quotes should be generated by the writer.\n"
1469 " It can take on any of the following module constants:\n"
1471 " csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1472 " field contains either the quotechar or the delimiter\n"
1473 " csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1474 " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1475 " fields which do not parse as integers or floating point\n"
1477 " csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1478 " * escapechar - specifies a one-character string used to escape \n"
1479 " the delimiter when quoting is set to QUOTE_NONE.\n"
1480 " * doublequote - controls the handling of quotes inside fields. When\n"
1481 " True, two consecutive quotes are interpreted as one during read,\n"
1482 " and when writing, each quote character embedded in the data is\n"
1483 " written as two quotes\n");
1485 PyDoc_STRVAR(csv_reader_doc
,
1486 " csv_reader = reader(iterable [, dialect='excel']\n"
1487 " [optional keyword args])\n"
1488 " for row in csv_reader:\n"
1491 "The \"iterable\" argument can be any object that returns a line\n"
1492 "of input for each iteration, such as a file object or a list. The\n"
1493 "optional \"dialect\" parameter is discussed below. The function\n"
1494 "also accepts optional keyword arguments which override settings\n"
1495 "provided by the dialect.\n"
1497 "The returned object is an iterator. Each iteration returns a row\n"
1498 "of the CSV file (which can span multiple input lines):\n");
1500 PyDoc_STRVAR(csv_writer_doc
,
1501 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1502 " [optional keyword args])\n"
1503 " for row in sequence:\n"
1504 " csv_writer.writerow(row)\n"
1508 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1509 " [optional keyword args])\n"
1510 " csv_writer.writerows(rows)\n"
1512 "The \"fileobj\" argument can be any object that supports the file API.\n");
1514 PyDoc_STRVAR(csv_list_dialects_doc
,
1515 "Return a list of all know dialect names.\n"
1516 " names = csv.list_dialects()");
1518 PyDoc_STRVAR(csv_get_dialect_doc
,
1519 "Return the dialect instance associated with name.\n"
1520 " dialect = csv.get_dialect(name)");
1522 PyDoc_STRVAR(csv_register_dialect_doc
,
1523 "Create a mapping from a string name to a dialect class.\n"
1524 " dialect = csv.register_dialect(name, dialect)");
1526 PyDoc_STRVAR(csv_unregister_dialect_doc
,
1527 "Delete the name/dialect mapping associated with a string name.\n"
1528 " csv.unregister_dialect(name)");
1530 PyDoc_STRVAR(csv_field_size_limit_doc
,
1531 "Sets an upper limit on parsed fields.\n"
1532 " csv.field_size_limit([limit])\n"
1534 "Returns old limit. If limit is not given, no new limit is set and\n"
1535 "the old limit is returned");
1537 static struct PyMethodDef csv_methods
[] = {
1538 { "reader", (PyCFunction
)csv_reader
,
1539 METH_VARARGS
| METH_KEYWORDS
, csv_reader_doc
},
1540 { "writer", (PyCFunction
)csv_writer
,
1541 METH_VARARGS
| METH_KEYWORDS
, csv_writer_doc
},
1542 { "list_dialects", (PyCFunction
)csv_list_dialects
,
1543 METH_NOARGS
, csv_list_dialects_doc
},
1544 { "register_dialect", (PyCFunction
)csv_register_dialect
,
1545 METH_VARARGS
| METH_KEYWORDS
, csv_register_dialect_doc
},
1546 { "unregister_dialect", (PyCFunction
)csv_unregister_dialect
,
1547 METH_O
, csv_unregister_dialect_doc
},
1548 { "get_dialect", (PyCFunction
)csv_get_dialect
,
1549 METH_O
, csv_get_dialect_doc
},
1550 { "field_size_limit", (PyCFunction
)csv_field_size_limit
,
1551 METH_VARARGS
, csv_field_size_limit_doc
},
1561 if (PyType_Ready(&Dialect_Type
) < 0)
1564 if (PyType_Ready(&Reader_Type
) < 0)
1567 if (PyType_Ready(&Writer_Type
) < 0)
1570 /* Create the module and add the functions */
1571 module
= Py_InitModule3("_csv", csv_methods
, csv_module_doc
);
1575 /* Add version to the module. */
1576 if (PyModule_AddStringConstant(module
, "__version__",
1577 MODULE_VERSION
) == -1)
1580 /* Add _dialects dictionary */
1581 dialects
= PyDict_New();
1582 if (dialects
== NULL
)
1584 if (PyModule_AddObject(module
, "_dialects", dialects
))
1587 /* Add quote styles into dictionary */
1588 for (style
= quote_styles
; style
->name
; style
++) {
1589 if (PyModule_AddIntConstant(module
, style
->name
,
1590 style
->style
) == -1)
1594 /* Add the Dialect type */
1595 Py_INCREF(&Dialect_Type
);
1596 if (PyModule_AddObject(module
, "Dialect", (PyObject
*)&Dialect_Type
))
1599 /* Add the CSV exception object to the module. */
1600 error_obj
= PyErr_NewException("_csv.Error", NULL
, NULL
);
1601 if (error_obj
== NULL
)
1603 PyModule_AddObject(module
, "Error", error_obj
);