Modules/_csv.c

   1 /* csv module */
   2
   3 /*
   4
   5 This module provides the low-level underpinnings of a CSV reading/writing
   6 module.  Users should not use this module directly, but import the csv.py
   7 module instead.
   8
   9 **** For people modifying this code, please note that as of this writing
  10 **** (2003-03-23), it is intended that this code should work with Python
  11 **** 2.2.
  12
  13 */
  14
  15 #define MODULE_VERSION "1.0"
  16
  17 #include "Python.h"
  18 #include "structmember.h"
  19
  20
  21 /* begin 2.2 compatibility macros */
  22 #ifndef PyDoc_STRVAR
  23 /* Define macros for inline documentation. */
  24 #define PyDoc_VAR(name) static char name[]
  25 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
  26 #ifdef WITH_DOC_STRINGS
  27 #define PyDoc_STR(str) str
  28 #else
  29 #define PyDoc_STR(str) ""
  30 #endif
  31 #endif /* ifndef PyDoc_STRVAR */
  32
  33 #ifndef PyMODINIT_FUNC
  34 #       if defined(__cplusplus)
  35 #               define PyMODINIT_FUNC extern "C" void
  36 #       else /* __cplusplus */
  37 #               define PyMODINIT_FUNC void
  38 #       endif /* __cplusplus */
  39 #endif
  40
  41 #ifndef Py_CLEAR
  42 #define Py_CLEAR(op)                                            \
  43         do {                                                    \
  44                 if (op) {                                       \
  45                         PyObject *tmp = (PyObject *)(op);       \
  46                         (op) = NULL;                            \
  47                         Py_DECREF(tmp);                         \
  48                 }                                               \
  49         } while (0)
  50 #endif
  51 #ifndef Py_VISIT
  52 #define Py_VISIT(op)                                                    \
  53         do {                                                            \
  54                 if (op) {                                               \
  55                         int vret = visit((PyObject *)(op), arg);        \
  56                         if (vret)                                       \
  57                                 return vret;                            \
  58                 }                                                       \
  59         } while (0)
  60 #endif
  61
  62 /* end 2.2 compatibility macros */
  63
  64 #define IS_BASESTRING(o) \
  65         PyObject_TypeCheck(o, &PyBaseString_Type)
  66
  67 static PyObject *error_obj;     /* CSV exception */
  68 static PyObject *dialects;      /* Dialect registry */
  69 static long field_limit = 128 * 1024;   /* max parsed field size */
  70
  71 typedef enum {
  72         START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
  73         IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
  74         EAT_CRNL
  75 } ParserState;
  76
  77 typedef enum {
  78         QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
  79 } QuoteStyle;
  80
  81 typedef struct {
  82         QuoteStyle style;
  83         char *name;
  84 } StyleDesc;
  85
  86 static StyleDesc quote_styles[] = {
  87         { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
  88         { QUOTE_ALL,        "QUOTE_ALL" },
  89         { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
  90         { QUOTE_NONE,       "QUOTE_NONE" },
  91         { 0 }
  92 };
  93
  94 typedef struct {
  95         PyObject_HEAD
  96
  97         int doublequote;        /* is " represented by ""? */
  98         char delimiter;         /* field separator */
  99         char quotechar;         /* quote character */
 100         char escapechar;        /* escape character */
 101         int skipinitialspace;   /* ignore spaces following delimiter? */
 102         PyObject *lineterminator; /* string to write between records */
 103         int quoting;            /* style of quoting to write */
 104
 105         int strict;             /* raise exception on bad CSV */
 106 } DialectObj;
 107
 108 staticforward PyTypeObject Dialect_Type;
 109
 110 typedef struct {
 111         PyObject_HEAD
 112
 113         PyObject *input_iter;   /* iterate over this for input lines */
 114
 115         DialectObj *dialect;    /* parsing dialect */
 116
 117         PyObject *fields;       /* field list for current record */
 118         ParserState state;      /* current CSV parse state */
 119         char *field;            /* build current field in here */
 120         int field_size;         /* size of allocated buffer */
 121         int field_len;          /* length of current field */
 122         int numeric_field;      /* treat field as numeric */
 123         unsigned long line_num; /* Source-file line number */
 124 } ReaderObj;
 125
 126 staticforward PyTypeObject Reader_Type;
 127
 128 #define ReaderObject_Check(v)   (Py_TYPE(v) == &Reader_Type)
 129
 130 typedef struct {
 131         PyObject_HEAD
 132
 133         PyObject *writeline;    /* write output lines to this file */
 134
 135         DialectObj *dialect;    /* parsing dialect */
 136
 137         char *rec;              /* buffer for parser.join */
 138         int rec_size;           /* size of allocated record */
 139         int rec_len;            /* length of record */
 140         int num_fields;         /* number of fields in record */
 141 } WriterObj;
 142
 143 staticforward PyTypeObject Writer_Type;
 144
 145 /*
 146  * DIALECT class
 147  */
 148
 149 static PyObject *
 150 get_dialect_from_registry(PyObject * name_obj)
 151 {
 152         PyObject *dialect_obj;
 153
 154         dialect_obj = PyDict_GetItem(dialects, name_obj);
 155         if (dialect_obj == NULL) {
 156                 if (!PyErr_Occurred())
 157                         PyErr_Format(error_obj, "unknown dialect");
 158         }
 159         else
 160                 Py_INCREF(dialect_obj);
 161         return dialect_obj;
 162 }
 163
 164 static PyObject *
 165 get_string(PyObject *str)
 166 {
 167         Py_XINCREF(str);
 168         return str;
 169 }
 170
 171 static PyObject *
 172 get_nullchar_as_None(char c)
 173 {
 174         if (c == '\0') {
 175                 Py_INCREF(Py_None);
 176                 return Py_None;
 177         }
 178         else
 179                 return PyString_FromStringAndSize((char*)&c, 1);
 180 }
 181
 182 static PyObject *
 183 Dialect_get_lineterminator(DialectObj *self)
 184 {
 185         return get_string(self->lineterminator);
 186 }
 187
 188 static PyObject *
 189 Dialect_get_escapechar(DialectObj *self)
 190 {
 191         return get_nullchar_as_None(self->escapechar);
 192 }
 193
 194 static PyObject *
 195 Dialect_get_quotechar(DialectObj *self)
 196 {
 197         return get_nullchar_as_None(self->quotechar);
 198 }
 199
 200 static PyObject *
 201 Dialect_get_quoting(DialectObj *self)
 202 {
 203         return PyInt_FromLong(self->quoting);
 204 }
 205
 206 static int
 207 _set_bool(const char *name, int *target, PyObject *src, int dflt)
 208 {
 209         if (src == NULL)
 210                 *target = dflt;
 211         else
 212                 *target = PyObject_IsTrue(src);
 213         return 0;
 214 }
 215
 216 static int
 217 _set_int(const char *name, int *target, PyObject *src, int dflt)
 218 {
 219         if (src == NULL)
 220                 *target = dflt;
 221         else {
 222                 if (!PyInt_Check(src)) {
 223                         PyErr_Format(PyExc_TypeError,
 224                                      "\"%s\" must be an integer", name);
 225                         return -1;
 226                 }
 227                 *target = PyInt_AsLong(src);
 228         }
 229         return 0;
 230 }
 231
 232 static int
 233 _set_char(const char *name, char *target, PyObject *src, char dflt)
 234 {
 235         if (src == NULL)
 236                 *target = dflt;
 237         else {
 238                 if (src == Py_None || PyString_Size(src) == 0)
 239                         *target = '\0';
 240                 else if (!PyString_Check(src) || PyString_Size(src) != 1) {
 241                         PyErr_Format(PyExc_TypeError,
 242                                      "\"%s\" must be an 1-character string",
 243                                      name);
 244                         return -1;
 245                 }
 246                 else {
 247                         char *s = PyString_AsString(src);
 248                         if (s == NULL)
 249                                 return -1;
 250                         *target = s[0];
 251                 }
 252         }
 253         return 0;
 254 }
 255
 256 static int
 257 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
 258 {
 259         if (src == NULL)
 260                 *target = PyString_FromString(dflt);
 261         else {
 262                 if (src == Py_None)
 263                         *target = NULL;
 264                 else if (!IS_BASESTRING(src)) {
 265                         PyErr_Format(PyExc_TypeError,
 266                                      "\"%s\" must be an string", name);
 267                         return -1;
 268                 }
 269                 else {
 270                         Py_XDECREF(*target);
 271                         Py_INCREF(src);
 272                         *target = src;
 273                 }
 274         }
 275         return 0;
 276 }
 277
 278 static int
 279 dialect_check_quoting(int quoting)
 280 {
 281         StyleDesc *qs = quote_styles;
 282
 283         for (qs = quote_styles; qs->name; qs++) {
 284                 if (qs->style == quoting)
 285                         return 0;
 286         }
 287         PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
 288         return -1;
 289 }
 290
 291 #define D_OFF(x) offsetof(DialectObj, x)
 292
 293 static struct PyMemberDef Dialect_memberlist[] = {
 294         { "delimiter",          T_CHAR, D_OFF(delimiter), READONLY },
 295         { "skipinitialspace",   T_INT, D_OFF(skipinitialspace), READONLY },
 296         { "doublequote",        T_INT, D_OFF(doublequote), READONLY },
 297         { "strict",             T_INT, D_OFF(strict), READONLY },
 298         { NULL }
 299 };
 300
 301 static PyGetSetDef Dialect_getsetlist[] = {
 302         { "escapechar",         (getter)Dialect_get_escapechar},
 303         { "lineterminator",     (getter)Dialect_get_lineterminator},
 304         { "quotechar",          (getter)Dialect_get_quotechar},
 305         { "quoting",            (getter)Dialect_get_quoting},
 306         {NULL},
 307 };
 308
 309 static void
 310 Dialect_dealloc(DialectObj *self)
 311 {
 312         Py_XDECREF(self->lineterminator);
 313         Py_TYPE(self)->tp_free((PyObject *)self);
 314 }
 315
 316 static char *dialect_kws[] = {
 317         "dialect",
 318         "delimiter",
 319         "doublequote",
 320         "escapechar",
 321         "lineterminator",
 322         "quotechar",
 323         "quoting",
 324         "skipinitialspace",
 325         "strict",
 326         NULL
 327 };
 328
 329 static PyObject *
 330 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
 331 {
 332         DialectObj *self;
 333         PyObject *ret = NULL;
 334         PyObject *dialect = NULL;
 335         PyObject *delimiter = NULL;
 336         PyObject *doublequote = NULL;
 337         PyObject *escapechar = NULL;
 338         PyObject *lineterminator = NULL;
 339         PyObject *quotechar = NULL;
 340         PyObject *quoting = NULL;
 341         PyObject *skipinitialspace = NULL;
 342         PyObject *strict = NULL;
 343
 344         if (!PyArg_ParseTupleAndKeywords(args, kwargs,
 345                                          "|OOOOOOOOO", dialect_kws,
 346                                          &dialect,
 347                                          &delimiter,
 348                                          &doublequote,
 349                                          &escapechar,
 350                                          &lineterminator,
 351                                          &quotechar,
 352                                          &quoting,
 353                                          &skipinitialspace,
 354                                          &strict))
 355                 return NULL;
 356
 357         if (dialect != NULL) {
 358                 if (IS_BASESTRING(dialect)) {
 359                         dialect = get_dialect_from_registry(dialect);
 360                         if (dialect == NULL)
 361                                 return NULL;
 362                 }
 363                 else
 364                         Py_INCREF(dialect);
 365                 /* Can we reuse this instance? */
 366                 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
 367                     delimiter == 0 &&
 368                     doublequote == 0 &&
 369                     escapechar == 0 &&
 370                     lineterminator == 0 &&
 371                     quotechar == 0 &&
 372                     quoting == 0 &&
 373                     skipinitialspace == 0 &&
 374                     strict == 0)
 375                         return dialect;
 376         }
 377
 378         self = (DialectObj *)type->tp_alloc(type, 0);
 379         if (self == NULL) {
 380                 Py_XDECREF(dialect);
 381                 return NULL;
 382         }
 383         self->lineterminator = NULL;
 384
 385         Py_XINCREF(delimiter);
 386         Py_XINCREF(doublequote);
 387         Py_XINCREF(escapechar);
 388         Py_XINCREF(lineterminator);
 389         Py_XINCREF(quotechar);
 390         Py_XINCREF(quoting);
 391         Py_XINCREF(skipinitialspace);
 392         Py_XINCREF(strict);
 393         if (dialect != NULL) {
 394 #define DIALECT_GETATTR(v, n) \
 395                 if (v == NULL) \
 396                         v = PyObject_GetAttrString(dialect, n)
 397                 DIALECT_GETATTR(delimiter, "delimiter");
 398                 DIALECT_GETATTR(doublequote, "doublequote");
 399                 DIALECT_GETATTR(escapechar, "escapechar");
 400                 DIALECT_GETATTR(lineterminator, "lineterminator");
 401                 DIALECT_GETATTR(quotechar, "quotechar");
 402                 DIALECT_GETATTR(quoting, "quoting");
 403                 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
 404                 DIALECT_GETATTR(strict, "strict");
 405                 PyErr_Clear();
 406         }
 407
 408         /* check types and convert to C values */
 409 #define DIASET(meth, name, target, src, dflt) \
 410         if (meth(name, target, src, dflt)) \
 411                 goto err
 412         DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
 413         DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
 414         DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
 415         DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
 416         DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
 417         DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
 418         DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
 419         DIASET(_set_bool, "strict", &self->strict, strict, 0);
 420
 421         /* validate options */
 422         if (dialect_check_quoting(self->quoting))
 423                 goto err;
 424         if (self->delimiter == 0) {
 425                 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
 426                 goto err;
 427         }
 428         if (quotechar == Py_None && quoting == NULL)
 429                 self->quoting = QUOTE_NONE;
 430         if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
 431                 PyErr_SetString(PyExc_TypeError,
 432                                 "quotechar must be set if quoting enabled");
 433                 goto err;
 434         }
 435         if (self->lineterminator == 0) {
 436                 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
 437                 goto err;
 438         }
 439
 440         ret = (PyObject *)self;
 441         Py_INCREF(self);
 442 err:
 443         Py_XDECREF(self);
 444         Py_XDECREF(dialect);
 445         Py_XDECREF(delimiter);
 446         Py_XDECREF(doublequote);
 447         Py_XDECREF(escapechar);
 448         Py_XDECREF(lineterminator);
 449         Py_XDECREF(quotechar);
 450         Py_XDECREF(quoting);
 451         Py_XDECREF(skipinitialspace);
 452         Py_XDECREF(strict);
 453         return ret;
 454 }
 455
 456
 457 PyDoc_STRVAR(Dialect_Type_doc,
 458 "CSV dialect\n"
 459 "\n"
 460 "The Dialect type records CSV parsing and generation options.\n");
 461
 462 static PyTypeObject Dialect_Type = {
 463         PyVarObject_HEAD_INIT(NULL, 0)
 464         "_csv.Dialect",                         /* tp_name */
 465         sizeof(DialectObj),                     /* tp_basicsize */
 466         0,                                      /* tp_itemsize */
 467         /*  methods  */
 468         (destructor)Dialect_dealloc,            /* tp_dealloc */
 469         (printfunc)0,                           /* tp_print */
 470         (getattrfunc)0,                         /* tp_getattr */
 471         (setattrfunc)0,                         /* tp_setattr */
 472         (cmpfunc)0,                             /* tp_compare */
 473         (reprfunc)0,                            /* tp_repr */
 474         0,                                      /* tp_as_number */
 475         0,                                      /* tp_as_sequence */
 476         0,                                      /* tp_as_mapping */
 477         (hashfunc)0,                            /* tp_hash */
 478         (ternaryfunc)0,                         /* tp_call */
 479         (reprfunc)0,                            /* tp_str */
 480         0,                                      /* tp_getattro */
 481         0,                                      /* tp_setattro */
 482         0,                                      /* tp_as_buffer */
 483         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
 484         Dialect_Type_doc,                       /* tp_doc */
 485         0,                                      /* tp_traverse */
 486         0,                                      /* tp_clear */
 487         0,                                      /* tp_richcompare */
 488         0,                                      /* tp_weaklistoffset */
 489         0,                                      /* tp_iter */
 490         0,                                      /* tp_iternext */
 491         0,                                      /* tp_methods */
 492         Dialect_memberlist,                     /* tp_members */
 493         Dialect_getsetlist,                     /* tp_getset */
 494         0,                                      /* tp_base */
 495         0,                                      /* tp_dict */
 496         0,                                      /* tp_descr_get */
 497         0,                                      /* tp_descr_set */
 498         0,                                      /* tp_dictoffset */
 499         0,                                      /* tp_init */
 500         0,                                      /* tp_alloc */
 501         dialect_new,                            /* tp_new */
 502         0,                                      /* tp_free */
 503 };
 504
 505 /*
 506  * Return an instance of the dialect type, given a Python instance or kwarg
 507  * description of the dialect
 508  */
 509 static PyObject *
 510 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
 511 {
 512         PyObject *ctor_args;
 513         PyObject *dialect;
 514
 515         ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
 516         if (ctor_args == NULL)
 517                 return NULL;
 518         dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
 519         Py_DECREF(ctor_args);
 520         return dialect;
 521 }
 522
 523 /*
 524  * READER
 525  */
 526 static int
 527 parse_save_field(ReaderObj *self)
 528 {
 529         PyObject *field;
 530
 531         field = PyString_FromStringAndSize(self->field, self->field_len);
 532         if (field == NULL)
 533                 return -1;
 534         self->field_len = 0;
 535         if (self->numeric_field) {
 536                 PyObject *tmp;
 537
 538                 self->numeric_field = 0;
 539                 tmp = PyNumber_Float(field);
 540                 if (tmp == NULL) {
 541                         Py_DECREF(field);
 542                         return -1;
 543                 }
 544                 Py_DECREF(field);
 545                 field = tmp;
 546         }
 547         PyList_Append(self->fields, field);
 548         Py_DECREF(field);
 549         return 0;
 550 }
 551
 552 static int
 553 parse_grow_buff(ReaderObj *self)
 554 {
 555         if (self->field_size == 0) {
 556                 self->field_size = 4096;
 557                 if (self->field != NULL)
 558                         PyMem_Free(self->field);
 559                 self->field = PyMem_Malloc(self->field_size);
 560         }
 561         else {
 562                 if (self->field_size > INT_MAX / 2) {
 563                         PyErr_NoMemory();
 564                         return 0;
 565                 }
 566                 self->field_size *= 2;
 567                 self->field = PyMem_Realloc(self->field, self->field_size);
 568         }
 569         if (self->field == NULL) {
 570                 PyErr_NoMemory();
 571                 return 0;
 572         }
 573         return 1;
 574 }
 575
 576 static int
 577 parse_add_char(ReaderObj *self, char c)
 578 {
 579         if (self->field_len >= field_limit) {
 580                 PyErr_Format(error_obj, "field larger than field limit (%ld)",
 581                              field_limit);
 582                 return -1;
 583         }
 584         if (self->field_len == self->field_size && !parse_grow_buff(self))
 585                 return -1;
 586         self->field[self->field_len++] = c;
 587         return 0;
 588 }
 589
 590 static int
 591 parse_process_char(ReaderObj *self, char c)
 592 {
 593         DialectObj *dialect = self->dialect;
 594
 595         switch (self->state) {
 596         case START_RECORD:
 597                 /* start of record */
 598                 if (c == '\0')
 599                         /* empty line - return [] */
 600                         break;
 601                 else if (c == '\n' || c == '\r') {
 602                         self->state = EAT_CRNL;
 603                         break;
 604                 }
 605                 /* normal character - handle as START_FIELD */
 606                 self->state = START_FIELD;
 607                 /* fallthru */
 608         case START_FIELD:
 609                 /* expecting field */
 610                 if (c == '\n' || c == '\r' || c == '\0') {
 611                         /* save empty field - return [fields] */
 612                         if (parse_save_field(self) < 0)
 613                                 return -1;
 614                         self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
 615                 }
 616                 else if (c == dialect->quotechar &&
 617                          dialect->quoting != QUOTE_NONE) {
 618                         /* start quoted field */
 619                         self->state = IN_QUOTED_FIELD;
 620                 }
 621                 else if (c == dialect->escapechar) {
 622                         /* possible escaped character */
 623                         self->state = ESCAPED_CHAR;
 624                 }
 625                 else if (c == ' ' && dialect->skipinitialspace)
 626                         /* ignore space at start of field */
 627                         ;
 628                 else if (c == dialect->delimiter) {
 629                         /* save empty field */
 630                         if (parse_save_field(self) < 0)
 631                                 return -1;
 632                 }
 633                 else {
 634                         /* begin new unquoted field */
 635                         if (dialect->quoting == QUOTE_NONNUMERIC)
 636                                 self->numeric_field = 1;
 637                         if (parse_add_char(self, c) < 0)
 638                                 return -1;
 639                         self->state = IN_FIELD;
 640                 }
 641                 break;
 642
 643         case ESCAPED_CHAR:
 644                 if (c == '\0')
 645                         c = '\n';
 646                 if (parse_add_char(self, c) < 0)
 647                         return -1;
 648                 self->state = IN_FIELD;
 649                 break;
 650
 651         case IN_FIELD:
 652                 /* in unquoted field */
 653                 if (c == '\n' || c == '\r' || c == '\0') {
 654                         /* end of line - return [fields] */
 655                         if (parse_save_field(self) < 0)
 656                                 return -1;
 657                         self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
 658                 }
 659                 else if (c == dialect->escapechar) {
 660                         /* possible escaped character */
 661                         self->state = ESCAPED_CHAR;
 662                 }
 663                 else if (c == dialect->delimiter) {
 664                         /* save field - wait for new field */
 665                         if (parse_save_field(self) < 0)
 666                                 return -1;
 667                         self->state = START_FIELD;
 668                 }
 669                 else {
 670                         /* normal character - save in field */
 671                         if (parse_add_char(self, c) < 0)
 672                                 return -1;
 673                 }
 674                 break;
 675
 676         case IN_QUOTED_FIELD:
 677                 /* in quoted field */
 678                 if (c == '\0')
 679                         ;
 680                 else if (c == dialect->escapechar) {
 681                         /* Possible escape character */
 682                         self->state = ESCAPE_IN_QUOTED_FIELD;
 683                 }
 684                 else if (c == dialect->quotechar &&
 685                          dialect->quoting != QUOTE_NONE) {
 686                         if (dialect->doublequote) {
 687                                 /* doublequote; " represented by "" */
 688                                 self->state = QUOTE_IN_QUOTED_FIELD;
 689                         }
 690                         else {
 691                                 /* end of quote part of field */
 692                                 self->state = IN_FIELD;
 693                         }
 694                 }
 695                 else {
 696                         /* normal character - save in field */
 697                         if (parse_add_char(self, c) < 0)
 698                                 return -1;
 699                 }
 700                 break;
 701
 702         case ESCAPE_IN_QUOTED_FIELD:
 703                 if (c == '\0')
 704                         c = '\n';
 705                 if (parse_add_char(self, c) < 0)
 706                         return -1;
 707                 self->state = IN_QUOTED_FIELD;
 708                 break;
 709
 710         case QUOTE_IN_QUOTED_FIELD:
 711                 /* doublequote - seen a quote in an quoted field */
 712                 if (dialect->quoting != QUOTE_NONE &&
 713                     c == dialect->quotechar) {
 714                         /* save "" as " */
 715                         if (parse_add_char(self, c) < 0)
 716                                 return -1;
 717                         self->state = IN_QUOTED_FIELD;
 718                 }
 719                 else if (c == dialect->delimiter) {
 720                         /* save field - wait for new field */
 721                         if (parse_save_field(self) < 0)
 722                                 return -1;
 723                         self->state = START_FIELD;
 724                 }
 725                 else if (c == '\n' || c == '\r' || c == '\0') {
 726                         /* end of line - return [fields] */
 727                         if (parse_save_field(self) < 0)
 728                                 return -1;
 729                         self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
 730                 }
 731                 else if (!dialect->strict) {
 732                         if (parse_add_char(self, c) < 0)
 733                                 return -1;
 734                         self->state = IN_FIELD;
 735                 }
 736                 else {
 737                         /* illegal */
 738                         PyErr_Format(error_obj, "'%c' expected after '%c'",
 739                                         dialect->delimiter,
 740                                         dialect->quotechar);
 741                         return -1;
 742                 }
 743                 break;
 744
 745         case EAT_CRNL:
 746                 if (c == '\n' || c == '\r')
 747                         ;
 748                 else if (c == '\0')
 749                         self->state = START_RECORD;
 750                 else {
 751                         PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
 752                         return -1;
 753                 }
 754                 break;
 755
 756         }
 757         return 0;
 758 }
 759
 760 static int
 761 parse_reset(ReaderObj *self)
 762 {
 763         Py_XDECREF(self->fields);
 764         self->fields = PyList_New(0);
 765         if (self->fields == NULL)
 766                 return -1;
 767         self->field_len = 0;
 768         self->state = START_RECORD;
 769         self->numeric_field = 0;
 770         return 0;
 771 }
 772
 773 static PyObject *
 774 Reader_iternext(ReaderObj *self)
 775 {
 776         PyObject *lineobj;
 777         PyObject *fields = NULL;
 778         char *line, c;
 779         int linelen;
 780
 781         if (parse_reset(self) < 0)
 782                 return NULL;
 783         do {
 784                 lineobj = PyIter_Next(self->input_iter);
 785                 if (lineobj == NULL) {
 786                         /* End of input OR exception */
 787                         if (!PyErr_Occurred() && self->field_len != 0)
 788                                 PyErr_Format(error_obj,
 789                                              "newline inside string");
 790                         return NULL;
 791                 }
 792                 ++self->line_num;
 793
 794                 line = PyString_AsString(lineobj);
 795                 linelen = PyString_Size(lineobj);
 796
 797                 if (line == NULL || linelen < 0) {
 798                         Py_DECREF(lineobj);
 799                         return NULL;
 800                 }
 801                 while (linelen--) {
 802                         c = *line++;
 803                         if (c == '\0') {
 804                                 Py_DECREF(lineobj);
 805                                 PyErr_Format(error_obj,
 806                                              "line contains NULL byte");
 807                                 goto err;
 808                         }
 809                         if (parse_process_char(self, c) < 0) {
 810                                 Py_DECREF(lineobj);
 811                                 goto err;
 812                         }
 813                 }
 814                 Py_DECREF(lineobj);
 815                 if (parse_process_char(self, 0) < 0)
 816                         goto err;
 817         } while (self->state != START_RECORD);
 818
 819         fields = self->fields;
 820         self->fields = NULL;
 821 err:
 822         return fields;
 823 }
 824
 825 static void
 826 Reader_dealloc(ReaderObj *self)
 827 {
 828         PyObject_GC_UnTrack(self);
 829         Py_XDECREF(self->dialect);
 830         Py_XDECREF(self->input_iter);
 831         Py_XDECREF(self->fields);
 832         if (self->field != NULL)
 833                 PyMem_Free(self->field);
 834         PyObject_GC_Del(self);
 835 }
 836
 837 static int
 838 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
 839 {
 840         Py_VISIT(self->dialect);
 841         Py_VISIT(self->input_iter);
 842         Py_VISIT(self->fields);
 843         return 0;
 844 }
 845
 846 static int
 847 Reader_clear(ReaderObj *self)
 848 {
 849         Py_CLEAR(self->dialect);
 850         Py_CLEAR(self->input_iter);
 851         Py_CLEAR(self->fields);
 852         return 0;
 853 }
 854
 855 PyDoc_STRVAR(Reader_Type_doc,
 856 "CSV reader\n"
 857 "\n"
 858 "Reader objects are responsible for reading and parsing tabular data\n"
 859 "in CSV format.\n"
 860 );
 861
 862 static struct PyMethodDef Reader_methods[] = {
 863         { NULL, NULL }
 864 };
 865 #define R_OFF(x) offsetof(ReaderObj, x)
 866
 867 static struct PyMemberDef Reader_memberlist[] = {
 868         { "dialect", T_OBJECT, R_OFF(dialect), RO },
 869         { "line_num", T_ULONG, R_OFF(line_num), RO },
 870         { NULL }
 871 };
 872
 873
 874 static PyTypeObject Reader_Type = {
 875         PyVarObject_HEAD_INIT(NULL, 0)
 876         "_csv.reader",                          /*tp_name*/
 877         sizeof(ReaderObj),                      /*tp_basicsize*/
 878         0,                                      /*tp_itemsize*/
 879         /* methods */
 880         (destructor)Reader_dealloc,             /*tp_dealloc*/
 881         (printfunc)0,                           /*tp_print*/
 882         (getattrfunc)0,                         /*tp_getattr*/
 883         (setattrfunc)0,                         /*tp_setattr*/
 884         (cmpfunc)0,                             /*tp_compare*/
 885         (reprfunc)0,                            /*tp_repr*/
 886         0,                                      /*tp_as_number*/
 887         0,                                      /*tp_as_sequence*/
 888         0,                                      /*tp_as_mapping*/
 889         (hashfunc)0,                            /*tp_hash*/
 890         (ternaryfunc)0,                         /*tp_call*/
 891         (reprfunc)0,                            /*tp_str*/
 892         0,                                      /*tp_getattro*/
 893         0,                                      /*tp_setattro*/
 894         0,                                      /*tp_as_buffer*/
 895         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
 896                 Py_TPFLAGS_HAVE_GC,             /*tp_flags*/
 897         Reader_Type_doc,                        /*tp_doc*/
 898         (traverseproc)Reader_traverse,          /*tp_traverse*/
 899         (inquiry)Reader_clear,                  /*tp_clear*/
 900         0,                                      /*tp_richcompare*/
 901         0,                                      /*tp_weaklistoffset*/
 902         PyObject_SelfIter,                      /*tp_iter*/
 903         (getiterfunc)Reader_iternext,           /*tp_iternext*/
 904         Reader_methods,                         /*tp_methods*/
 905         Reader_memberlist,                      /*tp_members*/
 906         0,                                      /*tp_getset*/
 907
 908 };
 909
 910 static PyObject *
 911 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
 912 {
 913         PyObject * iterator, * dialect = NULL;
 914         ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
 915
 916         if (!self)
 917                 return NULL;
 918
 919         self->dialect = NULL;
 920         self->fields = NULL;
 921         self->input_iter = NULL;
 922         self->field = NULL;
 923         self->field_size = 0;
 924         self->line_num = 0;
 925
 926         if (parse_reset(self) < 0) {
 927                 Py_DECREF(self);
 928                 return NULL;
 929         }
 930
 931         if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
 932                 Py_DECREF(self);
 933                 return NULL;
 934         }
 935         self->input_iter = PyObject_GetIter(iterator);
 936         if (self->input_iter == NULL) {
 937                 PyErr_SetString(PyExc_TypeError,
 938                                 "argument 1 must be an iterator");
 939                 Py_DECREF(self);
 940                 return NULL;
 941         }
 942         self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
 943         if (self->dialect == NULL) {
 944                 Py_DECREF(self);
 945                 return NULL;
 946         }
 947
 948         PyObject_GC_Track(self);
 949         return (PyObject *)self;
 950 }
 951
 952 /*
 953  * WRITER
 954  */
 955 /* ---------------------------------------------------------------- */
 956 static void
 957 join_reset(WriterObj *self)
 958 {
 959         self->rec_len = 0;
 960         self->num_fields = 0;
 961 }
 962
 963 #define MEM_INCR 32768
 964
 965 /* Calculate new record length or append field to record.  Return new
 966  * record length.
 967  */
 968 static int
 969 join_append_data(WriterObj *self, char *field, int quote_empty,
 970                  int *quoted, int copy_phase)
 971 {
 972         DialectObj *dialect = self->dialect;
 973         int i, rec_len;
 974         char *lineterm;
 975
 976 #define ADDCH(c) \
 977         do {\
 978                 if (copy_phase) \
 979                         self->rec[rec_len] = c;\
 980                 rec_len++;\
 981         } while(0)
 982
 983         lineterm = PyString_AsString(dialect->lineterminator);
 984         if (lineterm == NULL)
 985                 return -1;
 986
 987         rec_len = self->rec_len;
 988
 989         /* If this is not the first field we need a field separator */
 990         if (self->num_fields > 0)
 991                 ADDCH(dialect->delimiter);
 992
 993         /* Handle preceding quote */
 994         if (copy_phase && *quoted)
 995                 ADDCH(dialect->quotechar);
 996
 997         /* Copy/count field data */
 998         for (i = 0;; i++) {
 999                 char c = field[i];
1000                 int want_escape = 0;
1001
1002                 if (c == '\0')
1003                         break;
1004
1005                 if (c == dialect->delimiter ||
1006                     c == dialect->escapechar ||
1007                     c == dialect->quotechar ||
1008                     strchr(lineterm, c)) {
1009                         if (dialect->quoting == QUOTE_NONE)
1010                                 want_escape = 1;
1011                         else {
1012                                 if (c == dialect->quotechar) {
1013                                         if (dialect->doublequote)
1014                                                 ADDCH(dialect->quotechar);
1015                                         else
1016                                                 want_escape = 1;
1017                                 }
1018                                 if (!want_escape)
1019                                         *quoted = 1;
1020                         }
1021                         if (want_escape) {
1022                                 if (!dialect->escapechar) {
1023                                         PyErr_Format(error_obj,
1024                                                      "need to escape, but no escapechar set");
1025                                         return -1;
1026                                 }
1027                                 ADDCH(dialect->escapechar);
1028                         }
1029                 }
1030                 /* Copy field character into record buffer.
1031                  */
1032                 ADDCH(c);
1033         }
1034
1035         /* If field is empty check if it needs to be quoted.
1036          */
1037         if (i == 0 && quote_empty) {
1038                 if (dialect->quoting == QUOTE_NONE) {
1039                         PyErr_Format(error_obj,
1040                                      "single empty field record must be quoted");
1041                         return -1;
1042                 }
1043                 else
1044                         *quoted = 1;
1045         }
1046
1047         if (*quoted) {
1048                 if (copy_phase)
1049                         ADDCH(dialect->quotechar);
1050                 else
1051                         rec_len += 2;
1052         }
1053         return rec_len;
1054 #undef ADDCH
1055 }
1056
1057 static int
1058 join_check_rec_size(WriterObj *self, int rec_len)
1059 {
1060
1061         if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) {
1062                 PyErr_NoMemory();
1063                 return 0;
1064         }
1065
1066         if (rec_len > self->rec_size) {
1067                 if (self->rec_size == 0) {
1068                         self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1069                         if (self->rec != NULL)
1070                                 PyMem_Free(self->rec);
1071                         self->rec = PyMem_Malloc(self->rec_size);
1072                 }
1073                 else {
1074                         char *old_rec = self->rec;
1075
1076                         self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1077                         self->rec = PyMem_Realloc(self->rec, self->rec_size);
1078                         if (self->rec == NULL)
1079                                 PyMem_Free(old_rec);
1080                 }
1081                 if (self->rec == NULL) {
1082                         PyErr_NoMemory();
1083                         return 0;
1084                 }
1085         }
1086         return 1;
1087 }
1088
1089 static int
1090 join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1091 {
1092         int rec_len;
1093
1094         rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1095         if (rec_len < 0)
1096                 return 0;
1097
1098         /* grow record buffer if necessary */
1099         if (!join_check_rec_size(self, rec_len))
1100                 return 0;
1101
1102         self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1103         self->num_fields++;
1104
1105         return 1;
1106 }
1107
1108 static int
1109 join_append_lineterminator(WriterObj *self)
1110 {
1111         int terminator_len;
1112         char *terminator;
1113
1114         terminator_len = PyString_Size(self->dialect->lineterminator);
1115         if (terminator_len == -1)
1116                 return 0;
1117
1118         /* grow record buffer if necessary */
1119         if (!join_check_rec_size(self, self->rec_len + terminator_len))
1120                 return 0;
1121
1122         terminator = PyString_AsString(self->dialect->lineterminator);
1123         if (terminator == NULL)
1124                 return 0;
1125         memmove(self->rec + self->rec_len, terminator, terminator_len);
1126         self->rec_len += terminator_len;
1127
1128         return 1;
1129 }
1130
1131 PyDoc_STRVAR(csv_writerow_doc,
1132 "writerow(sequence)\n"
1133 "\n"
1134 "Construct and write a CSV record from a sequence of fields.  Non-string\n"
1135 "elements will be converted to string.");
1136
1137 static PyObject *
1138 csv_writerow(WriterObj *self, PyObject *seq)
1139 {
1140         DialectObj *dialect = self->dialect;
1141         int len, i;
1142
1143         if (!PySequence_Check(seq))
1144                 return PyErr_Format(error_obj, "sequence expected");
1145
1146         len = PySequence_Length(seq);
1147         if (len < 0)
1148                 return NULL;
1149
1150         /* Join all fields in internal buffer.
1151          */
1152         join_reset(self);
1153         for (i = 0; i < len; i++) {
1154                 PyObject *field;
1155                 int append_ok;
1156                 int quoted;
1157
1158                 field = PySequence_GetItem(seq, i);
1159                 if (field == NULL)
1160                         return NULL;
1161
1162                 switch (dialect->quoting) {
1163                 case QUOTE_NONNUMERIC:
1164                         quoted = !PyNumber_Check(field);
1165                         break;
1166                 case QUOTE_ALL:
1167                         quoted = 1;
1168                         break;
1169                 default:
1170                         quoted = 0;
1171                         break;
1172                 }
1173
1174                 if (PyString_Check(field)) {
1175                         append_ok = join_append(self,
1176                                                 PyString_AS_STRING(field),
1177                                                 &quoted, len == 1);
1178                         Py_DECREF(field);
1179                 }
1180                 else if (field == Py_None) {
1181                         append_ok = join_append(self, "", &quoted, len == 1);
1182                         Py_DECREF(field);
1183                 }
1184                 else {
1185                         PyObject *str;
1186
1187                         str = PyObject_Str(field);
1188                         Py_DECREF(field);
1189                         if (str == NULL)
1190                                 return NULL;
1191
1192                         append_ok = join_append(self, PyString_AS_STRING(str),
1193                                                 &quoted, len == 1);
1194                         Py_DECREF(str);
1195                 }
1196                 if (!append_ok)
1197                         return NULL;
1198         }
1199
1200         /* Add line terminator.
1201          */
1202         if (!join_append_lineterminator(self))
1203                 return 0;
1204
1205         return PyObject_CallFunction(self->writeline,
1206                                      "(s#)", self->rec, self->rec_len);
1207 }
1208
1209 PyDoc_STRVAR(csv_writerows_doc,
1210 "writerows(sequence of sequences)\n"
1211 "\n"
1212 "Construct and write a series of sequences to a csv file.  Non-string\n"
1213 "elements will be converted to string.");
1214
1215 static PyObject *
1216 csv_writerows(WriterObj *self, PyObject *seqseq)
1217 {
1218         PyObject *row_iter, *row_obj, *result;
1219
1220         row_iter = PyObject_GetIter(seqseq);
1221         if (row_iter == NULL) {
1222                 PyErr_SetString(PyExc_TypeError,
1223                                 "writerows() argument must be iterable");
1224                 return NULL;
1225         }
1226         while ((row_obj = PyIter_Next(row_iter))) {
1227                 result = csv_writerow(self, row_obj);
1228                 Py_DECREF(row_obj);
1229                 if (!result) {
1230                         Py_DECREF(row_iter);
1231                         return NULL;
1232                 }
1233                 else
1234                      Py_DECREF(result);
1235         }
1236         Py_DECREF(row_iter);
1237         if (PyErr_Occurred())
1238                 return NULL;
1239         Py_INCREF(Py_None);
1240         return Py_None;
1241 }
1242
1243 static struct PyMethodDef Writer_methods[] = {
1244         { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1245         { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1246         { NULL, NULL }
1247 };
1248
1249 #define W_OFF(x) offsetof(WriterObj, x)
1250
1251 static struct PyMemberDef Writer_memberlist[] = {
1252         { "dialect", T_OBJECT, W_OFF(dialect), RO },
1253         { NULL }
1254 };
1255
1256 static void
1257 Writer_dealloc(WriterObj *self)
1258 {
1259         PyObject_GC_UnTrack(self);
1260         Py_XDECREF(self->dialect);
1261         Py_XDECREF(self->writeline);
1262         if (self->rec != NULL)
1263                 PyMem_Free(self->rec);
1264         PyObject_GC_Del(self);
1265 }
1266
1267 static int
1268 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1269 {
1270         Py_VISIT(self->dialect);
1271         Py_VISIT(self->writeline);
1272         return 0;
1273 }
1274
1275 static int
1276 Writer_clear(WriterObj *self)
1277 {
1278         Py_CLEAR(self->dialect);
1279         Py_CLEAR(self->writeline);
1280         return 0;
1281 }
1282
1283 PyDoc_STRVAR(Writer_Type_doc,
1284 "CSV writer\n"
1285 "\n"
1286 "Writer objects are responsible for generating tabular data\n"
1287 "in CSV format from sequence input.\n"
1288 );
1289
1290 static PyTypeObject Writer_Type = {
1291         PyVarObject_HEAD_INIT(NULL, 0)
1292         "_csv.writer",                          /*tp_name*/
1293         sizeof(WriterObj),                      /*tp_basicsize*/
1294         0,                                      /*tp_itemsize*/
1295         /* methods */
1296         (destructor)Writer_dealloc,             /*tp_dealloc*/
1297         (printfunc)0,                           /*tp_print*/
1298         (getattrfunc)0,                         /*tp_getattr*/
1299         (setattrfunc)0,                         /*tp_setattr*/
1300         (cmpfunc)0,                             /*tp_compare*/
1301         (reprfunc)0,                            /*tp_repr*/
1302         0,                                      /*tp_as_number*/
1303         0,                                      /*tp_as_sequence*/
1304         0,                                      /*tp_as_mapping*/
1305         (hashfunc)0,                            /*tp_hash*/
1306         (ternaryfunc)0,                         /*tp_call*/
1307         (reprfunc)0,                            /*tp_str*/
1308         0,                                      /*tp_getattro*/
1309         0,                                      /*tp_setattro*/
1310         0,                                      /*tp_as_buffer*/
1311         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1312                 Py_TPFLAGS_HAVE_GC,             /*tp_flags*/
1313         Writer_Type_doc,
1314         (traverseproc)Writer_traverse,          /*tp_traverse*/
1315         (inquiry)Writer_clear,                  /*tp_clear*/
1316         0,                                      /*tp_richcompare*/
1317         0,                                      /*tp_weaklistoffset*/
1318         (getiterfunc)0,                         /*tp_iter*/
1319         (getiterfunc)0,                         /*tp_iternext*/
1320         Writer_methods,                         /*tp_methods*/
1321         Writer_memberlist,                      /*tp_members*/
1322         0,                                      /*tp_getset*/
1323 };
1324
1325 static PyObject *
1326 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1327 {
1328         PyObject * output_file, * dialect = NULL;
1329         WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1330
1331         if (!self)
1332                 return NULL;
1333
1334         self->dialect = NULL;
1335         self->writeline = NULL;
1336
1337         self->rec = NULL;
1338         self->rec_size = 0;
1339         self->rec_len = 0;
1340         self->num_fields = 0;
1341
1342         if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1343                 Py_DECREF(self);
1344                 return NULL;
1345         }
1346         self->writeline = PyObject_GetAttrString(output_file, "write");
1347         if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1348                 PyErr_SetString(PyExc_TypeError,
1349                                 "argument 1 must have a \"write\" method");
1350                 Py_DECREF(self);
1351                 return NULL;
1352         }
1353         self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1354         if (self->dialect == NULL) {
1355                 Py_DECREF(self);
1356                 return NULL;
1357         }
1358         PyObject_GC_Track(self);
1359         return (PyObject *)self;
1360 }
1361
1362 /*
1363  * DIALECT REGISTRY
1364  */
1365 static PyObject *
1366 csv_list_dialects(PyObject *module, PyObject *args)
1367 {
1368         return PyDict_Keys(dialects);
1369 }
1370
1371 static PyObject *
1372 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1373 {
1374         PyObject *name_obj, *dialect_obj = NULL;
1375         PyObject *dialect;
1376
1377         if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1378                 return NULL;
1379         if (!IS_BASESTRING(name_obj)) {
1380                 PyErr_SetString(PyExc_TypeError,
1381                                 "dialect name must be a string or unicode");
1382                 return NULL;
1383         }
1384         dialect = _call_dialect(dialect_obj, kwargs);
1385         if (dialect == NULL)
1386                 return NULL;
1387         if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1388                 Py_DECREF(dialect);
1389                 return NULL;
1390         }
1391         Py_DECREF(dialect);
1392         Py_INCREF(Py_None);
1393         return Py_None;
1394 }
1395
1396 static PyObject *
1397 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1398 {
1399         if (PyDict_DelItem(dialects, name_obj) < 0)
1400                 return PyErr_Format(error_obj, "unknown dialect");
1401         Py_INCREF(Py_None);
1402         return Py_None;
1403 }
1404
1405 static PyObject *
1406 csv_get_dialect(PyObject *module, PyObject *name_obj)
1407 {
1408         return get_dialect_from_registry(name_obj);
1409 }
1410
1411 static PyObject *
1412 csv_field_size_limit(PyObject *module, PyObject *args)
1413 {
1414         PyObject *new_limit = NULL;
1415         long old_limit = field_limit;
1416
1417         if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1418                 return NULL;
1419         if (new_limit != NULL) {
1420                 if (!PyInt_Check(new_limit)) {
1421                         PyErr_Format(PyExc_TypeError,
1422                                      "limit must be an integer");
1423                         return NULL;
1424                 }
1425                 field_limit = PyInt_AsLong(new_limit);
1426         }
1427         return PyInt_FromLong(old_limit);
1428 }
1429
1430 /*
1431  * MODULE
1432  */
1433
1434 PyDoc_STRVAR(csv_module_doc,
1435 "CSV parsing and writing.\n"
1436 "\n"
1437 "This module provides classes that assist in the reading and writing\n"
1438 "of Comma Separated Value (CSV) files, and implements the interface\n"
1439 "described by PEP 305.  Although many CSV files are simple to parse,\n"
1440 "the format is not formally defined by a stable specification and\n"
1441 "is subtle enough that parsing lines of a CSV file with something\n"
1442 "like line.split(\",\") is bound to fail.  The module supports three\n"
1443 "basic APIs: reading, writing, and registration of dialects.\n"
1444 "\n"
1445 "\n"
1446 "DIALECT REGISTRATION:\n"
1447 "\n"
1448 "Readers and writers support a dialect argument, which is a convenient\n"
1449 "handle on a group of settings.  When the dialect argument is a string,\n"
1450 "it identifies one of the dialects previously registered with the module.\n"
1451 "If it is a class or instance, the attributes of the argument are used as\n"
1452 "the settings for the reader or writer:\n"
1453 "\n"
1454 "    class excel:\n"
1455 "        delimiter = ','\n"
1456 "        quotechar = '\"'\n"
1457 "        escapechar = None\n"
1458 "        doublequote = True\n"
1459 "        skipinitialspace = False\n"
1460 "        lineterminator = '\\r\\n'\n"
1461 "        quoting = QUOTE_MINIMAL\n"
1462 "\n"
1463 "SETTINGS:\n"
1464 "\n"
1465 "    * quotechar - specifies a one-character string to use as the \n"
1466 "        quoting character.  It defaults to '\"'.\n"
1467 "    * delimiter - specifies a one-character string to use as the \n"
1468 "        field separator.  It defaults to ','.\n"
1469 "    * skipinitialspace - specifies how to interpret whitespace which\n"
1470 "        immediately follows a delimiter.  It defaults to False, which\n"
1471 "        means that whitespace immediately following a delimiter is part\n"
1472 "        of the following field.\n"
1473 "    * lineterminator -  specifies the character sequence which should \n"
1474 "        terminate rows.\n"
1475 "    * quoting - controls when quotes should be generated by the writer.\n"
1476 "        It can take on any of the following module constants:\n"
1477 "\n"
1478 "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1479 "            field contains either the quotechar or the delimiter\n"
1480 "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1481 "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1482 "            fields which do not parse as integers or floating point\n"
1483 "            numbers.\n"
1484 "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1485 "    * escapechar - specifies a one-character string used to escape \n"
1486 "        the delimiter when quoting is set to QUOTE_NONE.\n"
1487 "    * doublequote - controls the handling of quotes inside fields.  When\n"
1488 "        True, two consecutive quotes are interpreted as one during read,\n"
1489 "        and when writing, each quote character embedded in the data is\n"
1490 "        written as two quotes\n");
1491
1492 PyDoc_STRVAR(csv_reader_doc,
1493 "    csv_reader = reader(iterable [, dialect='excel']\n"
1494 "                        [optional keyword args])\n"
1495 "    for row in csv_reader:\n"
1496 "        process(row)\n"
1497 "\n"
1498 "The \"iterable\" argument can be any object that returns a line\n"
1499 "of input for each iteration, such as a file object or a list.  The\n"
1500 "optional \"dialect\" parameter is discussed below.  The function\n"
1501 "also accepts optional keyword arguments which override settings\n"
1502 "provided by the dialect.\n"
1503 "\n"
1504 "The returned object is an iterator.  Each iteration returns a row\n"
1505 "of the CSV file (which can span multiple input lines):\n");
1506
1507 PyDoc_STRVAR(csv_writer_doc,
1508 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1509 "                            [optional keyword args])\n"
1510 "    for row in sequence:\n"
1511 "        csv_writer.writerow(row)\n"
1512 "\n"
1513 "    [or]\n"
1514 "\n"
1515 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1516 "                            [optional keyword args])\n"
1517 "    csv_writer.writerows(rows)\n"
1518 "\n"
1519 "The \"fileobj\" argument can be any object that supports the file API.\n");
1520
1521 PyDoc_STRVAR(csv_list_dialects_doc,
1522 "Return a list of all know dialect names.\n"
1523 "    names = csv.list_dialects()");
1524
1525 PyDoc_STRVAR(csv_get_dialect_doc,
1526 "Return the dialect instance associated with name.\n"
1527 "    dialect = csv.get_dialect(name)");
1528
1529 PyDoc_STRVAR(csv_register_dialect_doc,
1530 "Create a mapping from a string name to a dialect class.\n"
1531 "    dialect = csv.register_dialect(name, dialect)");
1532
1533 PyDoc_STRVAR(csv_unregister_dialect_doc,
1534 "Delete the name/dialect mapping associated with a string name.\n"
1535 "    csv.unregister_dialect(name)");
1536
1537 PyDoc_STRVAR(csv_field_size_limit_doc,
1538 "Sets an upper limit on parsed fields.\n"
1539 "    csv.field_size_limit([limit])\n"
1540 "\n"
1541 "Returns old limit. If limit is not given, no new limit is set and\n"
1542 "the old limit is returned");
1543
1544 static struct PyMethodDef csv_methods[] = {
1545         { "reader", (PyCFunction)csv_reader,
1546                 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1547         { "writer", (PyCFunction)csv_writer,
1548                 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1549         { "list_dialects", (PyCFunction)csv_list_dialects,
1550                 METH_NOARGS, csv_list_dialects_doc},
1551         { "register_dialect", (PyCFunction)csv_register_dialect,
1552                 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1553         { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1554                 METH_O, csv_unregister_dialect_doc},
1555         { "get_dialect", (PyCFunction)csv_get_dialect,
1556                 METH_O, csv_get_dialect_doc},
1557         { "field_size_limit", (PyCFunction)csv_field_size_limit,
1558                 METH_VARARGS, csv_field_size_limit_doc},
1559         { NULL, NULL }
1560 };
1561
1562 PyMODINIT_FUNC
1563 init_csv(void)
1564 {
1565         PyObject *module;
1566         StyleDesc *style;
1567
1568         if (PyType_Ready(&Dialect_Type) < 0)
1569                 return;
1570
1571         if (PyType_Ready(&Reader_Type) < 0)
1572                 return;
1573
1574         if (PyType_Ready(&Writer_Type) < 0)
1575                 return;
1576
1577         /* Create the module and add the functions */
1578         module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1579         if (module == NULL)
1580                 return;
1581
1582         /* Add version to the module. */
1583         if (PyModule_AddStringConstant(module, "__version__",
1584                                        MODULE_VERSION) == -1)
1585                 return;
1586
1587         /* Add _dialects dictionary */
1588         dialects = PyDict_New();
1589         if (dialects == NULL)
1590                 return;
1591         if (PyModule_AddObject(module, "_dialects", dialects))
1592                 return;
1593
1594         /* Add quote styles into dictionary */
1595         for (style = quote_styles; style->name; style++) {
1596                 if (PyModule_AddIntConstant(module, style->name,
1597                                             style->style) == -1)
1598                         return;
1599         }
1600
1601         /* Add the Dialect type */
1602         Py_INCREF(&Dialect_Type);
1603         if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1604                 return;
1605
1606         /* Add the CSV exception object to the module. */
1607         error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1608         if (error_obj == NULL)
1609                 return;
1610         PyModule_AddObject(module, "Error", error_obj);
1611 }