Modules/_csv.c

   1 /* csv module */
   2
   3 /*
   4
   5 This module provides the low-level underpinnings of a CSV reading/writing
   6 module.  Users should not use this module directly, but import the csv.py
   7 module instead.
   8
   9 **** For people modifying this code, please note that as of this writing
  10 **** (2003-03-23), it is intended that this code should work with Python
  11 **** 2.2.
  12
  13 */
  14
  15 #define MODULE_VERSION "1.0"
  16
  17 #include "Python.h"
  18 #include "structmember.h"
  19
  20
  21 /* begin 2.2 compatibility macros */
  22 #ifndef PyDoc_STRVAR
  23 /* Define macros for inline documentation. */
  24 #define PyDoc_VAR(name) static char name[]
  25 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
  26 #ifdef WITH_DOC_STRINGS
  27 #define PyDoc_STR(str) str
  28 #else
  29 #define PyDoc_STR(str) ""
  30 #endif
  31 #endif /* ifndef PyDoc_STRVAR */
  32
  33 #ifndef PyMODINIT_FUNC
  34 #       if defined(__cplusplus)
  35 #               define PyMODINIT_FUNC extern "C" void
  36 #       else /* __cplusplus */
  37 #               define PyMODINIT_FUNC void
  38 #       endif /* __cplusplus */
  39 #endif
  40
  41 #ifndef Py_CLEAR
  42 #define Py_CLEAR(op)                                            \
  43         do {                                                    \
  44                 if (op) {                                       \
  45                         PyObject *tmp = (PyObject *)(op);       \
  46                         (op) = NULL;                            \
  47                         Py_DECREF(tmp);                         \
  48                 }                                               \
  49         } while (0)
  50 #endif
  51 #ifndef Py_VISIT
  52 #define Py_VISIT(op)                                                    \
  53         do {                                                            \
  54                 if (op) {                                               \
  55                         int vret = visit((PyObject *)(op), arg);        \
  56                         if (vret)                                       \
  57                                 return vret;                            \
  58                 }                                                       \
  59         } while (0)
  60 #endif
  61
  62 /* end 2.2 compatibility macros */
  63
  64 #define IS_BASESTRING(o) \
  65         PyObject_TypeCheck(o, &PyBaseString_Type)
  66
  67 static PyObject *error_obj;     /* CSV exception */
  68 static PyObject *dialects;      /* Dialect registry */
  69 static long field_limit = 128 * 1024;   /* max parsed field size */
  70
  71 typedef enum {
  72         START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
  73         IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
  74         EAT_CRNL
  75 } ParserState;
  76
  77 typedef enum {
  78         QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
  79 } QuoteStyle;
  80
  81 typedef struct {
  82         QuoteStyle style;
  83         char *name;
  84 } StyleDesc;
  85
  86 static StyleDesc quote_styles[] = {
  87         { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
  88         { QUOTE_ALL,        "QUOTE_ALL" },
  89         { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
  90         { QUOTE_NONE,       "QUOTE_NONE" },
  91         { 0 }
  92 };
  93
  94 typedef struct {
  95         PyObject_HEAD
  96
  97         int doublequote;        /* is " represented by ""? */
  98         char delimiter;         /* field separator */
  99         char quotechar;         /* quote character */
 100         char escapechar;        /* escape character */
 101         int skipinitialspace;   /* ignore spaces following delimiter? */
 102         PyObject *lineterminator; /* string to write between records */
 103         int quoting;            /* style of quoting to write */
 104
 105         int strict;             /* raise exception on bad CSV */
 106 } DialectObj;
 107
 108 staticforward PyTypeObject Dialect_Type;
 109
 110 typedef struct {
 111         PyObject_HEAD
 112
 113         PyObject *input_iter;   /* iterate over this for input lines */
 114
 115         DialectObj *dialect;    /* parsing dialect */
 116
 117         PyObject *fields;       /* field list for current record */
 118         ParserState state;      /* current CSV parse state */
 119         char *field;            /* build current field in here */
 120         int field_size;         /* size of allocated buffer */
 121         int field_len;          /* length of current field */
 122         int numeric_field;      /* treat field as numeric */
 123         unsigned long line_num; /* Source-file line number */
 124 } ReaderObj;
 125
 126 staticforward PyTypeObject Reader_Type;
 127
 128 #define ReaderObject_Check(v)   (Py_TYPE(v) == &Reader_Type)
 129
 130 typedef struct {
 131         PyObject_HEAD
 132
 133         PyObject *writeline;    /* write output lines to this file */
 134
 135         DialectObj *dialect;    /* parsing dialect */
 136
 137         char *rec;              /* buffer for parser.join */
 138         int rec_size;           /* size of allocated record */
 139         int rec_len;            /* length of record */
 140         int num_fields;         /* number of fields in record */
 141 } WriterObj;
 142
 143 staticforward PyTypeObject Writer_Type;
 144
 145 /*
 146  * DIALECT class
 147  */
 148
 149 static PyObject *
 150 get_dialect_from_registry(PyObject * name_obj)
 151 {
 152         PyObject *dialect_obj;
 153
 154         dialect_obj = PyDict_GetItem(dialects, name_obj);
 155         if (dialect_obj == NULL) {
 156                 if (!PyErr_Occurred())
 157                         PyErr_Format(error_obj, "unknown dialect");
 158         }
 159         else
 160                 Py_INCREF(dialect_obj);
 161         return dialect_obj;
 162 }
 163
 164 static PyObject *
 165 get_string(PyObject *str)
 166 {
 167         Py_XINCREF(str);
 168         return str;
 169 }
 170
 171 static PyObject *
 172 get_nullchar_as_None(char c)
 173 {
 174         if (c == '\0') {
 175                 Py_INCREF(Py_None);
 176                 return Py_None;
 177         }
 178         else
 179                 return PyBytes_FromStringAndSize((char*)&c, 1);
 180 }
 181
 182 static PyObject *
 183 Dialect_get_lineterminator(DialectObj *self)
 184 {
 185         return get_string(self->lineterminator);
 186 }
 187
 188 static PyObject *
 189 Dialect_get_escapechar(DialectObj *self)
 190 {
 191         return get_nullchar_as_None(self->escapechar);
 192 }
 193
 194 static PyObject *
 195 Dialect_get_quotechar(DialectObj *self)
 196 {
 197         return get_nullchar_as_None(self->quotechar);
 198 }
 199
 200 static PyObject *
 201 Dialect_get_quoting(DialectObj *self)
 202 {
 203         return PyInt_FromLong(self->quoting);
 204 }
 205
 206 static int
 207 _set_bool(const char *name, int *target, PyObject *src, int dflt)
 208 {
 209         if (src == NULL)
 210                 *target = dflt;
 211         else
 212                 *target = PyObject_IsTrue(src);
 213         return 0;
 214 }
 215
 216 static int
 217 _set_int(const char *name, int *target, PyObject *src, int dflt)
 218 {
 219         if (src == NULL)
 220                 *target = dflt;
 221         else {
 222                 if (!PyInt_Check(src)) {
 223                         PyErr_Format(PyExc_TypeError,
 224                                      "\"%s\" must be an integer", name);
 225                         return -1;
 226                 }
 227                 *target = PyInt_AsLong(src);
 228         }
 229         return 0;
 230 }
 231
 232 static int
 233 _set_char(const char *name, char *target, PyObject *src, char dflt)
 234 {
 235         if (src == NULL)
 236                 *target = dflt;
 237         else {
 238                 if (src == Py_None || PyBytes_Size(src) == 0)
 239                         *target = '\0';
 240                 else if (!PyBytes_Check(src) || PyBytes_Size(src) != 1) {
 241                         PyErr_Format(PyExc_TypeError,
 242                                      "\"%s\" must be an 1-character string",
 243                                      name);
 244                         return -1;
 245                 }
 246                 else {
 247                         char *s = PyBytes_AsString(src);
 248                         if (s == NULL)
 249                                 return -1;
 250                         *target = s[0];
 251                 }
 252         }
 253         return 0;
 254 }
 255
 256 static int
 257 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
 258 {
 259         if (src == NULL)
 260                 *target = PyBytes_FromString(dflt);
 261         else {
 262                 if (src == Py_None)
 263                         *target = NULL;
 264                 else if (!IS_BASESTRING(src)) {
 265                         PyErr_Format(PyExc_TypeError,
 266                                      "\"%s\" must be an string", name);
 267                         return -1;
 268                 }
 269                 else {
 270                         Py_XDECREF(*target);
 271                         Py_INCREF(src);
 272                         *target = src;
 273                 }
 274         }
 275         return 0;
 276 }
 277
 278 static int
 279 dialect_check_quoting(int quoting)
 280 {
 281         StyleDesc *qs = quote_styles;
 282
 283         for (qs = quote_styles; qs->name; qs++) {
 284                 if (qs->style == quoting)
 285                         return 0;
 286         }
 287         PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
 288         return -1;
 289 }
 290
 291 #define D_OFF(x) offsetof(DialectObj, x)
 292
 293 static struct PyMemberDef Dialect_memberlist[] = {
 294         { "delimiter",          T_CHAR, D_OFF(delimiter), READONLY },
 295         { "skipinitialspace",   T_INT, D_OFF(skipinitialspace), READONLY },
 296         { "doublequote",        T_INT, D_OFF(doublequote), READONLY },
 297         { "strict",             T_INT, D_OFF(strict), READONLY },
 298         { NULL }
 299 };
 300
 301 static PyGetSetDef Dialect_getsetlist[] = {
 302         { "escapechar",         (getter)Dialect_get_escapechar},
 303         { "lineterminator",     (getter)Dialect_get_lineterminator},
 304         { "quotechar",          (getter)Dialect_get_quotechar},
 305         { "quoting",            (getter)Dialect_get_quoting},
 306         {NULL},
 307 };
 308
 309 static void
 310 Dialect_dealloc(DialectObj *self)
 311 {
 312         Py_XDECREF(self->lineterminator);
 313         Py_TYPE(self)->tp_free((PyObject *)self);
 314 }
 315
 316 static char *dialect_kws[] = {
 317         "dialect",
 318         "delimiter",
 319         "doublequote",
 320         "escapechar",
 321         "lineterminator",
 322         "quotechar",
 323         "quoting",
 324         "skipinitialspace",
 325         "strict",
 326         NULL
 327 };
 328
 329 static PyObject *
 330 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
 331 {
 332         DialectObj *self;
 333         PyObject *ret = NULL;
 334         PyObject *dialect = NULL;
 335         PyObject *delimiter = NULL;
 336         PyObject *doublequote = NULL;
 337         PyObject *escapechar = NULL;
 338         PyObject *lineterminator = NULL;
 339         PyObject *quotechar = NULL;
 340         PyObject *quoting = NULL;
 341         PyObject *skipinitialspace = NULL;
 342         PyObject *strict = NULL;
 343
 344         if (!PyArg_ParseTupleAndKeywords(args, kwargs,
 345                                          "|OOOOOOOOO", dialect_kws,
 346                                          &dialect,
 347                                          &delimiter,
 348                                          &doublequote,
 349                                          &escapechar,
 350                                          &lineterminator,
 351                                          &quotechar,
 352                                          &quoting,
 353                                          &skipinitialspace,
 354                                          &strict))
 355                 return NULL;
 356
 357         if (dialect != NULL) {
 358                 if (IS_BASESTRING(dialect)) {
 359                         dialect = get_dialect_from_registry(dialect);
 360                         if (dialect == NULL)
 361                                 return NULL;
 362                 }
 363                 else
 364                         Py_INCREF(dialect);
 365                 /* Can we reuse this instance? */
 366                 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
 367                     delimiter == 0 &&
 368                     doublequote == 0 &&
 369                     escapechar == 0 &&
 370                     lineterminator == 0 &&
 371                     quotechar == 0 &&
 372                     quoting == 0 &&
 373                     skipinitialspace == 0 &&
 374                     strict == 0)
 375                         return dialect;
 376         }
 377
 378         self = (DialectObj *)type->tp_alloc(type, 0);
 379         if (self == NULL) {
 380                 Py_XDECREF(dialect);
 381                 return NULL;
 382         }
 383         self->lineterminator = NULL;
 384
 385         Py_XINCREF(delimiter);
 386         Py_XINCREF(doublequote);
 387         Py_XINCREF(escapechar);
 388         Py_XINCREF(lineterminator);
 389         Py_XINCREF(quotechar);
 390         Py_XINCREF(quoting);
 391         Py_XINCREF(skipinitialspace);
 392         Py_XINCREF(strict);
 393         if (dialect != NULL) {
 394 #define DIALECT_GETATTR(v, n) \
 395                 if (v == NULL) \
 396                         v = PyObject_GetAttrString(dialect, n)
 397                 DIALECT_GETATTR(delimiter, "delimiter");
 398                 DIALECT_GETATTR(doublequote, "doublequote");
 399                 DIALECT_GETATTR(escapechar, "escapechar");
 400                 DIALECT_GETATTR(lineterminator, "lineterminator");
 401                 DIALECT_GETATTR(quotechar, "quotechar");
 402                 DIALECT_GETATTR(quoting, "quoting");
 403                 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
 404                 DIALECT_GETATTR(strict, "strict");
 405                 PyErr_Clear();
 406         }
 407
 408         /* check types and convert to C values */
 409 #define DIASET(meth, name, target, src, dflt) \
 410         if (meth(name, target, src, dflt)) \
 411                 goto err
 412         DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
 413         DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
 414         DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
 415         DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
 416         DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
 417         DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
 418         DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
 419         DIASET(_set_bool, "strict", &self->strict, strict, 0);
 420
 421         /* validate options */
 422         if (dialect_check_quoting(self->quoting))
 423                 goto err;
 424         if (self->delimiter == 0) {
 425                 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
 426                 goto err;
 427         }
 428         if (quotechar == Py_None && quoting == NULL)
 429                 self->quoting = QUOTE_NONE;
 430         if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
 431                 PyErr_SetString(PyExc_TypeError,
 432                                 "quotechar must be set if quoting enabled");
 433                 goto err;
 434         }
 435         if (self->lineterminator == 0) {
 436                 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
 437                 goto err;
 438         }
 439
 440         ret = (PyObject *)self;
 441         Py_INCREF(self);
 442 err:
 443         Py_XDECREF(self);
 444         Py_XDECREF(dialect);
 445         Py_XDECREF(delimiter);
 446         Py_XDECREF(doublequote);
 447         Py_XDECREF(escapechar);
 448         Py_XDECREF(lineterminator);
 449         Py_XDECREF(quotechar);
 450         Py_XDECREF(quoting);
 451         Py_XDECREF(skipinitialspace);
 452         Py_XDECREF(strict);
 453         return ret;
 454 }
 455
 456
 457 PyDoc_STRVAR(Dialect_Type_doc,
 458 "CSV dialect\n"
 459 "\n"
 460 "The Dialect type records CSV parsing and generation options.\n");
 461
 462 static PyTypeObject Dialect_Type = {
 463         PyVarObject_HEAD_INIT(NULL, 0)
 464         "_csv.Dialect",                         /* tp_name */
 465         sizeof(DialectObj),                     /* tp_basicsize */
 466         0,                                      /* tp_itemsize */
 467         /*  methods  */
 468         (destructor)Dialect_dealloc,            /* tp_dealloc */
 469         (printfunc)0,                           /* tp_print */
 470         (getattrfunc)0,                         /* tp_getattr */
 471         (setattrfunc)0,                         /* tp_setattr */
 472         (cmpfunc)0,                             /* tp_compare */
 473         (reprfunc)0,                            /* tp_repr */
 474         0,                                      /* tp_as_number */
 475         0,                                      /* tp_as_sequence */
 476         0,                                      /* tp_as_mapping */
 477         (hashfunc)0,                            /* tp_hash */
 478         (ternaryfunc)0,                         /* tp_call */
 479         (reprfunc)0,                            /* tp_str */
 480         0,                                      /* tp_getattro */
 481         0,                                      /* tp_setattro */
 482         0,                                      /* tp_as_buffer */
 483         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
 484         Dialect_Type_doc,                       /* tp_doc */
 485         0,                                      /* tp_traverse */
 486         0,                                      /* tp_clear */
 487         0,                                      /* tp_richcompare */
 488         0,                                      /* tp_weaklistoffset */
 489         0,                                      /* tp_iter */
 490         0,                                      /* tp_iternext */
 491         0,                                      /* tp_methods */
 492         Dialect_memberlist,                     /* tp_members */
 493         Dialect_getsetlist,                     /* tp_getset */
 494         0,                                      /* tp_base */
 495         0,                                      /* tp_dict */
 496         0,                                      /* tp_descr_get */
 497         0,                                      /* tp_descr_set */
 498         0,                                      /* tp_dictoffset */
 499         0,                                      /* tp_init */
 500         0,                                      /* tp_alloc */
 501         dialect_new,                            /* tp_new */
 502         0,                                      /* tp_free */
 503 };
 504
 505 /*
 506  * Return an instance of the dialect type, given a Python instance or kwarg
 507  * description of the dialect
 508  */
 509 static PyObject *
 510 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
 511 {
 512         PyObject *ctor_args;
 513         PyObject *dialect;
 514
 515         ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
 516         if (ctor_args == NULL)
 517                 return NULL;
 518         dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
 519         Py_DECREF(ctor_args);
 520         return dialect;
 521 }
 522
 523 /*
 524  * READER
 525  */
 526 static int
 527 parse_save_field(ReaderObj *self)
 528 {
 529         PyObject *field;
 530
 531         field = PyBytes_FromStringAndSize(self->field, self->field_len);
 532         if (field == NULL)
 533                 return -1;
 534         self->field_len = 0;
 535         if (self->numeric_field) {
 536                 PyObject *tmp;
 537
 538                 self->numeric_field = 0;
 539                 tmp = PyNumber_Float(field);
 540                 if (tmp == NULL) {
 541                         Py_DECREF(field);
 542                         return -1;
 543                 }
 544                 Py_DECREF(field);
 545                 field = tmp;
 546         }
 547         PyList_Append(self->fields, field);
 548         Py_DECREF(field);
 549         return 0;
 550 }
 551
 552 static int
 553 parse_grow_buff(ReaderObj *self)
 554 {
 555         if (self->field_size == 0) {
 556                 self->field_size = 4096;
 557                 if (self->field != NULL)
 558                         PyMem_Free(self->field);
 559                 self->field = PyMem_Malloc(self->field_size);
 560         }
 561         else {
 562                 self->field_size *= 2;
 563                 self->field = PyMem_Realloc(self->field, self->field_size);
 564         }
 565         if (self->field == NULL) {
 566                 PyErr_NoMemory();
 567                 return 0;
 568         }
 569         return 1;
 570 }
 571
 572 static int
 573 parse_add_char(ReaderObj *self, char c)
 574 {
 575         if (self->field_len >= field_limit) {
 576                 PyErr_Format(error_obj, "field larger than field limit (%ld)",
 577                              field_limit);
 578                 return -1;
 579         }
 580         if (self->field_len == self->field_size && !parse_grow_buff(self))
 581                 return -1;
 582         self->field[self->field_len++] = c;
 583         return 0;
 584 }
 585
 586 static int
 587 parse_process_char(ReaderObj *self, char c)
 588 {
 589         DialectObj *dialect = self->dialect;
 590
 591         switch (self->state) {
 592         case START_RECORD:
 593                 /* start of record */
 594                 if (c == '\0')
 595                         /* empty line - return [] */
 596                         break;
 597                 else if (c == '\n' || c == '\r') {
 598                         self->state = EAT_CRNL;
 599                         break;
 600                 }
 601                 /* normal character - handle as START_FIELD */
 602                 self->state = START_FIELD;
 603                 /* fallthru */
 604         case START_FIELD:
 605                 /* expecting field */
 606                 if (c == '\n' || c == '\r' || c == '\0') {
 607                         /* save empty field - return [fields] */
 608                         if (parse_save_field(self) < 0)
 609                                 return -1;
 610                         self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
 611                 }
 612                 else if (c == dialect->quotechar &&
 613                          dialect->quoting != QUOTE_NONE) {
 614                         /* start quoted field */
 615                         self->state = IN_QUOTED_FIELD;
 616                 }
 617                 else if (c == dialect->escapechar) {
 618                         /* possible escaped character */
 619                         self->state = ESCAPED_CHAR;
 620                 }
 621                 else if (c == ' ' && dialect->skipinitialspace)
 622                         /* ignore space at start of field */
 623                         ;
 624                 else if (c == dialect->delimiter) {
 625                         /* save empty field */
 626                         if (parse_save_field(self) < 0)
 627                                 return -1;
 628                 }
 629                 else {
 630                         /* begin new unquoted field */
 631                         if (dialect->quoting == QUOTE_NONNUMERIC)
 632                                 self->numeric_field = 1;
 633                         if (parse_add_char(self, c) < 0)
 634                                 return -1;
 635                         self->state = IN_FIELD;
 636                 }
 637                 break;
 638
 639         case ESCAPED_CHAR:
 640                 if (c == '\0')
 641                         c = '\n';
 642                 if (parse_add_char(self, c) < 0)
 643                         return -1;
 644                 self->state = IN_FIELD;
 645                 break;
 646
 647         case IN_FIELD:
 648                 /* in unquoted field */
 649                 if (c == '\n' || c == '\r' || c == '\0') {
 650                         /* end of line - return [fields] */
 651                         if (parse_save_field(self) < 0)
 652                                 return -1;
 653                         self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
 654                 }
 655                 else if (c == dialect->escapechar) {
 656                         /* possible escaped character */
 657                         self->state = ESCAPED_CHAR;
 658                 }
 659                 else if (c == dialect->delimiter) {
 660                         /* save field - wait for new field */
 661                         if (parse_save_field(self) < 0)
 662                                 return -1;
 663                         self->state = START_FIELD;
 664                 }
 665                 else {
 666                         /* normal character - save in field */
 667                         if (parse_add_char(self, c) < 0)
 668                                 return -1;
 669                 }
 670                 break;
 671
 672         case IN_QUOTED_FIELD:
 673                 /* in quoted field */
 674                 if (c == '\0')
 675                         ;
 676                 else if (c == dialect->escapechar) {
 677                         /* Possible escape character */
 678                         self->state = ESCAPE_IN_QUOTED_FIELD;
 679                 }
 680                 else if (c == dialect->quotechar &&
 681                          dialect->quoting != QUOTE_NONE) {
 682                         if (dialect->doublequote) {
 683                                 /* doublequote; " represented by "" */
 684                                 self->state = QUOTE_IN_QUOTED_FIELD;
 685                         }
 686                         else {
 687                                 /* end of quote part of field */
 688                                 self->state = IN_FIELD;
 689                         }
 690                 }
 691                 else {
 692                         /* normal character - save in field */
 693                         if (parse_add_char(self, c) < 0)
 694                                 return -1;
 695                 }
 696                 break;
 697
 698         case ESCAPE_IN_QUOTED_FIELD:
 699                 if (c == '\0')
 700                         c = '\n';
 701                 if (parse_add_char(self, c) < 0)
 702                         return -1;
 703                 self->state = IN_QUOTED_FIELD;
 704                 break;
 705
 706         case QUOTE_IN_QUOTED_FIELD:
 707                 /* doublequote - seen a quote in an quoted field */
 708                 if (dialect->quoting != QUOTE_NONE &&
 709                     c == dialect->quotechar) {
 710                         /* save "" as " */
 711                         if (parse_add_char(self, c) < 0)
 712                                 return -1;
 713                         self->state = IN_QUOTED_FIELD;
 714                 }
 715                 else if (c == dialect->delimiter) {
 716                         /* save field - wait for new field */
 717                         if (parse_save_field(self) < 0)
 718                                 return -1;
 719                         self->state = START_FIELD;
 720                 }
 721                 else if (c == '\n' || c == '\r' || c == '\0') {
 722                         /* end of line - return [fields] */
 723                         if (parse_save_field(self) < 0)
 724                                 return -1;
 725                         self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
 726                 }
 727                 else if (!dialect->strict) {
 728                         if (parse_add_char(self, c) < 0)
 729                                 return -1;
 730                         self->state = IN_FIELD;
 731                 }
 732                 else {
 733                         /* illegal */
 734                         PyErr_Format(error_obj, "'%c' expected after '%c'",
 735                                         dialect->delimiter,
 736                                         dialect->quotechar);
 737                         return -1;
 738                 }
 739                 break;
 740
 741         case EAT_CRNL:
 742                 if (c == '\n' || c == '\r')
 743                         ;
 744                 else if (c == '\0')
 745                         self->state = START_RECORD;
 746                 else {
 747                         PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
 748                         return -1;
 749                 }
 750                 break;
 751
 752         }
 753         return 0;
 754 }
 755
 756 static int
 757 parse_reset(ReaderObj *self)
 758 {
 759         Py_XDECREF(self->fields);
 760         self->fields = PyList_New(0);
 761         if (self->fields == NULL)
 762                 return -1;
 763         self->field_len = 0;
 764         self->state = START_RECORD;
 765         self->numeric_field = 0;
 766         return 0;
 767 }
 768
 769 static PyObject *
 770 Reader_iternext(ReaderObj *self)
 771 {
 772         PyObject *lineobj;
 773         PyObject *fields = NULL;
 774         char *line, c;
 775         int linelen;
 776
 777         if (parse_reset(self) < 0)
 778                 return NULL;
 779         do {
 780                 lineobj = PyIter_Next(self->input_iter);
 781                 if (lineobj == NULL) {
 782                         /* End of input OR exception */
 783                         if (!PyErr_Occurred() && self->field_len != 0)
 784                                 PyErr_Format(error_obj,
 785                                              "newline inside string");
 786                         return NULL;
 787                 }
 788                 ++self->line_num;
 789
 790                 line = PyBytes_AsString(lineobj);
 791                 linelen = PyBytes_Size(lineobj);
 792
 793                 if (line == NULL || linelen < 0) {
 794                         Py_DECREF(lineobj);
 795                         return NULL;
 796                 }
 797                 while (linelen--) {
 798                         c = *line++;
 799                         if (c == '\0') {
 800                                 Py_DECREF(lineobj);
 801                                 PyErr_Format(error_obj,
 802                                              "line contains NULL byte");
 803                                 goto err;
 804                         }
 805                         if (parse_process_char(self, c) < 0) {
 806                                 Py_DECREF(lineobj);
 807                                 goto err;
 808                         }
 809                 }
 810                 Py_DECREF(lineobj);
 811                 if (parse_process_char(self, 0) < 0)
 812                         goto err;
 813         } while (self->state != START_RECORD);
 814
 815         fields = self->fields;
 816         self->fields = NULL;
 817 err:
 818         return fields;
 819 }
 820
 821 static void
 822 Reader_dealloc(ReaderObj *self)
 823 {
 824         PyObject_GC_UnTrack(self);
 825         Py_XDECREF(self->dialect);
 826         Py_XDECREF(self->input_iter);
 827         Py_XDECREF(self->fields);
 828         if (self->field != NULL)
 829                 PyMem_Free(self->field);
 830         PyObject_GC_Del(self);
 831 }
 832
 833 static int
 834 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
 835 {
 836         Py_VISIT(self->dialect);
 837         Py_VISIT(self->input_iter);
 838         Py_VISIT(self->fields);
 839         return 0;
 840 }
 841
 842 static int
 843 Reader_clear(ReaderObj *self)
 844 {
 845         Py_CLEAR(self->dialect);
 846         Py_CLEAR(self->input_iter);
 847         Py_CLEAR(self->fields);
 848         return 0;
 849 }
 850
 851 PyDoc_STRVAR(Reader_Type_doc,
 852 "CSV reader\n"
 853 "\n"
 854 "Reader objects are responsible for reading and parsing tabular data\n"
 855 "in CSV format.\n"
 856 );
 857
 858 static struct PyMethodDef Reader_methods[] = {
 859         { NULL, NULL }
 860 };
 861 #define R_OFF(x) offsetof(ReaderObj, x)
 862
 863 static struct PyMemberDef Reader_memberlist[] = {
 864         { "dialect", T_OBJECT, R_OFF(dialect), RO },
 865         { "line_num", T_ULONG, R_OFF(line_num), RO },
 866         { NULL }
 867 };
 868
 869
 870 static PyTypeObject Reader_Type = {
 871         PyVarObject_HEAD_INIT(NULL, 0)
 872         "_csv.reader",                          /*tp_name*/
 873         sizeof(ReaderObj),                      /*tp_basicsize*/
 874         0,                                      /*tp_itemsize*/
 875         /* methods */
 876         (destructor)Reader_dealloc,             /*tp_dealloc*/
 877         (printfunc)0,                           /*tp_print*/
 878         (getattrfunc)0,                         /*tp_getattr*/
 879         (setattrfunc)0,                         /*tp_setattr*/
 880         (cmpfunc)0,                             /*tp_compare*/
 881         (reprfunc)0,                            /*tp_repr*/
 882         0,                                      /*tp_as_number*/
 883         0,                                      /*tp_as_sequence*/
 884         0,                                      /*tp_as_mapping*/
 885         (hashfunc)0,                            /*tp_hash*/
 886         (ternaryfunc)0,                         /*tp_call*/
 887         (reprfunc)0,                            /*tp_str*/
 888         0,                                      /*tp_getattro*/
 889         0,                                      /*tp_setattro*/
 890         0,                                      /*tp_as_buffer*/
 891         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
 892                 Py_TPFLAGS_HAVE_GC,             /*tp_flags*/
 893         Reader_Type_doc,                        /*tp_doc*/
 894         (traverseproc)Reader_traverse,          /*tp_traverse*/
 895         (inquiry)Reader_clear,                  /*tp_clear*/
 896         0,                                      /*tp_richcompare*/
 897         0,                                      /*tp_weaklistoffset*/
 898         PyObject_SelfIter,                      /*tp_iter*/
 899         (getiterfunc)Reader_iternext,           /*tp_iternext*/
 900         Reader_methods,                         /*tp_methods*/
 901         Reader_memberlist,                      /*tp_members*/
 902         0,                                      /*tp_getset*/
 903
 904 };
 905
 906 static PyObject *
 907 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
 908 {
 909         PyObject * iterator, * dialect = NULL;
 910         ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
 911
 912         if (!self)
 913                 return NULL;
 914
 915         self->dialect = NULL;
 916         self->fields = NULL;
 917         self->input_iter = NULL;
 918         self->field = NULL;
 919         self->field_size = 0;
 920         self->line_num = 0;
 921
 922         if (parse_reset(self) < 0) {
 923                 Py_DECREF(self);
 924                 return NULL;
 925         }
 926
 927         if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
 928                 Py_DECREF(self);
 929                 return NULL;
 930         }
 931         self->input_iter = PyObject_GetIter(iterator);
 932         if (self->input_iter == NULL) {
 933                 PyErr_SetString(PyExc_TypeError,
 934                                 "argument 1 must be an iterator");
 935                 Py_DECREF(self);
 936                 return NULL;
 937         }
 938         self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
 939         if (self->dialect == NULL) {
 940                 Py_DECREF(self);
 941                 return NULL;
 942         }
 943
 944         PyObject_GC_Track(self);
 945         return (PyObject *)self;
 946 }
 947
 948 /*
 949  * WRITER
 950  */
 951 /* ---------------------------------------------------------------- */
 952 static void
 953 join_reset(WriterObj *self)
 954 {
 955         self->rec_len = 0;
 956         self->num_fields = 0;
 957 }
 958
 959 #define MEM_INCR 32768
 960
 961 /* Calculate new record length or append field to record.  Return new
 962  * record length.
 963  */
 964 static int
 965 join_append_data(WriterObj *self, char *field, int quote_empty,
 966                  int *quoted, int copy_phase)
 967 {
 968         DialectObj *dialect = self->dialect;
 969         int i, rec_len;
 970         char *lineterm;
 971
 972 #define ADDCH(c) \
 973         do {\
 974                 if (copy_phase) \
 975                         self->rec[rec_len] = c;\
 976                 rec_len++;\
 977         } while(0)
 978
 979         lineterm = PyBytes_AsString(dialect->lineterminator);
 980         if (lineterm == NULL)
 981                 return -1;
 982
 983         rec_len = self->rec_len;
 984
 985         /* If this is not the first field we need a field separator */
 986         if (self->num_fields > 0)
 987                 ADDCH(dialect->delimiter);
 988
 989         /* Handle preceding quote */
 990         if (copy_phase && *quoted)
 991                 ADDCH(dialect->quotechar);
 992
 993         /* Copy/count field data */
 994         for (i = 0;; i++) {
 995                 char c = field[i];
 996                 int want_escape = 0;
 997
 998                 if (c == '\0')
 999                         break;
1000
1001                 if (c == dialect->delimiter ||
1002                     c == dialect->escapechar ||
1003                     c == dialect->quotechar ||
1004                     strchr(lineterm, c)) {
1005                         if (dialect->quoting == QUOTE_NONE)
1006                                 want_escape = 1;
1007                         else {
1008                                 if (c == dialect->quotechar) {
1009                                         if (dialect->doublequote)
1010                                                 ADDCH(dialect->quotechar);
1011                                         else
1012                                                 want_escape = 1;
1013                                 }
1014                                 if (!want_escape)
1015                                         *quoted = 1;
1016                         }
1017                         if (want_escape) {
1018                                 if (!dialect->escapechar) {
1019                                         PyErr_Format(error_obj,
1020                                                      "need to escape, but no escapechar set");
1021                                         return -1;
1022                                 }
1023                                 ADDCH(dialect->escapechar);
1024                         }
1025                 }
1026                 /* Copy field character into record buffer.
1027                  */
1028                 ADDCH(c);
1029         }
1030
1031         /* If field is empty check if it needs to be quoted.
1032          */
1033         if (i == 0 && quote_empty) {
1034                 if (dialect->quoting == QUOTE_NONE) {
1035                         PyErr_Format(error_obj,
1036                                      "single empty field record must be quoted");
1037                         return -1;
1038                 }
1039                 else
1040                         *quoted = 1;
1041         }
1042
1043         if (*quoted) {
1044                 if (copy_phase)
1045                         ADDCH(dialect->quotechar);
1046                 else
1047                         rec_len += 2;
1048         }
1049         return rec_len;
1050 #undef ADDCH
1051 }
1052
1053 static int
1054 join_check_rec_size(WriterObj *self, int rec_len)
1055 {
1056         if (rec_len > self->rec_size) {
1057                 if (self->rec_size == 0) {
1058                         self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1059                         if (self->rec != NULL)
1060                                 PyMem_Free(self->rec);
1061                         self->rec = PyMem_Malloc(self->rec_size);
1062                 }
1063                 else {
1064                         char *old_rec = self->rec;
1065
1066                         self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1067                         self->rec = PyMem_Realloc(self->rec, self->rec_size);
1068                         if (self->rec == NULL)
1069                                 PyMem_Free(old_rec);
1070                 }
1071                 if (self->rec == NULL) {
1072                         PyErr_NoMemory();
1073                         return 0;
1074                 }
1075         }
1076         return 1;
1077 }
1078
1079 static int
1080 join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1081 {
1082         int rec_len;
1083
1084         rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1085         if (rec_len < 0)
1086                 return 0;
1087
1088         /* grow record buffer if necessary */
1089         if (!join_check_rec_size(self, rec_len))
1090                 return 0;
1091
1092         self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1093         self->num_fields++;
1094
1095         return 1;
1096 }
1097
1098 static int
1099 join_append_lineterminator(WriterObj *self)
1100 {
1101         int terminator_len;
1102         char *terminator;
1103
1104         terminator_len = PyBytes_Size(self->dialect->lineterminator);
1105         if (terminator_len == -1)
1106                 return 0;
1107
1108         /* grow record buffer if necessary */
1109         if (!join_check_rec_size(self, self->rec_len + terminator_len))
1110                 return 0;
1111
1112         terminator = PyBytes_AsString(self->dialect->lineterminator);
1113         if (terminator == NULL)
1114                 return 0;
1115         memmove(self->rec + self->rec_len, terminator, terminator_len);
1116         self->rec_len += terminator_len;
1117
1118         return 1;
1119 }
1120
1121 PyDoc_STRVAR(csv_writerow_doc,
1122 "writerow(sequence)\n"
1123 "\n"
1124 "Construct and write a CSV record from a sequence of fields.  Non-string\n"
1125 "elements will be converted to string.");
1126
1127 static PyObject *
1128 csv_writerow(WriterObj *self, PyObject *seq)
1129 {
1130         DialectObj *dialect = self->dialect;
1131         int len, i;
1132
1133         if (!PySequence_Check(seq))
1134                 return PyErr_Format(error_obj, "sequence expected");
1135
1136         len = PySequence_Length(seq);
1137         if (len < 0)
1138                 return NULL;
1139
1140         /* Join all fields in internal buffer.
1141          */
1142         join_reset(self);
1143         for (i = 0; i < len; i++) {
1144                 PyObject *field;
1145                 int append_ok;
1146                 int quoted;
1147
1148                 field = PySequence_GetItem(seq, i);
1149                 if (field == NULL)
1150                         return NULL;
1151
1152                 switch (dialect->quoting) {
1153                 case QUOTE_NONNUMERIC:
1154                         quoted = !PyNumber_Check(field);
1155                         break;
1156                 case QUOTE_ALL:
1157                         quoted = 1;
1158                         break;
1159                 default:
1160                         quoted = 0;
1161                         break;
1162                 }
1163
1164                 if (PyBytes_Check(field)) {
1165                         append_ok = join_append(self,
1166                                                 PyBytes_AS_STRING(field),
1167                                                 &quoted, len == 1);
1168                         Py_DECREF(field);
1169                 }
1170                 else if (field == Py_None) {
1171                         append_ok = join_append(self, "", &quoted, len == 1);
1172                         Py_DECREF(field);
1173                 }
1174                 else {
1175                         PyObject *str;
1176
1177                         str = PyObject_Str(field);
1178                         Py_DECREF(field);
1179                         if (str == NULL)
1180                                 return NULL;
1181
1182                         append_ok = join_append(self, PyBytes_AS_STRING(str),
1183                                                 &quoted, len == 1);
1184                         Py_DECREF(str);
1185                 }
1186                 if (!append_ok)
1187                         return NULL;
1188         }
1189
1190         /* Add line terminator.
1191          */
1192         if (!join_append_lineterminator(self))
1193                 return 0;
1194
1195         return PyObject_CallFunction(self->writeline,
1196                                      "(s#)", self->rec, self->rec_len);
1197 }
1198
1199 PyDoc_STRVAR(csv_writerows_doc,
1200 "writerows(sequence of sequences)\n"
1201 "\n"
1202 "Construct and write a series of sequences to a csv file.  Non-string\n"
1203 "elements will be converted to string.");
1204
1205 static PyObject *
1206 csv_writerows(WriterObj *self, PyObject *seqseq)
1207 {
1208         PyObject *row_iter, *row_obj, *result;
1209
1210         row_iter = PyObject_GetIter(seqseq);
1211         if (row_iter == NULL) {
1212                 PyErr_SetString(PyExc_TypeError,
1213                                 "writerows() argument must be iterable");
1214                 return NULL;
1215         }
1216         while ((row_obj = PyIter_Next(row_iter))) {
1217                 result = csv_writerow(self, row_obj);
1218                 Py_DECREF(row_obj);
1219                 if (!result) {
1220                         Py_DECREF(row_iter);
1221                         return NULL;
1222                 }
1223                 else
1224                      Py_DECREF(result);
1225         }
1226         Py_DECREF(row_iter);
1227         if (PyErr_Occurred())
1228                 return NULL;
1229         Py_INCREF(Py_None);
1230         return Py_None;
1231 }
1232
1233 static struct PyMethodDef Writer_methods[] = {
1234         { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1235         { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1236         { NULL, NULL }
1237 };
1238
1239 #define W_OFF(x) offsetof(WriterObj, x)
1240
1241 static struct PyMemberDef Writer_memberlist[] = {
1242         { "dialect", T_OBJECT, W_OFF(dialect), RO },
1243         { NULL }
1244 };
1245
1246 static void
1247 Writer_dealloc(WriterObj *self)
1248 {
1249         PyObject_GC_UnTrack(self);
1250         Py_XDECREF(self->dialect);
1251         Py_XDECREF(self->writeline);
1252         if (self->rec != NULL)
1253                 PyMem_Free(self->rec);
1254         PyObject_GC_Del(self);
1255 }
1256
1257 static int
1258 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1259 {
1260         Py_VISIT(self->dialect);
1261         Py_VISIT(self->writeline);
1262         return 0;
1263 }
1264
1265 static int
1266 Writer_clear(WriterObj *self)
1267 {
1268         Py_CLEAR(self->dialect);
1269         Py_CLEAR(self->writeline);
1270         return 0;
1271 }
1272
1273 PyDoc_STRVAR(Writer_Type_doc,
1274 "CSV writer\n"
1275 "\n"
1276 "Writer objects are responsible for generating tabular data\n"
1277 "in CSV format from sequence input.\n"
1278 );
1279
1280 static PyTypeObject Writer_Type = {
1281         PyVarObject_HEAD_INIT(NULL, 0)
1282         "_csv.writer",                          /*tp_name*/
1283         sizeof(WriterObj),                      /*tp_basicsize*/
1284         0,                                      /*tp_itemsize*/
1285         /* methods */
1286         (destructor)Writer_dealloc,             /*tp_dealloc*/
1287         (printfunc)0,                           /*tp_print*/
1288         (getattrfunc)0,                         /*tp_getattr*/
1289         (setattrfunc)0,                         /*tp_setattr*/
1290         (cmpfunc)0,                             /*tp_compare*/
1291         (reprfunc)0,                            /*tp_repr*/
1292         0,                                      /*tp_as_number*/
1293         0,                                      /*tp_as_sequence*/
1294         0,                                      /*tp_as_mapping*/
1295         (hashfunc)0,                            /*tp_hash*/
1296         (ternaryfunc)0,                         /*tp_call*/
1297         (reprfunc)0,                            /*tp_str*/
1298         0,                                      /*tp_getattro*/
1299         0,                                      /*tp_setattro*/
1300         0,                                      /*tp_as_buffer*/
1301         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1302                 Py_TPFLAGS_HAVE_GC,             /*tp_flags*/
1303         Writer_Type_doc,
1304         (traverseproc)Writer_traverse,          /*tp_traverse*/
1305         (inquiry)Writer_clear,                  /*tp_clear*/
1306         0,                                      /*tp_richcompare*/
1307         0,                                      /*tp_weaklistoffset*/
1308         (getiterfunc)0,                         /*tp_iter*/
1309         (getiterfunc)0,                         /*tp_iternext*/
1310         Writer_methods,                         /*tp_methods*/
1311         Writer_memberlist,                      /*tp_members*/
1312         0,                                      /*tp_getset*/
1313 };
1314
1315 static PyObject *
1316 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1317 {
1318         PyObject * output_file, * dialect = NULL;
1319         WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1320
1321         if (!self)
1322                 return NULL;
1323
1324         self->dialect = NULL;
1325         self->writeline = NULL;
1326
1327         self->rec = NULL;
1328         self->rec_size = 0;
1329         self->rec_len = 0;
1330         self->num_fields = 0;
1331
1332         if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1333                 Py_DECREF(self);
1334                 return NULL;
1335         }
1336         self->writeline = PyObject_GetAttrString(output_file, "write");
1337         if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1338                 PyErr_SetString(PyExc_TypeError,
1339                                 "argument 1 must have a \"write\" method");
1340                 Py_DECREF(self);
1341                 return NULL;
1342         }
1343         self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1344         if (self->dialect == NULL) {
1345                 Py_DECREF(self);
1346                 return NULL;
1347         }
1348         PyObject_GC_Track(self);
1349         return (PyObject *)self;
1350 }
1351
1352 /*
1353  * DIALECT REGISTRY
1354  */
1355 static PyObject *
1356 csv_list_dialects(PyObject *module, PyObject *args)
1357 {
1358         return PyDict_Keys(dialects);
1359 }
1360
1361 static PyObject *
1362 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1363 {
1364         PyObject *name_obj, *dialect_obj = NULL;
1365         PyObject *dialect;
1366
1367         if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1368                 return NULL;
1369         if (!IS_BASESTRING(name_obj)) {
1370                 PyErr_SetString(PyExc_TypeError,
1371                                 "dialect name must be a string or unicode");
1372                 return NULL;
1373         }
1374         dialect = _call_dialect(dialect_obj, kwargs);
1375         if (dialect == NULL)
1376                 return NULL;
1377         if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1378                 Py_DECREF(dialect);
1379                 return NULL;
1380         }
1381         Py_DECREF(dialect);
1382         Py_INCREF(Py_None);
1383         return Py_None;
1384 }
1385
1386 static PyObject *
1387 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1388 {
1389         if (PyDict_DelItem(dialects, name_obj) < 0)
1390                 return PyErr_Format(error_obj, "unknown dialect");
1391         Py_INCREF(Py_None);
1392         return Py_None;
1393 }
1394
1395 static PyObject *
1396 csv_get_dialect(PyObject *module, PyObject *name_obj)
1397 {
1398         return get_dialect_from_registry(name_obj);
1399 }
1400
1401 static PyObject *
1402 csv_field_size_limit(PyObject *module, PyObject *args)
1403 {
1404         PyObject *new_limit = NULL;
1405         long old_limit = field_limit;
1406
1407         if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1408                 return NULL;
1409         if (new_limit != NULL) {
1410                 if (!PyInt_Check(new_limit)) {
1411                         PyErr_Format(PyExc_TypeError,
1412                                      "limit must be an integer");
1413                         return NULL;
1414                 }
1415                 field_limit = PyInt_AsLong(new_limit);
1416         }
1417         return PyInt_FromLong(old_limit);
1418 }
1419
1420 /*
1421  * MODULE
1422  */
1423
1424 PyDoc_STRVAR(csv_module_doc,
1425 "CSV parsing and writing.\n"
1426 "\n"
1427 "This module provides classes that assist in the reading and writing\n"
1428 "of Comma Separated Value (CSV) files, and implements the interface\n"
1429 "described by PEP 305.  Although many CSV files are simple to parse,\n"
1430 "the format is not formally defined by a stable specification and\n"
1431 "is subtle enough that parsing lines of a CSV file with something\n"
1432 "like line.split(\",\") is bound to fail.  The module supports three\n"
1433 "basic APIs: reading, writing, and registration of dialects.\n"
1434 "\n"
1435 "\n"
1436 "DIALECT REGISTRATION:\n"
1437 "\n"
1438 "Readers and writers support a dialect argument, which is a convenient\n"
1439 "handle on a group of settings.  When the dialect argument is a string,\n"
1440 "it identifies one of the dialects previously registered with the module.\n"
1441 "If it is a class or instance, the attributes of the argument are used as\n"
1442 "the settings for the reader or writer:\n"
1443 "\n"
1444 "    class excel:\n"
1445 "        delimiter = ','\n"
1446 "        quotechar = '\"'\n"
1447 "        escapechar = None\n"
1448 "        doublequote = True\n"
1449 "        skipinitialspace = False\n"
1450 "        lineterminator = '\\r\\n'\n"
1451 "        quoting = QUOTE_MINIMAL\n"
1452 "\n"
1453 "SETTINGS:\n"
1454 "\n"
1455 "    * quotechar - specifies a one-character string to use as the \n"
1456 "        quoting character.  It defaults to '\"'.\n"
1457 "    * delimiter - specifies a one-character string to use as the \n"
1458 "        field separator.  It defaults to ','.\n"
1459 "    * skipinitialspace - specifies how to interpret whitespace which\n"
1460 "        immediately follows a delimiter.  It defaults to False, which\n"
1461 "        means that whitespace immediately following a delimiter is part\n"
1462 "        of the following field.\n"
1463 "    * lineterminator -  specifies the character sequence which should \n"
1464 "        terminate rows.\n"
1465 "    * quoting - controls when quotes should be generated by the writer.\n"
1466 "        It can take on any of the following module constants:\n"
1467 "\n"
1468 "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1469 "            field contains either the quotechar or the delimiter\n"
1470 "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1471 "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1472 "            fields which do not parse as integers or floating point\n"
1473 "            numbers.\n"
1474 "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1475 "    * escapechar - specifies a one-character string used to escape \n"
1476 "        the delimiter when quoting is set to QUOTE_NONE.\n"
1477 "    * doublequote - controls the handling of quotes inside fields.  When\n"
1478 "        True, two consecutive quotes are interpreted as one during read,\n"
1479 "        and when writing, each quote character embedded in the data is\n"
1480 "        written as two quotes\n");
1481
1482 PyDoc_STRVAR(csv_reader_doc,
1483 "    csv_reader = reader(iterable [, dialect='excel']\n"
1484 "                        [optional keyword args])\n"
1485 "    for row in csv_reader:\n"
1486 "        process(row)\n"
1487 "\n"
1488 "The \"iterable\" argument can be any object that returns a line\n"
1489 "of input for each iteration, such as a file object or a list.  The\n"
1490 "optional \"dialect\" parameter is discussed below.  The function\n"
1491 "also accepts optional keyword arguments which override settings\n"
1492 "provided by the dialect.\n"
1493 "\n"
1494 "The returned object is an iterator.  Each iteration returns a row\n"
1495 "of the CSV file (which can span multiple input lines):\n");
1496
1497 PyDoc_STRVAR(csv_writer_doc,
1498 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1499 "                            [optional keyword args])\n"
1500 "    for row in sequence:\n"
1501 "        csv_writer.writerow(row)\n"
1502 "\n"
1503 "    [or]\n"
1504 "\n"
1505 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1506 "                            [optional keyword args])\n"
1507 "    csv_writer.writerows(rows)\n"
1508 "\n"
1509 "The \"fileobj\" argument can be any object that supports the file API.\n");
1510
1511 PyDoc_STRVAR(csv_list_dialects_doc,
1512 "Return a list of all know dialect names.\n"
1513 "    names = csv.list_dialects()");
1514
1515 PyDoc_STRVAR(csv_get_dialect_doc,
1516 "Return the dialect instance associated with name.\n"
1517 "    dialect = csv.get_dialect(name)");
1518
1519 PyDoc_STRVAR(csv_register_dialect_doc,
1520 "Create a mapping from a string name to a dialect class.\n"
1521 "    dialect = csv.register_dialect(name, dialect)");
1522
1523 PyDoc_STRVAR(csv_unregister_dialect_doc,
1524 "Delete the name/dialect mapping associated with a string name.\n"
1525 "    csv.unregister_dialect(name)");
1526
1527 PyDoc_STRVAR(csv_field_size_limit_doc,
1528 "Sets an upper limit on parsed fields.\n"
1529 "    csv.field_size_limit([limit])\n"
1530 "\n"
1531 "Returns old limit. If limit is not given, no new limit is set and\n"
1532 "the old limit is returned");
1533
1534 static struct PyMethodDef csv_methods[] = {
1535         { "reader", (PyCFunction)csv_reader,
1536                 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1537         { "writer", (PyCFunction)csv_writer,
1538                 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1539         { "list_dialects", (PyCFunction)csv_list_dialects,
1540                 METH_NOARGS, csv_list_dialects_doc},
1541         { "register_dialect", (PyCFunction)csv_register_dialect,
1542                 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1543         { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1544                 METH_O, csv_unregister_dialect_doc},
1545         { "get_dialect", (PyCFunction)csv_get_dialect,
1546                 METH_O, csv_get_dialect_doc},
1547         { "field_size_limit", (PyCFunction)csv_field_size_limit,
1548                 METH_VARARGS, csv_field_size_limit_doc},
1549         { NULL, NULL }
1550 };
1551
1552 PyMODINIT_FUNC
1553 init_csv(void)
1554 {
1555         PyObject *module;
1556         StyleDesc *style;
1557
1558         if (PyType_Ready(&Dialect_Type) < 0)
1559                 return;
1560
1561         if (PyType_Ready(&Reader_Type) < 0)
1562                 return;
1563
1564         if (PyType_Ready(&Writer_Type) < 0)
1565                 return;
1566
1567         /* Create the module and add the functions */
1568         module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1569         if (module == NULL)
1570                 return;
1571
1572         /* Add version to the module. */
1573         if (PyModule_AddStringConstant(module, "__version__",
1574                                        MODULE_VERSION) == -1)
1575                 return;
1576
1577         /* Add _dialects dictionary */
1578         dialects = PyDict_New();
1579         if (dialects == NULL)
1580                 return;
1581         if (PyModule_AddObject(module, "_dialects", dialects))
1582                 return;
1583
1584         /* Add quote styles into dictionary */
1585         for (style = quote_styles; style->name; style++) {
1586                 if (PyModule_AddIntConstant(module, style->name,
1587                                             style->style) == -1)
1588                         return;
1589         }
1590
1591         /* Add the Dialect type */
1592         Py_INCREF(&Dialect_Type);
1593         if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1594                 return;
1595
1596         /* Add the CSV exception object to the module. */
1597         error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1598         if (error_obj == NULL)
1599                 return;
1600         PyModule_AddObject(module, "Error", error_obj);
1601 }