Modules/_csv.c

   1 /* csv module */
   2
   3 /*
   4
   5 This module provides the low-level underpinnings of a CSV reading/writing
   6 module.  Users should not use this module directly, but import the csv.py
   7 module instead.
   8
   9 **** For people modifying this code, please note that as of this writing
  10 **** (2003-03-23), it is intended that this code should work with Python
  11 **** 2.2.
  12
  13 */
  14
  15 #define MODULE_VERSION "1.0"
  16
  17 #include "Python.h"
  18 #include "structmember.h"
  19
  20
  21 /* begin 2.2 compatibility macros */
  22 #ifndef PyDoc_STRVAR
  23 /* Define macros for inline documentation. */
  24 #define PyDoc_VAR(name) static char name[]
  25 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
  26 #ifdef WITH_DOC_STRINGS
  27 #define PyDoc_STR(str) str
  28 #else
  29 #define PyDoc_STR(str) ""
  30 #endif
  31 #endif /* ifndef PyDoc_STRVAR */
  32
  33 #ifndef PyMODINIT_FUNC
  34 #       if defined(__cplusplus)
  35 #               define PyMODINIT_FUNC extern "C" void
  36 #       else /* __cplusplus */
  37 #               define PyMODINIT_FUNC void
  38 #       endif /* __cplusplus */
  39 #endif
  40
  41 #ifndef Py_CLEAR
  42 #define Py_CLEAR(op)                                            \
  43         do {                                                    \
  44                 if (op) {                                       \
  45                         PyObject *tmp = (PyObject *)(op);       \
  46                         (op) = NULL;                            \
  47                         Py_DECREF(tmp);                         \
  48                 }                                               \
  49         } while (0)
  50 #endif
  51 #ifndef Py_VISIT
  52 #define Py_VISIT(op)                                                    \
  53         do {                                                            \
  54                 if (op) {                                               \
  55                         int vret = visit((PyObject *)(op), arg);        \
  56                         if (vret)                                       \
  57                                 return vret;                            \
  58                 }                                                       \
  59         } while (0)
  60 #endif
  61
  62 /* end 2.2 compatibility macros */
  63
  64 #define IS_BASESTRING(o) \
  65         PyObject_TypeCheck(o, &PyBaseString_Type)
  66
  67 static PyObject *error_obj;     /* CSV exception */
  68 static PyObject *dialects;      /* Dialect registry */
  69 static long field_limit = 128 * 1024;   /* max parsed field size */
  70
  71 typedef enum {
  72         START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
  73         IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
  74         EAT_CRNL
  75 } ParserState;
  76
  77 typedef enum {
  78         QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
  79 } QuoteStyle;
  80
  81 typedef struct {
  82         QuoteStyle style;
  83         char *name;
  84 } StyleDesc;
  85
  86 static StyleDesc quote_styles[] = {
  87         { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
  88         { QUOTE_ALL,        "QUOTE_ALL" },
  89         { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
  90         { QUOTE_NONE,       "QUOTE_NONE" },
  91         { 0 }
  92 };
  93
  94 typedef struct {
  95         PyObject_HEAD
  96
  97         int doublequote;        /* is " represented by ""? */
  98         char delimiter;         /* field separator */
  99         char quotechar;         /* quote character */
 100         char escapechar;        /* escape character */
 101         int skipinitialspace;   /* ignore spaces following delimiter? */
 102         PyObject *lineterminator; /* string to write between records */
 103         int quoting;            /* style of quoting to write */
 104
 105         int strict;             /* raise exception on bad CSV */
 106 } DialectObj;
 107
 108 staticforward PyTypeObject Dialect_Type;
 109
 110 typedef struct {
 111         PyObject_HEAD
 112
 113         PyObject *input_iter;   /* iterate over this for input lines */
 114
 115         DialectObj *dialect;    /* parsing dialect */
 116
 117         PyObject *fields;       /* field list for current record */
 118         ParserState state;      /* current CSV parse state */
 119         char *field;            /* build current field in here */
 120         int field_size;         /* size of allocated buffer */
 121         int field_len;          /* length of current field */
 122         int numeric_field;      /* treat field as numeric */
 123         unsigned long line_num; /* Source-file line number */
 124 } ReaderObj;
 125
 126 staticforward PyTypeObject Reader_Type;
 127
 128 #define ReaderObject_Check(v)   ((v)->ob_type == &Reader_Type)
 129
 130 typedef struct {
 131         PyObject_HEAD
 132
 133         PyObject *writeline;    /* write output lines to this file */
 134
 135         DialectObj *dialect;    /* parsing dialect */
 136
 137         char *rec;              /* buffer for parser.join */
 138         int rec_size;           /* size of allocated record */
 139         int rec_len;            /* length of record */
 140         int num_fields;         /* number of fields in record */
 141 } WriterObj;
 142
 143 staticforward PyTypeObject Writer_Type;
 144
 145 /*
 146  * DIALECT class
 147  */
 148
 149 static PyObject *
 150 get_dialect_from_registry(PyObject * name_obj)
 151 {
 152         PyObject *dialect_obj;
 153
 154         dialect_obj = PyDict_GetItem(dialects, name_obj);
 155         if (dialect_obj == NULL) {
 156                 if (!PyErr_Occurred())
 157                         PyErr_Format(error_obj, "unknown dialect");
 158         }
 159         else
 160                 Py_INCREF(dialect_obj);
 161         return dialect_obj;
 162 }
 163
 164 static PyObject *
 165 get_string(PyObject *str)
 166 {
 167         Py_XINCREF(str);
 168         return str;
 169 }
 170
 171 static PyObject *
 172 get_nullchar_as_None(char c)
 173 {
 174         if (c == '\0') {
 175                 Py_INCREF(Py_None);
 176                 return Py_None;
 177         }
 178         else
 179                 return PyString_FromStringAndSize((char*)&c, 1);
 180 }
 181
 182 static PyObject *
 183 Dialect_get_lineterminator(DialectObj *self)
 184 {
 185         return get_string(self->lineterminator);
 186 }
 187
 188 static PyObject *
 189 Dialect_get_escapechar(DialectObj *self)
 190 {
 191         return get_nullchar_as_None(self->escapechar);
 192 }
 193
 194 static PyObject *
 195 Dialect_get_quotechar(DialectObj *self)
 196 {
 197         return get_nullchar_as_None(self->quotechar);
 198 }
 199
 200 static PyObject *
 201 Dialect_get_quoting(DialectObj *self)
 202 {
 203         return PyInt_FromLong(self->quoting);
 204 }
 205
 206 static int
 207 _set_bool(const char *name, int *target, PyObject *src, int dflt)
 208 {
 209         if (src == NULL)
 210                 *target = dflt;
 211         else
 212                 *target = PyObject_IsTrue(src);
 213         return 0;
 214 }
 215
 216 static int
 217 _set_int(const char *name, int *target, PyObject *src, int dflt)
 218 {
 219         if (src == NULL)
 220                 *target = dflt;
 221         else {
 222                 if (!PyInt_Check(src)) {
 223                         PyErr_Format(PyExc_TypeError,
 224                                      "\"%s\" must be an integer", name);
 225                         return -1;
 226                 }
 227                 *target = PyInt_AsLong(src);
 228         }
 229         return 0;
 230 }
 231
 232 static int
 233 _set_char(const char *name, char *target, PyObject *src, char dflt)
 234 {
 235         if (src == NULL)
 236                 *target = dflt;
 237         else {
 238                 if (src == Py_None || PyString_Size(src) == 0)
 239                         *target = '\0';
 240                 else if (!PyString_Check(src) || PyString_Size(src) != 1) {
 241                         PyErr_Format(PyExc_TypeError,
 242                                      "\"%s\" must be an 1-character string",
 243                                      name);
 244                         return -1;
 245                 }
 246                 else {
 247                         char *s = PyString_AsString(src);
 248                         if (s == NULL)
 249                                 return -1;
 250                         *target = s[0];
 251                 }
 252         }
 253         return 0;
 254 }
 255
 256 static int
 257 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
 258 {
 259         if (src == NULL)
 260                 *target = PyString_FromString(dflt);
 261         else {
 262                 if (src == Py_None)
 263                         *target = NULL;
 264                 else if (!IS_BASESTRING(src)) {
 265                         PyErr_Format(PyExc_TypeError,
 266                                      "\"%s\" must be an string", name);
 267                         return -1;
 268                 }
 269                 else {
 270                         Py_XDECREF(*target);
 271                         Py_INCREF(src);
 272                         *target = src;
 273                 }
 274         }
 275         return 0;
 276 }
 277
 278 static int
 279 dialect_check_quoting(int quoting)
 280 {
 281         StyleDesc *qs = quote_styles;
 282
 283         for (qs = quote_styles; qs->name; qs++) {
 284                 if (qs->style == quoting)
 285                         return 0;
 286         }
 287         PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
 288         return -1;
 289 }
 290
 291 #define D_OFF(x) offsetof(DialectObj, x)
 292
 293 static struct PyMemberDef Dialect_memberlist[] = {
 294         { "delimiter",          T_CHAR, D_OFF(delimiter), READONLY },
 295         { "skipinitialspace",   T_INT, D_OFF(skipinitialspace), READONLY },
 296         { "doublequote",        T_INT, D_OFF(doublequote), READONLY },
 297         { "strict",             T_INT, D_OFF(strict), READONLY },
 298         { NULL }
 299 };
 300
 301 static PyGetSetDef Dialect_getsetlist[] = {
 302         { "escapechar",         (getter)Dialect_get_escapechar},
 303         { "lineterminator",     (getter)Dialect_get_lineterminator},
 304         { "quotechar",          (getter)Dialect_get_quotechar},
 305         { "quoting",            (getter)Dialect_get_quoting},
 306         {NULL},
 307 };
 308
 309 static void
 310 Dialect_dealloc(DialectObj *self)
 311 {
 312         Py_XDECREF(self->lineterminator);
 313         self->ob_type->tp_free((PyObject *)self);
 314 }
 315
 316 static char *dialect_kws[] = {
 317         "dialect",
 318         "delimiter",
 319         "doublequote",
 320         "escapechar",
 321         "lineterminator",
 322         "quotechar",
 323         "quoting",
 324         "skipinitialspace",
 325         "strict",
 326         NULL
 327 };
 328
 329 static PyObject *
 330 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
 331 {
 332         DialectObj *self;
 333         PyObject *ret = NULL;
 334         PyObject *dialect = NULL;
 335         PyObject *delimiter = NULL;
 336         PyObject *doublequote = NULL;
 337         PyObject *escapechar = NULL;
 338         PyObject *lineterminator = NULL;
 339         PyObject *quotechar = NULL;
 340         PyObject *quoting = NULL;
 341         PyObject *skipinitialspace = NULL;
 342         PyObject *strict = NULL;
 343
 344         if (!PyArg_ParseTupleAndKeywords(args, kwargs,
 345                                          "|OOOOOOOOO", dialect_kws,
 346                                          &dialect,
 347                                          &delimiter,
 348                                          &doublequote,
 349                                          &escapechar,
 350                                          &lineterminator,
 351                                          &quotechar,
 352                                          &quoting,
 353                                          &skipinitialspace,
 354                                          &strict))
 355                 return NULL;
 356
 357         if (dialect != NULL) {
 358                 if (IS_BASESTRING(dialect)) {
 359                         dialect = get_dialect_from_registry(dialect);
 360                         if (dialect == NULL)
 361                                 return NULL;
 362                 }
 363                 else
 364                         Py_INCREF(dialect);
 365                 /* Can we reuse this instance? */
 366                 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
 367                     delimiter == 0 &&
 368                     doublequote == 0 &&
 369                     escapechar == 0 &&
 370                     lineterminator == 0 &&
 371                     quotechar == 0 &&
 372                     quoting == 0 &&
 373                     skipinitialspace == 0 &&
 374                     strict == 0)
 375                         return dialect;
 376         }
 377
 378         self = (DialectObj *)type->tp_alloc(type, 0);
 379         if (self == NULL) {
 380                 Py_XDECREF(dialect);
 381                 return NULL;
 382         }
 383         self->lineterminator = NULL;
 384
 385         Py_XINCREF(delimiter);
 386         Py_XINCREF(doublequote);
 387         Py_XINCREF(escapechar);
 388         Py_XINCREF(lineterminator);
 389         Py_XINCREF(quotechar);
 390         Py_XINCREF(quoting);
 391         Py_XINCREF(skipinitialspace);
 392         Py_XINCREF(strict);
 393         if (dialect != NULL) {
 394 #define DIALECT_GETATTR(v, n) \
 395                 if (v == NULL) \
 396                         v = PyObject_GetAttrString(dialect, n)
 397                 DIALECT_GETATTR(delimiter, "delimiter");
 398                 DIALECT_GETATTR(doublequote, "doublequote");
 399                 DIALECT_GETATTR(escapechar, "escapechar");
 400                 DIALECT_GETATTR(lineterminator, "lineterminator");
 401                 DIALECT_GETATTR(quotechar, "quotechar");
 402                 DIALECT_GETATTR(quoting, "quoting");
 403                 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
 404                 DIALECT_GETATTR(strict, "strict");
 405                 PyErr_Clear();
 406         }
 407
 408         /* check types and convert to C values */
 409 #define DIASET(meth, name, target, src, dflt) \
 410         if (meth(name, target, src, dflt)) \
 411                 goto err
 412         DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
 413         DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
 414         DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
 415         DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
 416         DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
 417         DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
 418         DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
 419         DIASET(_set_bool, "strict", &self->strict, strict, 0);
 420
 421         /* validate options */
 422         if (dialect_check_quoting(self->quoting))
 423                 goto err;
 424         if (self->delimiter == 0) {
 425                 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
 426                 goto err;
 427         }
 428         if (quotechar == Py_None && quoting == NULL)
 429                 self->quoting = QUOTE_NONE;
 430         if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
 431                 PyErr_SetString(PyExc_TypeError,
 432                                 "quotechar must be set if quoting enabled");
 433                 goto err;
 434         }
 435         if (self->lineterminator == 0) {
 436                 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
 437                 goto err;
 438         }
 439
 440         ret = (PyObject *)self;
 441         Py_INCREF(self);
 442 err:
 443         Py_XDECREF(self);
 444         Py_XDECREF(dialect);
 445         Py_XDECREF(delimiter);
 446         Py_XDECREF(doublequote);
 447         Py_XDECREF(escapechar);
 448         Py_XDECREF(lineterminator);
 449         Py_XDECREF(quotechar);
 450         Py_XDECREF(quoting);
 451         Py_XDECREF(skipinitialspace);
 452         Py_XDECREF(strict);
 453         return ret;
 454 }
 455
 456
 457 PyDoc_STRVAR(Dialect_Type_doc,
 458 "CSV dialect\n"
 459 "\n"
 460 "The Dialect type records CSV parsing and generation options.\n");
 461
 462 static PyTypeObject Dialect_Type = {
 463         PyObject_HEAD_INIT(NULL)
 464         0,                                      /* ob_size */
 465         "_csv.Dialect",                         /* tp_name */
 466         sizeof(DialectObj),                     /* tp_basicsize */
 467         0,                                      /* tp_itemsize */
 468         /*  methods  */
 469         (destructor)Dialect_dealloc,            /* tp_dealloc */
 470         (printfunc)0,                           /* tp_print */
 471         (getattrfunc)0,                         /* tp_getattr */
 472         (setattrfunc)0,                         /* tp_setattr */
 473         (cmpfunc)0,                             /* tp_compare */
 474         (reprfunc)0,                            /* tp_repr */
 475         0,                                      /* tp_as_number */
 476         0,                                      /* tp_as_sequence */
 477         0,                                      /* tp_as_mapping */
 478         (hashfunc)0,                            /* tp_hash */
 479         (ternaryfunc)0,                         /* tp_call */
 480         (reprfunc)0,                            /* tp_str */
 481         0,                                      /* tp_getattro */
 482         0,                                      /* tp_setattro */
 483         0,                                      /* tp_as_buffer */
 484         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
 485         Dialect_Type_doc,                       /* tp_doc */
 486         0,                                      /* tp_traverse */
 487         0,                                      /* tp_clear */
 488         0,                                      /* tp_richcompare */
 489         0,                                      /* tp_weaklistoffset */
 490         0,                                      /* tp_iter */
 491         0,                                      /* tp_iternext */
 492         0,                                      /* tp_methods */
 493         Dialect_memberlist,                     /* tp_members */
 494         Dialect_getsetlist,                     /* tp_getset */
 495         0,                                      /* tp_base */
 496         0,                                      /* tp_dict */
 497         0,                                      /* tp_descr_get */
 498         0,                                      /* tp_descr_set */
 499         0,                                      /* tp_dictoffset */
 500         0,                                      /* tp_init */
 501         0,                                      /* tp_alloc */
 502         dialect_new,                            /* tp_new */
 503         0,                                      /* tp_free */
 504 };
 505
 506 /*
 507  * Return an instance of the dialect type, given a Python instance or kwarg
 508  * description of the dialect
 509  */
 510 static PyObject *
 511 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
 512 {
 513         PyObject *ctor_args;
 514         PyObject *dialect;
 515
 516         ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
 517         if (ctor_args == NULL)
 518                 return NULL;
 519         dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
 520         Py_DECREF(ctor_args);
 521         return dialect;
 522 }
 523
 524 /*
 525  * READER
 526  */
 527 static int
 528 parse_save_field(ReaderObj *self)
 529 {
 530         PyObject *field;
 531
 532         field = PyString_FromStringAndSize(self->field, self->field_len);
 533         if (field == NULL)
 534                 return -1;
 535         self->field_len = 0;
 536         if (self->numeric_field) {
 537                 PyObject *tmp;
 538
 539                 self->numeric_field = 0;
 540                 tmp = PyNumber_Float(field);
 541                 if (tmp == NULL) {
 542                         Py_DECREF(field);
 543                         return -1;
 544                 }
 545                 Py_DECREF(field);
 546                 field = tmp;
 547         }
 548         PyList_Append(self->fields, field);
 549         Py_DECREF(field);
 550         return 0;
 551 }
 552
 553 static int
 554 parse_grow_buff(ReaderObj *self)
 555 {
 556         if (self->field_size == 0) {
 557                 self->field_size = 4096;
 558                 if (self->field != NULL)
 559                         PyMem_Free(self->field);
 560                 self->field = PyMem_Malloc(self->field_size);
 561         }
 562         else {
 563                 self->field_size *= 2;
 564                 self->field = PyMem_Realloc(self->field, self->field_size);
 565         }
 566         if (self->field == NULL) {
 567                 PyErr_NoMemory();
 568                 return 0;
 569         }
 570         return 1;
 571 }
 572
 573 static int
 574 parse_add_char(ReaderObj *self, char c)
 575 {
 576         if (self->field_len >= field_limit) {
 577                 PyErr_Format(error_obj, "field larger than field limit (%ld)",
 578                              field_limit);
 579                 return -1;
 580         }
 581         if (self->field_len == self->field_size && !parse_grow_buff(self))
 582                 return -1;
 583         self->field[self->field_len++] = c;
 584         return 0;
 585 }
 586
 587 static int
 588 parse_process_char(ReaderObj *self, char c)
 589 {
 590         DialectObj *dialect = self->dialect;
 591
 592         switch (self->state) {
 593         case START_RECORD:
 594                 /* start of record */
 595                 if (c == '\0')
 596                         /* empty line - return [] */
 597                         break;
 598                 else if (c == '\n' || c == '\r') {
 599                         self->state = EAT_CRNL;
 600                         break;
 601                 }
 602                 /* normal character - handle as START_FIELD */
 603                 self->state = START_FIELD;
 604                 /* fallthru */
 605         case START_FIELD:
 606                 /* expecting field */
 607                 if (c == '\n' || c == '\r' || c == '\0') {
 608                         /* save empty field - return [fields] */
 609                         if (parse_save_field(self) < 0)
 610                                 return -1;
 611                         self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
 612                 }
 613                 else if (c == dialect->quotechar &&
 614                          dialect->quoting != QUOTE_NONE) {
 615                         /* start quoted field */
 616                         self->state = IN_QUOTED_FIELD;
 617                 }
 618                 else if (c == dialect->escapechar) {
 619                         /* possible escaped character */
 620                         self->state = ESCAPED_CHAR;
 621                 }
 622                 else if (c == ' ' && dialect->skipinitialspace)
 623                         /* ignore space at start of field */
 624                         ;
 625                 else if (c == dialect->delimiter) {
 626                         /* save empty field */
 627                         if (parse_save_field(self) < 0)
 628                                 return -1;
 629                 }
 630                 else {
 631                         /* begin new unquoted field */
 632                         if (dialect->quoting == QUOTE_NONNUMERIC)
 633                                 self->numeric_field = 1;
 634                         if (parse_add_char(self, c) < 0)
 635                                 return -1;
 636                         self->state = IN_FIELD;
 637                 }
 638                 break;
 639
 640         case ESCAPED_CHAR:
 641                 if (c == '\0')
 642                         c = '\n';
 643                 if (parse_add_char(self, c) < 0)
 644                         return -1;
 645                 self->state = IN_FIELD;
 646                 break;
 647
 648         case IN_FIELD:
 649                 /* in unquoted field */
 650                 if (c == '\n' || c == '\r' || c == '\0') {
 651                         /* end of line - return [fields] */
 652                         if (parse_save_field(self) < 0)
 653                                 return -1;
 654                         self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
 655                 }
 656                 else if (c == dialect->escapechar) {
 657                         /* possible escaped character */
 658                         self->state = ESCAPED_CHAR;
 659                 }
 660                 else if (c == dialect->delimiter) {
 661                         /* save field - wait for new field */
 662                         if (parse_save_field(self) < 0)
 663                                 return -1;
 664                         self->state = START_FIELD;
 665                 }
 666                 else {
 667                         /* normal character - save in field */
 668                         if (parse_add_char(self, c) < 0)
 669                                 return -1;
 670                 }
 671                 break;
 672
 673         case IN_QUOTED_FIELD:
 674                 /* in quoted field */
 675                 if (c == '\0')
 676                         ;
 677                 else if (c == dialect->escapechar) {
 678                         /* Possible escape character */
 679                         self->state = ESCAPE_IN_QUOTED_FIELD;
 680                 }
 681                 else if (c == dialect->quotechar &&
 682                          dialect->quoting != QUOTE_NONE) {
 683                         if (dialect->doublequote) {
 684                                 /* doublequote; " represented by "" */
 685                                 self->state = QUOTE_IN_QUOTED_FIELD;
 686                         }
 687                         else {
 688                                 /* end of quote part of field */
 689                                 self->state = IN_FIELD;
 690                         }
 691                 }
 692                 else {
 693                         /* normal character - save in field */
 694                         if (parse_add_char(self, c) < 0)
 695                                 return -1;
 696                 }
 697                 break;
 698
 699         case ESCAPE_IN_QUOTED_FIELD:
 700                 if (c == '\0')
 701                         c = '\n';
 702                 if (parse_add_char(self, c) < 0)
 703                         return -1;
 704                 self->state = IN_QUOTED_FIELD;
 705                 break;
 706
 707         case QUOTE_IN_QUOTED_FIELD:
 708                 /* doublequote - seen a quote in an quoted field */
 709                 if (dialect->quoting != QUOTE_NONE &&
 710                     c == dialect->quotechar) {
 711                         /* save "" as " */
 712                         if (parse_add_char(self, c) < 0)
 713                                 return -1;
 714                         self->state = IN_QUOTED_FIELD;
 715                 }
 716                 else if (c == dialect->delimiter) {
 717                         /* save field - wait for new field */
 718                         if (parse_save_field(self) < 0)
 719                                 return -1;
 720                         self->state = START_FIELD;
 721                 }
 722                 else if (c == '\n' || c == '\r' || c == '\0') {
 723                         /* end of line - return [fields] */
 724                         if (parse_save_field(self) < 0)
 725                                 return -1;
 726                         self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
 727                 }
 728                 else if (!dialect->strict) {
 729                         if (parse_add_char(self, c) < 0)
 730                                 return -1;
 731                         self->state = IN_FIELD;
 732                 }
 733                 else {
 734                         /* illegal */
 735                         PyErr_Format(error_obj, "'%c' expected after '%c'",
 736                                         dialect->delimiter,
 737                                         dialect->quotechar);
 738                         return -1;
 739                 }
 740                 break;
 741
 742         case EAT_CRNL:
 743                 if (c == '\n' || c == '\r')
 744                         ;
 745                 else if (c == '\0')
 746                         self->state = START_RECORD;
 747                 else {
 748                         PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
 749                         return -1;
 750                 }
 751                 break;
 752
 753         }
 754         return 0;
 755 }
 756
 757 static int
 758 parse_reset(ReaderObj *self)
 759 {
 760         Py_XDECREF(self->fields);
 761         self->fields = PyList_New(0);
 762         if (self->fields == NULL)
 763                 return -1;
 764         self->field_len = 0;
 765         self->state = START_RECORD;
 766         self->numeric_field = 0;
 767         return 0;
 768 }
 769
 770 static PyObject *
 771 Reader_iternext(ReaderObj *self)
 772 {
 773         PyObject *lineobj;
 774         PyObject *fields = NULL;
 775         char *line, c;
 776         int linelen;
 777
 778         if (parse_reset(self) < 0)
 779                 return NULL;
 780         do {
 781                 lineobj = PyIter_Next(self->input_iter);
 782                 if (lineobj == NULL) {
 783                         /* End of input OR exception */
 784                         if (!PyErr_Occurred() && self->field_len != 0)
 785                                 PyErr_Format(error_obj,
 786                                              "newline inside string");
 787                         return NULL;
 788                 }
 789                 ++self->line_num;
 790
 791                 line = PyString_AsString(lineobj);
 792                 linelen = PyString_Size(lineobj);
 793
 794                 if (line == NULL || linelen < 0) {
 795                         Py_DECREF(lineobj);
 796                         return NULL;
 797                 }
 798                 while (linelen--) {
 799                         c = *line++;
 800                         if (c == '\0') {
 801                                 Py_DECREF(lineobj);
 802                                 PyErr_Format(error_obj,
 803                                              "line contains NULL byte");
 804                                 goto err;
 805                         }
 806                         if (parse_process_char(self, c) < 0) {
 807                                 Py_DECREF(lineobj);
 808                                 goto err;
 809                         }
 810                 }
 811                 Py_DECREF(lineobj);
 812                 if (parse_process_char(self, 0) < 0)
 813                         goto err;
 814         } while (self->state != START_RECORD);
 815
 816         fields = self->fields;
 817         self->fields = NULL;
 818 err:
 819         return fields;
 820 }
 821
 822 static void
 823 Reader_dealloc(ReaderObj *self)
 824 {
 825         PyObject_GC_UnTrack(self);
 826         Py_XDECREF(self->dialect);
 827         Py_XDECREF(self->input_iter);
 828         Py_XDECREF(self->fields);
 829         if (self->field != NULL)
 830                 PyMem_Free(self->field);
 831         PyObject_GC_Del(self);
 832 }
 833
 834 static int
 835 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
 836 {
 837         Py_VISIT(self->dialect);
 838         Py_VISIT(self->input_iter);
 839         Py_VISIT(self->fields);
 840         return 0;
 841 }
 842
 843 static int
 844 Reader_clear(ReaderObj *self)
 845 {
 846         Py_CLEAR(self->dialect);
 847         Py_CLEAR(self->input_iter);
 848         Py_CLEAR(self->fields);
 849         return 0;
 850 }
 851
 852 PyDoc_STRVAR(Reader_Type_doc,
 853 "CSV reader\n"
 854 "\n"
 855 "Reader objects are responsible for reading and parsing tabular data\n"
 856 "in CSV format.\n"
 857 );
 858
 859 static struct PyMethodDef Reader_methods[] = {
 860         { NULL, NULL }
 861 };
 862 #define R_OFF(x) offsetof(ReaderObj, x)
 863
 864 static struct PyMemberDef Reader_memberlist[] = {
 865         { "dialect", T_OBJECT, R_OFF(dialect), RO },
 866         { "line_num", T_ULONG, R_OFF(line_num), RO },
 867         { NULL }
 868 };
 869
 870
 871 static PyTypeObject Reader_Type = {
 872         PyObject_HEAD_INIT(NULL)
 873         0,                                      /*ob_size*/
 874         "_csv.reader",                          /*tp_name*/
 875         sizeof(ReaderObj),                      /*tp_basicsize*/
 876         0,                                      /*tp_itemsize*/
 877         /* methods */
 878         (destructor)Reader_dealloc,             /*tp_dealloc*/
 879         (printfunc)0,                           /*tp_print*/
 880         (getattrfunc)0,                         /*tp_getattr*/
 881         (setattrfunc)0,                         /*tp_setattr*/
 882         (cmpfunc)0,                             /*tp_compare*/
 883         (reprfunc)0,                            /*tp_repr*/
 884         0,                                      /*tp_as_number*/
 885         0,                                      /*tp_as_sequence*/
 886         0,                                      /*tp_as_mapping*/
 887         (hashfunc)0,                            /*tp_hash*/
 888         (ternaryfunc)0,                         /*tp_call*/
 889         (reprfunc)0,                            /*tp_str*/
 890         0,                                      /*tp_getattro*/
 891         0,                                      /*tp_setattro*/
 892         0,                                      /*tp_as_buffer*/
 893         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
 894                 Py_TPFLAGS_HAVE_GC,             /*tp_flags*/
 895         Reader_Type_doc,                        /*tp_doc*/
 896         (traverseproc)Reader_traverse,          /*tp_traverse*/
 897         (inquiry)Reader_clear,                  /*tp_clear*/
 898         0,                                      /*tp_richcompare*/
 899         0,                                      /*tp_weaklistoffset*/
 900         PyObject_SelfIter,                      /*tp_iter*/
 901         (getiterfunc)Reader_iternext,           /*tp_iternext*/
 902         Reader_methods,                         /*tp_methods*/
 903         Reader_memberlist,                      /*tp_members*/
 904         0,                                      /*tp_getset*/
 905
 906 };
 907
 908 static PyObject *
 909 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
 910 {
 911         PyObject * iterator, * dialect = NULL;
 912         ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
 913
 914         if (!self)
 915                 return NULL;
 916
 917         self->dialect = NULL;
 918         self->fields = NULL;
 919         self->input_iter = NULL;
 920         self->field = NULL;
 921         self->field_size = 0;
 922         self->line_num = 0;
 923
 924         if (parse_reset(self) < 0) {
 925                 Py_DECREF(self);
 926                 return NULL;
 927         }
 928
 929         if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
 930                 Py_DECREF(self);
 931                 return NULL;
 932         }
 933         self->input_iter = PyObject_GetIter(iterator);
 934         if (self->input_iter == NULL) {
 935                 PyErr_SetString(PyExc_TypeError,
 936                                 "argument 1 must be an iterator");
 937                 Py_DECREF(self);
 938                 return NULL;
 939         }
 940         self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
 941         if (self->dialect == NULL) {
 942                 Py_DECREF(self);
 943                 return NULL;
 944         }
 945
 946         PyObject_GC_Track(self);
 947         return (PyObject *)self;
 948 }
 949
 950 /*
 951  * WRITER
 952  */
 953 /* ---------------------------------------------------------------- */
 954 static void
 955 join_reset(WriterObj *self)
 956 {
 957         self->rec_len = 0;
 958         self->num_fields = 0;
 959 }
 960
 961 #define MEM_INCR 32768
 962
 963 /* Calculate new record length or append field to record.  Return new
 964  * record length.
 965  */
 966 static int
 967 join_append_data(WriterObj *self, char *field, int quote_empty,
 968                  int *quoted, int copy_phase)
 969 {
 970         DialectObj *dialect = self->dialect;
 971         int i, rec_len;
 972         char *lineterm;
 973
 974 #define ADDCH(c) \
 975         do {\
 976                 if (copy_phase) \
 977                         self->rec[rec_len] = c;\
 978                 rec_len++;\
 979         } while(0)
 980
 981         lineterm = PyString_AsString(dialect->lineterminator);
 982         if (lineterm == NULL)
 983                 return -1;
 984
 985         rec_len = self->rec_len;
 986
 987         /* If this is not the first field we need a field separator */
 988         if (self->num_fields > 0)
 989                 ADDCH(dialect->delimiter);
 990
 991         /* Handle preceding quote */
 992         if (copy_phase && *quoted)
 993                 ADDCH(dialect->quotechar);
 994
 995         /* Copy/count field data */
 996         for (i = 0;; i++) {
 997                 char c = field[i];
 998                 int want_escape = 0;
 999
1000                 if (c == '\0')
1001                         break;
1002
1003                 if (c == dialect->delimiter ||
1004                     c == dialect->escapechar ||
1005                     c == dialect->quotechar ||
1006                     strchr(lineterm, c)) {
1007                         if (dialect->quoting == QUOTE_NONE)
1008                                 want_escape = 1;
1009                         else {
1010                                 if (c == dialect->quotechar) {
1011                                         if (dialect->doublequote)
1012                                                 ADDCH(dialect->quotechar);
1013                                         else
1014                                                 want_escape = 1;
1015                                 }
1016                                 if (!want_escape)
1017                                         *quoted = 1;
1018                         }
1019                         if (want_escape) {
1020                                 if (!dialect->escapechar) {
1021                                         PyErr_Format(error_obj,
1022                                                      "need to escape, but no escapechar set");
1023                                         return -1;
1024                                 }
1025                                 ADDCH(dialect->escapechar);
1026                         }
1027                 }
1028                 /* Copy field character into record buffer.
1029                  */
1030                 ADDCH(c);
1031         }
1032
1033         /* If field is empty check if it needs to be quoted.
1034          */
1035         if (i == 0 && quote_empty) {
1036                 if (dialect->quoting == QUOTE_NONE) {
1037                         PyErr_Format(error_obj,
1038                                      "single empty field record must be quoted");
1039                         return -1;
1040                 }
1041                 else
1042                         *quoted = 1;
1043         }
1044
1045         if (*quoted) {
1046                 if (copy_phase)
1047                         ADDCH(dialect->quotechar);
1048                 else
1049                         rec_len += 2;
1050         }
1051         return rec_len;
1052 #undef ADDCH
1053 }
1054
1055 static int
1056 join_check_rec_size(WriterObj *self, int rec_len)
1057 {
1058         if (rec_len > self->rec_size) {
1059                 if (self->rec_size == 0) {
1060                         self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1061                         if (self->rec != NULL)
1062                                 PyMem_Free(self->rec);
1063                         self->rec = PyMem_Malloc(self->rec_size);
1064                 }
1065                 else {
1066                         char *old_rec = self->rec;
1067
1068                         self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1069                         self->rec = PyMem_Realloc(self->rec, self->rec_size);
1070                         if (self->rec == NULL)
1071                                 PyMem_Free(old_rec);
1072                 }
1073                 if (self->rec == NULL) {
1074                         PyErr_NoMemory();
1075                         return 0;
1076                 }
1077         }
1078         return 1;
1079 }
1080
1081 static int
1082 join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1083 {
1084         int rec_len;
1085
1086         rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1087         if (rec_len < 0)
1088                 return 0;
1089
1090         /* grow record buffer if necessary */
1091         if (!join_check_rec_size(self, rec_len))
1092                 return 0;
1093
1094         self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1095         self->num_fields++;
1096
1097         return 1;
1098 }
1099
1100 static int
1101 join_append_lineterminator(WriterObj *self)
1102 {
1103         int terminator_len;
1104         char *terminator;
1105
1106         terminator_len = PyString_Size(self->dialect->lineterminator);
1107         if (terminator_len == -1)
1108                 return 0;
1109
1110         /* grow record buffer if necessary */
1111         if (!join_check_rec_size(self, self->rec_len + terminator_len))
1112                 return 0;
1113
1114         terminator = PyString_AsString(self->dialect->lineterminator);
1115         if (terminator == NULL)
1116                 return 0;
1117         memmove(self->rec + self->rec_len, terminator, terminator_len);
1118         self->rec_len += terminator_len;
1119
1120         return 1;
1121 }
1122
1123 PyDoc_STRVAR(csv_writerow_doc,
1124 "writerow(sequence)\n"
1125 "\n"
1126 "Construct and write a CSV record from a sequence of fields.  Non-string\n"
1127 "elements will be converted to string.");
1128
1129 static PyObject *
1130 csv_writerow(WriterObj *self, PyObject *seq)
1131 {
1132         DialectObj *dialect = self->dialect;
1133         int len, i;
1134
1135         if (!PySequence_Check(seq))
1136                 return PyErr_Format(error_obj, "sequence expected");
1137
1138         len = PySequence_Length(seq);
1139         if (len < 0)
1140                 return NULL;
1141
1142         /* Join all fields in internal buffer.
1143          */
1144         join_reset(self);
1145         for (i = 0; i < len; i++) {
1146                 PyObject *field;
1147                 int append_ok;
1148                 int quoted;
1149
1150                 field = PySequence_GetItem(seq, i);
1151                 if (field == NULL)
1152                         return NULL;
1153
1154                 switch (dialect->quoting) {
1155                 case QUOTE_NONNUMERIC:
1156                         quoted = !PyNumber_Check(field);
1157                         break;
1158                 case QUOTE_ALL:
1159                         quoted = 1;
1160                         break;
1161                 default:
1162                         quoted = 0;
1163                         break;
1164                 }
1165
1166                 if (PyString_Check(field)) {
1167                         append_ok = join_append(self,
1168                                                 PyString_AS_STRING(field),
1169                                                 &quoted, len == 1);
1170                         Py_DECREF(field);
1171                 }
1172                 else if (field == Py_None) {
1173                         append_ok = join_append(self, "", &quoted, len == 1);
1174                         Py_DECREF(field);
1175                 }
1176                 else {
1177                         PyObject *str;
1178
1179                         str = PyObject_Str(field);
1180                         Py_DECREF(field);
1181                         if (str == NULL)
1182                                 return NULL;
1183
1184                         append_ok = join_append(self, PyString_AS_STRING(str),
1185                                                 &quoted, len == 1);
1186                         Py_DECREF(str);
1187                 }
1188                 if (!append_ok)
1189                         return NULL;
1190         }
1191
1192         /* Add line terminator.
1193          */
1194         if (!join_append_lineterminator(self))
1195                 return 0;
1196
1197         return PyObject_CallFunction(self->writeline,
1198                                      "(s#)", self->rec, self->rec_len);
1199 }
1200
1201 PyDoc_STRVAR(csv_writerows_doc,
1202 "writerows(sequence of sequences)\n"
1203 "\n"
1204 "Construct and write a series of sequences to a csv file.  Non-string\n"
1205 "elements will be converted to string.");
1206
1207 static PyObject *
1208 csv_writerows(WriterObj *self, PyObject *seqseq)
1209 {
1210         PyObject *row_iter, *row_obj, *result;
1211
1212         row_iter = PyObject_GetIter(seqseq);
1213         if (row_iter == NULL) {
1214                 PyErr_SetString(PyExc_TypeError,
1215                                 "writerows() argument must be iterable");
1216                 return NULL;
1217         }
1218         while ((row_obj = PyIter_Next(row_iter))) {
1219                 result = csv_writerow(self, row_obj);
1220                 Py_DECREF(row_obj);
1221                 if (!result) {
1222                         Py_DECREF(row_iter);
1223                         return NULL;
1224                 }
1225                 else
1226                      Py_DECREF(result);
1227         }
1228         Py_DECREF(row_iter);
1229         if (PyErr_Occurred())
1230                 return NULL;
1231         Py_INCREF(Py_None);
1232         return Py_None;
1233 }
1234
1235 static struct PyMethodDef Writer_methods[] = {
1236         { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1237         { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1238         { NULL, NULL }
1239 };
1240
1241 #define W_OFF(x) offsetof(WriterObj, x)
1242
1243 static struct PyMemberDef Writer_memberlist[] = {
1244         { "dialect", T_OBJECT, W_OFF(dialect), RO },
1245         { NULL }
1246 };
1247
1248 static void
1249 Writer_dealloc(WriterObj *self)
1250 {
1251         PyObject_GC_UnTrack(self);
1252         Py_XDECREF(self->dialect);
1253         Py_XDECREF(self->writeline);
1254         if (self->rec != NULL)
1255                 PyMem_Free(self->rec);
1256         PyObject_GC_Del(self);
1257 }
1258
1259 static int
1260 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1261 {
1262         Py_VISIT(self->dialect);
1263         Py_VISIT(self->writeline);
1264         return 0;
1265 }
1266
1267 static int
1268 Writer_clear(WriterObj *self)
1269 {
1270         Py_CLEAR(self->dialect);
1271         Py_CLEAR(self->writeline);
1272         return 0;
1273 }
1274
1275 PyDoc_STRVAR(Writer_Type_doc,
1276 "CSV writer\n"
1277 "\n"
1278 "Writer objects are responsible for generating tabular data\n"
1279 "in CSV format from sequence input.\n"
1280 );
1281
1282 static PyTypeObject Writer_Type = {
1283         PyObject_HEAD_INIT(NULL)
1284         0,                                      /*ob_size*/
1285         "_csv.writer",                          /*tp_name*/
1286         sizeof(WriterObj),                      /*tp_basicsize*/
1287         0,                                      /*tp_itemsize*/
1288         /* methods */
1289         (destructor)Writer_dealloc,             /*tp_dealloc*/
1290         (printfunc)0,                           /*tp_print*/
1291         (getattrfunc)0,                         /*tp_getattr*/
1292         (setattrfunc)0,                         /*tp_setattr*/
1293         (cmpfunc)0,                             /*tp_compare*/
1294         (reprfunc)0,                            /*tp_repr*/
1295         0,                                      /*tp_as_number*/
1296         0,                                      /*tp_as_sequence*/
1297         0,                                      /*tp_as_mapping*/
1298         (hashfunc)0,                            /*tp_hash*/
1299         (ternaryfunc)0,                         /*tp_call*/
1300         (reprfunc)0,                            /*tp_str*/
1301         0,                                      /*tp_getattro*/
1302         0,                                      /*tp_setattro*/
1303         0,                                      /*tp_as_buffer*/
1304         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1305                 Py_TPFLAGS_HAVE_GC,             /*tp_flags*/
1306         Writer_Type_doc,
1307         (traverseproc)Writer_traverse,          /*tp_traverse*/
1308         (inquiry)Writer_clear,                  /*tp_clear*/
1309         0,                                      /*tp_richcompare*/
1310         0,                                      /*tp_weaklistoffset*/
1311         (getiterfunc)0,                         /*tp_iter*/
1312         (getiterfunc)0,                         /*tp_iternext*/
1313         Writer_methods,                         /*tp_methods*/
1314         Writer_memberlist,                      /*tp_members*/
1315         0,                                      /*tp_getset*/
1316 };
1317
1318 static PyObject *
1319 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1320 {
1321         PyObject * output_file, * dialect = NULL;
1322         WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1323
1324         if (!self)
1325                 return NULL;
1326
1327         self->dialect = NULL;
1328         self->writeline = NULL;
1329
1330         self->rec = NULL;
1331         self->rec_size = 0;
1332         self->rec_len = 0;
1333         self->num_fields = 0;
1334
1335         if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1336                 Py_DECREF(self);
1337                 return NULL;
1338         }
1339         self->writeline = PyObject_GetAttrString(output_file, "write");
1340         if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1341                 PyErr_SetString(PyExc_TypeError,
1342                                 "argument 1 must have a \"write\" method");
1343                 Py_DECREF(self);
1344                 return NULL;
1345         }
1346         self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1347         if (self->dialect == NULL) {
1348                 Py_DECREF(self);
1349                 return NULL;
1350         }
1351         PyObject_GC_Track(self);
1352         return (PyObject *)self;
1353 }
1354
1355 /*
1356  * DIALECT REGISTRY
1357  */
1358 static PyObject *
1359 csv_list_dialects(PyObject *module, PyObject *args)
1360 {
1361         return PyDict_Keys(dialects);
1362 }
1363
1364 static PyObject *
1365 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1366 {
1367         PyObject *name_obj, *dialect_obj = NULL;
1368         PyObject *dialect;
1369
1370         if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1371                 return NULL;
1372         if (!IS_BASESTRING(name_obj)) {
1373                 PyErr_SetString(PyExc_TypeError,
1374                                 "dialect name must be a string or unicode");
1375                 return NULL;
1376         }
1377         dialect = _call_dialect(dialect_obj, kwargs);
1378         if (dialect == NULL)
1379                 return NULL;
1380         if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1381                 Py_DECREF(dialect);
1382                 return NULL;
1383         }
1384         Py_DECREF(dialect);
1385         Py_INCREF(Py_None);
1386         return Py_None;
1387 }
1388
1389 static PyObject *
1390 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1391 {
1392         if (PyDict_DelItem(dialects, name_obj) < 0)
1393                 return PyErr_Format(error_obj, "unknown dialect");
1394         Py_INCREF(Py_None);
1395         return Py_None;
1396 }
1397
1398 static PyObject *
1399 csv_get_dialect(PyObject *module, PyObject *name_obj)
1400 {
1401         return get_dialect_from_registry(name_obj);
1402 }
1403
1404 static PyObject *
1405 csv_field_size_limit(PyObject *module, PyObject *args)
1406 {
1407         PyObject *new_limit = NULL;
1408         long old_limit = field_limit;
1409
1410         if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1411                 return NULL;
1412         if (new_limit != NULL) {
1413                 if (!PyInt_Check(new_limit)) {
1414                         PyErr_Format(PyExc_TypeError,
1415                                      "limit must be an integer");
1416                         return NULL;
1417                 }
1418                 field_limit = PyInt_AsLong(new_limit);
1419         }
1420         return PyInt_FromLong(old_limit);
1421 }
1422
1423 /*
1424  * MODULE
1425  */
1426
1427 PyDoc_STRVAR(csv_module_doc,
1428 "CSV parsing and writing.\n"
1429 "\n"
1430 "This module provides classes that assist in the reading and writing\n"
1431 "of Comma Separated Value (CSV) files, and implements the interface\n"
1432 "described by PEP 305.  Although many CSV files are simple to parse,\n"
1433 "the format is not formally defined by a stable specification and\n"
1434 "is subtle enough that parsing lines of a CSV file with something\n"
1435 "like line.split(\",\") is bound to fail.  The module supports three\n"
1436 "basic APIs: reading, writing, and registration of dialects.\n"
1437 "\n"
1438 "\n"
1439 "DIALECT REGISTRATION:\n"
1440 "\n"
1441 "Readers and writers support a dialect argument, which is a convenient\n"
1442 "handle on a group of settings.  When the dialect argument is a string,\n"
1443 "it identifies one of the dialects previously registered with the module.\n"
1444 "If it is a class or instance, the attributes of the argument are used as\n"
1445 "the settings for the reader or writer:\n"
1446 "\n"
1447 "    class excel:\n"
1448 "        delimiter = ','\n"
1449 "        quotechar = '\"'\n"
1450 "        escapechar = None\n"
1451 "        doublequote = True\n"
1452 "        skipinitialspace = False\n"
1453 "        lineterminator = '\\r\\n'\n"
1454 "        quoting = QUOTE_MINIMAL\n"
1455 "\n"
1456 "SETTINGS:\n"
1457 "\n"
1458 "    * quotechar - specifies a one-character string to use as the \n"
1459 "        quoting character.  It defaults to '\"'.\n"
1460 "    * delimiter - specifies a one-character string to use as the \n"
1461 "        field separator.  It defaults to ','.\n"
1462 "    * skipinitialspace - specifies how to interpret whitespace which\n"
1463 "        immediately follows a delimiter.  It defaults to False, which\n"
1464 "        means that whitespace immediately following a delimiter is part\n"
1465 "        of the following field.\n"
1466 "    * lineterminator -  specifies the character sequence which should \n"
1467 "        terminate rows.\n"
1468 "    * quoting - controls when quotes should be generated by the writer.\n"
1469 "        It can take on any of the following module constants:\n"
1470 "\n"
1471 "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1472 "            field contains either the quotechar or the delimiter\n"
1473 "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1474 "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1475 "            fields which do not parse as integers or floating point\n"
1476 "            numbers.\n"
1477 "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1478 "    * escapechar - specifies a one-character string used to escape \n"
1479 "        the delimiter when quoting is set to QUOTE_NONE.\n"
1480 "    * doublequote - controls the handling of quotes inside fields.  When\n"
1481 "        True, two consecutive quotes are interpreted as one during read,\n"
1482 "        and when writing, each quote character embedded in the data is\n"
1483 "        written as two quotes\n");
1484
1485 PyDoc_STRVAR(csv_reader_doc,
1486 "    csv_reader = reader(iterable [, dialect='excel']\n"
1487 "                        [optional keyword args])\n"
1488 "    for row in csv_reader:\n"
1489 "        process(row)\n"
1490 "\n"
1491 "The \"iterable\" argument can be any object that returns a line\n"
1492 "of input for each iteration, such as a file object or a list.  The\n"
1493 "optional \"dialect\" parameter is discussed below.  The function\n"
1494 "also accepts optional keyword arguments which override settings\n"
1495 "provided by the dialect.\n"
1496 "\n"
1497 "The returned object is an iterator.  Each iteration returns a row\n"
1498 "of the CSV file (which can span multiple input lines):\n");
1499
1500 PyDoc_STRVAR(csv_writer_doc,
1501 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1502 "                            [optional keyword args])\n"
1503 "    for row in sequence:\n"
1504 "        csv_writer.writerow(row)\n"
1505 "\n"
1506 "    [or]\n"
1507 "\n"
1508 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1509 "                            [optional keyword args])\n"
1510 "    csv_writer.writerows(rows)\n"
1511 "\n"
1512 "The \"fileobj\" argument can be any object that supports the file API.\n");
1513
1514 PyDoc_STRVAR(csv_list_dialects_doc,
1515 "Return a list of all know dialect names.\n"
1516 "    names = csv.list_dialects()");
1517
1518 PyDoc_STRVAR(csv_get_dialect_doc,
1519 "Return the dialect instance associated with name.\n"
1520 "    dialect = csv.get_dialect(name)");
1521
1522 PyDoc_STRVAR(csv_register_dialect_doc,
1523 "Create a mapping from a string name to a dialect class.\n"
1524 "    dialect = csv.register_dialect(name, dialect)");
1525
1526 PyDoc_STRVAR(csv_unregister_dialect_doc,
1527 "Delete the name/dialect mapping associated with a string name.\n"
1528 "    csv.unregister_dialect(name)");
1529
1530 PyDoc_STRVAR(csv_field_size_limit_doc,
1531 "Sets an upper limit on parsed fields.\n"
1532 "    csv.field_size_limit([limit])\n"
1533 "\n"
1534 "Returns old limit. If limit is not given, no new limit is set and\n"
1535 "the old limit is returned");
1536
1537 static struct PyMethodDef csv_methods[] = {
1538         { "reader", (PyCFunction)csv_reader,
1539                 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1540         { "writer", (PyCFunction)csv_writer,
1541                 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1542         { "list_dialects", (PyCFunction)csv_list_dialects,
1543                 METH_NOARGS, csv_list_dialects_doc},
1544         { "register_dialect", (PyCFunction)csv_register_dialect,
1545                 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1546         { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1547                 METH_O, csv_unregister_dialect_doc},
1548         { "get_dialect", (PyCFunction)csv_get_dialect,
1549                 METH_O, csv_get_dialect_doc},
1550         { "field_size_limit", (PyCFunction)csv_field_size_limit,
1551                 METH_VARARGS, csv_field_size_limit_doc},
1552         { NULL, NULL }
1553 };
1554
1555 PyMODINIT_FUNC
1556 init_csv(void)
1557 {
1558         PyObject *module;
1559         StyleDesc *style;
1560
1561         if (PyType_Ready(&Dialect_Type) < 0)
1562                 return;
1563
1564         if (PyType_Ready(&Reader_Type) < 0)
1565                 return;
1566
1567         if (PyType_Ready(&Writer_Type) < 0)
1568                 return;
1569
1570         /* Create the module and add the functions */
1571         module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1572         if (module == NULL)
1573                 return;
1574
1575         /* Add version to the module. */
1576         if (PyModule_AddStringConstant(module, "__version__",
1577                                        MODULE_VERSION) == -1)
1578                 return;
1579
1580         /* Add _dialects dictionary */
1581         dialects = PyDict_New();
1582         if (dialects == NULL)
1583                 return;
1584         if (PyModule_AddObject(module, "_dialects", dialects))
1585                 return;
1586
1587         /* Add quote styles into dictionary */
1588         for (style = quote_styles; style->name; style++) {
1589                 if (PyModule_AddIntConstant(module, style->name,
1590                                             style->style) == -1)
1591                         return;
1592         }
1593
1594         /* Add the Dialect type */
1595         Py_INCREF(&Dialect_Type);
1596         if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1597                 return;
1598
1599         /* Add the CSV exception object to the module. */
1600         error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1601         if (error_obj == NULL)
1602                 return;
1603         PyModule_AddObject(module, "Error", error_obj);
1604 }