Modules/_io/textio.c

   1 /*
   2     An implementation of Text I/O as defined by PEP 3116 - "New I/O"
   3
   4     Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
   5
   6     Written by Amaury Forgeot d'Arc and Antoine Pitrou
   7 */
   8
   9 #define PY_SSIZE_T_CLEAN
  10 #include "Python.h"
  11 #include "structmember.h"
  12 #include "_iomodule.h"
  13
  14 /* TextIOBase */
  15
  16 PyDoc_STRVAR(textiobase_doc,
  17     "Base class for text I/O.\n"
  18     "\n"
  19     "This class provides a character and line based interface to stream\n"
  20     "I/O. There is no readinto method because Python's character strings\n"
  21     "are immutable. There is no public constructor.\n"
  22     );
  23
  24 static PyObject *
  25 _unsupported(const char *message)
  26 {
  27     PyErr_SetString(_PyIO_unsupported_operation, message);
  28     return NULL;
  29 }
  30
  31 PyDoc_STRVAR(textiobase_detach_doc,
  32     "Separate the underlying buffer from the TextIOBase and return it.\n"
  33     "\n"
  34     "After the underlying buffer has been detached, the TextIO is in an\n"
  35     "unusable state.\n"
  36     );
  37
  38 static PyObject *
  39 textiobase_detach(PyObject *self)
  40 {
  41     return _unsupported("detach");
  42 }
  43
  44 PyDoc_STRVAR(textiobase_read_doc,
  45     "Read at most n characters from stream.\n"
  46     "\n"
  47     "Read from underlying buffer until we have n characters or we hit EOF.\n"
  48     "If n is negative or omitted, read until EOF.\n"
  49     );
  50
  51 static PyObject *
  52 textiobase_read(PyObject *self, PyObject *args)
  53 {
  54     return _unsupported("read");
  55 }
  56
  57 PyDoc_STRVAR(textiobase_readline_doc,
  58     "Read until newline or EOF.\n"
  59     "\n"
  60     "Returns an empty string if EOF is hit immediately.\n"
  61     );
  62
  63 static PyObject *
  64 textiobase_readline(PyObject *self, PyObject *args)
  65 {
  66     return _unsupported("readline");
  67 }
  68
  69 PyDoc_STRVAR(textiobase_write_doc,
  70     "Write string to stream.\n"
  71     "Returns the number of characters written (which is always equal to\n"
  72     "the length of the string).\n"
  73     );
  74
  75 static PyObject *
  76 textiobase_write(PyObject *self, PyObject *args)
  77 {
  78     return _unsupported("write");
  79 }
  80
  81 PyDoc_STRVAR(textiobase_encoding_doc,
  82     "Encoding of the text stream.\n"
  83     "\n"
  84     "Subclasses should override.\n"
  85     );
  86
  87 static PyObject *
  88 textiobase_encoding_get(PyObject *self, void *context)
  89 {
  90     Py_RETURN_NONE;
  91 }
  92
  93 PyDoc_STRVAR(textiobase_newlines_doc,
  94     "Line endings translated so far.\n"
  95     "\n"
  96     "Only line endings translated during reading are considered.\n"
  97     "\n"
  98     "Subclasses should override.\n"
  99     );
 100
 101 static PyObject *
 102 textiobase_newlines_get(PyObject *self, void *context)
 103 {
 104     Py_RETURN_NONE;
 105 }
 106
 107 PyDoc_STRVAR(textiobase_errors_doc,
 108     "The error setting of the decoder or encoder.\n"
 109     "\n"
 110     "Subclasses should override.\n"
 111     );
 112
 113 static PyObject *
 114 textiobase_errors_get(PyObject *self, void *context)
 115 {
 116     Py_RETURN_NONE;
 117 }
 118
 119
 120 static PyMethodDef textiobase_methods[] = {
 121     {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
 122     {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
 123     {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
 124     {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
 125     {NULL, NULL}
 126 };
 127
 128 static PyGetSetDef textiobase_getset[] = {
 129     {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
 130     {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
 131     {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
 132     {NULL}
 133 };
 134
 135 PyTypeObject PyTextIOBase_Type = {
 136     PyVarObject_HEAD_INIT(NULL, 0)
 137     "_io._TextIOBase",          /*tp_name*/
 138     0,                          /*tp_basicsize*/
 139     0,                          /*tp_itemsize*/
 140     0,                          /*tp_dealloc*/
 141     0,                          /*tp_print*/
 142     0,                          /*tp_getattr*/
 143     0,                          /*tp_setattr*/
 144     0,                          /*tp_compare */
 145     0,                          /*tp_repr*/
 146     0,                          /*tp_as_number*/
 147     0,                          /*tp_as_sequence*/
 148     0,                          /*tp_as_mapping*/
 149     0,                          /*tp_hash */
 150     0,                          /*tp_call*/
 151     0,                          /*tp_str*/
 152     0,                          /*tp_getattro*/
 153     0,                          /*tp_setattro*/
 154     0,                          /*tp_as_buffer*/
 155     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
 156     textiobase_doc,             /* tp_doc */
 157     0,                          /* tp_traverse */
 158     0,                          /* tp_clear */
 159     0,                          /* tp_richcompare */
 160     0,                          /* tp_weaklistoffset */
 161     0,                          /* tp_iter */
 162     0,                          /* tp_iternext */
 163     textiobase_methods,         /* tp_methods */
 164     0,                          /* tp_members */
 165     textiobase_getset,          /* tp_getset */
 166     &PyIOBase_Type,             /* tp_base */
 167     0,                          /* tp_dict */
 168     0,                          /* tp_descr_get */
 169     0,                          /* tp_descr_set */
 170     0,                          /* tp_dictoffset */
 171     0,                          /* tp_init */
 172     0,                          /* tp_alloc */
 173     0,                          /* tp_new */
 174 };
 175
 176
 177 /* IncrementalNewlineDecoder */
 178
 179 PyDoc_STRVAR(incrementalnewlinedecoder_doc,
 180     "Codec used when reading a file in universal newlines mode.  It wraps\n"
 181     "another incremental decoder, translating \\r\\n and \\r into \\n.  It also\n"
 182     "records the types of newlines encountered.  When used with\n"
 183     "translate=False, it ensures that the newline sequence is returned in\n"
 184     "one piece. When used with decoder=None, it expects unicode strings as\n"
 185     "decode input and translates newlines without first invoking an external\n"
 186     "decoder.\n"
 187     );
 188
 189 typedef struct {
 190     PyObject_HEAD
 191     PyObject *decoder;
 192     PyObject *errors;
 193     int pendingcr:1;
 194     int translate:1;
 195     unsigned int seennl:3;
 196 } nldecoder_object;
 197
 198 static int
 199 incrementalnewlinedecoder_init(nldecoder_object *self,
 200                                PyObject *args, PyObject *kwds)
 201 {
 202     PyObject *decoder;
 203     int translate;
 204     PyObject *errors = NULL;
 205     char *kwlist[] = {"decoder", "translate", "errors", NULL};
 206
 207     if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
 208                                      kwlist, &decoder, &translate, &errors))
 209         return -1;
 210
 211     self->decoder = decoder;
 212     Py_INCREF(decoder);
 213
 214     if (errors == NULL) {
 215         self->errors = PyUnicode_FromString("strict");
 216         if (self->errors == NULL)
 217             return -1;
 218     }
 219     else {
 220         Py_INCREF(errors);
 221         self->errors = errors;
 222     }
 223
 224     self->translate = translate;
 225     self->seennl = 0;
 226     self->pendingcr = 0;
 227
 228     return 0;
 229 }
 230
 231 static void
 232 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
 233 {
 234     Py_CLEAR(self->decoder);
 235     Py_CLEAR(self->errors);
 236     Py_TYPE(self)->tp_free((PyObject *)self);
 237 }
 238
 239 #define SEEN_CR   1
 240 #define SEEN_LF   2
 241 #define SEEN_CRLF 4
 242 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
 243
 244 PyObject *
 245 _PyIncrementalNewlineDecoder_decode(PyObject *_self,
 246                                     PyObject *input, int final)
 247 {
 248     PyObject *output;
 249     Py_ssize_t output_len;
 250     nldecoder_object *self = (nldecoder_object *) _self;
 251
 252     if (self->decoder == NULL) {
 253         PyErr_SetString(PyExc_ValueError,
 254                         "IncrementalNewlineDecoder.__init__ not called");
 255         return NULL;
 256     }
 257
 258     /* decode input (with the eventual \r from a previous pass) */
 259     if (self->decoder != Py_None) {
 260         output = PyObject_CallMethodObjArgs(self->decoder,
 261             _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
 262     }
 263     else {
 264         output = input;
 265         Py_INCREF(output);
 266     }
 267
 268     if (output == NULL)
 269         return NULL;
 270
 271     if (!PyUnicode_Check(output)) {
 272         PyErr_SetString(PyExc_TypeError,
 273                         "decoder should return a string result");
 274         goto error;
 275     }
 276
 277     output_len = PyUnicode_GET_SIZE(output);
 278     if (self->pendingcr && (final || output_len > 0)) {
 279         Py_UNICODE *out;
 280         PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
 281         if (modified == NULL)
 282             goto error;
 283         out = PyUnicode_AS_UNICODE(modified);
 284         out[0] = '\r';
 285         memcpy(out + 1, PyUnicode_AS_UNICODE(output),
 286                output_len * sizeof(Py_UNICODE));
 287         Py_DECREF(output);
 288         output = modified;
 289         self->pendingcr = 0;
 290         output_len++;
 291     }
 292
 293     /* retain last \r even when not translating data:
 294      * then readline() is sure to get \r\n in one pass
 295      */
 296     if (!final) {
 297         if (output_len > 0
 298             && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
 299
 300             if (Py_REFCNT(output) == 1) {
 301                 if (PyUnicode_Resize(&output, output_len - 1) < 0)
 302                     goto error;
 303             }
 304             else {
 305                 PyObject *modified = PyUnicode_FromUnicode(
 306                     PyUnicode_AS_UNICODE(output),
 307                     output_len - 1);
 308                 if (modified == NULL)
 309                     goto error;
 310                 Py_DECREF(output);
 311                 output = modified;
 312             }
 313             self->pendingcr = 1;
 314         }
 315     }
 316
 317     /* Record which newlines are read and do newline translation if desired,
 318        all in one pass. */
 319     {
 320         Py_UNICODE *in_str;
 321         Py_ssize_t len;
 322         int seennl = self->seennl;
 323         int only_lf = 0;
 324
 325         in_str = PyUnicode_AS_UNICODE(output);
 326         len = PyUnicode_GET_SIZE(output);
 327
 328         if (len == 0)
 329             return output;
 330
 331         /* If, up to now, newlines are consistently \n, do a quick check
 332            for the \r *byte* with the libc's optimized memchr.
 333            */
 334         if (seennl == SEEN_LF || seennl == 0) {
 335             only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
 336         }
 337
 338         if (only_lf) {
 339             /* If not already seen, quick scan for a possible "\n" character.
 340                (there's nothing else to be done, even when in translation mode)
 341             */
 342             if (seennl == 0 &&
 343                 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
 344                 Py_UNICODE *s, *end;
 345                 s = in_str;
 346                 end = in_str + len;
 347                 for (;;) {
 348                     Py_UNICODE c;
 349                     /* Fast loop for non-control characters */
 350                     while (*s > '\n')
 351                         s++;
 352                     c = *s++;
 353                     if (c == '\n') {
 354                         seennl |= SEEN_LF;
 355                         break;
 356                     }
 357                     if (s > end)
 358                         break;
 359                 }
 360             }
 361             /* Finished: we have scanned for newlines, and none of them
 362                need translating */
 363         }
 364         else if (!self->translate) {
 365             Py_UNICODE *s, *end;
 366             /* We have already seen all newline types, no need to scan again */
 367             if (seennl == SEEN_ALL)
 368                 goto endscan;
 369             s = in_str;
 370             end = in_str + len;
 371             for (;;) {
 372                 Py_UNICODE c;
 373                 /* Fast loop for non-control characters */
 374                 while (*s > '\r')
 375                     s++;
 376                 c = *s++;
 377                 if (c == '\n')
 378                     seennl |= SEEN_LF;
 379                 else if (c == '\r') {
 380                     if (*s == '\n') {
 381                         seennl |= SEEN_CRLF;
 382                         s++;
 383                     }
 384                     else
 385                         seennl |= SEEN_CR;
 386                 }
 387                 if (s > end)
 388                     break;
 389                 if (seennl == SEEN_ALL)
 390                     break;
 391             }
 392         endscan:
 393             ;
 394         }
 395         else {
 396             PyObject *translated = NULL;
 397             Py_UNICODE *out_str;
 398             Py_UNICODE *in, *out, *end;
 399             if (Py_REFCNT(output) != 1) {
 400                 /* We could try to optimize this so that we only do a copy
 401                    when there is something to translate. On the other hand,
 402                    most decoders should only output non-shared strings, i.e.
 403                    translation is done in place. */
 404                 translated = PyUnicode_FromUnicode(NULL, len);
 405                 if (translated == NULL)
 406                     goto error;
 407                 assert(Py_REFCNT(translated) == 1);
 408                 memcpy(PyUnicode_AS_UNICODE(translated),
 409                        PyUnicode_AS_UNICODE(output),
 410                        len * sizeof(Py_UNICODE));
 411             }
 412             else {
 413                 translated = output;
 414             }
 415             out_str = PyUnicode_AS_UNICODE(translated);
 416             in = in_str;
 417             out = out_str;
 418             end = in_str + len;
 419             for (;;) {
 420                 Py_UNICODE c;
 421                 /* Fast loop for non-control characters */
 422                 while ((c = *in++) > '\r')
 423                     *out++ = c;
 424                 if (c == '\n') {
 425                     *out++ = c;
 426                     seennl |= SEEN_LF;
 427                     continue;
 428                 }
 429                 if (c == '\r') {
 430                     if (*in == '\n') {
 431                         in++;
 432                         seennl |= SEEN_CRLF;
 433                     }
 434                     else
 435                         seennl |= SEEN_CR;
 436                     *out++ = '\n';
 437                     continue;
 438                 }
 439                 if (in > end)
 440                     break;
 441                 *out++ = c;
 442             }
 443             if (translated != output) {
 444                 Py_DECREF(output);
 445                 output = translated;
 446             }
 447             if (out - out_str != len) {
 448                 if (PyUnicode_Resize(&output, out - out_str) < 0)
 449                     goto error;
 450             }
 451         }
 452         self->seennl |= seennl;
 453     }
 454
 455     return output;
 456
 457   error:
 458     Py_DECREF(output);
 459     return NULL;
 460 }
 461
 462 static PyObject *
 463 incrementalnewlinedecoder_decode(nldecoder_object *self,
 464                                  PyObject *args, PyObject *kwds)
 465 {
 466     char *kwlist[] = {"input", "final", NULL};
 467     PyObject *input;
 468     int final = 0;
 469
 470     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
 471                                      kwlist, &input, &final))
 472         return NULL;
 473     return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
 474 }
 475
 476 static PyObject *
 477 incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
 478 {
 479     PyObject *buffer;
 480     unsigned PY_LONG_LONG flag;
 481
 482     if (self->decoder != Py_None) {
 483         PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
 484            _PyIO_str_getstate, NULL);
 485         if (state == NULL)
 486             return NULL;
 487         if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
 488             Py_DECREF(state);
 489             return NULL;
 490         }
 491         Py_INCREF(buffer);
 492         Py_DECREF(state);
 493     }
 494     else {
 495         buffer = PyBytes_FromString("");
 496         flag = 0;
 497     }
 498     flag <<= 1;
 499     if (self->pendingcr)
 500         flag |= 1;
 501     return Py_BuildValue("NK", buffer, flag);
 502 }
 503
 504 static PyObject *
 505 incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
 506 {
 507     PyObject *buffer;
 508     unsigned PY_LONG_LONG flag;
 509
 510     if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
 511         return NULL;
 512
 513     self->pendingcr = (int) flag & 1;
 514     flag >>= 1;
 515
 516     if (self->decoder != Py_None)
 517         return PyObject_CallMethod(self->decoder,
 518                                    "setstate", "((OK))", buffer, flag);
 519     else
 520         Py_RETURN_NONE;
 521 }
 522
 523 static PyObject *
 524 incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
 525 {
 526     self->seennl = 0;
 527     self->pendingcr = 0;
 528     if (self->decoder != Py_None)
 529         return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
 530     else
 531         Py_RETURN_NONE;
 532 }
 533
 534 static PyObject *
 535 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
 536 {
 537     switch (self->seennl) {
 538     case SEEN_CR:
 539         return PyUnicode_FromString("\r");
 540     case SEEN_LF:
 541         return PyUnicode_FromString("\n");
 542     case SEEN_CRLF:
 543         return PyUnicode_FromString("\r\n");
 544     case SEEN_CR | SEEN_LF:
 545         return Py_BuildValue("ss", "\r", "\n");
 546     case SEEN_CR | SEEN_CRLF:
 547         return Py_BuildValue("ss", "\r", "\r\n");
 548     case SEEN_LF | SEEN_CRLF:
 549         return Py_BuildValue("ss", "\n", "\r\n");
 550     case SEEN_CR | SEEN_LF | SEEN_CRLF:
 551         return Py_BuildValue("sss", "\r", "\n", "\r\n");
 552     default:
 553         Py_RETURN_NONE;
 554    }
 555
 556 }
 557
 558
 559 static PyMethodDef incrementalnewlinedecoder_methods[] = {
 560     {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
 561     {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
 562     {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
 563     {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
 564     {NULL}
 565 };
 566
 567 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
 568     {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
 569     {NULL}
 570 };
 571
 572 PyTypeObject PyIncrementalNewlineDecoder_Type = {
 573     PyVarObject_HEAD_INIT(NULL, 0)
 574     "_io.IncrementalNewlineDecoder", /*tp_name*/
 575     sizeof(nldecoder_object), /*tp_basicsize*/
 576     0,                          /*tp_itemsize*/
 577     (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
 578     0,                          /*tp_print*/
 579     0,                          /*tp_getattr*/
 580     0,                          /*tp_setattr*/
 581     0,                          /*tp_compare */
 582     0,                          /*tp_repr*/
 583     0,                          /*tp_as_number*/
 584     0,                          /*tp_as_sequence*/
 585     0,                          /*tp_as_mapping*/
 586     0,                          /*tp_hash */
 587     0,                          /*tp_call*/
 588     0,                          /*tp_str*/
 589     0,                          /*tp_getattro*/
 590     0,                          /*tp_setattro*/
 591     0,                          /*tp_as_buffer*/
 592     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
 593     incrementalnewlinedecoder_doc,          /* tp_doc */
 594     0,                          /* tp_traverse */
 595     0,                          /* tp_clear */
 596     0,                          /* tp_richcompare */
 597     0,                          /*tp_weaklistoffset*/
 598     0,                          /* tp_iter */
 599     0,                          /* tp_iternext */
 600     incrementalnewlinedecoder_methods, /* tp_methods */
 601     0,                          /* tp_members */
 602     incrementalnewlinedecoder_getset, /* tp_getset */
 603     0,                          /* tp_base */
 604     0,                          /* tp_dict */
 605     0,                          /* tp_descr_get */
 606     0,                          /* tp_descr_set */
 607     0,                          /* tp_dictoffset */
 608     (initproc)incrementalnewlinedecoder_init, /* tp_init */
 609     0,                          /* tp_alloc */
 610     PyType_GenericNew,          /* tp_new */
 611 };
 612
 613
 614 /* TextIOWrapper */
 615
 616 PyDoc_STRVAR(textiowrapper_doc,
 617     "Character and line based layer over a BufferedIOBase object, buffer.\n"
 618     "\n"
 619     "encoding gives the name of the encoding that the stream will be\n"
 620     "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
 621     "\n"
 622     "errors determines the strictness of encoding and decoding (see the\n"
 623     "codecs.register) and defaults to \"strict\".\n"
 624     "\n"
 625     "newline can be None, '', '\\n', '\\r', or '\\r\\n'.  It controls the\n"
 626     "handling of line endings. If it is None, universal newlines is\n"
 627     "enabled.  With this enabled, on input, the lines endings '\\n', '\\r',\n"
 628     "or '\\r\\n' are translated to '\\n' before being returned to the\n"
 629     "caller. Conversely, on output, '\\n' is translated to the system\n"
 630     "default line seperator, os.linesep. If newline is any other of its\n"
 631     "legal values, that newline becomes the newline when the file is read\n"
 632     "and it is returned untranslated. On output, '\\n' is converted to the\n"
 633     "newline.\n"
 634     "\n"
 635     "If line_buffering is True, a call to flush is implied when a call to\n"
 636     "write contains a newline character."
 637     );
 638
 639 typedef PyObject *
 640         (*encodefunc_t)(PyObject *, PyObject *);
 641
 642 typedef struct
 643 {
 644     PyObject_HEAD
 645     int ok; /* initialized? */
 646     int detached;
 647     Py_ssize_t chunk_size;
 648     PyObject *buffer;
 649     PyObject *encoding;
 650     PyObject *encoder;
 651     PyObject *decoder;
 652     PyObject *readnl;
 653     PyObject *errors;
 654     const char *writenl; /* utf-8 encoded, NULL stands for \n */
 655     char line_buffering;
 656     char readuniversal;
 657     char readtranslate;
 658     char writetranslate;
 659     char seekable;
 660     char telling;
 661     /* Specialized encoding func (see below) */
 662     encodefunc_t encodefunc;
 663     /* Whether or not it's the start of the stream */
 664     char encoding_start_of_stream;
 665
 666     /* Reads and writes are internally buffered in order to speed things up.
 667        However, any read will first flush the write buffer if itsn't empty.
 668
 669        Please also note that text to be written is first encoded before being
 670        buffered. This is necessary so that encoding errors are immediately
 671        reported to the caller, but it unfortunately means that the
 672        IncrementalEncoder (whose encode() method is always written in Python)
 673        becomes a bottleneck for small writes.
 674     */
 675     PyObject *decoded_chars;       /* buffer for text returned from decoder */
 676     Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
 677     PyObject *pending_bytes;       /* list of bytes objects waiting to be
 678                                       written, or NULL */
 679     Py_ssize_t pending_bytes_count;
 680     PyObject *snapshot;
 681     /* snapshot is either None, or a tuple (dec_flags, next_input) where
 682      * dec_flags is the second (integer) item of the decoder state and
 683      * next_input is the chunk of input bytes that comes next after the
 684      * snapshot point.  We use this to reconstruct decoder states in tell().
 685      */
 686
 687     /* Cache raw object if it's a FileIO object */
 688     PyObject *raw;
 689
 690     PyObject *weakreflist;
 691     PyObject *dict;
 692 } textio;
 693
 694
 695 /* A couple of specialized cases in order to bypass the slow incremental
 696    encoding methods for the most popular encodings. */
 697
 698 static PyObject *
 699 ascii_encode(textio *self, PyObject *text)
 700 {
 701     return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
 702                                  PyUnicode_GET_SIZE(text),
 703                                  PyBytes_AS_STRING(self->errors));
 704 }
 705
 706 static PyObject *
 707 utf16be_encode(textio *self, PyObject *text)
 708 {
 709     return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
 710                                  PyUnicode_GET_SIZE(text),
 711                                  PyBytes_AS_STRING(self->errors), 1);
 712 }
 713
 714 static PyObject *
 715 utf16le_encode(textio *self, PyObject *text)
 716 {
 717     return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
 718                                  PyUnicode_GET_SIZE(text),
 719                                  PyBytes_AS_STRING(self->errors), -1);
 720 }
 721
 722 static PyObject *
 723 utf16_encode(textio *self, PyObject *text)
 724 {
 725     if (!self->encoding_start_of_stream) {
 726         /* Skip the BOM and use native byte ordering */
 727 #if defined(WORDS_BIGENDIAN)
 728         return utf16be_encode(self, text);
 729 #else
 730         return utf16le_encode(self, text);
 731 #endif
 732     }
 733     return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
 734                                  PyUnicode_GET_SIZE(text),
 735                                  PyBytes_AS_STRING(self->errors), 0);
 736 }
 737
 738 static PyObject *
 739 utf32be_encode(textio *self, PyObject *text)
 740 {
 741     return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
 742                                  PyUnicode_GET_SIZE(text),
 743                                  PyBytes_AS_STRING(self->errors), 1);
 744 }
 745
 746 static PyObject *
 747 utf32le_encode(textio *self, PyObject *text)
 748 {
 749     return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
 750                                  PyUnicode_GET_SIZE(text),
 751                                  PyBytes_AS_STRING(self->errors), -1);
 752 }
 753
 754 static PyObject *
 755 utf32_encode(textio *self, PyObject *text)
 756 {
 757     if (!self->encoding_start_of_stream) {
 758         /* Skip the BOM and use native byte ordering */
 759 #if defined(WORDS_BIGENDIAN)
 760         return utf32be_encode(self, text);
 761 #else
 762         return utf32le_encode(self, text);
 763 #endif
 764     }
 765     return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
 766                                  PyUnicode_GET_SIZE(text),
 767                                  PyBytes_AS_STRING(self->errors), 0);
 768 }
 769
 770 static PyObject *
 771 utf8_encode(textio *self, PyObject *text)
 772 {
 773     return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
 774                                 PyUnicode_GET_SIZE(text),
 775                                 PyBytes_AS_STRING(self->errors));
 776 }
 777
 778 static PyObject *
 779 latin1_encode(textio *self, PyObject *text)
 780 {
 781     return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
 782                                   PyUnicode_GET_SIZE(text),
 783                                   PyBytes_AS_STRING(self->errors));
 784 }
 785
 786 /* Map normalized encoding names onto the specialized encoding funcs */
 787
 788 typedef struct {
 789     const char *name;
 790     encodefunc_t encodefunc;
 791 } encodefuncentry;
 792
 793 static encodefuncentry encodefuncs[] = {
 794     {"ascii",       (encodefunc_t) ascii_encode},
 795     {"iso8859-1",   (encodefunc_t) latin1_encode},
 796     {"utf-8",       (encodefunc_t) utf8_encode},
 797     {"utf-16-be",   (encodefunc_t) utf16be_encode},
 798     {"utf-16-le",   (encodefunc_t) utf16le_encode},
 799     {"utf-16",      (encodefunc_t) utf16_encode},
 800     {"utf-32-be",   (encodefunc_t) utf32be_encode},
 801     {"utf-32-le",   (encodefunc_t) utf32le_encode},
 802     {"utf-32",      (encodefunc_t) utf32_encode},
 803     {NULL, NULL}
 804 };
 805
 806
 807 static int
 808 textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
 809 {
 810     char *kwlist[] = {"buffer", "encoding", "errors",
 811                       "newline", "line_buffering",
 812                       NULL};
 813     PyObject *buffer, *raw;
 814     char *encoding = NULL;
 815     char *errors = NULL;
 816     char *newline = NULL;
 817     int line_buffering = 0;
 818
 819     PyObject *res;
 820     int r;
 821
 822     self->ok = 0;
 823     self->detached = 0;
 824     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
 825                                      kwlist, &buffer, &encoding, &errors,
 826                                      &newline, &line_buffering))
 827         return -1;
 828
 829     if (newline && newline[0] != '\0'
 830         && !(newline[0] == '\n' && newline[1] == '\0')
 831         && !(newline[0] == '\r' && newline[1] == '\0')
 832         && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
 833         PyErr_Format(PyExc_ValueError,
 834                      "illegal newline value: %s", newline);
 835         return -1;
 836     }
 837
 838     Py_CLEAR(self->buffer);
 839     Py_CLEAR(self->encoding);
 840     Py_CLEAR(self->encoder);
 841     Py_CLEAR(self->decoder);
 842     Py_CLEAR(self->readnl);
 843     Py_CLEAR(self->decoded_chars);
 844     Py_CLEAR(self->pending_bytes);
 845     Py_CLEAR(self->snapshot);
 846     Py_CLEAR(self->errors);
 847     Py_CLEAR(self->raw);
 848     self->decoded_chars_used = 0;
 849     self->pending_bytes_count = 0;
 850     self->encodefunc = NULL;
 851     self->writenl = NULL;
 852
 853     if (encoding == NULL && self->encoding == NULL) {
 854         if (_PyIO_locale_module == NULL) {
 855             _PyIO_locale_module = PyImport_ImportModule("locale");
 856             if (_PyIO_locale_module == NULL)
 857                 goto catch_ImportError;
 858             else
 859                 goto use_locale;
 860         }
 861         else {
 862           use_locale:
 863             self->encoding = PyObject_CallMethod(
 864                 _PyIO_locale_module, "getpreferredencoding", NULL);
 865             if (self->encoding == NULL) {
 866               catch_ImportError:
 867                 /*
 868                  Importing locale can raise a ImportError because of
 869                  _functools, and locale.getpreferredencoding can raise a
 870                  ImportError if _locale is not available.  These will happen
 871                  during module building.
 872                 */
 873                 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
 874                     PyErr_Clear();
 875                     self->encoding = PyString_FromString("ascii");
 876                 }
 877                 else
 878                     goto error;
 879             }
 880             else if (!PyString_Check(self->encoding))
 881                 Py_CLEAR(self->encoding);
 882         }
 883     }
 884     if (self->encoding != NULL)
 885         encoding = PyString_AsString(self->encoding);
 886     else if (encoding != NULL) {
 887         self->encoding = PyString_FromString(encoding);
 888         if (self->encoding == NULL)
 889             goto error;
 890     }
 891     else {
 892         PyErr_SetString(PyExc_IOError,
 893                         "could not determine default encoding");
 894     }
 895
 896     if (errors == NULL)
 897         errors = "strict";
 898     self->errors = PyBytes_FromString(errors);
 899     if (self->errors == NULL)
 900         goto error;
 901
 902     self->chunk_size = 8192;
 903     self->readuniversal = (newline == NULL || newline[0] == '\0');
 904     self->line_buffering = line_buffering;
 905     self->readtranslate = (newline == NULL);
 906     if (newline) {
 907         self->readnl = PyString_FromString(newline);
 908         if (self->readnl == NULL)
 909             return -1;
 910     }
 911     self->writetranslate = (newline == NULL || newline[0] != '\0');
 912     if (!self->readuniversal && self->writetranslate) {
 913         self->writenl = PyString_AsString(self->readnl);
 914         if (!strcmp(self->writenl, "\n"))
 915             self->writenl = NULL;
 916     }
 917 #ifdef MS_WINDOWS
 918     else
 919         self->writenl = "\r\n";
 920 #endif
 921
 922     /* Build the decoder object */
 923     res = PyObject_CallMethod(buffer, "readable", NULL);
 924     if (res == NULL)
 925         goto error;
 926     r = PyObject_IsTrue(res);
 927     Py_DECREF(res);
 928     if (r == -1)
 929         goto error;
 930     if (r == 1) {
 931         self->decoder = PyCodec_IncrementalDecoder(
 932             encoding, errors);
 933         if (self->decoder == NULL)
 934             goto error;
 935
 936         if (self->readuniversal) {
 937             PyObject *incrementalDecoder = PyObject_CallFunction(
 938                 (PyObject *)&PyIncrementalNewlineDecoder_Type,
 939                 "Oi", self->decoder, (int)self->readtranslate);
 940             if (incrementalDecoder == NULL)
 941                 goto error;
 942             Py_CLEAR(self->decoder);
 943             self->decoder = incrementalDecoder;
 944         }
 945     }
 946
 947     /* Build the encoder object */
 948     res = PyObject_CallMethod(buffer, "writable", NULL);
 949     if (res == NULL)
 950         goto error;
 951     r = PyObject_IsTrue(res);
 952     Py_DECREF(res);
 953     if (r == -1)
 954         goto error;
 955     if (r == 1) {
 956         PyObject *ci;
 957         self->encoder = PyCodec_IncrementalEncoder(
 958             encoding, errors);
 959         if (self->encoder == NULL)
 960             goto error;
 961         /* Get the normalized named of the codec */
 962         ci = _PyCodec_Lookup(encoding);
 963         if (ci == NULL)
 964             goto error;
 965         res = PyObject_GetAttrString(ci, "name");
 966         Py_DECREF(ci);
 967         if (res == NULL) {
 968             if (PyErr_ExceptionMatches(PyExc_AttributeError))
 969                 PyErr_Clear();
 970             else
 971                 goto error;
 972         }
 973         else if (PyString_Check(res)) {
 974             encodefuncentry *e = encodefuncs;
 975             while (e->name != NULL) {
 976                 if (!strcmp(PyString_AS_STRING(res), e->name)) {
 977                     self->encodefunc = e->encodefunc;
 978                     break;
 979                 }
 980                 e++;
 981             }
 982         }
 983         Py_XDECREF(res);
 984     }
 985
 986     self->buffer = buffer;
 987     Py_INCREF(buffer);
 988
 989     if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
 990         Py_TYPE(buffer) == &PyBufferedWriter_Type ||
 991         Py_TYPE(buffer) == &PyBufferedRandom_Type) {
 992         raw = PyObject_GetAttrString(buffer, "raw");
 993         /* Cache the raw FileIO object to speed up 'closed' checks */
 994         if (raw == NULL) {
 995             if (PyErr_ExceptionMatches(PyExc_AttributeError))
 996                 PyErr_Clear();
 997             else
 998                 goto error;
 999         }
1000         else if (Py_TYPE(raw) == &PyFileIO_Type)
1001             self->raw = raw;
1002         else
1003             Py_DECREF(raw);
1004     }
1005
1006     res = PyObject_CallMethod(buffer, "seekable", NULL);
1007     if (res == NULL)
1008         goto error;
1009     self->seekable = self->telling = PyObject_IsTrue(res);
1010     Py_DECREF(res);
1011
1012     self->encoding_start_of_stream = 0;
1013     if (self->seekable && self->encoder) {
1014         PyObject *cookieObj;
1015         int cmp;
1016
1017         self->encoding_start_of_stream = 1;
1018
1019         cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1020         if (cookieObj == NULL)
1021             goto error;
1022
1023         cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1024         Py_DECREF(cookieObj);
1025         if (cmp < 0) {
1026             goto error;
1027         }
1028
1029         if (cmp == 0) {
1030             self->encoding_start_of_stream = 0;
1031             res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1032                                              _PyIO_zero, NULL);
1033             if (res == NULL)
1034                 goto error;
1035             Py_DECREF(res);
1036         }
1037     }
1038
1039     self->ok = 1;
1040     return 0;
1041
1042   error:
1043     return -1;
1044 }
1045
1046 static int
1047 _textiowrapper_clear(textio *self)
1048 {
1049     if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1050         return -1;
1051     self->ok = 0;
1052     Py_CLEAR(self->buffer);
1053     Py_CLEAR(self->encoding);
1054     Py_CLEAR(self->encoder);
1055     Py_CLEAR(self->decoder);
1056     Py_CLEAR(self->readnl);
1057     Py_CLEAR(self->decoded_chars);
1058     Py_CLEAR(self->pending_bytes);
1059     Py_CLEAR(self->snapshot);
1060     Py_CLEAR(self->errors);
1061     Py_CLEAR(self->raw);
1062     return 0;
1063 }
1064
1065 static void
1066 textiowrapper_dealloc(textio *self)
1067 {
1068     if (_textiowrapper_clear(self) < 0)
1069         return;
1070     _PyObject_GC_UNTRACK(self);
1071     if (self->weakreflist != NULL)
1072         PyObject_ClearWeakRefs((PyObject *)self);
1073     Py_CLEAR(self->dict);
1074     Py_TYPE(self)->tp_free((PyObject *)self);
1075 }
1076
1077 static int
1078 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1079 {
1080     Py_VISIT(self->buffer);
1081     Py_VISIT(self->encoding);
1082     Py_VISIT(self->encoder);
1083     Py_VISIT(self->decoder);
1084     Py_VISIT(self->readnl);
1085     Py_VISIT(self->decoded_chars);
1086     Py_VISIT(self->pending_bytes);
1087     Py_VISIT(self->snapshot);
1088     Py_VISIT(self->errors);
1089     Py_VISIT(self->raw);
1090
1091     Py_VISIT(self->dict);
1092     return 0;
1093 }
1094
1095 static int
1096 textiowrapper_clear(textio *self)
1097 {
1098     if (_textiowrapper_clear(self) < 0)
1099         return -1;
1100     Py_CLEAR(self->dict);
1101     return 0;
1102 }
1103
1104 static PyObject *
1105 textiowrapper_closed_get(textio *self, void *context);
1106
1107 /* This macro takes some shortcuts to make the common case faster. */
1108 #define CHECK_CLOSED(self) \
1109     do { \
1110         int r; \
1111         PyObject *_res; \
1112         if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1113             if (self->raw != NULL) \
1114                 r = _PyFileIO_closed(self->raw); \
1115             else { \
1116                 _res = textiowrapper_closed_get(self, NULL); \
1117                 if (_res == NULL) \
1118                     return NULL; \
1119                 r = PyObject_IsTrue(_res); \
1120                 Py_DECREF(_res); \
1121                 if (r < 0) \
1122                     return NULL; \
1123             } \
1124             if (r > 0) { \
1125                 PyErr_SetString(PyExc_ValueError, \
1126                                 "I/O operation on closed file."); \
1127                 return NULL; \
1128             } \
1129         } \
1130         else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1131             return NULL; \
1132     } while (0)
1133
1134 #define CHECK_INITIALIZED(self) \
1135     if (self->ok <= 0) { \
1136         if (self->detached) { \
1137             PyErr_SetString(PyExc_ValueError, \
1138                  "underlying buffer has been detached"); \
1139         } else {                                   \
1140             PyErr_SetString(PyExc_ValueError, \
1141                 "I/O operation on uninitialized object"); \
1142         } \
1143         return NULL; \
1144     }
1145
1146 #define CHECK_INITIALIZED_INT(self) \
1147     if (self->ok <= 0) { \
1148         if (self->detached) { \
1149             PyErr_SetString(PyExc_ValueError, \
1150                  "underlying buffer has been detached"); \
1151         } else {                                   \
1152             PyErr_SetString(PyExc_ValueError, \
1153                 "I/O operation on uninitialized object"); \
1154         } \
1155         return -1; \
1156     }
1157
1158
1159 static PyObject *
1160 textiowrapper_detach(textio *self)
1161 {
1162     PyObject *buffer, *res;
1163     CHECK_INITIALIZED(self);
1164     res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1165     if (res == NULL)
1166         return NULL;
1167     Py_DECREF(res);
1168     buffer = self->buffer;
1169     self->buffer = NULL;
1170     self->detached = 1;
1171     self->ok = 0;
1172     return buffer;
1173 }
1174
1175 Py_LOCAL_INLINE(const Py_UNICODE *)
1176 findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1177 {
1178     /* like wcschr, but doesn't stop at NULL characters */
1179     while (size-- > 0) {
1180         if (*s == ch)
1181             return s;
1182         s++;
1183     }
1184     return NULL;
1185 }
1186
1187 /* Flush the internal write buffer. This doesn't explicitly flush the
1188    underlying buffered object, though. */
1189 static int
1190 _textiowrapper_writeflush(textio *self)
1191 {
1192     PyObject *b, *ret;
1193
1194     if (self->pending_bytes == NULL)
1195         return 0;
1196     b = _PyBytes_Join(_PyIO_empty_bytes, self->pending_bytes);
1197     if (b == NULL)
1198         return -1;
1199     ret = PyObject_CallMethodObjArgs(self->buffer,
1200                                      _PyIO_str_write, b, NULL);
1201     Py_DECREF(b);
1202     if (ret == NULL)
1203         return -1;
1204     Py_DECREF(ret);
1205     Py_CLEAR(self->pending_bytes);
1206     self->pending_bytes_count = 0;
1207     return 0;
1208 }
1209
1210 static PyObject *
1211 textiowrapper_write(textio *self, PyObject *args)
1212 {
1213     PyObject *ret;
1214     PyObject *text; /* owned reference */
1215     PyObject *b;
1216     Py_ssize_t textlen;
1217     int haslf = 0;
1218     int needflush = 0;
1219
1220     CHECK_INITIALIZED(self);
1221
1222     if (!PyArg_ParseTuple(args, "U:write", &text)) {
1223         return NULL;
1224     }
1225
1226     CHECK_CLOSED(self);
1227
1228     if (self->encoder == NULL) {
1229         PyErr_SetString(PyExc_IOError, "not writable");
1230         return NULL;
1231     }
1232
1233     Py_INCREF(text);
1234
1235     textlen = PyUnicode_GetSize(text);
1236
1237     if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1238         if (findchar(PyUnicode_AS_UNICODE(text),
1239                      PyUnicode_GET_SIZE(text), '\n'))
1240             haslf = 1;
1241
1242     if (haslf && self->writetranslate && self->writenl != NULL) {
1243         PyObject *newtext = PyObject_CallMethod(
1244             text, "replace", "ss", "\n", self->writenl);
1245         Py_DECREF(text);
1246         if (newtext == NULL)
1247             return NULL;
1248         text = newtext;
1249     }
1250
1251     if (self->line_buffering &&
1252         (haslf ||
1253          findchar(PyUnicode_AS_UNICODE(text),
1254                   PyUnicode_GET_SIZE(text), '\r')))
1255         needflush = 1;
1256
1257     /* XXX What if we were just reading? */
1258     if (self->encodefunc != NULL) {
1259         b = (*self->encodefunc)((PyObject *) self, text);
1260         self->encoding_start_of_stream = 0;
1261     }
1262     else
1263         b = PyObject_CallMethodObjArgs(self->encoder,
1264                                        _PyIO_str_encode, text, NULL);
1265     Py_DECREF(text);
1266     if (b == NULL)
1267         return NULL;
1268
1269     if (self->pending_bytes == NULL) {
1270         self->pending_bytes = PyList_New(0);
1271         if (self->pending_bytes == NULL) {
1272             Py_DECREF(b);
1273             return NULL;
1274         }
1275         self->pending_bytes_count = 0;
1276     }
1277     if (PyList_Append(self->pending_bytes, b) < 0) {
1278         Py_DECREF(b);
1279         return NULL;
1280     }
1281     self->pending_bytes_count += PyBytes_GET_SIZE(b);
1282     Py_DECREF(b);
1283     if (self->pending_bytes_count > self->chunk_size || needflush) {
1284         if (_textiowrapper_writeflush(self) < 0)
1285             return NULL;
1286     }
1287
1288     if (needflush) {
1289         ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1290         if (ret == NULL)
1291             return NULL;
1292         Py_DECREF(ret);
1293     }
1294
1295     Py_CLEAR(self->snapshot);
1296
1297     if (self->decoder) {
1298         ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1299         if (ret == NULL)
1300             return NULL;
1301         Py_DECREF(ret);
1302     }
1303
1304     return PyLong_FromSsize_t(textlen);
1305 }
1306
1307 /* Steal a reference to chars and store it in the decoded_char buffer;
1308  */
1309 static void
1310 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1311 {
1312     Py_CLEAR(self->decoded_chars);
1313     self->decoded_chars = chars;
1314     self->decoded_chars_used = 0;
1315 }
1316
1317 static PyObject *
1318 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1319 {
1320     PyObject *chars;
1321     Py_ssize_t avail;
1322
1323     if (self->decoded_chars == NULL)
1324         return PyUnicode_FromStringAndSize(NULL, 0);
1325
1326     avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1327              - self->decoded_chars_used);
1328
1329     assert(avail >= 0);
1330
1331     if (n < 0 || n > avail)
1332         n = avail;
1333
1334     if (self->decoded_chars_used > 0 || n < avail) {
1335         chars = PyUnicode_FromUnicode(
1336             PyUnicode_AS_UNICODE(self->decoded_chars)
1337             + self->decoded_chars_used, n);
1338         if (chars == NULL)
1339             return NULL;
1340     }
1341     else {
1342         chars = self->decoded_chars;
1343         Py_INCREF(chars);
1344     }
1345
1346     self->decoded_chars_used += n;
1347     return chars;
1348 }
1349
1350 /* Read and decode the next chunk of data from the BufferedReader.
1351  */
1352 static int
1353 textiowrapper_read_chunk(textio *self)
1354 {
1355     PyObject *dec_buffer = NULL;
1356     PyObject *dec_flags = NULL;
1357     PyObject *input_chunk = NULL;
1358     PyObject *decoded_chars, *chunk_size;
1359     int eof;
1360
1361     /* The return value is True unless EOF was reached.  The decoded string is
1362      * placed in self._decoded_chars (replacing its previous value).  The
1363      * entire input chunk is sent to the decoder, though some of it may remain
1364      * buffered in the decoder, yet to be converted.
1365      */
1366
1367     if (self->decoder == NULL) {
1368         PyErr_SetString(PyExc_IOError, "not readable");
1369         return -1;
1370     }
1371
1372     if (self->telling) {
1373         /* To prepare for tell(), we need to snapshot a point in the file
1374          * where the decoder's input buffer is empty.
1375          */
1376
1377         PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1378                                                      _PyIO_str_getstate, NULL);
1379         if (state == NULL)
1380             return -1;
1381         /* Given this, we know there was a valid snapshot point
1382          * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1383          */
1384         if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1385             Py_DECREF(state);
1386             return -1;
1387         }
1388         Py_INCREF(dec_buffer);
1389         Py_INCREF(dec_flags);
1390         Py_DECREF(state);
1391     }
1392
1393     /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1394     chunk_size = PyLong_FromSsize_t(self->chunk_size);
1395     if (chunk_size == NULL)
1396         goto fail;
1397     input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1398         _PyIO_str_read1, chunk_size, NULL);
1399     Py_DECREF(chunk_size);
1400     if (input_chunk == NULL)
1401         goto fail;
1402     assert(PyBytes_Check(input_chunk));
1403
1404     eof = (PyBytes_Size(input_chunk) == 0);
1405
1406     if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1407         decoded_chars = _PyIncrementalNewlineDecoder_decode(
1408             self->decoder, input_chunk, eof);
1409     }
1410     else {
1411         decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1412             _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1413     }
1414
1415     /* TODO sanity check: isinstance(decoded_chars, unicode) */
1416     if (decoded_chars == NULL)
1417         goto fail;
1418     textiowrapper_set_decoded_chars(self, decoded_chars);
1419     if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1420         eof = 0;
1421
1422     if (self->telling) {
1423         /* At the snapshot point, len(dec_buffer) bytes before the read, the
1424          * next input to be decoded is dec_buffer + input_chunk.
1425          */
1426         PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1427         if (next_input == NULL)
1428             goto fail;
1429         assert (PyBytes_Check(next_input));
1430         Py_DECREF(dec_buffer);
1431         Py_CLEAR(self->snapshot);
1432         self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1433     }
1434     Py_DECREF(input_chunk);
1435
1436     return (eof == 0);
1437
1438   fail:
1439     Py_XDECREF(dec_buffer);
1440     Py_XDECREF(dec_flags);
1441     Py_XDECREF(input_chunk);
1442     return -1;
1443 }
1444
1445 static PyObject *
1446 textiowrapper_read(textio *self, PyObject *args)
1447 {
1448     Py_ssize_t n = -1;
1449     PyObject *result = NULL, *chunks = NULL;
1450
1451     CHECK_INITIALIZED(self);
1452
1453     if (!PyArg_ParseTuple(args, "|n:read", &n))
1454         return NULL;
1455
1456     CHECK_CLOSED(self);
1457
1458     if (self->decoder == NULL) {
1459         PyErr_SetString(PyExc_IOError, "not readable");
1460         return NULL;
1461     }
1462
1463     if (_textiowrapper_writeflush(self) < 0)
1464         return NULL;
1465
1466     if (n < 0) {
1467         /* Read everything */
1468         PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1469         PyObject *decoded, *final;
1470         if (bytes == NULL)
1471             goto fail;
1472         decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1473                                              bytes, Py_True, NULL);
1474         Py_DECREF(bytes);
1475         if (decoded == NULL)
1476             goto fail;
1477
1478         result = textiowrapper_get_decoded_chars(self, -1);
1479
1480         if (result == NULL) {
1481             Py_DECREF(decoded);
1482             return NULL;
1483         }
1484
1485         final = PyUnicode_Concat(result, decoded);
1486         Py_DECREF(result);
1487         Py_DECREF(decoded);
1488         if (final == NULL)
1489             goto fail;
1490
1491         Py_CLEAR(self->snapshot);
1492         return final;
1493     }
1494     else {
1495         int res = 1;
1496         Py_ssize_t remaining = n;
1497
1498         result = textiowrapper_get_decoded_chars(self, n);
1499         if (result == NULL)
1500             goto fail;
1501         remaining -= PyUnicode_GET_SIZE(result);
1502
1503         /* Keep reading chunks until we have n characters to return */
1504         while (remaining > 0) {
1505             res = textiowrapper_read_chunk(self);
1506             if (res < 0)
1507                 goto fail;
1508             if (res == 0)  /* EOF */
1509                 break;
1510             if (chunks == NULL) {
1511                 chunks = PyList_New(0);
1512                 if (chunks == NULL)
1513                     goto fail;
1514             }
1515             if (PyList_Append(chunks, result) < 0)
1516                 goto fail;
1517             Py_DECREF(result);
1518             result = textiowrapper_get_decoded_chars(self, remaining);
1519             if (result == NULL)
1520                 goto fail;
1521             remaining -= PyUnicode_GET_SIZE(result);
1522         }
1523         if (chunks != NULL) {
1524             if (result != NULL && PyList_Append(chunks, result) < 0)
1525                 goto fail;
1526             Py_CLEAR(result);
1527             result = PyUnicode_Join(_PyIO_empty_str, chunks);
1528             if (result == NULL)
1529                 goto fail;
1530             Py_CLEAR(chunks);
1531         }
1532         return result;
1533     }
1534   fail:
1535     Py_XDECREF(result);
1536     Py_XDECREF(chunks);
1537     return NULL;
1538 }
1539
1540
1541 /* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1542    that is to the NUL character. Otherwise the function will produce
1543    incorrect results. */
1544 static Py_UNICODE *
1545 find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1546 {
1547     Py_UNICODE *s = start;
1548     for (;;) {
1549         while (*s > ch)
1550             s++;
1551         if (*s == ch)
1552             return s;
1553         if (s == end)
1554             return NULL;
1555         s++;
1556     }
1557 }
1558
1559 Py_ssize_t
1560 _PyIO_find_line_ending(
1561     int translated, int universal, PyObject *readnl,
1562     Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1563 {
1564     Py_ssize_t len = end - start;
1565
1566     if (translated) {
1567         /* Newlines are already translated, only search for \n */
1568         Py_UNICODE *pos = find_control_char(start, end, '\n');
1569         if (pos != NULL)
1570             return pos - start + 1;
1571         else {
1572             *consumed = len;
1573             return -1;
1574         }
1575     }
1576     else if (universal) {
1577         /* Universal newline search. Find any of \r, \r\n, \n
1578          * The decoder ensures that \r\n are not split in two pieces
1579          */
1580         Py_UNICODE *s = start;
1581         for (;;) {
1582             Py_UNICODE ch;
1583             /* Fast path for non-control chars. The loop always ends
1584                since the Py_UNICODE storage is NUL-terminated. */
1585             while (*s > '\r')
1586                 s++;
1587             if (s >= end) {
1588                 *consumed = len;
1589                 return -1;
1590             }
1591             ch = *s++;
1592             if (ch == '\n')
1593                 return s - start;
1594             if (ch == '\r') {
1595                 if (*s == '\n')
1596                     return s - start + 1;
1597                 else
1598                     return s - start;
1599             }
1600         }
1601     }
1602     else {
1603         /* Non-universal mode. */
1604         Py_ssize_t readnl_len = PyString_GET_SIZE(readnl);
1605         unsigned char *nl = (unsigned char *) PyString_AS_STRING(readnl);
1606         if (readnl_len == 1) {
1607             Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1608             if (pos != NULL)
1609                 return pos - start + 1;
1610             *consumed = len;
1611             return -1;
1612         }
1613         else {
1614             Py_UNICODE *s = start;
1615             Py_UNICODE *e = end - readnl_len + 1;
1616             Py_UNICODE *pos;
1617             if (e < s)
1618                 e = s;
1619             while (s < e) {
1620                 Py_ssize_t i;
1621                 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1622                 if (pos == NULL || pos >= e)
1623                     break;
1624                 for (i = 1; i < readnl_len; i++) {
1625                     if (pos[i] != nl[i])
1626                         break;
1627                 }
1628                 if (i == readnl_len)
1629                     return pos - start + readnl_len;
1630                 s = pos + 1;
1631             }
1632             pos = find_control_char(e, end, nl[0]);
1633             if (pos == NULL)
1634                 *consumed = len;
1635             else
1636                 *consumed = pos - start;
1637             return -1;
1638         }
1639     }
1640 }
1641
1642 static PyObject *
1643 _textiowrapper_readline(textio *self, Py_ssize_t limit)
1644 {
1645     PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1646     Py_ssize_t start, endpos, chunked, offset_to_buffer;
1647     int res;
1648
1649     CHECK_CLOSED(self);
1650
1651     if (_textiowrapper_writeflush(self) < 0)
1652         return NULL;
1653
1654     chunked = 0;
1655
1656     while (1) {
1657         Py_UNICODE *ptr;
1658         Py_ssize_t line_len;
1659         Py_ssize_t consumed = 0;
1660
1661         /* First, get some data if necessary */
1662         res = 1;
1663         while (!self->decoded_chars ||
1664                !PyUnicode_GET_SIZE(self->decoded_chars)) {
1665             res = textiowrapper_read_chunk(self);
1666             if (res < 0)
1667                 goto error;
1668             if (res == 0)
1669                 break;
1670         }
1671         if (res == 0) {
1672             /* end of file */
1673             textiowrapper_set_decoded_chars(self, NULL);
1674             Py_CLEAR(self->snapshot);
1675             start = endpos = offset_to_buffer = 0;
1676             break;
1677         }
1678
1679         if (remaining == NULL) {
1680             line = self->decoded_chars;
1681             start = self->decoded_chars_used;
1682             offset_to_buffer = 0;
1683             Py_INCREF(line);
1684         }
1685         else {
1686             assert(self->decoded_chars_used == 0);
1687             line = PyUnicode_Concat(remaining, self->decoded_chars);
1688             start = 0;
1689             offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1690             Py_CLEAR(remaining);
1691             if (line == NULL)
1692                 goto error;
1693         }
1694
1695         ptr = PyUnicode_AS_UNICODE(line);
1696         line_len = PyUnicode_GET_SIZE(line);
1697
1698         endpos = _PyIO_find_line_ending(
1699             self->readtranslate, self->readuniversal, self->readnl,
1700             ptr + start, ptr + line_len, &consumed);
1701         if (endpos >= 0) {
1702             endpos += start;
1703             if (limit >= 0 && (endpos - start) + chunked >= limit)
1704                 endpos = start + limit - chunked;
1705             break;
1706         }
1707
1708         /* We can put aside up to `endpos` */
1709         endpos = consumed + start;
1710         if (limit >= 0 && (endpos - start) + chunked >= limit) {
1711             /* Didn't find line ending, but reached length limit */
1712             endpos = start + limit - chunked;
1713             break;
1714         }
1715
1716         if (endpos > start) {
1717             /* No line ending seen yet - put aside current data */
1718             PyObject *s;
1719             if (chunks == NULL) {
1720                 chunks = PyList_New(0);
1721                 if (chunks == NULL)
1722                     goto error;
1723             }
1724             s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1725             if (s == NULL)
1726                 goto error;
1727             if (PyList_Append(chunks, s) < 0) {
1728                 Py_DECREF(s);
1729                 goto error;
1730             }
1731             chunked += PyUnicode_GET_SIZE(s);
1732             Py_DECREF(s);
1733         }
1734         /* There may be some remaining bytes we'll have to prepend to the
1735            next chunk of data */
1736         if (endpos < line_len) {
1737             remaining = PyUnicode_FromUnicode(
1738                     ptr + endpos, line_len - endpos);
1739             if (remaining == NULL)
1740                 goto error;
1741         }
1742         Py_CLEAR(line);
1743         /* We have consumed the buffer */
1744         textiowrapper_set_decoded_chars(self, NULL);
1745     }
1746
1747     if (line != NULL) {
1748         /* Our line ends in the current buffer */
1749         self->decoded_chars_used = endpos - offset_to_buffer;
1750         if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1751             if (start == 0 && Py_REFCNT(line) == 1) {
1752                 if (PyUnicode_Resize(&line, endpos) < 0)
1753                     goto error;
1754             }
1755             else {
1756                 PyObject *s = PyUnicode_FromUnicode(
1757                         PyUnicode_AS_UNICODE(line) + start, endpos - start);
1758                 Py_CLEAR(line);
1759                 if (s == NULL)
1760                     goto error;
1761                 line = s;
1762             }
1763         }
1764     }
1765     if (remaining != NULL) {
1766         if (chunks == NULL) {
1767             chunks = PyList_New(0);
1768             if (chunks == NULL)
1769                 goto error;
1770         }
1771         if (PyList_Append(chunks, remaining) < 0)
1772             goto error;
1773         Py_CLEAR(remaining);
1774     }
1775     if (chunks != NULL) {
1776         if (line != NULL && PyList_Append(chunks, line) < 0)
1777             goto error;
1778         Py_CLEAR(line);
1779         line = PyUnicode_Join(_PyIO_empty_str, chunks);
1780         if (line == NULL)
1781             goto error;
1782         Py_DECREF(chunks);
1783     }
1784     if (line == NULL)
1785         line = PyUnicode_FromStringAndSize(NULL, 0);
1786
1787     return line;
1788
1789   error:
1790     Py_XDECREF(chunks);
1791     Py_XDECREF(remaining);
1792     Py_XDECREF(line);
1793     return NULL;
1794 }
1795
1796 static PyObject *
1797 textiowrapper_readline(textio *self, PyObject *args)
1798 {
1799     PyObject *limitobj = NULL;
1800     Py_ssize_t limit = -1;
1801
1802     CHECK_INITIALIZED(self);
1803     if (!PyArg_ParseTuple(args, "|O:readline", &limitobj)) {
1804         return NULL;
1805     }
1806     if (limitobj) {
1807         if (!PyNumber_Check(limitobj)) {
1808             PyErr_Format(PyExc_TypeError,
1809                          "integer argument expected, got '%.200s'",
1810                          Py_TYPE(limitobj)->tp_name);
1811             return NULL;
1812         }
1813         limit = PyNumber_AsSsize_t(limitobj, PyExc_OverflowError);
1814         if (limit == -1 && PyErr_Occurred())
1815             return NULL;
1816     }
1817     return _textiowrapper_readline(self, limit);
1818 }
1819
1820 /* Seek and Tell */
1821
1822 typedef struct {
1823     Py_off_t start_pos;
1824     int dec_flags;
1825     int bytes_to_feed;
1826     int chars_to_skip;
1827     char need_eof;
1828 } cookie_type;
1829
1830 /*
1831    To speed up cookie packing/unpacking, we store the fields in a temporary
1832    string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1833    The following macros define at which offsets in the intermediary byte
1834    string the various CookieStruct fields will be stored.
1835  */
1836
1837 #define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1838
1839 #if defined(WORDS_BIGENDIAN)
1840
1841 # define IS_LITTLE_ENDIAN   0
1842
1843 /* We want the least significant byte of start_pos to also be the least
1844    significant byte of the cookie, which means that in big-endian mode we
1845    must copy the fields in reverse order. */
1846
1847 # define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
1848 # define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
1849 # define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
1850 # define OFF_CHARS_TO_SKIP  (sizeof(char))
1851 # define OFF_NEED_EOF       0
1852
1853 #else
1854
1855 # define IS_LITTLE_ENDIAN   1
1856
1857 /* Little-endian mode: the least significant byte of start_pos will
1858    naturally end up the least significant byte of the cookie. */
1859
1860 # define OFF_START_POS      0
1861 # define OFF_DEC_FLAGS      (sizeof(Py_off_t))
1862 # define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
1863 # define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
1864 # define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
1865
1866 #endif
1867
1868 static int
1869 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
1870 {
1871     unsigned char buffer[COOKIE_BUF_LEN];
1872     PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1873     if (cookieLong == NULL)
1874         return -1;
1875
1876     if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1877                             IS_LITTLE_ENDIAN, 0) < 0) {
1878         Py_DECREF(cookieLong);
1879         return -1;
1880     }
1881     Py_DECREF(cookieLong);
1882
1883     memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1884     memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1885     memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1886     memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1887     memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
1888
1889     return 0;
1890 }
1891
1892 static PyObject *
1893 textiowrapper_build_cookie(cookie_type *cookie)
1894 {
1895     unsigned char buffer[COOKIE_BUF_LEN];
1896
1897     memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1898     memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1899     memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1900     memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1901     memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
1902
1903     return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1904 }
1905 #undef IS_LITTLE_ENDIAN
1906
1907 static int
1908 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
1909 {
1910     PyObject *res;
1911     /* When seeking to the start of the stream, we call decoder.reset()
1912        rather than decoder.getstate().
1913        This is for a few decoders such as utf-16 for which the state value
1914        at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1915        utf-16, that we are expecting a BOM).
1916     */
1917     if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1918         res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1919     else
1920         res = PyObject_CallMethod(self->decoder, "setstate",
1921                                   "((si))", "", cookie->dec_flags);
1922     if (res == NULL)
1923         return -1;
1924     Py_DECREF(res);
1925     return 0;
1926 }
1927
1928 static int
1929 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
1930 {
1931     PyObject *res;
1932     /* Same as _textiowrapper_decoder_setstate() above. */
1933     if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1934         res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1935         self->encoding_start_of_stream = 1;
1936     }
1937     else {
1938         res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1939                                          _PyIO_zero, NULL);
1940         self->encoding_start_of_stream = 0;
1941     }
1942     if (res == NULL)
1943         return -1;
1944     Py_DECREF(res);
1945     return 0;
1946 }
1947
1948 static PyObject *
1949 textiowrapper_seek(textio *self, PyObject *args)
1950 {
1951     PyObject *cookieObj, *posobj;
1952     cookie_type cookie;
1953     int whence = 0;
1954     PyObject *res;
1955     int cmp;
1956
1957     CHECK_INITIALIZED(self);
1958
1959     if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1960         return NULL;
1961     CHECK_CLOSED(self);
1962
1963     Py_INCREF(cookieObj);
1964
1965     if (!self->seekable) {
1966         PyErr_SetString(PyExc_IOError,
1967                         "underlying stream is not seekable");
1968         goto fail;
1969     }
1970
1971     if (whence == 1) {
1972         /* seek relative to current position */
1973         cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1974         if (cmp < 0)
1975             goto fail;
1976
1977         if (cmp == 0) {
1978             PyErr_SetString(PyExc_IOError,
1979                             "can't do nonzero cur-relative seeks");
1980             goto fail;
1981         }
1982
1983         /* Seeking to the current position should attempt to
1984          * sync the underlying buffer with the current position.
1985          */
1986         Py_DECREF(cookieObj);
1987         cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
1988         if (cookieObj == NULL)
1989             goto fail;
1990     }
1991     else if (whence == 2) {
1992         /* seek relative to end of file */
1993
1994         cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1995         if (cmp < 0)
1996             goto fail;
1997
1998         if (cmp == 0) {
1999             PyErr_SetString(PyExc_IOError,
2000                             "can't do nonzero end-relative seeks");
2001             goto fail;
2002         }
2003
2004         res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2005         if (res == NULL)
2006             goto fail;
2007         Py_DECREF(res);
2008
2009         textiowrapper_set_decoded_chars(self, NULL);
2010         Py_CLEAR(self->snapshot);
2011         if (self->decoder) {
2012             res = PyObject_CallMethod(self->decoder, "reset", NULL);
2013             if (res == NULL)
2014                 goto fail;
2015             Py_DECREF(res);
2016         }
2017
2018         res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2019         Py_XDECREF(cookieObj);
2020         return res;
2021     }
2022     else if (whence != 0) {
2023         PyErr_Format(PyExc_ValueError,
2024                      "invalid whence (%d, should be 0, 1 or 2)", whence);
2025         goto fail;
2026     }
2027
2028     cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
2029     if (cmp < 0)
2030         goto fail;
2031
2032     if (cmp == 1) {
2033         PyObject *repr = PyObject_Repr(cookieObj);
2034         if (repr != NULL) {
2035             PyErr_Format(PyExc_ValueError,
2036                          "negative seek position %s",
2037                          PyString_AS_STRING(repr));
2038             Py_DECREF(repr);
2039         }
2040         goto fail;
2041     }
2042
2043     res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2044     if (res == NULL)
2045         goto fail;
2046     Py_DECREF(res);
2047
2048     /* The strategy of seek() is to go back to the safe start point
2049      * and replay the effect of read(chars_to_skip) from there.
2050      */
2051     if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2052         goto fail;
2053
2054     /* Seek back to the safe start point. */
2055     posobj = PyLong_FromOff_t(cookie.start_pos);
2056     if (posobj == NULL)
2057         goto fail;
2058     res = PyObject_CallMethodObjArgs(self->buffer,
2059                                      _PyIO_str_seek, posobj, NULL);
2060     Py_DECREF(posobj);
2061     if (res == NULL)
2062         goto fail;
2063     Py_DECREF(res);
2064
2065     textiowrapper_set_decoded_chars(self, NULL);
2066     Py_CLEAR(self->snapshot);
2067
2068     /* Restore the decoder to its state from the safe start point. */
2069     if (self->decoder) {
2070         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2071             goto fail;
2072     }
2073
2074     if (cookie.chars_to_skip) {
2075         /* Just like _read_chunk, feed the decoder and save a snapshot. */
2076         PyObject *input_chunk = PyObject_CallMethod(
2077             self->buffer, "read", "i", cookie.bytes_to_feed);
2078         PyObject *decoded;
2079
2080         if (input_chunk == NULL)
2081             goto fail;
2082
2083         assert (PyBytes_Check(input_chunk));
2084
2085         self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2086         if (self->snapshot == NULL) {
2087             Py_DECREF(input_chunk);
2088             goto fail;
2089         }
2090
2091         decoded = PyObject_CallMethod(self->decoder, "decode",
2092                                       "Oi", input_chunk, (int)cookie.need_eof);
2093
2094         if (decoded == NULL)
2095             goto fail;
2096
2097         textiowrapper_set_decoded_chars(self, decoded);
2098
2099         /* Skip chars_to_skip of the decoded characters. */
2100         if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2101             PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2102             goto fail;
2103         }
2104         self->decoded_chars_used = cookie.chars_to_skip;
2105     }
2106     else {
2107         self->snapshot = Py_BuildValue("is", cookie.dec_flags, "");
2108         if (self->snapshot == NULL)
2109             goto fail;
2110     }
2111
2112     /* Finally, reset the encoder (merely useful for proper BOM handling) */
2113     if (self->encoder) {
2114         if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2115             goto fail;
2116     }
2117     return cookieObj;
2118   fail:
2119     Py_XDECREF(cookieObj);
2120     return NULL;
2121
2122 }
2123
2124 static PyObject *
2125 textiowrapper_tell(textio *self, PyObject *args)
2126 {
2127     PyObject *res;
2128     PyObject *posobj = NULL;
2129     cookie_type cookie = {0,0,0,0,0};
2130     PyObject *next_input;
2131     Py_ssize_t chars_to_skip, chars_decoded;
2132     PyObject *saved_state = NULL;
2133     char *input, *input_end;
2134
2135     CHECK_INITIALIZED(self);
2136     CHECK_CLOSED(self);
2137
2138     if (!self->seekable) {
2139         PyErr_SetString(PyExc_IOError,
2140                         "underlying stream is not seekable");
2141         goto fail;
2142     }
2143     if (!self->telling) {
2144         PyErr_SetString(PyExc_IOError,
2145                         "telling position disabled by next() call");
2146         goto fail;
2147     }
2148
2149     if (_textiowrapper_writeflush(self) < 0)
2150         return NULL;
2151     res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2152     if (res == NULL)
2153         goto fail;
2154     Py_DECREF(res);
2155
2156     posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2157     if (posobj == NULL)
2158         goto fail;
2159
2160     if (self->decoder == NULL || self->snapshot == NULL) {
2161         assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2162         return posobj;
2163     }
2164
2165 #if defined(HAVE_LARGEFILE_SUPPORT)
2166     cookie.start_pos = PyLong_AsLongLong(posobj);
2167 #else
2168     cookie.start_pos = PyLong_AsLong(posobj);
2169 #endif
2170     if (PyErr_Occurred())
2171         goto fail;
2172
2173     /* Skip backward to the snapshot point (see _read_chunk). */
2174     if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2175         goto fail;
2176
2177     assert (PyBytes_Check(next_input));
2178
2179     cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2180
2181     /* How many decoded characters have been used up since the snapshot? */
2182     if (self->decoded_chars_used == 0)  {
2183         /* We haven't moved from the snapshot point. */
2184         Py_DECREF(posobj);
2185         return textiowrapper_build_cookie(&cookie);
2186     }
2187
2188     chars_to_skip = self->decoded_chars_used;
2189
2190     /* Starting from the snapshot position, we will walk the decoder
2191      * forward until it gives us enough decoded characters.
2192      */
2193     saved_state = PyObject_CallMethodObjArgs(self->decoder,
2194                                              _PyIO_str_getstate, NULL);
2195     if (saved_state == NULL)
2196         goto fail;
2197
2198     /* Note our initial start point. */
2199     if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2200         goto fail;
2201
2202     /* Feed the decoder one byte at a time.  As we go, note the
2203      * nearest "safe start point" before the current location
2204      * (a point where the decoder has nothing buffered, so seek()
2205      * can safely start from there and advance to this location).
2206      */
2207     chars_decoded = 0;
2208     input = PyBytes_AS_STRING(next_input);
2209     input_end = input + PyBytes_GET_SIZE(next_input);
2210     while (input < input_end) {
2211         PyObject *state;
2212         char *dec_buffer;
2213         Py_ssize_t dec_buffer_len;
2214         int dec_flags;
2215
2216         PyObject *decoded = PyObject_CallMethod(
2217             self->decoder, "decode", "s#", input, 1);
2218         if (decoded == NULL)
2219             goto fail;
2220         assert (PyUnicode_Check(decoded));
2221         chars_decoded += PyUnicode_GET_SIZE(decoded);
2222         Py_DECREF(decoded);
2223
2224         cookie.bytes_to_feed += 1;
2225
2226         state = PyObject_CallMethodObjArgs(self->decoder,
2227                                            _PyIO_str_getstate, NULL);
2228         if (state == NULL)
2229             goto fail;
2230         if (!PyArg_Parse(state, "(s#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2231             Py_DECREF(state);
2232             goto fail;
2233         }
2234         Py_DECREF(state);
2235
2236         if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2237             /* Decoder buffer is empty, so this is a safe start point. */
2238             cookie.start_pos += cookie.bytes_to_feed;
2239             chars_to_skip -= chars_decoded;
2240             cookie.dec_flags = dec_flags;
2241             cookie.bytes_to_feed = 0;
2242             chars_decoded = 0;
2243         }
2244         if (chars_decoded >= chars_to_skip)
2245             break;
2246         input++;
2247     }
2248     if (input == input_end) {
2249         /* We didn't get enough decoded data; signal EOF to get more. */
2250         PyObject *decoded = PyObject_CallMethod(
2251             self->decoder, "decode", "si", "", /* final = */ 1);
2252         if (decoded == NULL)
2253             goto fail;
2254         assert (PyUnicode_Check(decoded));
2255         chars_decoded += PyUnicode_GET_SIZE(decoded);
2256         Py_DECREF(decoded);
2257         cookie.need_eof = 1;
2258
2259         if (chars_decoded < chars_to_skip) {
2260             PyErr_SetString(PyExc_IOError,
2261                             "can't reconstruct logical file position");
2262             goto fail;
2263         }
2264     }
2265
2266     /* finally */
2267     Py_XDECREF(posobj);
2268     res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2269     Py_DECREF(saved_state);
2270     if (res == NULL)
2271         return NULL;
2272     Py_DECREF(res);
2273
2274     /* The returned cookie corresponds to the last safe start point. */
2275     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2276     return textiowrapper_build_cookie(&cookie);
2277
2278   fail:
2279     Py_XDECREF(posobj);
2280     if (saved_state) {
2281         PyObject *type, *value, *traceback;
2282         PyErr_Fetch(&type, &value, &traceback);
2283
2284         res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2285         Py_DECREF(saved_state);
2286         if (res == NULL)
2287             return NULL;
2288         Py_DECREF(res);
2289
2290         PyErr_Restore(type, value, traceback);
2291     }
2292     return NULL;
2293 }
2294
2295 static PyObject *
2296 textiowrapper_truncate(textio *self, PyObject *args)
2297 {
2298     PyObject *pos = Py_None;
2299     PyObject *res;
2300
2301     CHECK_INITIALIZED(self)
2302     if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2303         return NULL;
2304     }
2305
2306     res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2307     if (res == NULL)
2308         return NULL;
2309     Py_DECREF(res);
2310
2311     if (pos != Py_None) {
2312         res = PyObject_CallMethodObjArgs((PyObject *) self,
2313                                           _PyIO_str_seek, pos, NULL);
2314         if (res == NULL)
2315             return NULL;
2316         Py_DECREF(res);
2317     }
2318
2319     return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, NULL);
2320 }
2321
2322 static PyObject *
2323 textiowrapper_repr(textio *self)
2324 {
2325     PyObject *nameobj, *res;
2326     PyObject *namerepr = NULL, *encrepr = NULL;
2327
2328     CHECK_INITIALIZED(self);
2329
2330     nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2331     if (nameobj == NULL) {
2332         if (PyErr_ExceptionMatches(PyExc_AttributeError))
2333             PyErr_Clear();
2334         else
2335             goto error;
2336         encrepr = PyObject_Repr(self->encoding);
2337         res = PyString_FromFormat("<_io.TextIOWrapper encoding=%s>",
2338                                    PyString_AS_STRING(encrepr));
2339     }
2340     else {
2341         encrepr = PyObject_Repr(self->encoding);
2342         namerepr = PyObject_Repr(nameobj);
2343         res = PyString_FromFormat("<_io.TextIOWrapper name=%s encoding=%s>",
2344                                    PyString_AS_STRING(namerepr),
2345                                    PyString_AS_STRING(encrepr));
2346         Py_DECREF(nameobj);
2347     }
2348     Py_XDECREF(namerepr);
2349     Py_XDECREF(encrepr);
2350     return res;
2351
2352 error:
2353     Py_XDECREF(namerepr);
2354     Py_XDECREF(encrepr);
2355     return NULL;
2356 }
2357
2358
2359 /* Inquiries */
2360
2361 static PyObject *
2362 textiowrapper_fileno(textio *self, PyObject *args)
2363 {
2364     CHECK_INITIALIZED(self);
2365     return PyObject_CallMethod(self->buffer, "fileno", NULL);
2366 }
2367
2368 static PyObject *
2369 textiowrapper_seekable(textio *self, PyObject *args)
2370 {
2371     CHECK_INITIALIZED(self);
2372     return PyObject_CallMethod(self->buffer, "seekable", NULL);
2373 }
2374
2375 static PyObject *
2376 textiowrapper_readable(textio *self, PyObject *args)
2377 {
2378     CHECK_INITIALIZED(self);
2379     return PyObject_CallMethod(self->buffer, "readable", NULL);
2380 }
2381
2382 static PyObject *
2383 textiowrapper_writable(textio *self, PyObject *args)
2384 {
2385     CHECK_INITIALIZED(self);
2386     return PyObject_CallMethod(self->buffer, "writable", NULL);
2387 }
2388
2389 static PyObject *
2390 textiowrapper_isatty(textio *self, PyObject *args)
2391 {
2392     CHECK_INITIALIZED(self);
2393     return PyObject_CallMethod(self->buffer, "isatty", NULL);
2394 }
2395
2396 static PyObject *
2397 textiowrapper_flush(textio *self, PyObject *args)
2398 {
2399     CHECK_INITIALIZED(self);
2400     CHECK_CLOSED(self);
2401     self->telling = self->seekable;
2402     if (_textiowrapper_writeflush(self) < 0)
2403         return NULL;
2404     return PyObject_CallMethod(self->buffer, "flush", NULL);
2405 }
2406
2407 static PyObject *
2408 textiowrapper_close(textio *self, PyObject *args)
2409 {
2410     PyObject *res;
2411     CHECK_INITIALIZED(self);
2412     res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2413     if (res == NULL) {
2414         /* If flush() fails, just give up */
2415         PyErr_Clear();
2416     }
2417     else
2418         Py_DECREF(res);
2419
2420     return PyObject_CallMethod(self->buffer, "close", NULL);
2421 }
2422
2423 static PyObject *
2424 textiowrapper_iternext(textio *self)
2425 {
2426     PyObject *line;
2427
2428     CHECK_INITIALIZED(self);
2429
2430     self->telling = 0;
2431     if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2432         /* Skip method call overhead for speed */
2433         line = _textiowrapper_readline(self, -1);
2434     }
2435     else {
2436         line = PyObject_CallMethodObjArgs((PyObject *)self,
2437                                            _PyIO_str_readline, NULL);
2438         if (line && !PyUnicode_Check(line)) {
2439             PyErr_Format(PyExc_IOError,
2440                          "readline() should have returned an str object, "
2441                          "not '%.200s'", Py_TYPE(line)->tp_name);
2442             Py_DECREF(line);
2443             return NULL;
2444         }
2445     }
2446
2447     if (line == NULL)
2448         return NULL;
2449
2450     if (PyUnicode_GET_SIZE(line) == 0) {
2451         /* Reached EOF or would have blocked */
2452         Py_DECREF(line);
2453         Py_CLEAR(self->snapshot);
2454         self->telling = self->seekable;
2455         return NULL;
2456     }
2457
2458     return line;
2459 }
2460
2461 static PyObject *
2462 textiowrapper_name_get(textio *self, void *context)
2463 {
2464     CHECK_INITIALIZED(self);
2465     return PyObject_GetAttrString(self->buffer, "name");
2466 }
2467
2468 static PyObject *
2469 textiowrapper_closed_get(textio *self, void *context)
2470 {
2471     CHECK_INITIALIZED(self);
2472     return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2473 }
2474
2475 static PyObject *
2476 textiowrapper_newlines_get(textio *self, void *context)
2477 {
2478     PyObject *res;
2479     CHECK_INITIALIZED(self);
2480     if (self->decoder == NULL)
2481         Py_RETURN_NONE;
2482     res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2483     if (res == NULL) {
2484         if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2485             PyErr_Clear();
2486             Py_RETURN_NONE;
2487         }
2488         else {
2489             return NULL;
2490         }
2491     }
2492     return res;
2493 }
2494
2495 static PyObject *
2496 textiowrapper_errors_get(textio *self, void *context)
2497 {
2498     CHECK_INITIALIZED(self);
2499     Py_INCREF(self->errors);
2500     return self->errors;
2501 }
2502
2503 static PyObject *
2504 textiowrapper_chunk_size_get(textio *self, void *context)
2505 {
2506     CHECK_INITIALIZED(self);
2507     return PyLong_FromSsize_t(self->chunk_size);
2508 }
2509
2510 static int
2511 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
2512 {
2513     Py_ssize_t n;
2514     CHECK_INITIALIZED_INT(self);
2515     n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2516     if (n == -1 && PyErr_Occurred())
2517         return -1;
2518     if (n <= 0) {
2519         PyErr_SetString(PyExc_ValueError,
2520                         "a strictly positive integer is required");
2521         return -1;
2522     }
2523     self->chunk_size = n;
2524     return 0;
2525 }
2526
2527 static PyMethodDef textiowrapper_methods[] = {
2528     {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2529     {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2530     {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2531     {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2532     {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2533     {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
2534
2535     {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2536     {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2537     {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2538     {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2539     {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
2540
2541     {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2542     {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2543     {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
2544     {NULL, NULL}
2545 };
2546
2547 static PyMemberDef textiowrapper_members[] = {
2548     {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2549     {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2550     {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
2551     {NULL}
2552 };
2553
2554 static PyGetSetDef textiowrapper_getset[] = {
2555     {"name", (getter)textiowrapper_name_get, NULL, NULL},
2556     {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
2557 /*    {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2558 */
2559     {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2560     {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2561     {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2562                     (setter)textiowrapper_chunk_size_set, NULL},
2563     {NULL}
2564 };
2565
2566 PyTypeObject PyTextIOWrapper_Type = {
2567     PyVarObject_HEAD_INIT(NULL, 0)
2568     "_io.TextIOWrapper",        /*tp_name*/
2569     sizeof(textio), /*tp_basicsize*/
2570     0,                          /*tp_itemsize*/
2571     (destructor)textiowrapper_dealloc, /*tp_dealloc*/
2572     0,                          /*tp_print*/
2573     0,                          /*tp_getattr*/
2574     0,                          /*tps_etattr*/
2575     0,                          /*tp_compare */
2576     (reprfunc)textiowrapper_repr,/*tp_repr*/
2577     0,                          /*tp_as_number*/
2578     0,                          /*tp_as_sequence*/
2579     0,                          /*tp_as_mapping*/
2580     0,                          /*tp_hash */
2581     0,                          /*tp_call*/
2582     0,                          /*tp_str*/
2583     0,                          /*tp_getattro*/
2584     0,                          /*tp_setattro*/
2585     0,                          /*tp_as_buffer*/
2586     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2587             | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2588     textiowrapper_doc,          /* tp_doc */
2589     (traverseproc)textiowrapper_traverse, /* tp_traverse */
2590     (inquiry)textiowrapper_clear, /* tp_clear */
2591     0,                          /* tp_richcompare */
2592     offsetof(textio, weakreflist), /*tp_weaklistoffset*/
2593     0,                          /* tp_iter */
2594     (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2595     textiowrapper_methods,      /* tp_methods */
2596     textiowrapper_members,      /* tp_members */
2597     textiowrapper_getset,       /* tp_getset */
2598     0,                          /* tp_base */
2599     0,                          /* tp_dict */
2600     0,                          /* tp_descr_get */
2601     0,                          /* tp_descr_set */
2602     offsetof(textio, dict), /*tp_dictoffset*/
2603     (initproc)textiowrapper_init, /* tp_init */
2604     0,                          /* tp_alloc */
2605     PyType_GenericNew,          /* tp_new */
2606 };