Modules/_io/textio.c

   1 /*
   2     An implementation of Text I/O as defined by PEP 3116 - "New I/O"
   3
   4     Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
   5
   6     Written by Amaury Forgeot d'Arc and Antoine Pitrou
   7 */
   8
   9 #define PY_SSIZE_T_CLEAN
  10 #include "Python.h"
  11 #include "structmember.h"
  12 #include "_iomodule.h"
  13
  14 /* TextIOBase */
  15
  16 PyDoc_STRVAR(textiobase_doc,
  17     "Base class for text I/O.\n"
  18     "\n"
  19     "This class provides a character and line based interface to stream\n"
  20     "I/O. There is no readinto method because Python's character strings\n"
  21     "are immutable. There is no public constructor.\n"
  22     );
  23
  24 static PyObject *
  25 _unsupported(const char *message)
  26 {
  27     PyErr_SetString(_PyIO_unsupported_operation, message);
  28     return NULL;
  29 }
  30
  31 PyDoc_STRVAR(textiobase_detach_doc,
  32     "Separate the underlying buffer from the TextIOBase and return it.\n"
  33     "\n"
  34     "After the underlying buffer has been detached, the TextIO is in an\n"
  35     "unusable state.\n"
  36     );
  37
  38 static PyObject *
  39 textiobase_detach(PyObject *self)
  40 {
  41     return _unsupported("detach");
  42 }
  43
  44 PyDoc_STRVAR(textiobase_read_doc,
  45     "Read at most n characters from stream.\n"
  46     "\n"
  47     "Read from underlying buffer until we have n characters or we hit EOF.\n"
  48     "If n is negative or omitted, read until EOF.\n"
  49     );
  50
  51 static PyObject *
  52 textiobase_read(PyObject *self, PyObject *args)
  53 {
  54     return _unsupported("read");
  55 }
  56
  57 PyDoc_STRVAR(textiobase_readline_doc,
  58     "Read until newline or EOF.\n"
  59     "\n"
  60     "Returns an empty string if EOF is hit immediately.\n"
  61     );
  62
  63 static PyObject *
  64 textiobase_readline(PyObject *self, PyObject *args)
  65 {
  66     return _unsupported("readline");
  67 }
  68
  69 PyDoc_STRVAR(textiobase_write_doc,
  70     "Write string to stream.\n"
  71     "Returns the number of characters written (which is always equal to\n"
  72     "the length of the string).\n"
  73     );
  74
  75 static PyObject *
  76 textiobase_write(PyObject *self, PyObject *args)
  77 {
  78     return _unsupported("write");
  79 }
  80
  81 PyDoc_STRVAR(textiobase_encoding_doc,
  82     "Encoding of the text stream.\n"
  83     "\n"
  84     "Subclasses should override.\n"
  85     );
  86
  87 static PyObject *
  88 textiobase_encoding_get(PyObject *self, void *context)
  89 {
  90     Py_RETURN_NONE;
  91 }
  92
  93 PyDoc_STRVAR(textiobase_newlines_doc,
  94     "Line endings translated so far.\n"
  95     "\n"
  96     "Only line endings translated during reading are considered.\n"
  97     "\n"
  98     "Subclasses should override.\n"
  99     );
 100
 101 static PyObject *
 102 textiobase_newlines_get(PyObject *self, void *context)
 103 {
 104     Py_RETURN_NONE;
 105 }
 106
 107 PyDoc_STRVAR(textiobase_errors_doc,
 108     "The error setting of the decoder or encoder.\n"
 109     "\n"
 110     "Subclasses should override.\n"
 111     );
 112
 113 static PyObject *
 114 textiobase_errors_get(PyObject *self, void *context)
 115 {
 116     Py_RETURN_NONE;
 117 }
 118
 119
 120 static PyMethodDef textiobase_methods[] = {
 121     {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
 122     {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
 123     {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
 124     {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
 125     {NULL, NULL}
 126 };
 127
 128 static PyGetSetDef textiobase_getset[] = {
 129     {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
 130     {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
 131     {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
 132     {NULL}
 133 };
 134
 135 PyTypeObject PyTextIOBase_Type = {
 136     PyVarObject_HEAD_INIT(NULL, 0)
 137     "_io._TextIOBase",          /*tp_name*/
 138     0,                          /*tp_basicsize*/
 139     0,                          /*tp_itemsize*/
 140     0,                          /*tp_dealloc*/
 141     0,                          /*tp_print*/
 142     0,                          /*tp_getattr*/
 143     0,                          /*tp_setattr*/
 144     0,                          /*tp_compare */
 145     0,                          /*tp_repr*/
 146     0,                          /*tp_as_number*/
 147     0,                          /*tp_as_sequence*/
 148     0,                          /*tp_as_mapping*/
 149     0,                          /*tp_hash */
 150     0,                          /*tp_call*/
 151     0,                          /*tp_str*/
 152     0,                          /*tp_getattro*/
 153     0,                          /*tp_setattro*/
 154     0,                          /*tp_as_buffer*/
 155     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
 156     textiobase_doc,             /* tp_doc */
 157     0,                          /* tp_traverse */
 158     0,                          /* tp_clear */
 159     0,                          /* tp_richcompare */
 160     0,                          /* tp_weaklistoffset */
 161     0,                          /* tp_iter */
 162     0,                          /* tp_iternext */
 163     textiobase_methods,         /* tp_methods */
 164     0,                          /* tp_members */
 165     textiobase_getset,          /* tp_getset */
 166     &PyIOBase_Type,             /* tp_base */
 167     0,                          /* tp_dict */
 168     0,                          /* tp_descr_get */
 169     0,                          /* tp_descr_set */
 170     0,                          /* tp_dictoffset */
 171     0,                          /* tp_init */
 172     0,                          /* tp_alloc */
 173     0,                          /* tp_new */
 174 };
 175
 176
 177 /* IncrementalNewlineDecoder */
 178
 179 PyDoc_STRVAR(incrementalnewlinedecoder_doc,
 180     "Codec used when reading a file in universal newlines mode.  It wraps\n"
 181     "another incremental decoder, translating \\r\\n and \\r into \\n.  It also\n"
 182     "records the types of newlines encountered.  When used with\n"
 183     "translate=False, it ensures that the newline sequence is returned in\n"
 184     "one piece. When used with decoder=None, it expects unicode strings as\n"
 185     "decode input and translates newlines without first invoking an external\n"
 186     "decoder.\n"
 187     );
 188
 189 typedef struct {
 190     PyObject_HEAD
 191     PyObject *decoder;
 192     PyObject *errors;
 193     signed int pendingcr: 1;
 194     signed int translate: 1;
 195     unsigned int seennl: 3;
 196 } nldecoder_object;
 197
 198 static int
 199 incrementalnewlinedecoder_init(nldecoder_object *self,
 200                                PyObject *args, PyObject *kwds)
 201 {
 202     PyObject *decoder;
 203     int translate;
 204     PyObject *errors = NULL;
 205     char *kwlist[] = {"decoder", "translate", "errors", NULL};
 206
 207     if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
 208                                      kwlist, &decoder, &translate, &errors))
 209         return -1;
 210
 211     self->decoder = decoder;
 212     Py_INCREF(decoder);
 213
 214     if (errors == NULL) {
 215         self->errors = PyUnicode_FromString("strict");
 216         if (self->errors == NULL)
 217             return -1;
 218     }
 219     else {
 220         Py_INCREF(errors);
 221         self->errors = errors;
 222     }
 223
 224     self->translate = translate;
 225     self->seennl = 0;
 226     self->pendingcr = 0;
 227
 228     return 0;
 229 }
 230
 231 static void
 232 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
 233 {
 234     Py_CLEAR(self->decoder);
 235     Py_CLEAR(self->errors);
 236     Py_TYPE(self)->tp_free((PyObject *)self);
 237 }
 238
 239 #define SEEN_CR   1
 240 #define SEEN_LF   2
 241 #define SEEN_CRLF 4
 242 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
 243
 244 PyObject *
 245 _PyIncrementalNewlineDecoder_decode(PyObject *_self,
 246                                     PyObject *input, int final)
 247 {
 248     PyObject *output;
 249     Py_ssize_t output_len;
 250     nldecoder_object *self = (nldecoder_object *) _self;
 251
 252     if (self->decoder == NULL) {
 253         PyErr_SetString(PyExc_ValueError,
 254                         "IncrementalNewlineDecoder.__init__ not called");
 255         return NULL;
 256     }
 257
 258     /* decode input (with the eventual \r from a previous pass) */
 259     if (self->decoder != Py_None) {
 260         output = PyObject_CallMethodObjArgs(self->decoder,
 261             _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
 262     }
 263     else {
 264         output = input;
 265         Py_INCREF(output);
 266     }
 267
 268     if (output == NULL)
 269         return NULL;
 270
 271     if (!PyUnicode_Check(output)) {
 272         PyErr_SetString(PyExc_TypeError,
 273                         "decoder should return a string result");
 274         goto error;
 275     }
 276
 277     output_len = PyUnicode_GET_SIZE(output);
 278     if (self->pendingcr && (final || output_len > 0)) {
 279         Py_UNICODE *out;
 280         PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
 281         if (modified == NULL)
 282             goto error;
 283         out = PyUnicode_AS_UNICODE(modified);
 284         out[0] = '\r';
 285         memcpy(out + 1, PyUnicode_AS_UNICODE(output),
 286                output_len * sizeof(Py_UNICODE));
 287         Py_DECREF(output);
 288         output = modified;
 289         self->pendingcr = 0;
 290         output_len++;
 291     }
 292
 293     /* retain last \r even when not translating data:
 294      * then readline() is sure to get \r\n in one pass
 295      */
 296     if (!final) {
 297         if (output_len > 0
 298             && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
 299
 300             if (Py_REFCNT(output) == 1) {
 301                 if (PyUnicode_Resize(&output, output_len - 1) < 0)
 302                     goto error;
 303             }
 304             else {
 305                 PyObject *modified = PyUnicode_FromUnicode(
 306                     PyUnicode_AS_UNICODE(output),
 307                     output_len - 1);
 308                 if (modified == NULL)
 309                     goto error;
 310                 Py_DECREF(output);
 311                 output = modified;
 312             }
 313             self->pendingcr = 1;
 314         }
 315     }
 316
 317     /* Record which newlines are read and do newline translation if desired,
 318        all in one pass. */
 319     {
 320         Py_UNICODE *in_str;
 321         Py_ssize_t len;
 322         int seennl = self->seennl;
 323         int only_lf = 0;
 324
 325         in_str = PyUnicode_AS_UNICODE(output);
 326         len = PyUnicode_GET_SIZE(output);
 327
 328         if (len == 0)
 329             return output;
 330
 331         /* If, up to now, newlines are consistently \n, do a quick check
 332            for the \r *byte* with the libc's optimized memchr.
 333            */
 334         if (seennl == SEEN_LF || seennl == 0) {
 335             only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
 336         }
 337
 338         if (only_lf) {
 339             /* If not already seen, quick scan for a possible "\n" character.
 340                (there's nothing else to be done, even when in translation mode)
 341             */
 342             if (seennl == 0 &&
 343                 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
 344                 Py_UNICODE *s, *end;
 345                 s = in_str;
 346                 end = in_str + len;
 347                 for (;;) {
 348                     Py_UNICODE c;
 349                     /* Fast loop for non-control characters */
 350                     while (*s > '\n')
 351                         s++;
 352                     c = *s++;
 353                     if (c == '\n') {
 354                         seennl |= SEEN_LF;
 355                         break;
 356                     }
 357                     if (s > end)
 358                         break;
 359                 }
 360             }
 361             /* Finished: we have scanned for newlines, and none of them
 362                need translating */
 363         }
 364         else if (!self->translate) {
 365             Py_UNICODE *s, *end;
 366             /* We have already seen all newline types, no need to scan again */
 367             if (seennl == SEEN_ALL)
 368                 goto endscan;
 369             s = in_str;
 370             end = in_str + len;
 371             for (;;) {
 372                 Py_UNICODE c;
 373                 /* Fast loop for non-control characters */
 374                 while (*s > '\r')
 375                     s++;
 376                 c = *s++;
 377                 if (c == '\n')
 378                     seennl |= SEEN_LF;
 379                 else if (c == '\r') {
 380                     if (*s == '\n') {
 381                         seennl |= SEEN_CRLF;
 382                         s++;
 383                     }
 384                     else
 385                         seennl |= SEEN_CR;
 386                 }
 387                 if (s > end)
 388                     break;
 389                 if (seennl == SEEN_ALL)
 390                     break;
 391             }
 392         endscan:
 393             ;
 394         }
 395         else {
 396             PyObject *translated = NULL;
 397             Py_UNICODE *out_str;
 398             Py_UNICODE *in, *out, *end;
 399             if (Py_REFCNT(output) != 1) {
 400                 /* We could try to optimize this so that we only do a copy
 401                    when there is something to translate. On the other hand,
 402                    most decoders should only output non-shared strings, i.e.
 403                    translation is done in place. */
 404                 translated = PyUnicode_FromUnicode(NULL, len);
 405                 if (translated == NULL)
 406                     goto error;
 407                 assert(Py_REFCNT(translated) == 1);
 408                 memcpy(PyUnicode_AS_UNICODE(translated),
 409                        PyUnicode_AS_UNICODE(output),
 410                        len * sizeof(Py_UNICODE));
 411             }
 412             else {
 413                 translated = output;
 414             }
 415             out_str = PyUnicode_AS_UNICODE(translated);
 416             in = in_str;
 417             out = out_str;
 418             end = in_str + len;
 419             for (;;) {
 420                 Py_UNICODE c;
 421                 /* Fast loop for non-control characters */
 422                 while ((c = *in++) > '\r')
 423                     *out++ = c;
 424                 if (c == '\n') {
 425                     *out++ = c;
 426                     seennl |= SEEN_LF;
 427                     continue;
 428                 }
 429                 if (c == '\r') {
 430                     if (*in == '\n') {
 431                         in++;
 432                         seennl |= SEEN_CRLF;
 433                     }
 434                     else
 435                         seennl |= SEEN_CR;
 436                     *out++ = '\n';
 437                     continue;
 438                 }
 439                 if (in > end)
 440                     break;
 441                 *out++ = c;
 442             }
 443             if (translated != output) {
 444                 Py_DECREF(output);
 445                 output = translated;
 446             }
 447             if (out - out_str != len) {
 448                 if (PyUnicode_Resize(&output, out - out_str) < 0)
 449                     goto error;
 450             }
 451         }
 452         self->seennl |= seennl;
 453     }
 454
 455     return output;
 456
 457   error:
 458     Py_DECREF(output);
 459     return NULL;
 460 }
 461
 462 static PyObject *
 463 incrementalnewlinedecoder_decode(nldecoder_object *self,
 464                                  PyObject *args, PyObject *kwds)
 465 {
 466     char *kwlist[] = {"input", "final", NULL};
 467     PyObject *input;
 468     int final = 0;
 469
 470     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
 471                                      kwlist, &input, &final))
 472         return NULL;
 473     return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
 474 }
 475
 476 static PyObject *
 477 incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
 478 {
 479     PyObject *buffer;
 480     unsigned PY_LONG_LONG flag;
 481
 482     if (self->decoder != Py_None) {
 483         PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
 484            _PyIO_str_getstate, NULL);
 485         if (state == NULL)
 486             return NULL;
 487         if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
 488             Py_DECREF(state);
 489             return NULL;
 490         }
 491         Py_INCREF(buffer);
 492         Py_DECREF(state);
 493     }
 494     else {
 495         buffer = PyBytes_FromString("");
 496         flag = 0;
 497     }
 498     flag <<= 1;
 499     if (self->pendingcr)
 500         flag |= 1;
 501     return Py_BuildValue("NK", buffer, flag);
 502 }
 503
 504 static PyObject *
 505 incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
 506 {
 507     PyObject *buffer;
 508     unsigned PY_LONG_LONG flag;
 509
 510     if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
 511         return NULL;
 512
 513     self->pendingcr = (int) flag & 1;
 514     flag >>= 1;
 515
 516     if (self->decoder != Py_None)
 517         return PyObject_CallMethod(self->decoder,
 518                                    "setstate", "((OK))", buffer, flag);
 519     else
 520         Py_RETURN_NONE;
 521 }
 522
 523 static PyObject *
 524 incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
 525 {
 526     self->seennl = 0;
 527     self->pendingcr = 0;
 528     if (self->decoder != Py_None)
 529         return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
 530     else
 531         Py_RETURN_NONE;
 532 }
 533
 534 static PyObject *
 535 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
 536 {
 537     switch (self->seennl) {
 538     case SEEN_CR:
 539         return PyUnicode_FromString("\r");
 540     case SEEN_LF:
 541         return PyUnicode_FromString("\n");
 542     case SEEN_CRLF:
 543         return PyUnicode_FromString("\r\n");
 544     case SEEN_CR | SEEN_LF:
 545         return Py_BuildValue("ss", "\r", "\n");
 546     case SEEN_CR | SEEN_CRLF:
 547         return Py_BuildValue("ss", "\r", "\r\n");
 548     case SEEN_LF | SEEN_CRLF:
 549         return Py_BuildValue("ss", "\n", "\r\n");
 550     case SEEN_CR | SEEN_LF | SEEN_CRLF:
 551         return Py_BuildValue("sss", "\r", "\n", "\r\n");
 552     default:
 553         Py_RETURN_NONE;
 554    }
 555
 556 }
 557
 558
 559 static PyMethodDef incrementalnewlinedecoder_methods[] = {
 560     {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
 561     {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
 562     {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
 563     {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
 564     {NULL}
 565 };
 566
 567 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
 568     {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
 569     {NULL}
 570 };
 571
 572 PyTypeObject PyIncrementalNewlineDecoder_Type = {
 573     PyVarObject_HEAD_INIT(NULL, 0)
 574     "_io.IncrementalNewlineDecoder", /*tp_name*/
 575     sizeof(nldecoder_object), /*tp_basicsize*/
 576     0,                          /*tp_itemsize*/
 577     (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
 578     0,                          /*tp_print*/
 579     0,                          /*tp_getattr*/
 580     0,                          /*tp_setattr*/
 581     0,                          /*tp_compare */
 582     0,                          /*tp_repr*/
 583     0,                          /*tp_as_number*/
 584     0,                          /*tp_as_sequence*/
 585     0,                          /*tp_as_mapping*/
 586     0,                          /*tp_hash */
 587     0,                          /*tp_call*/
 588     0,                          /*tp_str*/
 589     0,                          /*tp_getattro*/
 590     0,                          /*tp_setattro*/
 591     0,                          /*tp_as_buffer*/
 592     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
 593     incrementalnewlinedecoder_doc,          /* tp_doc */
 594     0,                          /* tp_traverse */
 595     0,                          /* tp_clear */
 596     0,                          /* tp_richcompare */
 597     0,                          /*tp_weaklistoffset*/
 598     0,                          /* tp_iter */
 599     0,                          /* tp_iternext */
 600     incrementalnewlinedecoder_methods, /* tp_methods */
 601     0,                          /* tp_members */
 602     incrementalnewlinedecoder_getset, /* tp_getset */
 603     0,                          /* tp_base */
 604     0,                          /* tp_dict */
 605     0,                          /* tp_descr_get */
 606     0,                          /* tp_descr_set */
 607     0,                          /* tp_dictoffset */
 608     (initproc)incrementalnewlinedecoder_init, /* tp_init */
 609     0,                          /* tp_alloc */
 610     PyType_GenericNew,          /* tp_new */
 611 };
 612
 613
 614 /* TextIOWrapper */
 615
 616 PyDoc_STRVAR(textiowrapper_doc,
 617     "Character and line based layer over a BufferedIOBase object, buffer.\n"
 618     "\n"
 619     "encoding gives the name of the encoding that the stream will be\n"
 620     "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
 621     "\n"
 622     "errors determines the strictness of encoding and decoding (see the\n"
 623     "codecs.register) and defaults to \"strict\".\n"
 624     "\n"
 625     "newline can be None, '', '\\n', '\\r', or '\\r\\n'.  It controls the\n"
 626     "handling of line endings. If it is None, universal newlines is\n"
 627     "enabled.  With this enabled, on input, the lines endings '\\n', '\\r',\n"
 628     "or '\\r\\n' are translated to '\\n' before being returned to the\n"
 629     "caller. Conversely, on output, '\\n' is translated to the system\n"
 630     "default line seperator, os.linesep. If newline is any other of its\n"
 631     "legal values, that newline becomes the newline when the file is read\n"
 632     "and it is returned untranslated. On output, '\\n' is converted to the\n"
 633     "newline.\n"
 634     "\n"
 635     "If line_buffering is True, a call to flush is implied when a call to\n"
 636     "write contains a newline character."
 637     );
 638
 639 typedef PyObject *
 640         (*encodefunc_t)(PyObject *, PyObject *);
 641
 642 typedef struct
 643 {
 644     PyObject_HEAD
 645     int ok; /* initialized? */
 646     int detached;
 647     Py_ssize_t chunk_size;
 648     PyObject *buffer;
 649     PyObject *encoding;
 650     PyObject *encoder;
 651     PyObject *decoder;
 652     PyObject *readnl;
 653     PyObject *errors;
 654     const char *writenl; /* utf-8 encoded, NULL stands for \n */
 655     char line_buffering;
 656     char readuniversal;
 657     char readtranslate;
 658     char writetranslate;
 659     char seekable;
 660     char telling;
 661     /* Specialized encoding func (see below) */
 662     encodefunc_t encodefunc;
 663     /* Whether or not it's the start of the stream */
 664     char encoding_start_of_stream;
 665
 666     /* Reads and writes are internally buffered in order to speed things up.
 667        However, any read will first flush the write buffer if itsn't empty.
 668
 669        Please also note that text to be written is first encoded before being
 670        buffered. This is necessary so that encoding errors are immediately
 671        reported to the caller, but it unfortunately means that the
 672        IncrementalEncoder (whose encode() method is always written in Python)
 673        becomes a bottleneck for small writes.
 674     */
 675     PyObject *decoded_chars;       /* buffer for text returned from decoder */
 676     Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
 677     PyObject *pending_bytes;       /* list of bytes objects waiting to be
 678                                       written, or NULL */
 679     Py_ssize_t pending_bytes_count;
 680     PyObject *snapshot;
 681     /* snapshot is either None, or a tuple (dec_flags, next_input) where
 682      * dec_flags is the second (integer) item of the decoder state and
 683      * next_input is the chunk of input bytes that comes next after the
 684      * snapshot point.  We use this to reconstruct decoder states in tell().
 685      */
 686
 687     /* Cache raw object if it's a FileIO object */
 688     PyObject *raw;
 689
 690     PyObject *weakreflist;
 691     PyObject *dict;
 692 } textio;
 693
 694
 695 /* A couple of specialized cases in order to bypass the slow incremental
 696    encoding methods for the most popular encodings. */
 697
 698 static PyObject *
 699 ascii_encode(textio *self, PyObject *text)
 700 {
 701     return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
 702                                  PyUnicode_GET_SIZE(text),
 703                                  PyBytes_AS_STRING(self->errors));
 704 }
 705
 706 static PyObject *
 707 utf16be_encode(textio *self, PyObject *text)
 708 {
 709     return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
 710                                  PyUnicode_GET_SIZE(text),
 711                                  PyBytes_AS_STRING(self->errors), 1);
 712 }
 713
 714 static PyObject *
 715 utf16le_encode(textio *self, PyObject *text)
 716 {
 717     return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
 718                                  PyUnicode_GET_SIZE(text),
 719                                  PyBytes_AS_STRING(self->errors), -1);
 720 }
 721
 722 static PyObject *
 723 utf16_encode(textio *self, PyObject *text)
 724 {
 725     if (!self->encoding_start_of_stream) {
 726         /* Skip the BOM and use native byte ordering */
 727 #if defined(WORDS_BIGENDIAN)
 728         return utf16be_encode(self, text);
 729 #else
 730         return utf16le_encode(self, text);
 731 #endif
 732     }
 733     return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
 734                                  PyUnicode_GET_SIZE(text),
 735                                  PyBytes_AS_STRING(self->errors), 0);
 736 }
 737
 738 static PyObject *
 739 utf32be_encode(textio *self, PyObject *text)
 740 {
 741     return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
 742                                  PyUnicode_GET_SIZE(text),
 743                                  PyBytes_AS_STRING(self->errors), 1);
 744 }
 745
 746 static PyObject *
 747 utf32le_encode(textio *self, PyObject *text)
 748 {
 749     return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
 750                                  PyUnicode_GET_SIZE(text),
 751                                  PyBytes_AS_STRING(self->errors), -1);
 752 }
 753
 754 static PyObject *
 755 utf32_encode(textio *self, PyObject *text)
 756 {
 757     if (!self->encoding_start_of_stream) {
 758         /* Skip the BOM and use native byte ordering */
 759 #if defined(WORDS_BIGENDIAN)
 760         return utf32be_encode(self, text);
 761 #else
 762         return utf32le_encode(self, text);
 763 #endif
 764     }
 765     return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
 766                                  PyUnicode_GET_SIZE(text),
 767                                  PyBytes_AS_STRING(self->errors), 0);
 768 }
 769
 770 static PyObject *
 771 utf8_encode(textio *self, PyObject *text)
 772 {
 773     return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
 774                                 PyUnicode_GET_SIZE(text),
 775                                 PyBytes_AS_STRING(self->errors));
 776 }
 777
 778 static PyObject *
 779 latin1_encode(textio *self, PyObject *text)
 780 {
 781     return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
 782                                   PyUnicode_GET_SIZE(text),
 783                                   PyBytes_AS_STRING(self->errors));
 784 }
 785
 786 /* Map normalized encoding names onto the specialized encoding funcs */
 787
 788 typedef struct {
 789     const char *name;
 790     encodefunc_t encodefunc;
 791 } encodefuncentry;
 792
 793 static encodefuncentry encodefuncs[] = {
 794     {"ascii",       (encodefunc_t) ascii_encode},
 795     {"iso8859-1",   (encodefunc_t) latin1_encode},
 796     {"utf-8",       (encodefunc_t) utf8_encode},
 797     {"utf-16-be",   (encodefunc_t) utf16be_encode},
 798     {"utf-16-le",   (encodefunc_t) utf16le_encode},
 799     {"utf-16",      (encodefunc_t) utf16_encode},
 800     {"utf-32-be",   (encodefunc_t) utf32be_encode},
 801     {"utf-32-le",   (encodefunc_t) utf32le_encode},
 802     {"utf-32",      (encodefunc_t) utf32_encode},
 803     {NULL, NULL}
 804 };
 805
 806
 807 static int
 808 textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
 809 {
 810     char *kwlist[] = {"buffer", "encoding", "errors",
 811                       "newline", "line_buffering",
 812                       NULL};
 813     PyObject *buffer, *raw;
 814     char *encoding = NULL;
 815     char *errors = NULL;
 816     char *newline = NULL;
 817     int line_buffering = 0;
 818
 819     PyObject *res;
 820     int r;
 821
 822     self->ok = 0;
 823     self->detached = 0;
 824     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
 825                                      kwlist, &buffer, &encoding, &errors,
 826                                      &newline, &line_buffering))
 827         return -1;
 828
 829     if (newline && newline[0] != '\0'
 830         && !(newline[0] == '\n' && newline[1] == '\0')
 831         && !(newline[0] == '\r' && newline[1] == '\0')
 832         && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
 833         PyErr_Format(PyExc_ValueError,
 834                      "illegal newline value: %s", newline);
 835         return -1;
 836     }
 837
 838     Py_CLEAR(self->buffer);
 839     Py_CLEAR(self->encoding);
 840     Py_CLEAR(self->encoder);
 841     Py_CLEAR(self->decoder);
 842     Py_CLEAR(self->readnl);
 843     Py_CLEAR(self->decoded_chars);
 844     Py_CLEAR(self->pending_bytes);
 845     Py_CLEAR(self->snapshot);
 846     Py_CLEAR(self->errors);
 847     Py_CLEAR(self->raw);
 848     self->decoded_chars_used = 0;
 849     self->pending_bytes_count = 0;
 850     self->encodefunc = NULL;
 851     self->writenl = NULL;
 852
 853     if (encoding == NULL && self->encoding == NULL) {
 854         if (_PyIO_locale_module == NULL) {
 855             _PyIO_locale_module = PyImport_ImportModule("locale");
 856             if (_PyIO_locale_module == NULL)
 857                 goto catch_ImportError;
 858             else
 859                 goto use_locale;
 860         }
 861         else {
 862           use_locale:
 863             self->encoding = PyObject_CallMethod(
 864                 _PyIO_locale_module, "getpreferredencoding", NULL);
 865             if (self->encoding == NULL) {
 866               catch_ImportError:
 867                 /*
 868                  Importing locale can raise a ImportError because of
 869                  _functools, and locale.getpreferredencoding can raise a
 870                  ImportError if _locale is not available.  These will happen
 871                  during module building.
 872                 */
 873                 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
 874                     PyErr_Clear();
 875                     self->encoding = PyString_FromString("ascii");
 876                 }
 877                 else
 878                     goto error;
 879             }
 880             else if (!PyString_Check(self->encoding))
 881                 Py_CLEAR(self->encoding);
 882         }
 883     }
 884     if (self->encoding != NULL)
 885         encoding = PyString_AsString(self->encoding);
 886     else if (encoding != NULL) {
 887         self->encoding = PyString_FromString(encoding);
 888         if (self->encoding == NULL)
 889             goto error;
 890     }
 891     else {
 892         PyErr_SetString(PyExc_IOError,
 893                         "could not determine default encoding");
 894     }
 895
 896     if (errors == NULL)
 897         errors = "strict";
 898     self->errors = PyBytes_FromString(errors);
 899     if (self->errors == NULL)
 900         goto error;
 901
 902     self->chunk_size = 8192;
 903     self->readuniversal = (newline == NULL || newline[0] == '\0');
 904     self->line_buffering = line_buffering;
 905     self->readtranslate = (newline == NULL);
 906     if (newline) {
 907         self->readnl = PyString_FromString(newline);
 908         if (self->readnl == NULL)
 909             return -1;
 910     }
 911     self->writetranslate = (newline == NULL || newline[0] != '\0');
 912     if (!self->readuniversal && self->writetranslate) {
 913         self->writenl = PyString_AsString(self->readnl);
 914         if (!strcmp(self->writenl, "\n"))
 915             self->writenl = NULL;
 916     }
 917 #ifdef MS_WINDOWS
 918     else
 919         self->writenl = "\r\n";
 920 #endif
 921
 922     /* Build the decoder object */
 923     res = PyObject_CallMethod(buffer, "readable", NULL);
 924     if (res == NULL)
 925         goto error;
 926     r = PyObject_IsTrue(res);
 927     Py_DECREF(res);
 928     if (r == -1)
 929         goto error;
 930     if (r == 1) {
 931         self->decoder = PyCodec_IncrementalDecoder(
 932             encoding, errors);
 933         if (self->decoder == NULL)
 934             goto error;
 935
 936         if (self->readuniversal) {
 937             PyObject *incrementalDecoder = PyObject_CallFunction(
 938                 (PyObject *)&PyIncrementalNewlineDecoder_Type,
 939                 "Oi", self->decoder, (int)self->readtranslate);
 940             if (incrementalDecoder == NULL)
 941                 goto error;
 942             Py_CLEAR(self->decoder);
 943             self->decoder = incrementalDecoder;
 944         }
 945     }
 946
 947     /* Build the encoder object */
 948     res = PyObject_CallMethod(buffer, "writable", NULL);
 949     if (res == NULL)
 950         goto error;
 951     r = PyObject_IsTrue(res);
 952     Py_DECREF(res);
 953     if (r == -1)
 954         goto error;
 955     if (r == 1) {
 956         PyObject *ci;
 957         self->encoder = PyCodec_IncrementalEncoder(
 958             encoding, errors);
 959         if (self->encoder == NULL)
 960             goto error;
 961         /* Get the normalized named of the codec */
 962         ci = _PyCodec_Lookup(encoding);
 963         if (ci == NULL)
 964             goto error;
 965         res = PyObject_GetAttrString(ci, "name");
 966         Py_DECREF(ci);
 967         if (res == NULL) {
 968             if (PyErr_ExceptionMatches(PyExc_AttributeError))
 969                 PyErr_Clear();
 970             else
 971                 goto error;
 972         }
 973         else if (PyString_Check(res)) {
 974             encodefuncentry *e = encodefuncs;
 975             while (e->name != NULL) {
 976                 if (!strcmp(PyString_AS_STRING(res), e->name)) {
 977                     self->encodefunc = e->encodefunc;
 978                     break;
 979                 }
 980                 e++;
 981             }
 982         }
 983         Py_XDECREF(res);
 984     }
 985
 986     self->buffer = buffer;
 987     Py_INCREF(buffer);
 988
 989     if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
 990         Py_TYPE(buffer) == &PyBufferedWriter_Type ||
 991         Py_TYPE(buffer) == &PyBufferedRandom_Type) {
 992         raw = PyObject_GetAttrString(buffer, "raw");
 993         /* Cache the raw FileIO object to speed up 'closed' checks */
 994         if (raw == NULL) {
 995             if (PyErr_ExceptionMatches(PyExc_AttributeError))
 996                 PyErr_Clear();
 997             else
 998                 goto error;
 999         }
1000         else if (Py_TYPE(raw) == &PyFileIO_Type)
1001             self->raw = raw;
1002         else
1003             Py_DECREF(raw);
1004     }
1005
1006     res = PyObject_CallMethod(buffer, "seekable", NULL);
1007     if (res == NULL)
1008         goto error;
1009     self->seekable = self->telling = PyObject_IsTrue(res);
1010     Py_DECREF(res);
1011
1012     self->encoding_start_of_stream = 0;
1013     if (self->seekable && self->encoder) {
1014         PyObject *cookieObj;
1015         int cmp;
1016
1017         self->encoding_start_of_stream = 1;
1018
1019         cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1020         if (cookieObj == NULL)
1021             goto error;
1022
1023         cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1024         Py_DECREF(cookieObj);
1025         if (cmp < 0) {
1026             goto error;
1027         }
1028
1029         if (cmp == 0) {
1030             self->encoding_start_of_stream = 0;
1031             res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1032                                              _PyIO_zero, NULL);
1033             if (res == NULL)
1034                 goto error;
1035             Py_DECREF(res);
1036         }
1037     }
1038
1039     self->ok = 1;
1040     return 0;
1041
1042   error:
1043     return -1;
1044 }
1045
1046 static int
1047 _textiowrapper_clear(textio *self)
1048 {
1049     if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1050         return -1;
1051     self->ok = 0;
1052     Py_CLEAR(self->buffer);
1053     Py_CLEAR(self->encoding);
1054     Py_CLEAR(self->encoder);
1055     Py_CLEAR(self->decoder);
1056     Py_CLEAR(self->readnl);
1057     Py_CLEAR(self->decoded_chars);
1058     Py_CLEAR(self->pending_bytes);
1059     Py_CLEAR(self->snapshot);
1060     Py_CLEAR(self->errors);
1061     Py_CLEAR(self->raw);
1062     return 0;
1063 }
1064
1065 static void
1066 textiowrapper_dealloc(textio *self)
1067 {
1068     if (_textiowrapper_clear(self) < 0)
1069         return;
1070     _PyObject_GC_UNTRACK(self);
1071     if (self->weakreflist != NULL)
1072         PyObject_ClearWeakRefs((PyObject *)self);
1073     Py_CLEAR(self->dict);
1074     Py_TYPE(self)->tp_free((PyObject *)self);
1075 }
1076
1077 static int
1078 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1079 {
1080     Py_VISIT(self->buffer);
1081     Py_VISIT(self->encoding);
1082     Py_VISIT(self->encoder);
1083     Py_VISIT(self->decoder);
1084     Py_VISIT(self->readnl);
1085     Py_VISIT(self->decoded_chars);
1086     Py_VISIT(self->pending_bytes);
1087     Py_VISIT(self->snapshot);
1088     Py_VISIT(self->errors);
1089     Py_VISIT(self->raw);
1090
1091     Py_VISIT(self->dict);
1092     return 0;
1093 }
1094
1095 static int
1096 textiowrapper_clear(textio *self)
1097 {
1098     if (_textiowrapper_clear(self) < 0)
1099         return -1;
1100     Py_CLEAR(self->dict);
1101     return 0;
1102 }
1103
1104 static PyObject *
1105 textiowrapper_closed_get(textio *self, void *context);
1106
1107 /* This macro takes some shortcuts to make the common case faster. */
1108 #define CHECK_CLOSED(self) \
1109     do { \
1110         int r; \
1111         PyObject *_res; \
1112         if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1113             if (self->raw != NULL) \
1114                 r = _PyFileIO_closed(self->raw); \
1115             else { \
1116                 _res = textiowrapper_closed_get(self, NULL); \
1117                 if (_res == NULL) \
1118                     return NULL; \
1119                 r = PyObject_IsTrue(_res); \
1120                 Py_DECREF(_res); \
1121                 if (r < 0) \
1122                     return NULL; \
1123             } \
1124             if (r > 0) { \
1125                 PyErr_SetString(PyExc_ValueError, \
1126                                 "I/O operation on closed file."); \
1127                 return NULL; \
1128             } \
1129         } \
1130         else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1131             return NULL; \
1132     } while (0)
1133
1134 #define CHECK_INITIALIZED(self) \
1135     if (self->ok <= 0) { \
1136         if (self->detached) { \
1137             PyErr_SetString(PyExc_ValueError, \
1138                  "underlying buffer has been detached"); \
1139         } else {                                   \
1140             PyErr_SetString(PyExc_ValueError, \
1141                 "I/O operation on uninitialized object"); \
1142         } \
1143         return NULL; \
1144     }
1145
1146 #define CHECK_INITIALIZED_INT(self) \
1147     if (self->ok <= 0) { \
1148         if (self->detached) { \
1149             PyErr_SetString(PyExc_ValueError, \
1150                  "underlying buffer has been detached"); \
1151         } else {                                   \
1152             PyErr_SetString(PyExc_ValueError, \
1153                 "I/O operation on uninitialized object"); \
1154         } \
1155         return -1; \
1156     }
1157
1158
1159 static PyObject *
1160 textiowrapper_detach(textio *self)
1161 {
1162     PyObject *buffer, *res;
1163     CHECK_INITIALIZED(self);
1164     res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1165     if (res == NULL)
1166         return NULL;
1167     Py_DECREF(res);
1168     buffer = self->buffer;
1169     self->buffer = NULL;
1170     self->detached = 1;
1171     self->ok = 0;
1172     return buffer;
1173 }
1174
1175 Py_LOCAL_INLINE(const Py_UNICODE *)
1176 findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1177 {
1178     /* like wcschr, but doesn't stop at NULL characters */
1179     while (size-- > 0) {
1180         if (*s == ch)
1181             return s;
1182         s++;
1183     }
1184     return NULL;
1185 }
1186
1187 /* Flush the internal write buffer. This doesn't explicitly flush the
1188    underlying buffered object, though. */
1189 static int
1190 _textiowrapper_writeflush(textio *self)
1191 {
1192     PyObject *pending, *b, *ret;
1193
1194     if (self->pending_bytes == NULL)
1195         return 0;
1196
1197     pending = self->pending_bytes;
1198     Py_INCREF(pending);
1199     self->pending_bytes_count = 0;
1200     Py_CLEAR(self->pending_bytes);
1201
1202     b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1203     Py_DECREF(pending);
1204     if (b == NULL)
1205         return -1;
1206     ret = PyObject_CallMethodObjArgs(self->buffer,
1207                                      _PyIO_str_write, b, NULL);
1208     Py_DECREF(b);
1209     if (ret == NULL)
1210         return -1;
1211     Py_DECREF(ret);
1212     return 0;
1213 }
1214
1215 static PyObject *
1216 textiowrapper_write(textio *self, PyObject *args)
1217 {
1218     PyObject *ret;
1219     PyObject *text; /* owned reference */
1220     PyObject *b;
1221     Py_ssize_t textlen;
1222     int haslf = 0;
1223     int needflush = 0;
1224
1225     CHECK_INITIALIZED(self);
1226
1227     if (!PyArg_ParseTuple(args, "U:write", &text)) {
1228         return NULL;
1229     }
1230
1231     CHECK_CLOSED(self);
1232
1233     if (self->encoder == NULL) {
1234         PyErr_SetString(PyExc_IOError, "not writable");
1235         return NULL;
1236     }
1237
1238     Py_INCREF(text);
1239
1240     textlen = PyUnicode_GetSize(text);
1241
1242     if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1243         if (findchar(PyUnicode_AS_UNICODE(text),
1244                      PyUnicode_GET_SIZE(text), '\n'))
1245             haslf = 1;
1246
1247     if (haslf && self->writetranslate && self->writenl != NULL) {
1248         PyObject *newtext = PyObject_CallMethod(
1249             text, "replace", "ss", "\n", self->writenl);
1250         Py_DECREF(text);
1251         if (newtext == NULL)
1252             return NULL;
1253         text = newtext;
1254     }
1255
1256     if (self->line_buffering &&
1257         (haslf ||
1258          findchar(PyUnicode_AS_UNICODE(text),
1259                   PyUnicode_GET_SIZE(text), '\r')))
1260         needflush = 1;
1261
1262     /* XXX What if we were just reading? */
1263     if (self->encodefunc != NULL) {
1264         b = (*self->encodefunc)((PyObject *) self, text);
1265         self->encoding_start_of_stream = 0;
1266     }
1267     else
1268         b = PyObject_CallMethodObjArgs(self->encoder,
1269                                        _PyIO_str_encode, text, NULL);
1270     Py_DECREF(text);
1271     if (b == NULL)
1272         return NULL;
1273
1274     if (self->pending_bytes == NULL) {
1275         self->pending_bytes = PyList_New(0);
1276         if (self->pending_bytes == NULL) {
1277             Py_DECREF(b);
1278             return NULL;
1279         }
1280         self->pending_bytes_count = 0;
1281     }
1282     if (PyList_Append(self->pending_bytes, b) < 0) {
1283         Py_DECREF(b);
1284         return NULL;
1285     }
1286     self->pending_bytes_count += PyBytes_GET_SIZE(b);
1287     Py_DECREF(b);
1288     if (self->pending_bytes_count > self->chunk_size || needflush) {
1289         if (_textiowrapper_writeflush(self) < 0)
1290             return NULL;
1291     }
1292
1293     if (needflush) {
1294         ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1295         if (ret == NULL)
1296             return NULL;
1297         Py_DECREF(ret);
1298     }
1299
1300     Py_CLEAR(self->snapshot);
1301
1302     if (self->decoder) {
1303         ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1304         if (ret == NULL)
1305             return NULL;
1306         Py_DECREF(ret);
1307     }
1308
1309     return PyLong_FromSsize_t(textlen);
1310 }
1311
1312 /* Steal a reference to chars and store it in the decoded_char buffer;
1313  */
1314 static void
1315 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1316 {
1317     Py_CLEAR(self->decoded_chars);
1318     self->decoded_chars = chars;
1319     self->decoded_chars_used = 0;
1320 }
1321
1322 static PyObject *
1323 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1324 {
1325     PyObject *chars;
1326     Py_ssize_t avail;
1327
1328     if (self->decoded_chars == NULL)
1329         return PyUnicode_FromStringAndSize(NULL, 0);
1330
1331     avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1332              - self->decoded_chars_used);
1333
1334     assert(avail >= 0);
1335
1336     if (n < 0 || n > avail)
1337         n = avail;
1338
1339     if (self->decoded_chars_used > 0 || n < avail) {
1340         chars = PyUnicode_FromUnicode(
1341             PyUnicode_AS_UNICODE(self->decoded_chars)
1342             + self->decoded_chars_used, n);
1343         if (chars == NULL)
1344             return NULL;
1345     }
1346     else {
1347         chars = self->decoded_chars;
1348         Py_INCREF(chars);
1349     }
1350
1351     self->decoded_chars_used += n;
1352     return chars;
1353 }
1354
1355 /* Read and decode the next chunk of data from the BufferedReader.
1356  */
1357 static int
1358 textiowrapper_read_chunk(textio *self)
1359 {
1360     PyObject *dec_buffer = NULL;
1361     PyObject *dec_flags = NULL;
1362     PyObject *input_chunk = NULL;
1363     PyObject *decoded_chars, *chunk_size;
1364     int eof;
1365
1366     /* The return value is True unless EOF was reached.  The decoded string is
1367      * placed in self._decoded_chars (replacing its previous value).  The
1368      * entire input chunk is sent to the decoder, though some of it may remain
1369      * buffered in the decoder, yet to be converted.
1370      */
1371
1372     if (self->decoder == NULL) {
1373         PyErr_SetString(PyExc_IOError, "not readable");
1374         return -1;
1375     }
1376
1377     if (self->telling) {
1378         /* To prepare for tell(), we need to snapshot a point in the file
1379          * where the decoder's input buffer is empty.
1380          */
1381
1382         PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1383                                                      _PyIO_str_getstate, NULL);
1384         if (state == NULL)
1385             return -1;
1386         /* Given this, we know there was a valid snapshot point
1387          * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1388          */
1389         if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1390             Py_DECREF(state);
1391             return -1;
1392         }
1393         Py_INCREF(dec_buffer);
1394         Py_INCREF(dec_flags);
1395         Py_DECREF(state);
1396     }
1397
1398     /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1399     chunk_size = PyLong_FromSsize_t(self->chunk_size);
1400     if (chunk_size == NULL)
1401         goto fail;
1402     input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1403         _PyIO_str_read1, chunk_size, NULL);
1404     Py_DECREF(chunk_size);
1405     if (input_chunk == NULL)
1406         goto fail;
1407     assert(PyBytes_Check(input_chunk));
1408
1409     eof = (PyBytes_Size(input_chunk) == 0);
1410
1411     if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1412         decoded_chars = _PyIncrementalNewlineDecoder_decode(
1413             self->decoder, input_chunk, eof);
1414     }
1415     else {
1416         decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1417             _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1418     }
1419
1420     /* TODO sanity check: isinstance(decoded_chars, unicode) */
1421     if (decoded_chars == NULL)
1422         goto fail;
1423     textiowrapper_set_decoded_chars(self, decoded_chars);
1424     if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1425         eof = 0;
1426
1427     if (self->telling) {
1428         /* At the snapshot point, len(dec_buffer) bytes before the read, the
1429          * next input to be decoded is dec_buffer + input_chunk.
1430          */
1431         PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1432         if (next_input == NULL)
1433             goto fail;
1434         assert (PyBytes_Check(next_input));
1435         Py_DECREF(dec_buffer);
1436         Py_CLEAR(self->snapshot);
1437         self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1438     }
1439     Py_DECREF(input_chunk);
1440
1441     return (eof == 0);
1442
1443   fail:
1444     Py_XDECREF(dec_buffer);
1445     Py_XDECREF(dec_flags);
1446     Py_XDECREF(input_chunk);
1447     return -1;
1448 }
1449
1450 static PyObject *
1451 textiowrapper_read(textio *self, PyObject *args)
1452 {
1453     Py_ssize_t n = -1;
1454     PyObject *result = NULL, *chunks = NULL;
1455
1456     CHECK_INITIALIZED(self);
1457
1458     if (!PyArg_ParseTuple(args, "|n:read", &n))
1459         return NULL;
1460
1461     CHECK_CLOSED(self);
1462
1463     if (self->decoder == NULL) {
1464         PyErr_SetString(PyExc_IOError, "not readable");
1465         return NULL;
1466     }
1467
1468     if (_textiowrapper_writeflush(self) < 0)
1469         return NULL;
1470
1471     if (n < 0) {
1472         /* Read everything */
1473         PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1474         PyObject *decoded, *final;
1475         if (bytes == NULL)
1476             goto fail;
1477         decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1478                                              bytes, Py_True, NULL);
1479         Py_DECREF(bytes);
1480         if (decoded == NULL)
1481             goto fail;
1482
1483         result = textiowrapper_get_decoded_chars(self, -1);
1484
1485         if (result == NULL) {
1486             Py_DECREF(decoded);
1487             return NULL;
1488         }
1489
1490         final = PyUnicode_Concat(result, decoded);
1491         Py_DECREF(result);
1492         Py_DECREF(decoded);
1493         if (final == NULL)
1494             goto fail;
1495
1496         Py_CLEAR(self->snapshot);
1497         return final;
1498     }
1499     else {
1500         int res = 1;
1501         Py_ssize_t remaining = n;
1502
1503         result = textiowrapper_get_decoded_chars(self, n);
1504         if (result == NULL)
1505             goto fail;
1506         remaining -= PyUnicode_GET_SIZE(result);
1507
1508         /* Keep reading chunks until we have n characters to return */
1509         while (remaining > 0) {
1510             res = textiowrapper_read_chunk(self);
1511             if (res < 0)
1512                 goto fail;
1513             if (res == 0)  /* EOF */
1514                 break;
1515             if (chunks == NULL) {
1516                 chunks = PyList_New(0);
1517                 if (chunks == NULL)
1518                     goto fail;
1519             }
1520             if (PyList_Append(chunks, result) < 0)
1521                 goto fail;
1522             Py_DECREF(result);
1523             result = textiowrapper_get_decoded_chars(self, remaining);
1524             if (result == NULL)
1525                 goto fail;
1526             remaining -= PyUnicode_GET_SIZE(result);
1527         }
1528         if (chunks != NULL) {
1529             if (result != NULL && PyList_Append(chunks, result) < 0)
1530                 goto fail;
1531             Py_CLEAR(result);
1532             result = PyUnicode_Join(_PyIO_empty_str, chunks);
1533             if (result == NULL)
1534                 goto fail;
1535             Py_CLEAR(chunks);
1536         }
1537         return result;
1538     }
1539   fail:
1540     Py_XDECREF(result);
1541     Py_XDECREF(chunks);
1542     return NULL;
1543 }
1544
1545
1546 /* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1547    that is to the NUL character. Otherwise the function will produce
1548    incorrect results. */
1549 static Py_UNICODE *
1550 find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1551 {
1552     Py_UNICODE *s = start;
1553     for (;;) {
1554         while (*s > ch)
1555             s++;
1556         if (*s == ch)
1557             return s;
1558         if (s == end)
1559             return NULL;
1560         s++;
1561     }
1562 }
1563
1564 Py_ssize_t
1565 _PyIO_find_line_ending(
1566     int translated, int universal, PyObject *readnl,
1567     Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1568 {
1569     Py_ssize_t len = end - start;
1570
1571     if (translated) {
1572         /* Newlines are already translated, only search for \n */
1573         Py_UNICODE *pos = find_control_char(start, end, '\n');
1574         if (pos != NULL)
1575             return pos - start + 1;
1576         else {
1577             *consumed = len;
1578             return -1;
1579         }
1580     }
1581     else if (universal) {
1582         /* Universal newline search. Find any of \r, \r\n, \n
1583          * The decoder ensures that \r\n are not split in two pieces
1584          */
1585         Py_UNICODE *s = start;
1586         for (;;) {
1587             Py_UNICODE ch;
1588             /* Fast path for non-control chars. The loop always ends
1589                since the Py_UNICODE storage is NUL-terminated. */
1590             while (*s > '\r')
1591                 s++;
1592             if (s >= end) {
1593                 *consumed = len;
1594                 return -1;
1595             }
1596             ch = *s++;
1597             if (ch == '\n')
1598                 return s - start;
1599             if (ch == '\r') {
1600                 if (*s == '\n')
1601                     return s - start + 1;
1602                 else
1603                     return s - start;
1604             }
1605         }
1606     }
1607     else {
1608         /* Non-universal mode. */
1609         Py_ssize_t readnl_len = PyString_GET_SIZE(readnl);
1610         unsigned char *nl = (unsigned char *) PyString_AS_STRING(readnl);
1611         if (readnl_len == 1) {
1612             Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1613             if (pos != NULL)
1614                 return pos - start + 1;
1615             *consumed = len;
1616             return -1;
1617         }
1618         else {
1619             Py_UNICODE *s = start;
1620             Py_UNICODE *e = end - readnl_len + 1;
1621             Py_UNICODE *pos;
1622             if (e < s)
1623                 e = s;
1624             while (s < e) {
1625                 Py_ssize_t i;
1626                 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1627                 if (pos == NULL || pos >= e)
1628                     break;
1629                 for (i = 1; i < readnl_len; i++) {
1630                     if (pos[i] != nl[i])
1631                         break;
1632                 }
1633                 if (i == readnl_len)
1634                     return pos - start + readnl_len;
1635                 s = pos + 1;
1636             }
1637             pos = find_control_char(e, end, nl[0]);
1638             if (pos == NULL)
1639                 *consumed = len;
1640             else
1641                 *consumed = pos - start;
1642             return -1;
1643         }
1644     }
1645 }
1646
1647 static PyObject *
1648 _textiowrapper_readline(textio *self, Py_ssize_t limit)
1649 {
1650     PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1651     Py_ssize_t start, endpos, chunked, offset_to_buffer;
1652     int res;
1653
1654     CHECK_CLOSED(self);
1655
1656     if (_textiowrapper_writeflush(self) < 0)
1657         return NULL;
1658
1659     chunked = 0;
1660
1661     while (1) {
1662         Py_UNICODE *ptr;
1663         Py_ssize_t line_len;
1664         Py_ssize_t consumed = 0;
1665
1666         /* First, get some data if necessary */
1667         res = 1;
1668         while (!self->decoded_chars ||
1669                !PyUnicode_GET_SIZE(self->decoded_chars)) {
1670             res = textiowrapper_read_chunk(self);
1671             if (res < 0)
1672                 goto error;
1673             if (res == 0)
1674                 break;
1675         }
1676         if (res == 0) {
1677             /* end of file */
1678             textiowrapper_set_decoded_chars(self, NULL);
1679             Py_CLEAR(self->snapshot);
1680             start = endpos = offset_to_buffer = 0;
1681             break;
1682         }
1683
1684         if (remaining == NULL) {
1685             line = self->decoded_chars;
1686             start = self->decoded_chars_used;
1687             offset_to_buffer = 0;
1688             Py_INCREF(line);
1689         }
1690         else {
1691             assert(self->decoded_chars_used == 0);
1692             line = PyUnicode_Concat(remaining, self->decoded_chars);
1693             start = 0;
1694             offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1695             Py_CLEAR(remaining);
1696             if (line == NULL)
1697                 goto error;
1698         }
1699
1700         ptr = PyUnicode_AS_UNICODE(line);
1701         line_len = PyUnicode_GET_SIZE(line);
1702
1703         endpos = _PyIO_find_line_ending(
1704             self->readtranslate, self->readuniversal, self->readnl,
1705             ptr + start, ptr + line_len, &consumed);
1706         if (endpos >= 0) {
1707             endpos += start;
1708             if (limit >= 0 && (endpos - start) + chunked >= limit)
1709                 endpos = start + limit - chunked;
1710             break;
1711         }
1712
1713         /* We can put aside up to `endpos` */
1714         endpos = consumed + start;
1715         if (limit >= 0 && (endpos - start) + chunked >= limit) {
1716             /* Didn't find line ending, but reached length limit */
1717             endpos = start + limit - chunked;
1718             break;
1719         }
1720
1721         if (endpos > start) {
1722             /* No line ending seen yet - put aside current data */
1723             PyObject *s;
1724             if (chunks == NULL) {
1725                 chunks = PyList_New(0);
1726                 if (chunks == NULL)
1727                     goto error;
1728             }
1729             s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1730             if (s == NULL)
1731                 goto error;
1732             if (PyList_Append(chunks, s) < 0) {
1733                 Py_DECREF(s);
1734                 goto error;
1735             }
1736             chunked += PyUnicode_GET_SIZE(s);
1737             Py_DECREF(s);
1738         }
1739         /* There may be some remaining bytes we'll have to prepend to the
1740            next chunk of data */
1741         if (endpos < line_len) {
1742             remaining = PyUnicode_FromUnicode(
1743                     ptr + endpos, line_len - endpos);
1744             if (remaining == NULL)
1745                 goto error;
1746         }
1747         Py_CLEAR(line);
1748         /* We have consumed the buffer */
1749         textiowrapper_set_decoded_chars(self, NULL);
1750     }
1751
1752     if (line != NULL) {
1753         /* Our line ends in the current buffer */
1754         self->decoded_chars_used = endpos - offset_to_buffer;
1755         if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1756             if (start == 0 && Py_REFCNT(line) == 1) {
1757                 if (PyUnicode_Resize(&line, endpos) < 0)
1758                     goto error;
1759             }
1760             else {
1761                 PyObject *s = PyUnicode_FromUnicode(
1762                         PyUnicode_AS_UNICODE(line) + start, endpos - start);
1763                 Py_CLEAR(line);
1764                 if (s == NULL)
1765                     goto error;
1766                 line = s;
1767             }
1768         }
1769     }
1770     if (remaining != NULL) {
1771         if (chunks == NULL) {
1772             chunks = PyList_New(0);
1773             if (chunks == NULL)
1774                 goto error;
1775         }
1776         if (PyList_Append(chunks, remaining) < 0)
1777             goto error;
1778         Py_CLEAR(remaining);
1779     }
1780     if (chunks != NULL) {
1781         if (line != NULL && PyList_Append(chunks, line) < 0)
1782             goto error;
1783         Py_CLEAR(line);
1784         line = PyUnicode_Join(_PyIO_empty_str, chunks);
1785         if (line == NULL)
1786             goto error;
1787         Py_DECREF(chunks);
1788     }
1789     if (line == NULL)
1790         line = PyUnicode_FromStringAndSize(NULL, 0);
1791
1792     return line;
1793
1794   error:
1795     Py_XDECREF(chunks);
1796     Py_XDECREF(remaining);
1797     Py_XDECREF(line);
1798     return NULL;
1799 }
1800
1801 static PyObject *
1802 textiowrapper_readline(textio *self, PyObject *args)
1803 {
1804     PyObject *limitobj = NULL;
1805     Py_ssize_t limit = -1;
1806
1807     CHECK_INITIALIZED(self);
1808     if (!PyArg_ParseTuple(args, "|O:readline", &limitobj)) {
1809         return NULL;
1810     }
1811     if (limitobj) {
1812         if (!PyNumber_Check(limitobj)) {
1813             PyErr_Format(PyExc_TypeError,
1814                          "integer argument expected, got '%.200s'",
1815                          Py_TYPE(limitobj)->tp_name);
1816             return NULL;
1817         }
1818         limit = PyNumber_AsSsize_t(limitobj, PyExc_OverflowError);
1819         if (limit == -1 && PyErr_Occurred())
1820             return NULL;
1821     }
1822     return _textiowrapper_readline(self, limit);
1823 }
1824
1825 /* Seek and Tell */
1826
1827 typedef struct {
1828     Py_off_t start_pos;
1829     int dec_flags;
1830     int bytes_to_feed;
1831     int chars_to_skip;
1832     char need_eof;
1833 } cookie_type;
1834
1835 /*
1836    To speed up cookie packing/unpacking, we store the fields in a temporary
1837    string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1838    The following macros define at which offsets in the intermediary byte
1839    string the various CookieStruct fields will be stored.
1840  */
1841
1842 #define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1843
1844 #if defined(WORDS_BIGENDIAN)
1845
1846 # define IS_LITTLE_ENDIAN   0
1847
1848 /* We want the least significant byte of start_pos to also be the least
1849    significant byte of the cookie, which means that in big-endian mode we
1850    must copy the fields in reverse order. */
1851
1852 # define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
1853 # define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
1854 # define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
1855 # define OFF_CHARS_TO_SKIP  (sizeof(char))
1856 # define OFF_NEED_EOF       0
1857
1858 #else
1859
1860 # define IS_LITTLE_ENDIAN   1
1861
1862 /* Little-endian mode: the least significant byte of start_pos will
1863    naturally end up the least significant byte of the cookie. */
1864
1865 # define OFF_START_POS      0
1866 # define OFF_DEC_FLAGS      (sizeof(Py_off_t))
1867 # define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
1868 # define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
1869 # define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
1870
1871 #endif
1872
1873 static int
1874 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
1875 {
1876     unsigned char buffer[COOKIE_BUF_LEN];
1877     PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1878     if (cookieLong == NULL)
1879         return -1;
1880
1881     if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1882                             IS_LITTLE_ENDIAN, 0) < 0) {
1883         Py_DECREF(cookieLong);
1884         return -1;
1885     }
1886     Py_DECREF(cookieLong);
1887
1888     memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1889     memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1890     memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1891     memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1892     memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
1893
1894     return 0;
1895 }
1896
1897 static PyObject *
1898 textiowrapper_build_cookie(cookie_type *cookie)
1899 {
1900     unsigned char buffer[COOKIE_BUF_LEN];
1901
1902     memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1903     memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1904     memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1905     memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1906     memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
1907
1908     return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1909 }
1910 #undef IS_LITTLE_ENDIAN
1911
1912 static int
1913 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
1914 {
1915     PyObject *res;
1916     /* When seeking to the start of the stream, we call decoder.reset()
1917        rather than decoder.getstate().
1918        This is for a few decoders such as utf-16 for which the state value
1919        at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1920        utf-16, that we are expecting a BOM).
1921     */
1922     if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1923         res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1924     else
1925         res = PyObject_CallMethod(self->decoder, "setstate",
1926                                   "((si))", "", cookie->dec_flags);
1927     if (res == NULL)
1928         return -1;
1929     Py_DECREF(res);
1930     return 0;
1931 }
1932
1933 static int
1934 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
1935 {
1936     PyObject *res;
1937     /* Same as _textiowrapper_decoder_setstate() above. */
1938     if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1939         res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1940         self->encoding_start_of_stream = 1;
1941     }
1942     else {
1943         res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1944                                          _PyIO_zero, NULL);
1945         self->encoding_start_of_stream = 0;
1946     }
1947     if (res == NULL)
1948         return -1;
1949     Py_DECREF(res);
1950     return 0;
1951 }
1952
1953 static PyObject *
1954 textiowrapper_seek(textio *self, PyObject *args)
1955 {
1956     PyObject *cookieObj, *posobj;
1957     cookie_type cookie;
1958     int whence = 0;
1959     PyObject *res;
1960     int cmp;
1961
1962     CHECK_INITIALIZED(self);
1963
1964     if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1965         return NULL;
1966     CHECK_CLOSED(self);
1967
1968     Py_INCREF(cookieObj);
1969
1970     if (!self->seekable) {
1971         PyErr_SetString(PyExc_IOError,
1972                         "underlying stream is not seekable");
1973         goto fail;
1974     }
1975
1976     if (whence == 1) {
1977         /* seek relative to current position */
1978         cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1979         if (cmp < 0)
1980             goto fail;
1981
1982         if (cmp == 0) {
1983             PyErr_SetString(PyExc_IOError,
1984                             "can't do nonzero cur-relative seeks");
1985             goto fail;
1986         }
1987
1988         /* Seeking to the current position should attempt to
1989          * sync the underlying buffer with the current position.
1990          */
1991         Py_DECREF(cookieObj);
1992         cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
1993         if (cookieObj == NULL)
1994             goto fail;
1995     }
1996     else if (whence == 2) {
1997         /* seek relative to end of file */
1998
1999         cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2000         if (cmp < 0)
2001             goto fail;
2002
2003         if (cmp == 0) {
2004             PyErr_SetString(PyExc_IOError,
2005                             "can't do nonzero end-relative seeks");
2006             goto fail;
2007         }
2008
2009         res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2010         if (res == NULL)
2011             goto fail;
2012         Py_DECREF(res);
2013
2014         textiowrapper_set_decoded_chars(self, NULL);
2015         Py_CLEAR(self->snapshot);
2016         if (self->decoder) {
2017             res = PyObject_CallMethod(self->decoder, "reset", NULL);
2018             if (res == NULL)
2019                 goto fail;
2020             Py_DECREF(res);
2021         }
2022
2023         res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2024         Py_XDECREF(cookieObj);
2025         return res;
2026     }
2027     else if (whence != 0) {
2028         PyErr_Format(PyExc_ValueError,
2029                      "invalid whence (%d, should be 0, 1 or 2)", whence);
2030         goto fail;
2031     }
2032
2033     cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
2034     if (cmp < 0)
2035         goto fail;
2036
2037     if (cmp == 1) {
2038         PyObject *repr = PyObject_Repr(cookieObj);
2039         if (repr != NULL) {
2040             PyErr_Format(PyExc_ValueError,
2041                          "negative seek position %s",
2042                          PyString_AS_STRING(repr));
2043             Py_DECREF(repr);
2044         }
2045         goto fail;
2046     }
2047
2048     res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2049     if (res == NULL)
2050         goto fail;
2051     Py_DECREF(res);
2052
2053     /* The strategy of seek() is to go back to the safe start point
2054      * and replay the effect of read(chars_to_skip) from there.
2055      */
2056     if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2057         goto fail;
2058
2059     /* Seek back to the safe start point. */
2060     posobj = PyLong_FromOff_t(cookie.start_pos);
2061     if (posobj == NULL)
2062         goto fail;
2063     res = PyObject_CallMethodObjArgs(self->buffer,
2064                                      _PyIO_str_seek, posobj, NULL);
2065     Py_DECREF(posobj);
2066     if (res == NULL)
2067         goto fail;
2068     Py_DECREF(res);
2069
2070     textiowrapper_set_decoded_chars(self, NULL);
2071     Py_CLEAR(self->snapshot);
2072
2073     /* Restore the decoder to its state from the safe start point. */
2074     if (self->decoder) {
2075         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2076             goto fail;
2077     }
2078
2079     if (cookie.chars_to_skip) {
2080         /* Just like _read_chunk, feed the decoder and save a snapshot. */
2081         PyObject *input_chunk = PyObject_CallMethod(
2082             self->buffer, "read", "i", cookie.bytes_to_feed);
2083         PyObject *decoded;
2084
2085         if (input_chunk == NULL)
2086             goto fail;
2087
2088         assert (PyBytes_Check(input_chunk));
2089
2090         self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2091         if (self->snapshot == NULL) {
2092             Py_DECREF(input_chunk);
2093             goto fail;
2094         }
2095
2096         decoded = PyObject_CallMethod(self->decoder, "decode",
2097                                       "Oi", input_chunk, (int)cookie.need_eof);
2098
2099         if (decoded == NULL)
2100             goto fail;
2101
2102         textiowrapper_set_decoded_chars(self, decoded);
2103
2104         /* Skip chars_to_skip of the decoded characters. */
2105         if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2106             PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2107             goto fail;
2108         }
2109         self->decoded_chars_used = cookie.chars_to_skip;
2110     }
2111     else {
2112         self->snapshot = Py_BuildValue("is", cookie.dec_flags, "");
2113         if (self->snapshot == NULL)
2114             goto fail;
2115     }
2116
2117     /* Finally, reset the encoder (merely useful for proper BOM handling) */
2118     if (self->encoder) {
2119         if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2120             goto fail;
2121     }
2122     return cookieObj;
2123   fail:
2124     Py_XDECREF(cookieObj);
2125     return NULL;
2126
2127 }
2128
2129 static PyObject *
2130 textiowrapper_tell(textio *self, PyObject *args)
2131 {
2132     PyObject *res;
2133     PyObject *posobj = NULL;
2134     cookie_type cookie = {0,0,0,0,0};
2135     PyObject *next_input;
2136     Py_ssize_t chars_to_skip, chars_decoded;
2137     PyObject *saved_state = NULL;
2138     char *input, *input_end;
2139
2140     CHECK_INITIALIZED(self);
2141     CHECK_CLOSED(self);
2142
2143     if (!self->seekable) {
2144         PyErr_SetString(PyExc_IOError,
2145                         "underlying stream is not seekable");
2146         goto fail;
2147     }
2148     if (!self->telling) {
2149         PyErr_SetString(PyExc_IOError,
2150                         "telling position disabled by next() call");
2151         goto fail;
2152     }
2153
2154     if (_textiowrapper_writeflush(self) < 0)
2155         return NULL;
2156     res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2157     if (res == NULL)
2158         goto fail;
2159     Py_DECREF(res);
2160
2161     posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2162     if (posobj == NULL)
2163         goto fail;
2164
2165     if (self->decoder == NULL || self->snapshot == NULL) {
2166         assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2167         return posobj;
2168     }
2169
2170 #if defined(HAVE_LARGEFILE_SUPPORT)
2171     cookie.start_pos = PyLong_AsLongLong(posobj);
2172 #else
2173     cookie.start_pos = PyLong_AsLong(posobj);
2174 #endif
2175     if (PyErr_Occurred())
2176         goto fail;
2177
2178     /* Skip backward to the snapshot point (see _read_chunk). */
2179     if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2180         goto fail;
2181
2182     assert (PyBytes_Check(next_input));
2183
2184     cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2185
2186     /* How many decoded characters have been used up since the snapshot? */
2187     if (self->decoded_chars_used == 0)  {
2188         /* We haven't moved from the snapshot point. */
2189         Py_DECREF(posobj);
2190         return textiowrapper_build_cookie(&cookie);
2191     }
2192
2193     chars_to_skip = self->decoded_chars_used;
2194
2195     /* Starting from the snapshot position, we will walk the decoder
2196      * forward until it gives us enough decoded characters.
2197      */
2198     saved_state = PyObject_CallMethodObjArgs(self->decoder,
2199                                              _PyIO_str_getstate, NULL);
2200     if (saved_state == NULL)
2201         goto fail;
2202
2203     /* Note our initial start point. */
2204     if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2205         goto fail;
2206
2207     /* Feed the decoder one byte at a time.  As we go, note the
2208      * nearest "safe start point" before the current location
2209      * (a point where the decoder has nothing buffered, so seek()
2210      * can safely start from there and advance to this location).
2211      */
2212     chars_decoded = 0;
2213     input = PyBytes_AS_STRING(next_input);
2214     input_end = input + PyBytes_GET_SIZE(next_input);
2215     while (input < input_end) {
2216         PyObject *state;
2217         char *dec_buffer;
2218         Py_ssize_t dec_buffer_len;
2219         int dec_flags;
2220
2221         PyObject *decoded = PyObject_CallMethod(
2222             self->decoder, "decode", "s#", input, 1);
2223         if (decoded == NULL)
2224             goto fail;
2225         assert (PyUnicode_Check(decoded));
2226         chars_decoded += PyUnicode_GET_SIZE(decoded);
2227         Py_DECREF(decoded);
2228
2229         cookie.bytes_to_feed += 1;
2230
2231         state = PyObject_CallMethodObjArgs(self->decoder,
2232                                            _PyIO_str_getstate, NULL);
2233         if (state == NULL)
2234             goto fail;
2235         if (!PyArg_Parse(state, "(s#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2236             Py_DECREF(state);
2237             goto fail;
2238         }
2239         Py_DECREF(state);
2240
2241         if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2242             /* Decoder buffer is empty, so this is a safe start point. */
2243             cookie.start_pos += cookie.bytes_to_feed;
2244             chars_to_skip -= chars_decoded;
2245             cookie.dec_flags = dec_flags;
2246             cookie.bytes_to_feed = 0;
2247             chars_decoded = 0;
2248         }
2249         if (chars_decoded >= chars_to_skip)
2250             break;
2251         input++;
2252     }
2253     if (input == input_end) {
2254         /* We didn't get enough decoded data; signal EOF to get more. */
2255         PyObject *decoded = PyObject_CallMethod(
2256             self->decoder, "decode", "si", "", /* final = */ 1);
2257         if (decoded == NULL)
2258             goto fail;
2259         assert (PyUnicode_Check(decoded));
2260         chars_decoded += PyUnicode_GET_SIZE(decoded);
2261         Py_DECREF(decoded);
2262         cookie.need_eof = 1;
2263
2264         if (chars_decoded < chars_to_skip) {
2265             PyErr_SetString(PyExc_IOError,
2266                             "can't reconstruct logical file position");
2267             goto fail;
2268         }
2269     }
2270
2271     /* finally */
2272     Py_XDECREF(posobj);
2273     res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2274     Py_DECREF(saved_state);
2275     if (res == NULL)
2276         return NULL;
2277     Py_DECREF(res);
2278
2279     /* The returned cookie corresponds to the last safe start point. */
2280     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2281     return textiowrapper_build_cookie(&cookie);
2282
2283   fail:
2284     Py_XDECREF(posobj);
2285     if (saved_state) {
2286         PyObject *type, *value, *traceback;
2287         PyErr_Fetch(&type, &value, &traceback);
2288
2289         res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2290         Py_DECREF(saved_state);
2291         if (res == NULL)
2292             return NULL;
2293         Py_DECREF(res);
2294
2295         PyErr_Restore(type, value, traceback);
2296     }
2297     return NULL;
2298 }
2299
2300 static PyObject *
2301 textiowrapper_truncate(textio *self, PyObject *args)
2302 {
2303     PyObject *pos = Py_None;
2304     PyObject *res;
2305
2306     CHECK_INITIALIZED(self)
2307     if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2308         return NULL;
2309     }
2310
2311     res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2312     if (res == NULL)
2313         return NULL;
2314     Py_DECREF(res);
2315
2316     if (pos != Py_None) {
2317         res = PyObject_CallMethodObjArgs((PyObject *) self,
2318                                           _PyIO_str_seek, pos, NULL);
2319         if (res == NULL)
2320             return NULL;
2321         Py_DECREF(res);
2322     }
2323
2324     return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, NULL);
2325 }
2326
2327 static PyObject *
2328 textiowrapper_repr(textio *self)
2329 {
2330     PyObject *nameobj, *res;
2331     PyObject *namerepr = NULL, *encrepr = NULL;
2332
2333     CHECK_INITIALIZED(self);
2334
2335     nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2336     if (nameobj == NULL) {
2337         if (PyErr_ExceptionMatches(PyExc_AttributeError))
2338             PyErr_Clear();
2339         else
2340             goto error;
2341         encrepr = PyObject_Repr(self->encoding);
2342         res = PyString_FromFormat("<_io.TextIOWrapper encoding=%s>",
2343                                    PyString_AS_STRING(encrepr));
2344     }
2345     else {
2346         encrepr = PyObject_Repr(self->encoding);
2347         namerepr = PyObject_Repr(nameobj);
2348         res = PyString_FromFormat("<_io.TextIOWrapper name=%s encoding=%s>",
2349                                    PyString_AS_STRING(namerepr),
2350                                    PyString_AS_STRING(encrepr));
2351         Py_DECREF(nameobj);
2352     }
2353     Py_XDECREF(namerepr);
2354     Py_XDECREF(encrepr);
2355     return res;
2356
2357 error:
2358     Py_XDECREF(namerepr);
2359     Py_XDECREF(encrepr);
2360     return NULL;
2361 }
2362
2363
2364 /* Inquiries */
2365
2366 static PyObject *
2367 textiowrapper_fileno(textio *self, PyObject *args)
2368 {
2369     CHECK_INITIALIZED(self);
2370     return PyObject_CallMethod(self->buffer, "fileno", NULL);
2371 }
2372
2373 static PyObject *
2374 textiowrapper_seekable(textio *self, PyObject *args)
2375 {
2376     CHECK_INITIALIZED(self);
2377     return PyObject_CallMethod(self->buffer, "seekable", NULL);
2378 }
2379
2380 static PyObject *
2381 textiowrapper_readable(textio *self, PyObject *args)
2382 {
2383     CHECK_INITIALIZED(self);
2384     return PyObject_CallMethod(self->buffer, "readable", NULL);
2385 }
2386
2387 static PyObject *
2388 textiowrapper_writable(textio *self, PyObject *args)
2389 {
2390     CHECK_INITIALIZED(self);
2391     return PyObject_CallMethod(self->buffer, "writable", NULL);
2392 }
2393
2394 static PyObject *
2395 textiowrapper_isatty(textio *self, PyObject *args)
2396 {
2397     CHECK_INITIALIZED(self);
2398     return PyObject_CallMethod(self->buffer, "isatty", NULL);
2399 }
2400
2401 static PyObject *
2402 textiowrapper_flush(textio *self, PyObject *args)
2403 {
2404     CHECK_INITIALIZED(self);
2405     CHECK_CLOSED(self);
2406     self->telling = self->seekable;
2407     if (_textiowrapper_writeflush(self) < 0)
2408         return NULL;
2409     return PyObject_CallMethod(self->buffer, "flush", NULL);
2410 }
2411
2412 static PyObject *
2413 textiowrapper_close(textio *self, PyObject *args)
2414 {
2415     PyObject *res;
2416     CHECK_INITIALIZED(self);
2417     res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2418     if (res == NULL) {
2419         /* If flush() fails, just give up */
2420         PyErr_Clear();
2421     }
2422     else
2423         Py_DECREF(res);
2424
2425     return PyObject_CallMethod(self->buffer, "close", NULL);
2426 }
2427
2428 static PyObject *
2429 textiowrapper_iternext(textio *self)
2430 {
2431     PyObject *line;
2432
2433     CHECK_INITIALIZED(self);
2434
2435     self->telling = 0;
2436     if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2437         /* Skip method call overhead for speed */
2438         line = _textiowrapper_readline(self, -1);
2439     }
2440     else {
2441         line = PyObject_CallMethodObjArgs((PyObject *)self,
2442                                            _PyIO_str_readline, NULL);
2443         if (line && !PyUnicode_Check(line)) {
2444             PyErr_Format(PyExc_IOError,
2445                          "readline() should have returned an str object, "
2446                          "not '%.200s'", Py_TYPE(line)->tp_name);
2447             Py_DECREF(line);
2448             return NULL;
2449         }
2450     }
2451
2452     if (line == NULL)
2453         return NULL;
2454
2455     if (PyUnicode_GET_SIZE(line) == 0) {
2456         /* Reached EOF or would have blocked */
2457         Py_DECREF(line);
2458         Py_CLEAR(self->snapshot);
2459         self->telling = self->seekable;
2460         return NULL;
2461     }
2462
2463     return line;
2464 }
2465
2466 static PyObject *
2467 textiowrapper_name_get(textio *self, void *context)
2468 {
2469     CHECK_INITIALIZED(self);
2470     return PyObject_GetAttrString(self->buffer, "name");
2471 }
2472
2473 static PyObject *
2474 textiowrapper_closed_get(textio *self, void *context)
2475 {
2476     CHECK_INITIALIZED(self);
2477     return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2478 }
2479
2480 static PyObject *
2481 textiowrapper_newlines_get(textio *self, void *context)
2482 {
2483     PyObject *res;
2484     CHECK_INITIALIZED(self);
2485     if (self->decoder == NULL)
2486         Py_RETURN_NONE;
2487     res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2488     if (res == NULL) {
2489         if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2490             PyErr_Clear();
2491             Py_RETURN_NONE;
2492         }
2493         else {
2494             return NULL;
2495         }
2496     }
2497     return res;
2498 }
2499
2500 static PyObject *
2501 textiowrapper_errors_get(textio *self, void *context)
2502 {
2503     CHECK_INITIALIZED(self);
2504     Py_INCREF(self->errors);
2505     return self->errors;
2506 }
2507
2508 static PyObject *
2509 textiowrapper_chunk_size_get(textio *self, void *context)
2510 {
2511     CHECK_INITIALIZED(self);
2512     return PyLong_FromSsize_t(self->chunk_size);
2513 }
2514
2515 static int
2516 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
2517 {
2518     Py_ssize_t n;
2519     CHECK_INITIALIZED_INT(self);
2520     n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2521     if (n == -1 && PyErr_Occurred())
2522         return -1;
2523     if (n <= 0) {
2524         PyErr_SetString(PyExc_ValueError,
2525                         "a strictly positive integer is required");
2526         return -1;
2527     }
2528     self->chunk_size = n;
2529     return 0;
2530 }
2531
2532 static PyMethodDef textiowrapper_methods[] = {
2533     {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2534     {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2535     {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2536     {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2537     {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2538     {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
2539
2540     {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2541     {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2542     {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2543     {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2544     {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
2545
2546     {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2547     {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2548     {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
2549     {NULL, NULL}
2550 };
2551
2552 static PyMemberDef textiowrapper_members[] = {
2553     {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2554     {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2555     {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
2556     {NULL}
2557 };
2558
2559 static PyGetSetDef textiowrapper_getset[] = {
2560     {"name", (getter)textiowrapper_name_get, NULL, NULL},
2561     {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
2562 /*    {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2563 */
2564     {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2565     {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2566     {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2567                     (setter)textiowrapper_chunk_size_set, NULL},
2568     {NULL}
2569 };
2570
2571 PyTypeObject PyTextIOWrapper_Type = {
2572     PyVarObject_HEAD_INIT(NULL, 0)
2573     "_io.TextIOWrapper",        /*tp_name*/
2574     sizeof(textio), /*tp_basicsize*/
2575     0,                          /*tp_itemsize*/
2576     (destructor)textiowrapper_dealloc, /*tp_dealloc*/
2577     0,                          /*tp_print*/
2578     0,                          /*tp_getattr*/
2579     0,                          /*tps_etattr*/
2580     0,                          /*tp_compare */
2581     (reprfunc)textiowrapper_repr,/*tp_repr*/
2582     0,                          /*tp_as_number*/
2583     0,                          /*tp_as_sequence*/
2584     0,                          /*tp_as_mapping*/
2585     0,                          /*tp_hash */
2586     0,                          /*tp_call*/
2587     0,                          /*tp_str*/
2588     0,                          /*tp_getattro*/
2589     0,                          /*tp_setattro*/
2590     0,                          /*tp_as_buffer*/
2591     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2592             | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2593     textiowrapper_doc,          /* tp_doc */
2594     (traverseproc)textiowrapper_traverse, /* tp_traverse */
2595     (inquiry)textiowrapper_clear, /* tp_clear */
2596     0,                          /* tp_richcompare */
2597     offsetof(textio, weakreflist), /*tp_weaklistoffset*/
2598     0,                          /* tp_iter */
2599     (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2600     textiowrapper_methods,      /* tp_methods */
2601     textiowrapper_members,      /* tp_members */
2602     textiowrapper_getset,       /* tp_getset */
2603     0,                          /* tp_base */
2604     0,                          /* tp_dict */
2605     0,                          /* tp_descr_get */
2606     0,                          /* tp_descr_set */
2607     offsetof(textio, dict), /*tp_dictoffset*/
2608     (initproc)textiowrapper_init, /* tp_init */
2609     0,                          /* tp_alloc */
2610     PyType_GenericNew,          /* tp_new */
2611 };