Modules/_textio.c

   1 /*
   2     An implementation of Text I/O as defined by PEP 3116 - "New I/O"
   3
   4     Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
   5
   6     Written by Amaury Forgeot d'Arc and Antoine Pitrou
   7 */
   8
   9 #define PY_SSIZE_T_CLEAN
  10 #include "Python.h"
  11 #include "structmember.h"
  12 #include "_iomodule.h"
  13
  14 /* TextIOBase */
  15
  16 PyDoc_STRVAR(TextIOBase_doc,
  17     "Base class for text I/O.\n"
  18     "\n"
  19     "This class provides a character and line based interface to stream\n"
  20     "I/O. There is no readinto method because Python's character strings\n"
  21     "are immutable. There is no public constructor.\n"
  22     );
  23
  24 static PyObject *
  25 _unsupported(const char *message)
  26 {
  27     PyErr_SetString(IO_STATE->unsupported_operation, message);
  28     return NULL;
  29 }
  30
  31 PyDoc_STRVAR(TextIOBase_read_doc,
  32     "Read at most n characters from stream.\n"
  33     "\n"
  34     "Read from underlying buffer until we have n characters or we hit EOF.\n"
  35     "If n is negative or omitted, read until EOF.\n"
  36     );
  37
  38 static PyObject *
  39 TextIOBase_read(PyObject *self, PyObject *args)
  40 {
  41     return _unsupported("read");
  42 }
  43
  44 PyDoc_STRVAR(TextIOBase_readline_doc,
  45     "Read until newline or EOF.\n"
  46     "\n"
  47     "Returns an empty string if EOF is hit immediately.\n"
  48     );
  49
  50 static PyObject *
  51 TextIOBase_readline(PyObject *self, PyObject *args)
  52 {
  53     return _unsupported("readline");
  54 }
  55
  56 PyDoc_STRVAR(TextIOBase_write_doc,
  57     "Write string to stream.\n"
  58     "Returns the number of characters written (which is always equal to\n"
  59     "the length of the string).\n"
  60     );
  61
  62 static PyObject *
  63 TextIOBase_write(PyObject *self, PyObject *args)
  64 {
  65     return _unsupported("write");
  66 }
  67
  68 PyDoc_STRVAR(TextIOBase_encoding_doc,
  69     "Encoding of the text stream.\n"
  70     "\n"
  71     "Subclasses should override.\n"
  72     );
  73
  74 static PyObject *
  75 TextIOBase_encoding_get(PyObject *self, void *context)
  76 {
  77     Py_RETURN_NONE;
  78 }
  79
  80 PyDoc_STRVAR(TextIOBase_newlines_doc,
  81     "Line endings translated so far.\n"
  82     "\n"
  83     "Only line endings translated during reading are considered.\n"
  84     "\n"
  85     "Subclasses should override.\n"
  86     );
  87
  88 static PyObject *
  89 TextIOBase_newlines_get(PyObject *self, void *context)
  90 {
  91     Py_RETURN_NONE;
  92 }
  93
  94
  95 static PyMethodDef TextIOBase_methods[] = {
  96     {"read", TextIOBase_read, METH_VARARGS, TextIOBase_read_doc},
  97     {"readline", TextIOBase_readline, METH_VARARGS, TextIOBase_readline_doc},
  98     {"write", TextIOBase_write, METH_VARARGS, TextIOBase_write_doc},
  99     {NULL, NULL}
 100 };
 101
 102 static PyGetSetDef TextIOBase_getset[] = {
 103     {"encoding", (getter)TextIOBase_encoding_get, NULL, TextIOBase_encoding_doc},
 104     {"newlines", (getter)TextIOBase_newlines_get, NULL, TextIOBase_newlines_doc},
 105     {0}
 106 };
 107
 108 PyTypeObject PyTextIOBase_Type = {
 109     PyVarObject_HEAD_INIT(NULL, 0)
 110     "_io._TextIOBase",          /*tp_name*/
 111     0,                          /*tp_basicsize*/
 112     0,                          /*tp_itemsize*/
 113     0,                          /*tp_dealloc*/
 114     0,                          /*tp_print*/
 115     0,                          /*tp_getattr*/
 116     0,                          /*tp_setattr*/
 117     0,                          /*tp_compare */
 118     0,                          /*tp_repr*/
 119     0,                          /*tp_as_number*/
 120     0,                          /*tp_as_sequence*/
 121     0,                          /*tp_as_mapping*/
 122     0,                          /*tp_hash */
 123     0,                          /*tp_call*/
 124     0,                          /*tp_str*/
 125     0,                          /*tp_getattro*/
 126     0,                          /*tp_setattro*/
 127     0,                          /*tp_as_buffer*/
 128     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
 129     TextIOBase_doc,             /* tp_doc */
 130     0,                          /* tp_traverse */
 131     0,                          /* tp_clear */
 132     0,                          /* tp_richcompare */
 133     0,                          /* tp_weaklistoffset */
 134     0,                          /* tp_iter */
 135     0,                          /* tp_iternext */
 136     TextIOBase_methods,         /* tp_methods */
 137     0,                          /* tp_members */
 138     TextIOBase_getset,          /* tp_getset */
 139     &PyIOBase_Type,             /* tp_base */
 140     0,                          /* tp_dict */
 141     0,                          /* tp_descr_get */
 142     0,                          /* tp_descr_set */
 143     0,                          /* tp_dictoffset */
 144     0,                          /* tp_init */
 145     0,                          /* tp_alloc */
 146     0,                          /* tp_new */
 147 };
 148
 149
 150 /* IncrementalNewlineDecoder */
 151
 152 PyDoc_STRVAR(IncrementalNewlineDecoder_doc,
 153     "Codec used when reading a file in universal newlines mode.  It wraps\n"
 154     "another incremental decoder, translating \\r\\n and \\r into \\n.  It also\n"
 155     "records the types of newlines encountered.  When used with\n"
 156     "translate=False, it ensures that the newline sequence is returned in\n"
 157     "one piece. When used with decoder=None, it expects unicode strings as\n"
 158     "decode input and translates newlines without first invoking an external\n"
 159     "decoder.\n"
 160     );
 161
 162 typedef struct {
 163     PyObject_HEAD
 164     PyObject *decoder;
 165     PyObject *errors;
 166     int pendingcr:1;
 167     int translate:1;
 168     unsigned int seennl:3;
 169 } PyNewLineDecoderObject;
 170
 171 static int
 172 IncrementalNewlineDecoder_init(PyNewLineDecoderObject *self,
 173                                PyObject *args, PyObject *kwds)
 174 {
 175     PyObject *decoder;
 176     int translate;
 177     PyObject *errors = NULL;
 178     char *kwlist[] = {"decoder", "translate", "errors", NULL};
 179
 180     if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
 181                                      kwlist, &decoder, &translate, &errors))
 182         return -1;
 183
 184     self->decoder = decoder;
 185     Py_INCREF(decoder);
 186
 187     if (errors == NULL) {
 188         self->errors = PyUnicode_FromString("strict");
 189         if (self->errors == NULL)
 190             return -1;
 191     }
 192     else {
 193         Py_INCREF(errors);
 194         self->errors = errors;
 195     }
 196
 197     self->translate = translate;
 198     self->seennl = 0;
 199     self->pendingcr = 0;
 200
 201     return 0;
 202 }
 203
 204 static void
 205 IncrementalNewlineDecoder_dealloc(PyNewLineDecoderObject *self)
 206 {
 207     Py_CLEAR(self->decoder);
 208     Py_CLEAR(self->errors);
 209     Py_TYPE(self)->tp_free((PyObject *)self);
 210 }
 211
 212 #define SEEN_CR   1
 213 #define SEEN_LF   2
 214 #define SEEN_CRLF 4
 215 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
 216
 217 PyObject *
 218 _PyIncrementalNewlineDecoder_decode(PyObject *_self,
 219                                     PyObject *input, int final)
 220 {
 221     PyObject *output;
 222     Py_ssize_t output_len;
 223     PyNewLineDecoderObject *self = (PyNewLineDecoderObject *) _self;
 224
 225     if (self->decoder == NULL) {
 226         PyErr_SetString(PyExc_ValueError,
 227                         "IncrementalNewlineDecoder.__init__ not called");
 228         return NULL;
 229     }
 230
 231     /* decode input (with the eventual \r from a previous pass) */
 232     if (self->decoder != Py_None) {
 233         output = PyObject_CallMethodObjArgs(self->decoder,
 234             _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
 235     }
 236     else {
 237         output = input;
 238         Py_INCREF(output);
 239     }
 240
 241     if (output == NULL)
 242         return NULL;
 243
 244     if (!PyUnicode_Check(output)) {
 245         PyErr_SetString(PyExc_TypeError,
 246                         "decoder should return a string result");
 247         goto error;
 248     }
 249
 250     output_len = PyUnicode_GET_SIZE(output);
 251     if (self->pendingcr && (final || output_len > 0)) {
 252         Py_UNICODE *out;
 253         PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
 254         if (modified == NULL)
 255             goto error;
 256         out = PyUnicode_AS_UNICODE(modified);
 257         out[0] = '\r';
 258         memcpy(out + 1, PyUnicode_AS_UNICODE(output),
 259                output_len * sizeof(Py_UNICODE));
 260         Py_DECREF(output);
 261         output = modified;
 262         self->pendingcr = 0;
 263         output_len++;
 264     }
 265
 266     /* retain last \r even when not translating data:
 267      * then readline() is sure to get \r\n in one pass
 268      */
 269     if (!final) {
 270         if (output_len > 0
 271             && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
 272
 273             if (Py_REFCNT(output) == 1) {
 274                 if (PyUnicode_Resize(&output, output_len - 1) < 0)
 275                     goto error;
 276             }
 277             else {
 278                 PyObject *modified = PyUnicode_FromUnicode(
 279                     PyUnicode_AS_UNICODE(output),
 280                     output_len - 1);
 281                 if (modified == NULL)
 282                     goto error;
 283                 Py_DECREF(output);
 284                 output = modified;
 285             }
 286             self->pendingcr = 1;
 287         }
 288     }
 289
 290     /* Record which newlines are read and do newline translation if desired,
 291        all in one pass. */
 292     {
 293         Py_UNICODE *in_str;
 294         Py_ssize_t len;
 295         int seennl = self->seennl;
 296         int only_lf = 0;
 297
 298         in_str = PyUnicode_AS_UNICODE(output);
 299         len = PyUnicode_GET_SIZE(output);
 300
 301         if (len == 0)
 302             return output;
 303
 304         /* If, up to now, newlines are consistently \n, do a quick check
 305            for the \r *byte* with the libc's optimized memchr.
 306            */
 307         if (seennl == SEEN_LF || seennl == 0) {
 308             only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
 309         }
 310
 311         if (only_lf) {
 312             /* If not already seen, quick scan for a possible "\n" character.
 313                (there's nothing else to be done, even when in translation mode)
 314             */
 315             if (seennl == 0 &&
 316                 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
 317                 Py_UNICODE *s, *end;
 318                 s = in_str;
 319                 end = in_str + len;
 320                 for (;;) {
 321                     Py_UNICODE c;
 322                     /* Fast loop for non-control characters */
 323                     while (*s > '\n')
 324                         s++;
 325                     c = *s++;
 326                     if (c == '\n') {
 327                         seennl |= SEEN_LF;
 328                         break;
 329                     }
 330                     if (s > end)
 331                         break;
 332                 }
 333             }
 334             /* Finished: we have scanned for newlines, and none of them
 335                need translating */
 336         }
 337         else if (!self->translate) {
 338             Py_UNICODE *s, *end;
 339             /* We have already seen all newline types, no need to scan again */
 340             if (seennl == SEEN_ALL)
 341                 goto endscan;
 342             s = in_str;
 343             end = in_str + len;
 344             for (;;) {
 345                 Py_UNICODE c;
 346                 /* Fast loop for non-control characters */
 347                 while (*s > '\r')
 348                     s++;
 349                 c = *s++;
 350                 if (c == '\n')
 351                     seennl |= SEEN_LF;
 352                 else if (c == '\r') {
 353                     if (*s == '\n') {
 354                         seennl |= SEEN_CRLF;
 355                         s++;
 356                     }
 357                     else
 358                         seennl |= SEEN_CR;
 359                 }
 360                 if (s > end)
 361                     break;
 362                 if (seennl == SEEN_ALL)
 363                     break;
 364             }
 365         endscan:
 366             ;
 367         }
 368         else {
 369             PyObject *translated = NULL;
 370             Py_UNICODE *out_str;
 371             Py_UNICODE *in, *out, *end;
 372             if (Py_REFCNT(output) != 1) {
 373                 /* We could try to optimize this so that we only do a copy
 374                    when there is something to translate. On the other hand,
 375                    most decoders should only output non-shared strings, i.e.
 376                    translation is done in place. */
 377                 translated = PyUnicode_FromUnicode(NULL, len);
 378                 if (translated == NULL)
 379                     goto error;
 380                 assert(Py_REFCNT(translated) == 1);
 381                 memcpy(PyUnicode_AS_UNICODE(translated),
 382                        PyUnicode_AS_UNICODE(output),
 383                        len * sizeof(Py_UNICODE));
 384             }
 385             else {
 386                 translated = output;
 387             }
 388             out_str = PyUnicode_AS_UNICODE(translated);
 389             in = in_str;
 390             out = out_str;
 391             end = in_str + len;
 392             for (;;) {
 393                 Py_UNICODE c;
 394                 /* Fast loop for non-control characters */
 395                 while ((c = *in++) > '\r')
 396                     *out++ = c;
 397                 if (c == '\n') {
 398                     *out++ = c;
 399                     seennl |= SEEN_LF;
 400                     continue;
 401                 }
 402                 if (c == '\r') {
 403                     if (*in == '\n') {
 404                         in++;
 405                         seennl |= SEEN_CRLF;
 406                     }
 407                     else
 408                         seennl |= SEEN_CR;
 409                     *out++ = '\n';
 410                     continue;
 411                 }
 412                 if (in > end)
 413                     break;
 414                 *out++ = c;
 415             }
 416             if (translated != output) {
 417                 Py_DECREF(output);
 418                 output = translated;
 419             }
 420             if (out - out_str != len) {
 421                 if (PyUnicode_Resize(&output, out - out_str) < 0)
 422                     goto error;
 423             }
 424         }
 425         self->seennl |= seennl;
 426     }
 427
 428     return output;
 429
 430   error:
 431     Py_DECREF(output);
 432     return NULL;
 433 }
 434
 435 static PyObject *
 436 IncrementalNewlineDecoder_decode(PyNewLineDecoderObject *self,
 437                                  PyObject *args, PyObject *kwds)
 438 {
 439     char *kwlist[] = {"input", "final", NULL};
 440     PyObject *input;
 441     int final = 0;
 442
 443     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
 444                                      kwlist, &input, &final))
 445         return NULL;
 446     return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
 447 }
 448
 449 static PyObject *
 450 IncrementalNewlineDecoder_getstate(PyNewLineDecoderObject *self, PyObject *args)
 451 {
 452     PyObject *buffer;
 453     unsigned PY_LONG_LONG flag;
 454
 455     if (self->decoder != Py_None) {
 456         PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
 457            _PyIO_str_getstate, NULL);
 458         if (state == NULL)
 459             return NULL;
 460         if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
 461             Py_DECREF(state);
 462             return NULL;
 463         }
 464         Py_INCREF(buffer);
 465         Py_DECREF(state);
 466     }
 467     else {
 468         buffer = PyBytes_FromString("");
 469         flag = 0;
 470     }
 471     flag <<= 1;
 472     if (self->pendingcr)
 473         flag |= 1;
 474     return Py_BuildValue("NK", buffer, flag);
 475 }
 476
 477 static PyObject *
 478 IncrementalNewlineDecoder_setstate(PyNewLineDecoderObject *self, PyObject *state)
 479 {
 480     PyObject *buffer;
 481     unsigned PY_LONG_LONG flag;
 482
 483     if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
 484         return NULL;
 485
 486     self->pendingcr = (int) flag & 1;
 487     flag >>= 1;
 488
 489     if (self->decoder != Py_None)
 490         return PyObject_CallMethod(self->decoder,
 491                                    "setstate", "((OK))", buffer, flag);
 492     else
 493         Py_RETURN_NONE;
 494 }
 495
 496 static PyObject *
 497 IncrementalNewlineDecoder_reset(PyNewLineDecoderObject *self, PyObject *args)
 498 {
 499     self->seennl = 0;
 500     self->pendingcr = 0;
 501     if (self->decoder != Py_None)
 502         return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
 503     else
 504         Py_RETURN_NONE;
 505 }
 506
 507 static PyObject *
 508 IncrementalNewlineDecoder_newlines_get(PyNewLineDecoderObject *self, void *context)
 509 {
 510     switch (self->seennl) {
 511     case SEEN_CR:
 512         return PyUnicode_FromString("\r");
 513     case SEEN_LF:
 514         return PyUnicode_FromString("\n");
 515     case SEEN_CRLF:
 516         return PyUnicode_FromString("\r\n");
 517     case SEEN_CR | SEEN_LF:
 518         return Py_BuildValue("ss", "\r", "\n");
 519     case SEEN_CR | SEEN_CRLF:
 520         return Py_BuildValue("ss", "\r", "\r\n");
 521     case SEEN_LF | SEEN_CRLF:
 522         return Py_BuildValue("ss", "\n", "\r\n");
 523     case SEEN_CR | SEEN_LF | SEEN_CRLF:
 524         return Py_BuildValue("sss", "\r", "\n", "\r\n");
 525     default:
 526         Py_RETURN_NONE;
 527    }
 528
 529 }
 530
 531
 532 static PyMethodDef IncrementalNewlineDecoder_methods[] = {
 533     {"decode", (PyCFunction)IncrementalNewlineDecoder_decode, METH_VARARGS|METH_KEYWORDS},
 534     {"getstate", (PyCFunction)IncrementalNewlineDecoder_getstate, METH_NOARGS},
 535     {"setstate", (PyCFunction)IncrementalNewlineDecoder_setstate, METH_O},
 536     {"reset", (PyCFunction)IncrementalNewlineDecoder_reset, METH_NOARGS},
 537     {0}
 538 };
 539
 540 static PyGetSetDef IncrementalNewlineDecoder_getset[] = {
 541     {"newlines", (getter)IncrementalNewlineDecoder_newlines_get, NULL, NULL},
 542     {0}
 543 };
 544
 545 PyTypeObject PyIncrementalNewlineDecoder_Type = {
 546     PyVarObject_HEAD_INIT(NULL, 0)
 547     "_io.IncrementalNewlineDecoder", /*tp_name*/
 548     sizeof(PyNewLineDecoderObject), /*tp_basicsize*/
 549     0,                          /*tp_itemsize*/
 550     (destructor)IncrementalNewlineDecoder_dealloc, /*tp_dealloc*/
 551     0,                          /*tp_print*/
 552     0,                          /*tp_getattr*/
 553     0,                          /*tp_setattr*/
 554     0,                          /*tp_compare */
 555     0,                          /*tp_repr*/
 556     0,                          /*tp_as_number*/
 557     0,                          /*tp_as_sequence*/
 558     0,                          /*tp_as_mapping*/
 559     0,                          /*tp_hash */
 560     0,                          /*tp_call*/
 561     0,                          /*tp_str*/
 562     0,                          /*tp_getattro*/
 563     0,                          /*tp_setattro*/
 564     0,                          /*tp_as_buffer*/
 565     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
 566     IncrementalNewlineDecoder_doc,          /* tp_doc */
 567     0,                          /* tp_traverse */
 568     0,                          /* tp_clear */
 569     0,                          /* tp_richcompare */
 570     0,                          /*tp_weaklistoffset*/
 571     0,                          /* tp_iter */
 572     0,                          /* tp_iternext */
 573     IncrementalNewlineDecoder_methods, /* tp_methods */
 574     0,                          /* tp_members */
 575     IncrementalNewlineDecoder_getset, /* tp_getset */
 576     0,                          /* tp_base */
 577     0,                          /* tp_dict */
 578     0,                          /* tp_descr_get */
 579     0,                          /* tp_descr_set */
 580     0,                          /* tp_dictoffset */
 581     (initproc)IncrementalNewlineDecoder_init, /* tp_init */
 582     0,                          /* tp_alloc */
 583     PyType_GenericNew,          /* tp_new */
 584 };
 585
 586
 587 /* TextIOWrapper */
 588
 589 PyDoc_STRVAR(TextIOWrapper_doc,
 590     "Character and line based layer over a BufferedIOBase object, buffer.\n"
 591     "\n"
 592     "encoding gives the name of the encoding that the stream will be\n"
 593     "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
 594     "\n"
 595     "errors determines the strictness of encoding and decoding (see the\n"
 596     "codecs.register) and defaults to \"strict\".\n"
 597     "\n"
 598     "newline can be None, '', '\\n', '\\r', or '\\r\\n'.  It controls the\n"
 599     "handling of line endings. If it is None, universal newlines is\n"
 600     "enabled.  With this enabled, on input, the lines endings '\\n', '\\r',\n"
 601     "or '\\r\\n' are translated to '\\n' before being returned to the\n"
 602     "caller. Conversely, on output, '\\n' is translated to the system\n"
 603     "default line seperator, os.linesep. If newline is any other of its\n"
 604     "legal values, that newline becomes the newline when the file is read\n"
 605     "and it is returned untranslated. On output, '\\n' is converted to the\n"
 606     "newline.\n"
 607     "\n"
 608     "If line_buffering is True, a call to flush is implied when a call to\n"
 609     "write contains a newline character."
 610     );
 611
 612 typedef PyObject *
 613         (*encodefunc_t)(PyObject *, PyObject *);
 614
 615 typedef struct
 616 {
 617     PyObject_HEAD
 618     int ok; /* initialized? */
 619     Py_ssize_t chunk_size;
 620     PyObject *buffer;
 621     PyObject *encoding;
 622     PyObject *encoder;
 623     PyObject *decoder;
 624     PyObject *readnl;
 625     PyObject *errors;
 626     const char *writenl; /* utf-8 encoded, NULL stands for \n */
 627     char line_buffering;
 628     char readuniversal;
 629     char readtranslate;
 630     char writetranslate;
 631     char seekable;
 632     char telling;
 633     /* Specialized encoding func (see below) */
 634     encodefunc_t encodefunc;
 635
 636     /* Reads and writes are internally buffered in order to speed things up.
 637        However, any read will first flush the write buffer if itsn't empty.
 638
 639        Please also note that text to be written is first encoded before being
 640        buffered. This is necessary so that encoding errors are immediately
 641        reported to the caller, but it unfortunately means that the
 642        IncrementalEncoder (whose encode() method is always written in Python)
 643        becomes a bottleneck for small writes.
 644     */
 645     PyObject *decoded_chars;       /* buffer for text returned from decoder */
 646     Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
 647     PyObject *pending_bytes;       /* list of bytes objects waiting to be
 648                                       written, or NULL */
 649     Py_ssize_t pending_bytes_count;
 650     PyObject *snapshot;
 651     /* snapshot is either None, or a tuple (dec_flags, next_input) where
 652      * dec_flags is the second (integer) item of the decoder state and
 653      * next_input is the chunk of input bytes that comes next after the
 654      * snapshot point.  We use this to reconstruct decoder states in tell().
 655      */
 656
 657     /* Cache raw object if it's a FileIO object */
 658     PyObject *raw;
 659
 660     PyObject *weakreflist;
 661     PyObject *dict;
 662 } PyTextIOWrapperObject;
 663
 664
 665 /* A couple of specialized cases in order to bypass the slow incremental
 666    encoding methods for the most popular encodings. */
 667
 668 static PyObject *
 669 ascii_encode(PyTextIOWrapperObject *self, PyObject *text)
 670 {
 671     return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
 672                                  PyUnicode_GET_SIZE(text),
 673                                  PyBytes_AS_STRING(self->errors));
 674 }
 675
 676 static PyObject *
 677 utf16be_encode(PyTextIOWrapperObject *self, PyObject *text)
 678 {
 679     return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
 680                                  PyUnicode_GET_SIZE(text),
 681                                  PyBytes_AS_STRING(self->errors), 1);
 682 }
 683
 684 static PyObject *
 685 utf16le_encode(PyTextIOWrapperObject *self, PyObject *text)
 686 {
 687     return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
 688                                  PyUnicode_GET_SIZE(text),
 689                                  PyBytes_AS_STRING(self->errors), -1);
 690 }
 691
 692 static PyObject *
 693 utf16_encode(PyTextIOWrapperObject *self, PyObject *text)
 694 {
 695     PyObject *res;
 696     res = PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
 697                                 PyUnicode_GET_SIZE(text),
 698                                 PyBytes_AS_STRING(self->errors), 0);
 699     if (res == NULL)
 700         return NULL;
 701     /* Next writes will skip the BOM and use native byte ordering */
 702 #if defined(WORDS_BIGENDIAN)
 703     self->encodefunc = (encodefunc_t) utf16be_encode;
 704 #else
 705     self->encodefunc = (encodefunc_t) utf16le_encode;
 706 #endif
 707     return res;
 708 }
 709
 710
 711 static PyObject *
 712 utf8_encode(PyTextIOWrapperObject *self, PyObject *text)
 713 {
 714     return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
 715                                 PyUnicode_GET_SIZE(text),
 716                                 PyBytes_AS_STRING(self->errors));
 717 }
 718
 719 static PyObject *
 720 latin1_encode(PyTextIOWrapperObject *self, PyObject *text)
 721 {
 722     return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
 723                                   PyUnicode_GET_SIZE(text),
 724                                   PyBytes_AS_STRING(self->errors));
 725 }
 726
 727 /* Map normalized encoding names onto the specialized encoding funcs */
 728
 729 typedef struct {
 730     const char *name;
 731     encodefunc_t encodefunc;
 732 } encodefuncentry;
 733
 734 static encodefuncentry encodefuncs[] = {
 735     {"ascii",       (encodefunc_t) ascii_encode},
 736     {"iso8859-1",   (encodefunc_t) latin1_encode},
 737     {"utf-16-be",   (encodefunc_t) utf16be_encode},
 738     {"utf-16-le",   (encodefunc_t) utf16le_encode},
 739     {"utf-16",      (encodefunc_t) utf16_encode},
 740     {"utf-8",       (encodefunc_t) utf8_encode},
 741     {NULL, NULL}
 742 };
 743
 744
 745 static int
 746 TextIOWrapper_init(PyTextIOWrapperObject *self, PyObject *args, PyObject *kwds)
 747 {
 748     char *kwlist[] = {"buffer", "encoding", "errors",
 749                       "newline", "line_buffering",
 750                       NULL};
 751     PyObject *buffer, *raw;
 752     char *encoding = NULL;
 753     char *errors = NULL;
 754     char *newline = NULL;
 755     int line_buffering = 0;
 756     _PyIO_State *state = IO_STATE;
 757
 758     PyObject *res;
 759     int r;
 760
 761     self->ok = 0;
 762     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
 763                                      kwlist, &buffer, &encoding, &errors,
 764                                      &newline, &line_buffering))
 765         return -1;
 766
 767     if (newline && newline[0] != '\0'
 768         && !(newline[0] == '\n' && newline[1] == '\0')
 769         && !(newline[0] == '\r' && newline[1] == '\0')
 770         && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
 771         PyErr_Format(PyExc_ValueError,
 772                      "illegal newline value: %s", newline);
 773         return -1;
 774     }
 775
 776     Py_CLEAR(self->buffer);
 777     Py_CLEAR(self->encoding);
 778     Py_CLEAR(self->encoder);
 779     Py_CLEAR(self->decoder);
 780     Py_CLEAR(self->readnl);
 781     Py_CLEAR(self->decoded_chars);
 782     Py_CLEAR(self->pending_bytes);
 783     Py_CLEAR(self->snapshot);
 784     Py_CLEAR(self->errors);
 785     Py_CLEAR(self->raw);
 786     self->decoded_chars_used = 0;
 787     self->pending_bytes_count = 0;
 788     self->encodefunc = NULL;
 789
 790     if (encoding == NULL) {
 791         /* Try os.device_encoding(fileno) */
 792         PyObject *fileno;
 793         fileno = PyObject_CallMethod(buffer, "fileno", NULL);
 794         /* Ignore only AttributeError and UnsupportedOperation */
 795         if (fileno == NULL) {
 796             if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
 797                 PyErr_ExceptionMatches(state->unsupported_operation)) {
 798                 PyErr_Clear();
 799             }
 800             else {
 801                 goto error;
 802             }
 803         }
 804         else {
 805             self->encoding = PyObject_CallMethod(state->os_module,
 806                                                  "device_encoding",
 807                                                  "N", fileno);
 808             if (self->encoding == NULL)
 809                 goto error;
 810             else if (!PyUnicode_Check(self->encoding))
 811                 Py_CLEAR(self->encoding);
 812         }
 813     }
 814     if (encoding == NULL && self->encoding == NULL) {
 815         if (state->locale_module == NULL) {
 816             state->locale_module = PyImport_ImportModule("locale");
 817             if (state->locale_module == NULL)
 818                 goto catch_ImportError;
 819             else
 820                 goto use_locale;
 821         }
 822         else {
 823           use_locale:
 824             self->encoding = PyObject_CallMethod(
 825                 state->locale_module, "getpreferredencoding", NULL);
 826             if (self->encoding == NULL) {
 827               catch_ImportError:
 828                 /*
 829                  Importing locale can raise a ImportError because of
 830                  _functools, and locale.getpreferredencoding can raise a
 831                  ImportError if _locale is not available.  These will happen
 832                  during module building.
 833                 */
 834                 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
 835                     PyErr_Clear();
 836                     self->encoding = PyUnicode_FromString("ascii");
 837                 }
 838                 else
 839                     goto error;
 840             }
 841             else if (!PyUnicode_Check(self->encoding))
 842                 Py_CLEAR(self->encoding);
 843         }
 844     }
 845     if (self->encoding != NULL)
 846         encoding = _PyUnicode_AsString(self->encoding);
 847     else if (encoding != NULL) {
 848         self->encoding = PyUnicode_FromString(encoding);
 849         if (self->encoding == NULL)
 850             goto error;
 851     }
 852     else {
 853         PyErr_SetString(PyExc_IOError,
 854                         "could not determine default encoding");
 855     }
 856
 857     if (errors == NULL)
 858         errors = "strict";
 859     self->errors = PyBytes_FromString(errors);
 860     if (self->errors == NULL)
 861         goto error;
 862
 863     self->chunk_size = 8192;
 864     self->readuniversal = (newline == NULL || newline[0] == '\0');
 865     self->line_buffering = line_buffering;
 866     self->readtranslate = (newline == NULL);
 867     if (newline) {
 868         self->readnl = PyUnicode_FromString(newline);
 869         if (self->readnl == NULL)
 870             return -1;
 871     }
 872     self->writetranslate = (newline == NULL || newline[0] != '\0');
 873     if (!self->readuniversal && self->readnl) {
 874         self->writenl = _PyUnicode_AsString(self->readnl);
 875         if (!strcmp(self->writenl, "\n"))
 876             self->writenl = NULL;
 877     }
 878 #ifdef MS_WINDOWS
 879     else
 880         self->writenl = "\r\n";
 881 #endif
 882
 883     /* Build the decoder object */
 884     res = PyObject_CallMethod(buffer, "readable", NULL);
 885     if (res == NULL)
 886         goto error;
 887     r = PyObject_IsTrue(res);
 888     Py_DECREF(res);
 889     if (r == -1)
 890         goto error;
 891     if (r == 1) {
 892         self->decoder = PyCodec_IncrementalDecoder(
 893             encoding, errors);
 894         if (self->decoder == NULL)
 895             goto error;
 896
 897         if (self->readuniversal) {
 898             PyObject *incrementalDecoder = PyObject_CallFunction(
 899                 (PyObject *)&PyIncrementalNewlineDecoder_Type,
 900                 "Oi", self->decoder, (int)self->readtranslate);
 901             if (incrementalDecoder == NULL)
 902                 goto error;
 903             Py_CLEAR(self->decoder);
 904             self->decoder = incrementalDecoder;
 905         }
 906     }
 907
 908     /* Build the encoder object */
 909     res = PyObject_CallMethod(buffer, "writable", NULL);
 910     if (res == NULL)
 911         goto error;
 912     r = PyObject_IsTrue(res);
 913     Py_DECREF(res);
 914     if (r == -1)
 915         goto error;
 916     if (r == 1) {
 917         PyObject *ci;
 918         self->encoder = PyCodec_IncrementalEncoder(
 919             encoding, errors);
 920         if (self->encoder == NULL)
 921             goto error;
 922         /* Get the normalized named of the codec */
 923         ci = _PyCodec_Lookup(encoding);
 924         if (ci == NULL)
 925             goto error;
 926         res = PyObject_GetAttrString(ci, "name");
 927         Py_DECREF(ci);
 928         if (res == NULL)
 929             PyErr_Clear();
 930         else if (PyUnicode_Check(res)) {
 931             encodefuncentry *e = encodefuncs;
 932             while (e->name != NULL) {
 933                 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
 934                     self->encodefunc = e->encodefunc;
 935                     break;
 936                 }
 937                 e++;
 938             }
 939         }
 940         Py_XDECREF(res);
 941     }
 942
 943     self->buffer = buffer;
 944     Py_INCREF(buffer);
 945
 946     if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
 947         Py_TYPE(buffer) == &PyBufferedWriter_Type ||
 948         Py_TYPE(buffer) == &PyBufferedRandom_Type) {
 949         raw = PyObject_GetAttrString(buffer, "raw");
 950         /* Cache the raw FileIO object to speed up 'closed' checks */
 951         if (raw == NULL)
 952             PyErr_Clear();
 953         else if (Py_TYPE(raw) == &PyFileIO_Type)
 954             self->raw = raw;
 955         else
 956             Py_DECREF(raw);
 957     }
 958
 959     res = PyObject_CallMethod(buffer, "seekable", NULL);
 960     if (res == NULL)
 961         goto error;
 962     self->seekable = self->telling = PyObject_IsTrue(res);
 963     Py_DECREF(res);
 964
 965     self->ok = 1;
 966     return 0;
 967
 968   error:
 969     return -1;
 970 }
 971
 972 static int
 973 _TextIOWrapper_clear(PyTextIOWrapperObject *self)
 974 {
 975     if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
 976         return -1;
 977     self->ok = 0;
 978     Py_CLEAR(self->buffer);
 979     Py_CLEAR(self->encoding);
 980     Py_CLEAR(self->encoder);
 981     Py_CLEAR(self->decoder);
 982     Py_CLEAR(self->readnl);
 983     Py_CLEAR(self->decoded_chars);
 984     Py_CLEAR(self->pending_bytes);
 985     Py_CLEAR(self->snapshot);
 986     Py_CLEAR(self->errors);
 987     Py_CLEAR(self->raw);
 988     return 0;
 989 }
 990
 991 static void
 992 TextIOWrapper_dealloc(PyTextIOWrapperObject *self)
 993 {
 994     if (_TextIOWrapper_clear(self) < 0)
 995         return;
 996     _PyObject_GC_UNTRACK(self);
 997     if (self->weakreflist != NULL)
 998         PyObject_ClearWeakRefs((PyObject *)self);
 999     Py_CLEAR(self->dict);
1000     Py_TYPE(self)->tp_free((PyObject *)self);
1001 }
1002
1003 static int
1004 TextIOWrapper_traverse(PyTextIOWrapperObject *self, visitproc visit, void *arg)
1005 {
1006     Py_VISIT(self->buffer);
1007     Py_VISIT(self->encoding);
1008     Py_VISIT(self->encoder);
1009     Py_VISIT(self->decoder);
1010     Py_VISIT(self->readnl);
1011     Py_VISIT(self->decoded_chars);
1012     Py_VISIT(self->pending_bytes);
1013     Py_VISIT(self->snapshot);
1014     Py_VISIT(self->errors);
1015     Py_VISIT(self->raw);
1016
1017     Py_VISIT(self->dict);
1018     return 0;
1019 }
1020
1021 static int
1022 TextIOWrapper_clear(PyTextIOWrapperObject *self)
1023 {
1024     if (_TextIOWrapper_clear(self) < 0)
1025         return -1;
1026     Py_CLEAR(self->dict);
1027     return 0;
1028 }
1029
1030 static PyObject *
1031 TextIOWrapper_closed_get(PyTextIOWrapperObject *self, void *context);
1032
1033 /* This macro takes some shortcuts to make the common case faster. */
1034 #define CHECK_CLOSED(self) \
1035     do { \
1036         int r; \
1037         PyObject *_res; \
1038         if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1039             if (self->raw != NULL) \
1040                 r = _PyFileIO_closed(self->raw); \
1041             else { \
1042                 _res = TextIOWrapper_closed_get(self, NULL); \
1043                 if (_res == NULL) \
1044                     return NULL; \
1045                 r = PyObject_IsTrue(_res); \
1046                 Py_DECREF(_res); \
1047                 if (r < 0) \
1048                     return NULL; \
1049             } \
1050             if (r > 0) { \
1051                 PyErr_SetString(PyExc_ValueError, \
1052                                 "I/O operation on closed file."); \
1053                 return NULL; \
1054             } \
1055         } \
1056         else if (_PyIOBase_checkClosed((PyObject *)self, Py_True) == NULL) \
1057             return NULL; \
1058     } while (0)
1059
1060 #define CHECK_INITIALIZED(self) \
1061     if (self->ok <= 0) { \
1062         PyErr_SetString(PyExc_ValueError, \
1063             "I/O operation on uninitialized object"); \
1064         return NULL; \
1065     }
1066
1067 #define CHECK_INITIALIZED_INT(self) \
1068     if (self->ok <= 0) { \
1069         PyErr_SetString(PyExc_ValueError, \
1070             "I/O operation on uninitialized object"); \
1071         return -1; \
1072     }
1073
1074
1075 Py_LOCAL_INLINE(const Py_UNICODE *)
1076 findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1077 {
1078     /* like wcschr, but doesn't stop at NULL characters */
1079     while (size-- > 0) {
1080         if (*s == ch)
1081             return s;
1082         s++;
1083     }
1084     return NULL;
1085 }
1086
1087 /* Flush the internal write buffer. This doesn't explicitly flush the
1088    underlying buffered object, though. */
1089 static int
1090 _TextIOWrapper_writeflush(PyTextIOWrapperObject *self)
1091 {
1092     PyObject *b, *ret;
1093
1094     if (self->pending_bytes == NULL)
1095         return 0;
1096     b = _PyBytes_Join(_PyIO_empty_bytes, self->pending_bytes);
1097     if (b == NULL)
1098         return -1;
1099     ret = PyObject_CallMethodObjArgs(self->buffer,
1100                                      _PyIO_str_write, b, NULL);
1101     Py_DECREF(b);
1102     if (ret == NULL)
1103         return -1;
1104     Py_DECREF(ret);
1105     Py_CLEAR(self->pending_bytes);
1106     self->pending_bytes_count = 0;
1107     return 0;
1108 }
1109
1110 static PyObject *
1111 TextIOWrapper_write(PyTextIOWrapperObject *self, PyObject *args)
1112 {
1113     PyObject *ret;
1114     PyObject *text; /* owned reference */
1115     PyObject *b;
1116     Py_ssize_t textlen;
1117     int haslf = 0;
1118     int needflush = 0;
1119
1120     CHECK_INITIALIZED(self);
1121
1122     if (!PyArg_ParseTuple(args, "U:write", &text)) {
1123         return NULL;
1124     }
1125
1126     CHECK_CLOSED(self);
1127
1128     Py_INCREF(text);
1129
1130     textlen = PyUnicode_GetSize(text);
1131
1132     if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1133         if (findchar(PyUnicode_AS_UNICODE(text),
1134                      PyUnicode_GET_SIZE(text), '\n'))
1135             haslf = 1;
1136
1137     if (haslf && self->writetranslate && self->writenl != NULL) {
1138         PyObject *newtext = PyObject_CallMethod(
1139             text, "replace", "ss", "\n", self->writenl);
1140         Py_DECREF(text);
1141         if (newtext == NULL)
1142             return NULL;
1143         text = newtext;
1144     }
1145
1146     if (self->line_buffering &&
1147         (haslf ||
1148          findchar(PyUnicode_AS_UNICODE(text),
1149                   PyUnicode_GET_SIZE(text), '\r')))
1150         needflush = 1;
1151
1152     /* XXX What if we were just reading? */
1153     if (self->encodefunc != NULL)
1154         b = (*self->encodefunc)((PyObject *) self, text);
1155     else
1156         b = PyObject_CallMethodObjArgs(self->encoder,
1157                                        _PyIO_str_encode, text, NULL);
1158     Py_DECREF(text);
1159     if (b == NULL)
1160         return NULL;
1161
1162     if (self->pending_bytes == NULL) {
1163         self->pending_bytes = PyList_New(0);
1164         if (self->pending_bytes == NULL) {
1165             Py_DECREF(b);
1166             return NULL;
1167         }
1168         self->pending_bytes_count = 0;
1169     }
1170     if (PyList_Append(self->pending_bytes, b) < 0) {
1171         Py_DECREF(b);
1172         return NULL;
1173     }
1174     self->pending_bytes_count += PyBytes_GET_SIZE(b);
1175     Py_DECREF(b);
1176     if (self->pending_bytes_count > self->chunk_size || needflush) {
1177         if (_TextIOWrapper_writeflush(self) < 0)
1178             return NULL;
1179     }
1180
1181     if (needflush) {
1182         ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1183         if (ret == NULL)
1184             return NULL;
1185         Py_DECREF(ret);
1186     }
1187
1188     Py_CLEAR(self->snapshot);
1189
1190     if (self->decoder) {
1191         ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1192         if (ret == NULL)
1193             return NULL;
1194         Py_DECREF(ret);
1195     }
1196
1197     return PyLong_FromSsize_t(textlen);
1198 }
1199
1200 /* Steal a reference to chars and store it in the decoded_char buffer;
1201  */
1202 static void
1203 TextIOWrapper_set_decoded_chars(PyTextIOWrapperObject *self, PyObject *chars)
1204 {
1205     Py_CLEAR(self->decoded_chars);
1206     self->decoded_chars = chars;
1207     self->decoded_chars_used = 0;
1208 }
1209
1210 static PyObject *
1211 TextIOWrapper_get_decoded_chars(PyTextIOWrapperObject *self, Py_ssize_t n)
1212 {
1213     PyObject *chars;
1214     Py_ssize_t avail;
1215
1216     if (self->decoded_chars == NULL)
1217         return PyUnicode_FromStringAndSize(NULL, 0);
1218
1219     avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1220              - self->decoded_chars_used);
1221
1222     assert(avail >= 0);
1223
1224     if (n < 0 || n > avail)
1225         n = avail;
1226
1227     if (self->decoded_chars_used > 0 || n < avail) {
1228         chars = PyUnicode_FromUnicode(
1229             PyUnicode_AS_UNICODE(self->decoded_chars)
1230             + self->decoded_chars_used, n);
1231         if (chars == NULL)
1232             return NULL;
1233     }
1234     else {
1235         chars = self->decoded_chars;
1236         Py_INCREF(chars);
1237     }
1238
1239     self->decoded_chars_used += n;
1240     return chars;
1241 }
1242
1243 /* Read and decode the next chunk of data from the BufferedReader.
1244  */
1245 static int
1246 TextIOWrapper_read_chunk(PyTextIOWrapperObject *self)
1247 {
1248     PyObject *dec_buffer = NULL;
1249     PyObject *dec_flags = NULL;
1250     PyObject *input_chunk = NULL;
1251     PyObject *decoded_chars, *chunk_size;
1252     int eof;
1253
1254     /* The return value is True unless EOF was reached.  The decoded string is
1255      * placed in self._decoded_chars (replacing its previous value).  The
1256      * entire input chunk is sent to the decoder, though some of it may remain
1257      * buffered in the decoder, yet to be converted.
1258      */
1259
1260     if (self->decoder == NULL) {
1261         PyErr_SetString(PyExc_ValueError, "no decoder");
1262         return -1;
1263     }
1264
1265     if (self->telling) {
1266         /* To prepare for tell(), we need to snapshot a point in the file
1267          * where the decoder's input buffer is empty.
1268          */
1269
1270         PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1271                                                      _PyIO_str_getstate, NULL);
1272         if (state == NULL)
1273             return -1;
1274         /* Given this, we know there was a valid snapshot point
1275          * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1276          */
1277         if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1278             Py_DECREF(state);
1279             return -1;
1280         }
1281         Py_INCREF(dec_buffer);
1282         Py_INCREF(dec_flags);
1283         Py_DECREF(state);
1284     }
1285
1286     /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1287     chunk_size = PyLong_FromSsize_t(self->chunk_size);
1288     if (chunk_size == NULL)
1289         goto fail;
1290     input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1291         _PyIO_str_read1, chunk_size, NULL);
1292     Py_DECREF(chunk_size);
1293     if (input_chunk == NULL)
1294         goto fail;
1295     assert(PyBytes_Check(input_chunk));
1296
1297     eof = (PyBytes_Size(input_chunk) == 0);
1298
1299     if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1300         decoded_chars = _PyIncrementalNewlineDecoder_decode(
1301             self->decoder, input_chunk, eof);
1302     }
1303     else {
1304         decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1305             _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1306     }
1307
1308     /* TODO sanity check: isinstance(decoded_chars, unicode) */
1309     if (decoded_chars == NULL)
1310         goto fail;
1311     TextIOWrapper_set_decoded_chars(self, decoded_chars);
1312     if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1313         eof = 0;
1314
1315     if (self->telling) {
1316         /* At the snapshot point, len(dec_buffer) bytes before the read, the
1317          * next input to be decoded is dec_buffer + input_chunk.
1318          */
1319         PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1320         if (next_input == NULL)
1321             goto fail;
1322         assert (PyBytes_Check(next_input));
1323         Py_DECREF(dec_buffer);
1324         Py_CLEAR(self->snapshot);
1325         self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1326     }
1327     Py_DECREF(input_chunk);
1328
1329     return (eof == 0);
1330
1331   fail:
1332     Py_XDECREF(dec_buffer);
1333     Py_XDECREF(dec_flags);
1334     Py_XDECREF(input_chunk);
1335     return -1;
1336 }
1337
1338 static PyObject *
1339 TextIOWrapper_read(PyTextIOWrapperObject *self, PyObject *args)
1340 {
1341     Py_ssize_t n = -1;
1342     PyObject *result = NULL, *chunks = NULL;
1343
1344     CHECK_INITIALIZED(self);
1345
1346     if (!PyArg_ParseTuple(args, "|n:read", &n))
1347         return NULL;
1348
1349     CHECK_CLOSED(self);
1350
1351     if (self->decoder == NULL) {
1352         PyErr_SetString(PyExc_IOError, "not readable");
1353         return NULL;
1354     }
1355
1356     if (_TextIOWrapper_writeflush(self) < 0)
1357         return NULL;
1358
1359     if (n < 0) {
1360         /* Read everything */
1361         PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1362         PyObject *decoded;
1363         if (bytes == NULL)
1364             goto fail;
1365         decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1366                                              bytes, Py_True, NULL);
1367         Py_DECREF(bytes);
1368         if (decoded == NULL)
1369             goto fail;
1370
1371         result = TextIOWrapper_get_decoded_chars(self, -1);
1372
1373         if (result == NULL) {
1374             Py_DECREF(decoded);
1375             return NULL;
1376         }
1377
1378         PyUnicode_AppendAndDel(&result, decoded);
1379         if (result == NULL)
1380             goto fail;
1381
1382         Py_CLEAR(self->snapshot);
1383         return result;
1384     }
1385     else {
1386         int res = 1;
1387         Py_ssize_t remaining = n;
1388
1389         result = TextIOWrapper_get_decoded_chars(self, n);
1390         if (result == NULL)
1391             goto fail;
1392         remaining -= PyUnicode_GET_SIZE(result);
1393
1394         /* Keep reading chunks until we have n characters to return */
1395         while (remaining > 0) {
1396             res = TextIOWrapper_read_chunk(self);
1397             if (res < 0)
1398                 goto fail;
1399             if (res == 0)  /* EOF */
1400                 break;
1401             if (chunks == NULL) {
1402                 chunks = PyList_New(0);
1403                 if (chunks == NULL)
1404                     goto fail;
1405             }
1406             if (PyList_Append(chunks, result) < 0)
1407                 goto fail;
1408             Py_DECREF(result);
1409             result = TextIOWrapper_get_decoded_chars(self, remaining);
1410             if (result == NULL)
1411                 goto fail;
1412             remaining -= PyUnicode_GET_SIZE(result);
1413         }
1414         if (chunks != NULL) {
1415             if (result != NULL && PyList_Append(chunks, result) < 0)
1416                 goto fail;
1417             Py_CLEAR(result);
1418             result = PyUnicode_Join(_PyIO_empty_str, chunks);
1419             if (result == NULL)
1420                 goto fail;
1421             Py_CLEAR(chunks);
1422         }
1423         return result;
1424     }
1425   fail:
1426     Py_XDECREF(result);
1427     Py_XDECREF(chunks);
1428     return NULL;
1429 }
1430
1431
1432 /* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1433    that is to the NUL character. Otherwise the function will produce
1434    incorrect results. */
1435 static Py_UNICODE *
1436 find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1437 {
1438     Py_UNICODE *s = start;
1439     for (;;) {
1440         while (*s > ch)
1441             s++;
1442         if (*s == ch)
1443             return s;
1444         if (s == end)
1445             return NULL;
1446         s++;
1447     }
1448 }
1449
1450 Py_ssize_t
1451 _PyIO_find_line_ending(
1452     int translated, int universal, PyObject *readnl,
1453     Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1454 {
1455     Py_ssize_t len = end - start;
1456
1457     if (translated) {
1458         /* Newlines are already translated, only search for \n */
1459         Py_UNICODE *pos = find_control_char(start, end, '\n');
1460         if (pos != NULL)
1461             return pos - start + 1;
1462         else {
1463             *consumed = len;
1464             return -1;
1465         }
1466     }
1467     else if (universal) {
1468         /* Universal newline search. Find any of \r, \r\n, \n
1469          * The decoder ensures that \r\n are not split in two pieces
1470          */
1471         Py_UNICODE *s = start;
1472         for (;;) {
1473             Py_UNICODE ch;
1474             /* Fast path for non-control chars. The loop always ends
1475                since the Py_UNICODE storage is NUL-terminated. */
1476             while (*s > '\r')
1477                 s++;
1478             if (s >= end) {
1479                 *consumed = len;
1480                 return -1;
1481             }
1482             ch = *s++;
1483             if (ch == '\n')
1484                 return s - start;
1485             if (ch == '\r') {
1486                 if (*s == '\n')
1487                     return s - start + 1;
1488                 else
1489                     return s - start;
1490             }
1491         }
1492     }
1493     else {
1494         /* Non-universal mode. */
1495         Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl);
1496         Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl);
1497         if (readnl_len == 1) {
1498             Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1499             if (pos != NULL)
1500                 return pos - start + 1;
1501             *consumed = len;
1502             return -1;
1503         }
1504         else {
1505             Py_UNICODE *s = start;
1506             Py_UNICODE *e = end - readnl_len + 1;
1507             Py_UNICODE *pos;
1508             if (e < s)
1509                 e = s;
1510             while (s < e) {
1511                 Py_ssize_t i;
1512                 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1513                 if (pos == NULL || pos >= e)
1514                     break;
1515                 for (i = 1; i < readnl_len; i++) {
1516                     if (pos[i] != nl[i])
1517                         break;
1518                 }
1519                 if (i == readnl_len)
1520                     return pos - start + readnl_len;
1521                 s = pos + 1;
1522             }
1523             pos = find_control_char(e, end, nl[0]);
1524             if (pos == NULL)
1525                 *consumed = len;
1526             else
1527                 *consumed = pos - start;
1528             return -1;
1529         }
1530     }
1531 }
1532
1533 static PyObject *
1534 _TextIOWrapper_readline(PyTextIOWrapperObject *self, Py_ssize_t limit)
1535 {
1536     PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1537     Py_ssize_t start, endpos, chunked, offset_to_buffer;
1538     int res;
1539
1540     CHECK_CLOSED(self);
1541
1542     if (_TextIOWrapper_writeflush(self) < 0)
1543         return NULL;
1544
1545     chunked = 0;
1546
1547     while (1) {
1548         Py_UNICODE *ptr;
1549         Py_ssize_t line_len;
1550         Py_ssize_t consumed = 0;
1551
1552         /* First, get some data if necessary */
1553         res = 1;
1554         while (!self->decoded_chars ||
1555                !PyUnicode_GET_SIZE(self->decoded_chars)) {
1556             res = TextIOWrapper_read_chunk(self);
1557             if (res < 0)
1558                 goto error;
1559             if (res == 0)
1560                 break;
1561         }
1562         if (res == 0) {
1563             /* end of file */
1564             TextIOWrapper_set_decoded_chars(self, NULL);
1565             Py_CLEAR(self->snapshot);
1566             start = endpos = offset_to_buffer = 0;
1567             break;
1568         }
1569
1570         if (remaining == NULL) {
1571             line = self->decoded_chars;
1572             start = self->decoded_chars_used;
1573             offset_to_buffer = 0;
1574             Py_INCREF(line);
1575         }
1576         else {
1577             assert(self->decoded_chars_used == 0);
1578             line = PyUnicode_Concat(remaining, self->decoded_chars);
1579             start = 0;
1580             offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1581             Py_CLEAR(remaining);
1582             if (line == NULL)
1583                 goto error;
1584         }
1585
1586         ptr = PyUnicode_AS_UNICODE(line);
1587         line_len = PyUnicode_GET_SIZE(line);
1588
1589         endpos = _PyIO_find_line_ending(
1590             self->readtranslate, self->readuniversal, self->readnl,
1591             ptr + start, ptr + line_len, &consumed);
1592         if (endpos >= 0) {
1593             endpos += start;
1594             if (limit >= 0 && (endpos - start) + chunked >= limit)
1595                 endpos = start + limit - chunked;
1596             break;
1597         }
1598
1599         /* We can put aside up to `endpos` */
1600         endpos = consumed + start;
1601         if (limit >= 0 && (endpos - start) + chunked >= limit) {
1602             /* Didn't find line ending, but reached length limit */
1603             endpos = start + limit - chunked;
1604             break;
1605         }
1606
1607         if (endpos > start) {
1608             /* No line ending seen yet - put aside current data */
1609             PyObject *s;
1610             if (chunks == NULL) {
1611                 chunks = PyList_New(0);
1612                 if (chunks == NULL)
1613                     goto error;
1614             }
1615             s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1616             if (s == NULL)
1617                 goto error;
1618             if (PyList_Append(chunks, s) < 0) {
1619                 Py_DECREF(s);
1620                 goto error;
1621             }
1622             chunked += PyUnicode_GET_SIZE(s);
1623             Py_DECREF(s);
1624         }
1625         /* There may be some remaining bytes we'll have to prepend to the
1626            next chunk of data */
1627         if (endpos < line_len) {
1628             remaining = PyUnicode_FromUnicode(
1629                     ptr + endpos, line_len - endpos);
1630             if (remaining == NULL)
1631                 goto error;
1632         }
1633         Py_CLEAR(line);
1634         /* We have consumed the buffer */
1635         TextIOWrapper_set_decoded_chars(self, NULL);
1636     }
1637
1638     if (line != NULL) {
1639         /* Our line ends in the current buffer */
1640         self->decoded_chars_used = endpos - offset_to_buffer;
1641         if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1642             if (start == 0 && Py_REFCNT(line) == 1) {
1643                 if (PyUnicode_Resize(&line, endpos) < 0)
1644                     goto error;
1645             }
1646             else {
1647                 PyObject *s = PyUnicode_FromUnicode(
1648                         PyUnicode_AS_UNICODE(line) + start, endpos - start);
1649                 Py_CLEAR(line);
1650                 if (s == NULL)
1651                     goto error;
1652                 line = s;
1653             }
1654         }
1655     }
1656     if (remaining != NULL) {
1657         if (chunks == NULL) {
1658             chunks = PyList_New(0);
1659             if (chunks == NULL)
1660                 goto error;
1661         }
1662         if (PyList_Append(chunks, remaining) < 0)
1663             goto error;
1664         Py_CLEAR(remaining);
1665     }
1666     if (chunks != NULL) {
1667         if (line != NULL && PyList_Append(chunks, line) < 0)
1668             goto error;
1669         Py_CLEAR(line);
1670         line = PyUnicode_Join(_PyIO_empty_str, chunks);
1671         if (line == NULL)
1672             goto error;
1673         Py_DECREF(chunks);
1674     }
1675     if (line == NULL)
1676         line = PyUnicode_FromStringAndSize(NULL, 0);
1677
1678     return line;
1679
1680   error:
1681     Py_XDECREF(chunks);
1682     Py_XDECREF(remaining);
1683     Py_XDECREF(line);
1684     return NULL;
1685 }
1686
1687 static PyObject *
1688 TextIOWrapper_readline(PyTextIOWrapperObject *self, PyObject *args)
1689 {
1690     Py_ssize_t limit = -1;
1691
1692     CHECK_INITIALIZED(self);
1693     if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1694         return NULL;
1695     }
1696     return _TextIOWrapper_readline(self, limit);
1697 }
1698
1699 /* Seek and Tell */
1700
1701 typedef struct {
1702     Py_off_t start_pos;
1703     int dec_flags;
1704     int bytes_to_feed;
1705     int chars_to_skip;
1706     char need_eof;
1707 } CookieStruct;
1708
1709 /*
1710    To speed up cookie packing/unpacking, we store the fields in a temporary
1711    string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1712    The following macros define at which offsets in the intermediary byte
1713    string the various CookieStruct fields will be stored.
1714  */
1715
1716 #define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1717
1718 #if defined(WORDS_BIGENDIAN)
1719
1720 # define IS_LITTLE_ENDIAN   0
1721
1722 /* We want the least significant byte of start_pos to also be the least
1723    significant byte of the cookie, which means that in big-endian mode we
1724    must copy the fields in reverse order. */
1725
1726 # define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
1727 # define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
1728 # define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
1729 # define OFF_CHARS_TO_SKIP  (sizeof(char))
1730 # define OFF_NEED_EOF       0
1731
1732 #else
1733
1734 # define IS_LITTLE_ENDIAN   1
1735
1736 /* Little-endian mode: the least significant byte of start_pos will
1737    naturally end up the least significant byte of the cookie. */
1738
1739 # define OFF_START_POS      0
1740 # define OFF_DEC_FLAGS      (sizeof(Py_off_t))
1741 # define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
1742 # define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
1743 # define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
1744
1745 #endif
1746
1747 static int
1748 TextIOWrapper_parseCookie(CookieStruct *cookie, PyObject *cookieObj)
1749 {
1750     unsigned char buffer[COOKIE_BUF_LEN];
1751     PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1752     if (cookieLong == NULL)
1753         return -1;
1754
1755     if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1756                             IS_LITTLE_ENDIAN, 0) < 0) {
1757         Py_DECREF(cookieLong);
1758         return -1;
1759     }
1760     Py_DECREF(cookieLong);
1761
1762     memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1763     memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1764     memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1765     memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1766     memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
1767
1768     return 0;
1769 }
1770
1771 static PyObject *
1772 TextIOWrapper_buildCookie(CookieStruct *cookie)
1773 {
1774     unsigned char buffer[COOKIE_BUF_LEN];
1775
1776     memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1777     memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1778     memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1779     memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1780     memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
1781
1782     return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1783 }
1784 #undef IS_LITTLE_ENDIAN
1785
1786 static int
1787 _TextIOWrapper_decoder_setstate(PyTextIOWrapperObject *self,
1788                                 CookieStruct *cookie)
1789 {
1790     PyObject *res;
1791     /* When seeking to the start of the stream, we call decoder.reset()
1792        rather than decoder.getstate().
1793        This is for a few decoders such as utf-16 for which the state value
1794        at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1795        utf-16, that we are expecting a BOM).
1796     */
1797     if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1798         res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1799     else
1800         res = PyObject_CallMethod(self->decoder, "setstate",
1801                                   "((yi))", "", cookie->dec_flags);
1802     if (res == NULL)
1803         return -1;
1804     Py_DECREF(res);
1805     return 0;
1806 }
1807
1808 static PyObject *
1809 TextIOWrapper_seek(PyTextIOWrapperObject *self, PyObject *args)
1810 {
1811     PyObject *cookieObj, *posobj;
1812     CookieStruct cookie;
1813     int whence = 0;
1814     static PyObject *zero = NULL;
1815     PyObject *res;
1816     int cmp;
1817
1818     CHECK_INITIALIZED(self);
1819
1820     if (zero == NULL) {
1821         zero = PyLong_FromLong(0L);
1822         if (zero == NULL)
1823             return NULL;
1824     }
1825
1826     if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1827         return NULL;
1828     CHECK_CLOSED(self);
1829
1830     Py_INCREF(cookieObj);
1831
1832     if (!self->seekable) {
1833         PyErr_SetString(PyExc_IOError,
1834                         "underlying stream is not seekable");
1835         goto fail;
1836     }
1837
1838     if (whence == 1) {
1839         /* seek relative to current position */
1840         cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
1841         if (cmp < 0)
1842             goto fail;
1843
1844         if (cmp == 0) {
1845             PyErr_SetString(PyExc_IOError,
1846                             "can't do nonzero cur-relative seeks");
1847             goto fail;
1848         }
1849
1850         /* Seeking to the current position should attempt to
1851          * sync the underlying buffer with the current position.
1852          */
1853         Py_DECREF(cookieObj);
1854         cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
1855         if (cookieObj == NULL)
1856             goto fail;
1857     }
1858     else if (whence == 2) {
1859         /* seek relative to end of file */
1860
1861         cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
1862         if (cmp < 0)
1863             goto fail;
1864
1865         if (cmp == 0) {
1866             PyErr_SetString(PyExc_IOError,
1867                             "can't do nonzero end-relative seeks");
1868             goto fail;
1869         }
1870
1871         res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
1872         if (res == NULL)
1873             goto fail;
1874         Py_DECREF(res);
1875
1876         TextIOWrapper_set_decoded_chars(self, NULL);
1877         Py_CLEAR(self->snapshot);
1878         if (self->decoder) {
1879             res = PyObject_CallMethod(self->decoder, "reset", NULL);
1880             if (res == NULL)
1881                 goto fail;
1882             Py_DECREF(res);
1883         }
1884
1885         res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
1886         Py_XDECREF(cookieObj);
1887         return res;
1888     }
1889     else if (whence != 0) {
1890         PyErr_Format(PyExc_ValueError,
1891                      "invalid whence (%d, should be 0, 1 or 2)", whence);
1892         goto fail;
1893     }
1894
1895     cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
1896     if (cmp < 0)
1897         goto fail;
1898
1899     if (cmp == 1) {
1900         PyErr_Format(PyExc_ValueError,
1901                      "negative seek position %R", cookieObj);
1902         goto fail;
1903     }
1904
1905     res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1906     if (res == NULL)
1907         goto fail;
1908     Py_DECREF(res);
1909
1910     /* The strategy of seek() is to go back to the safe start point
1911      * and replay the effect of read(chars_to_skip) from there.
1912      */
1913     if (TextIOWrapper_parseCookie(&cookie, cookieObj) < 0)
1914         goto fail;
1915
1916     /* Seek back to the safe start point. */
1917     posobj = PyLong_FromOff_t(cookie.start_pos);
1918     if (posobj == NULL)
1919         goto fail;
1920     res = PyObject_CallMethodObjArgs(self->buffer,
1921                                      _PyIO_str_seek, posobj, NULL);
1922     Py_DECREF(posobj);
1923     if (res == NULL)
1924         goto fail;
1925     Py_DECREF(res);
1926
1927     TextIOWrapper_set_decoded_chars(self, NULL);
1928     Py_CLEAR(self->snapshot);
1929
1930     /* Restore the decoder to its state from the safe start point. */
1931     if (self->decoder) {
1932         if (_TextIOWrapper_decoder_setstate(self, &cookie) < 0)
1933             goto fail;
1934     }
1935
1936     if (cookie.chars_to_skip) {
1937         /* Just like _read_chunk, feed the decoder and save a snapshot. */
1938         PyObject *input_chunk = PyObject_CallMethod(
1939             self->buffer, "read", "i", cookie.bytes_to_feed);
1940         PyObject *decoded;
1941
1942         if (input_chunk == NULL)
1943             goto fail;
1944
1945         assert (PyBytes_Check(input_chunk));
1946
1947         self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
1948         if (self->snapshot == NULL) {
1949             Py_DECREF(input_chunk);
1950             goto fail;
1951         }
1952
1953         decoded = PyObject_CallMethod(self->decoder, "decode",
1954                                       "Oi", input_chunk, (int)cookie.need_eof);
1955
1956         if (decoded == NULL)
1957             goto fail;
1958
1959         TextIOWrapper_set_decoded_chars(self, decoded);
1960
1961         /* Skip chars_to_skip of the decoded characters. */
1962         if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
1963             PyErr_SetString(PyExc_IOError, "can't restore logical file position");
1964             goto fail;
1965         }
1966         self->decoded_chars_used = cookie.chars_to_skip;
1967     }
1968     else {
1969         self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
1970         if (self->snapshot == NULL)
1971             goto fail;
1972     }
1973
1974     return cookieObj;
1975   fail:
1976     Py_XDECREF(cookieObj);
1977     return NULL;
1978
1979 }
1980
1981 static PyObject *
1982 TextIOWrapper_tell(PyTextIOWrapperObject *self, PyObject *args)
1983 {
1984     PyObject *res;
1985     PyObject *posobj = NULL;
1986     CookieStruct cookie = {0,0,0,0,0};
1987     PyObject *next_input;
1988     Py_ssize_t chars_to_skip, chars_decoded;
1989     PyObject *saved_state = NULL;
1990     char *input, *input_end;
1991
1992     CHECK_INITIALIZED(self);
1993     CHECK_CLOSED(self);
1994
1995     if (!self->seekable) {
1996         PyErr_SetString(PyExc_IOError,
1997                         "underlying stream is not seekable");
1998         goto fail;
1999     }
2000     if (!self->telling) {
2001         PyErr_SetString(PyExc_IOError,
2002                         "telling position disabled by next() call");
2003         goto fail;
2004     }
2005
2006     if (_TextIOWrapper_writeflush(self) < 0)
2007         return NULL;
2008     res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2009     if (res == NULL)
2010         goto fail;
2011     Py_DECREF(res);
2012
2013     posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2014     if (posobj == NULL)
2015         goto fail;
2016
2017     if (self->decoder == NULL || self->snapshot == NULL) {
2018         assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2019         return posobj;
2020     }
2021
2022 #if defined(HAVE_LARGEFILE_SUPPORT)
2023     cookie.start_pos = PyLong_AsLongLong(posobj);
2024 #else
2025     cookie.start_pos = PyLong_AsLong(posobj);
2026 #endif
2027     if (PyErr_Occurred())
2028         goto fail;
2029
2030     /* Skip backward to the snapshot point (see _read_chunk). */
2031     if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2032         goto fail;
2033
2034     assert (PyBytes_Check(next_input));
2035
2036     cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2037
2038     /* How many decoded characters have been used up since the snapshot? */
2039     if (self->decoded_chars_used == 0)  {
2040         /* We haven't moved from the snapshot point. */
2041         Py_DECREF(posobj);
2042         return TextIOWrapper_buildCookie(&cookie);
2043     }
2044
2045     chars_to_skip = self->decoded_chars_used;
2046
2047     /* Starting from the snapshot position, we will walk the decoder
2048      * forward until it gives us enough decoded characters.
2049      */
2050     saved_state = PyObject_CallMethodObjArgs(self->decoder,
2051                                              _PyIO_str_getstate, NULL);
2052     if (saved_state == NULL)
2053         goto fail;
2054
2055     /* Note our initial start point. */
2056     if (_TextIOWrapper_decoder_setstate(self, &cookie) < 0)
2057         goto fail;
2058
2059     /* Feed the decoder one byte at a time.  As we go, note the
2060      * nearest "safe start point" before the current location
2061      * (a point where the decoder has nothing buffered, so seek()
2062      * can safely start from there and advance to this location).
2063      */
2064     chars_decoded = 0;
2065     input = PyBytes_AS_STRING(next_input);
2066     input_end = input + PyBytes_GET_SIZE(next_input);
2067     while (input < input_end) {
2068         PyObject *state;
2069         char *dec_buffer;
2070         Py_ssize_t dec_buffer_len;
2071         int dec_flags;
2072
2073         PyObject *decoded = PyObject_CallMethod(
2074             self->decoder, "decode", "y#", input, 1);
2075         if (decoded == NULL)
2076             goto fail;
2077         assert (PyUnicode_Check(decoded));
2078         chars_decoded += PyUnicode_GET_SIZE(decoded);
2079         Py_DECREF(decoded);
2080
2081         cookie.bytes_to_feed += 1;
2082
2083         state = PyObject_CallMethodObjArgs(self->decoder,
2084                                            _PyIO_str_getstate, NULL);
2085         if (state == NULL)
2086             goto fail;
2087         if (!PyArg_Parse(state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2088             Py_DECREF(state);
2089             goto fail;
2090         }
2091         Py_DECREF(state);
2092
2093         if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2094             /* Decoder buffer is empty, so this is a safe start point. */
2095             cookie.start_pos += cookie.bytes_to_feed;
2096             chars_to_skip -= chars_decoded;
2097             cookie.dec_flags = dec_flags;
2098             cookie.bytes_to_feed = 0;
2099             chars_decoded = 0;
2100         }
2101         if (chars_decoded >= chars_to_skip)
2102             break;
2103         input++;
2104     }
2105     if (input == input_end) {
2106         /* We didn't get enough decoded data; signal EOF to get more. */
2107         PyObject *decoded = PyObject_CallMethod(
2108             self->decoder, "decode", "yi", "", /* final = */ 1);
2109         if (decoded == NULL)
2110             goto fail;
2111         assert (PyUnicode_Check(decoded));
2112         chars_decoded += PyUnicode_GET_SIZE(decoded);
2113         Py_DECREF(decoded);
2114         cookie.need_eof = 1;
2115
2116         if (chars_decoded < chars_to_skip) {
2117             PyErr_SetString(PyExc_IOError,
2118                             "can't reconstruct logical file position");
2119             goto fail;
2120         }
2121     }
2122
2123     /* finally */
2124     Py_XDECREF(posobj);
2125     res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2126     Py_DECREF(saved_state);
2127     if (res == NULL)
2128         return NULL;
2129     Py_DECREF(res);
2130
2131     /* The returned cookie corresponds to the last safe start point. */
2132     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2133     return TextIOWrapper_buildCookie(&cookie);
2134
2135   fail:
2136     Py_XDECREF(posobj);
2137     if (saved_state) {
2138         PyObject *type, *value, *traceback;
2139         PyErr_Fetch(&type, &value, &traceback);
2140
2141         res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2142         Py_DECREF(saved_state);
2143         if (res == NULL)
2144             return NULL;
2145         Py_DECREF(res);
2146
2147         PyErr_Restore(type, value, traceback);
2148     }
2149     return NULL;
2150 }
2151
2152 static PyObject *
2153 TextIOWrapper_truncate(PyTextIOWrapperObject *self, PyObject *args)
2154 {
2155     PyObject *pos = Py_None;
2156     PyObject *res;
2157
2158     CHECK_INITIALIZED(self)
2159     if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2160         return NULL;
2161     }
2162
2163     res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2164     if (res == NULL)
2165         return NULL;
2166     Py_DECREF(res);
2167
2168     if (pos != Py_None) {
2169         res = PyObject_CallMethodObjArgs((PyObject *) self,
2170                                           _PyIO_str_seek, pos, NULL);
2171         if (res == NULL)
2172             return NULL;
2173         Py_DECREF(res);
2174     }
2175
2176     return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, NULL);
2177 }
2178
2179 static PyObject *
2180 TextIOWrapper_repr(PyTextIOWrapperObject *self)
2181 {
2182   CHECK_INITIALIZED(self);
2183   return PyUnicode_FromFormat("<TextIOWrapper encoding=%S>", self->encoding);
2184 }
2185
2186
2187 /* Inquiries */
2188
2189 static PyObject *
2190 TextIOWrapper_fileno(PyTextIOWrapperObject *self, PyObject *args)
2191 {
2192     CHECK_INITIALIZED(self);
2193     return PyObject_CallMethod(self->buffer, "fileno", NULL);
2194 }
2195
2196 static PyObject *
2197 TextIOWrapper_seekable(PyTextIOWrapperObject *self, PyObject *args)
2198 {
2199     CHECK_INITIALIZED(self);
2200     return PyObject_CallMethod(self->buffer, "seekable", NULL);
2201 }
2202
2203 static PyObject *
2204 TextIOWrapper_readable(PyTextIOWrapperObject *self, PyObject *args)
2205 {
2206     CHECK_INITIALIZED(self);
2207     return PyObject_CallMethod(self->buffer, "readable", NULL);
2208 }
2209
2210 static PyObject *
2211 TextIOWrapper_writable(PyTextIOWrapperObject *self, PyObject *args)
2212 {
2213     CHECK_INITIALIZED(self);
2214     return PyObject_CallMethod(self->buffer, "writable", NULL);
2215 }
2216
2217 static PyObject *
2218 TextIOWrapper_isatty(PyTextIOWrapperObject *self, PyObject *args)
2219 {
2220     CHECK_INITIALIZED(self);
2221     return PyObject_CallMethod(self->buffer, "isatty", NULL);
2222 }
2223
2224 static PyObject *
2225 TextIOWrapper_flush(PyTextIOWrapperObject *self, PyObject *args)
2226 {
2227     CHECK_INITIALIZED(self);
2228     CHECK_CLOSED(self);
2229     self->telling = self->seekable;
2230     if (_TextIOWrapper_writeflush(self) < 0)
2231         return NULL;
2232     return PyObject_CallMethod(self->buffer, "flush", NULL);
2233 }
2234
2235 static PyObject *
2236 TextIOWrapper_close(PyTextIOWrapperObject *self, PyObject *args)
2237 {
2238     PyObject *res;
2239     CHECK_INITIALIZED(self);
2240     res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2241     if (res == NULL) {
2242         /* If flush() fails, just give up */
2243         PyErr_Clear();
2244     }
2245     else
2246         Py_DECREF(res);
2247
2248     return PyObject_CallMethod(self->buffer, "close", NULL);
2249 }
2250
2251 static PyObject *
2252 TextIOWrapper_iternext(PyTextIOWrapperObject *self)
2253 {
2254     PyObject *line;
2255
2256     CHECK_INITIALIZED(self);
2257
2258     self->telling = 0;
2259     if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2260         /* Skip method call overhead for speed */
2261         line = _TextIOWrapper_readline(self, -1);
2262     }
2263     else {
2264         line = PyObject_CallMethodObjArgs((PyObject *)self,
2265                                            _PyIO_str_readline, NULL);
2266         if (line && !PyUnicode_Check(line)) {
2267             PyErr_Format(PyExc_IOError,
2268                          "readline() should have returned an str object, "
2269                          "not '%.200s'", Py_TYPE(line)->tp_name);
2270             Py_DECREF(line);
2271             return NULL;
2272         }
2273     }
2274
2275     if (line == NULL)
2276         return NULL;
2277
2278     if (PyUnicode_GET_SIZE(line) == 0) {
2279         /* Reached EOF or would have blocked */
2280         Py_DECREF(line);
2281         Py_CLEAR(self->snapshot);
2282         self->telling = self->seekable;
2283         return NULL;
2284     }
2285
2286     return line;
2287 }
2288
2289 static PyObject *
2290 TextIOWrapper_name_get(PyTextIOWrapperObject *self, void *context)
2291 {
2292     CHECK_INITIALIZED(self);
2293     return PyObject_GetAttrString(self->buffer, "name");
2294 }
2295
2296 static PyObject *
2297 TextIOWrapper_closed_get(PyTextIOWrapperObject *self, void *context)
2298 {
2299     CHECK_INITIALIZED(self);
2300     return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2301 }
2302
2303 static PyObject *
2304 TextIOWrapper_newlines_get(PyTextIOWrapperObject *self, void *context)
2305 {
2306     PyObject *res;
2307     CHECK_INITIALIZED(self);
2308     if (self->decoder == NULL)
2309         Py_RETURN_NONE;
2310     res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2311     if (res == NULL) {
2312         PyErr_Clear();
2313         Py_RETURN_NONE;
2314     }
2315     return res;
2316 }
2317
2318 static PyObject *
2319 TextIOWrapper_chunk_size_get(PyTextIOWrapperObject *self, void *context)
2320 {
2321     CHECK_INITIALIZED(self);
2322     return PyLong_FromSsize_t(self->chunk_size);
2323 }
2324
2325 static int
2326 TextIOWrapper_chunk_size_set(PyTextIOWrapperObject *self,
2327                              PyObject *arg, void *context)
2328 {
2329     Py_ssize_t n;
2330     CHECK_INITIALIZED_INT(self);
2331     n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2332     if (n == -1 && PyErr_Occurred())
2333         return -1;
2334     if (n <= 0) {
2335         PyErr_SetString(PyExc_ValueError,
2336                         "a strictly positive integer is required");
2337         return -1;
2338     }
2339     self->chunk_size = n;
2340     return 0;
2341 }
2342
2343 static PyMethodDef TextIOWrapper_methods[] = {
2344     {"write", (PyCFunction)TextIOWrapper_write, METH_VARARGS},
2345     {"read", (PyCFunction)TextIOWrapper_read, METH_VARARGS},
2346     {"readline", (PyCFunction)TextIOWrapper_readline, METH_VARARGS},
2347     {"flush", (PyCFunction)TextIOWrapper_flush, METH_NOARGS},
2348     {"close", (PyCFunction)TextIOWrapper_close, METH_NOARGS},
2349
2350     {"fileno", (PyCFunction)TextIOWrapper_fileno, METH_NOARGS},
2351     {"seekable", (PyCFunction)TextIOWrapper_seekable, METH_NOARGS},
2352     {"readable", (PyCFunction)TextIOWrapper_readable, METH_NOARGS},
2353     {"writable", (PyCFunction)TextIOWrapper_writable, METH_NOARGS},
2354     {"isatty", (PyCFunction)TextIOWrapper_isatty, METH_NOARGS},
2355
2356     {"seek", (PyCFunction)TextIOWrapper_seek, METH_VARARGS},
2357     {"tell", (PyCFunction)TextIOWrapper_tell, METH_NOARGS},
2358     {"truncate", (PyCFunction)TextIOWrapper_truncate, METH_VARARGS},
2359     {NULL, NULL}
2360 };
2361
2362 static PyMemberDef TextIOWrapper_members[] = {
2363     {"encoding", T_OBJECT, offsetof(PyTextIOWrapperObject, encoding), READONLY},
2364     {"buffer", T_OBJECT, offsetof(PyTextIOWrapperObject, buffer), READONLY},
2365     {"line_buffering", T_BOOL, offsetof(PyTextIOWrapperObject, line_buffering), READONLY},
2366     {NULL}
2367 };
2368
2369 static PyGetSetDef TextIOWrapper_getset[] = {
2370     {"name", (getter)TextIOWrapper_name_get, NULL, NULL},
2371     {"closed", (getter)TextIOWrapper_closed_get, NULL, NULL},
2372 /*    {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2373 */
2374     {"newlines", (getter)TextIOWrapper_newlines_get, NULL, NULL},
2375     {"_CHUNK_SIZE", (getter)TextIOWrapper_chunk_size_get,
2376                     (setter)TextIOWrapper_chunk_size_set, NULL},
2377     {0}
2378 };
2379
2380 PyTypeObject PyTextIOWrapper_Type = {
2381     PyVarObject_HEAD_INIT(NULL, 0)
2382     "_io.TextIOWrapper",        /*tp_name*/
2383     sizeof(PyTextIOWrapperObject), /*tp_basicsize*/
2384     0,                          /*tp_itemsize*/
2385     (destructor)TextIOWrapper_dealloc, /*tp_dealloc*/
2386     0,                          /*tp_print*/
2387     0,                          /*tp_getattr*/
2388     0,                          /*tps_etattr*/
2389     0,                          /*tp_compare */
2390     (reprfunc)TextIOWrapper_repr,/*tp_repr*/
2391     0,                          /*tp_as_number*/
2392     0,                          /*tp_as_sequence*/
2393     0,                          /*tp_as_mapping*/
2394     0,                          /*tp_hash */
2395     0,                          /*tp_call*/
2396     0,                          /*tp_str*/
2397     0,                          /*tp_getattro*/
2398     0,                          /*tp_setattro*/
2399     0,                          /*tp_as_buffer*/
2400     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2401             | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2402     TextIOWrapper_doc,          /* tp_doc */
2403     (traverseproc)TextIOWrapper_traverse, /* tp_traverse */
2404     (inquiry)TextIOWrapper_clear, /* tp_clear */
2405     0,                          /* tp_richcompare */
2406     offsetof(PyTextIOWrapperObject, weakreflist), /*tp_weaklistoffset*/
2407     0,                          /* tp_iter */
2408     (iternextfunc)TextIOWrapper_iternext, /* tp_iternext */
2409     TextIOWrapper_methods,      /* tp_methods */
2410     TextIOWrapper_members,      /* tp_members */
2411     TextIOWrapper_getset,       /* tp_getset */
2412     0,                          /* tp_base */
2413     0,                          /* tp_dict */
2414     0,                          /* tp_descr_get */
2415     0,                          /* tp_descr_set */
2416     offsetof(PyTextIOWrapperObject, dict), /*tp_dictoffset*/
2417     (initproc)TextIOWrapper_init, /* tp_init */
2418     0,                          /* tp_alloc */
2419     PyType_GenericNew,          /* tp_new */
2420 };