Issue #6215: backport the 3.1 io lib
[python.git] / Modules / _io / textio.c
blobc129303a3db239f60a7a0df93e94c93e5e8b393d
1 /*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
6 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 */
9 #define PY_SSIZE_T_CLEAN
10 #include "Python.h"
11 #include "structmember.h"
12 #include "_iomodule.h"
14 /* TextIOBase */
16 PyDoc_STRVAR(textiobase_doc,
17 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
24 static PyObject *
25 _unsupported(const char *message)
27 PyErr_SetString(_PyIO_unsupported_operation, message);
28 return NULL;
31 PyDoc_STRVAR(textiobase_detach_doc,
32 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
38 static PyObject *
39 textiobase_detach(PyObject *self)
41 return _unsupported("detach");
44 PyDoc_STRVAR(textiobase_read_doc,
45 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
51 static PyObject *
52 textiobase_read(PyObject *self, PyObject *args)
54 return _unsupported("read");
57 PyDoc_STRVAR(textiobase_readline_doc,
58 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
63 static PyObject *
64 textiobase_readline(PyObject *self, PyObject *args)
66 return _unsupported("readline");
69 PyDoc_STRVAR(textiobase_write_doc,
70 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
75 static PyObject *
76 textiobase_write(PyObject *self, PyObject *args)
78 return _unsupported("write");
81 PyDoc_STRVAR(textiobase_encoding_doc,
82 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
87 static PyObject *
88 textiobase_encoding_get(PyObject *self, void *context)
90 Py_RETURN_NONE;
93 PyDoc_STRVAR(textiobase_newlines_doc,
94 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
101 static PyObject *
102 textiobase_newlines_get(PyObject *self, void *context)
104 Py_RETURN_NONE;
107 PyDoc_STRVAR(textiobase_errors_doc,
108 "The error setting of the decoder or encoder.\n"
109 "\n"
110 "Subclasses should override.\n"
113 static PyObject *
114 textiobase_errors_get(PyObject *self, void *context)
116 Py_RETURN_NONE;
120 static PyMethodDef textiobase_methods[] = {
121 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
125 {NULL, NULL}
128 static PyGetSetDef textiobase_getset[] = {
129 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
132 {NULL}
135 PyTypeObject PyTextIOBase_Type = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "_io._TextIOBase", /*tp_name*/
138 0, /*tp_basicsize*/
139 0, /*tp_itemsize*/
140 0, /*tp_dealloc*/
141 0, /*tp_print*/
142 0, /*tp_getattr*/
143 0, /*tp_setattr*/
144 0, /*tp_compare */
145 0, /*tp_repr*/
146 0, /*tp_as_number*/
147 0, /*tp_as_sequence*/
148 0, /*tp_as_mapping*/
149 0, /*tp_hash */
150 0, /*tp_call*/
151 0, /*tp_str*/
152 0, /*tp_getattro*/
153 0, /*tp_setattro*/
154 0, /*tp_as_buffer*/
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
156 textiobase_doc, /* tp_doc */
157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 0, /* tp_iter */
162 0, /* tp_iternext */
163 textiobase_methods, /* tp_methods */
164 0, /* tp_members */
165 textiobase_getset, /* tp_getset */
166 &PyIOBase_Type, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 0, /* tp_new */
177 /* IncrementalNewlineDecoder */
179 PyDoc_STRVAR(incrementalnewlinedecoder_doc,
180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
186 "decoder.\n"
189 typedef struct {
190 PyObject_HEAD
191 PyObject *decoder;
192 PyObject *errors;
193 int pendingcr:1;
194 int translate:1;
195 unsigned int seennl:3;
196 } nldecoder_object;
198 static int
199 incrementalnewlinedecoder_init(nldecoder_object *self,
200 PyObject *args, PyObject *kwds)
202 PyObject *decoder;
203 int translate;
204 PyObject *errors = NULL;
205 char *kwlist[] = {"decoder", "translate", "errors", NULL};
207 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208 kwlist, &decoder, &translate, &errors))
209 return -1;
211 self->decoder = decoder;
212 Py_INCREF(decoder);
214 if (errors == NULL) {
215 self->errors = PyUnicode_FromString("strict");
216 if (self->errors == NULL)
217 return -1;
219 else {
220 Py_INCREF(errors);
221 self->errors = errors;
224 self->translate = translate;
225 self->seennl = 0;
226 self->pendingcr = 0;
228 return 0;
231 static void
232 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
234 Py_CLEAR(self->decoder);
235 Py_CLEAR(self->errors);
236 Py_TYPE(self)->tp_free((PyObject *)self);
239 #define SEEN_CR 1
240 #define SEEN_LF 2
241 #define SEEN_CRLF 4
242 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
244 PyObject *
245 _PyIncrementalNewlineDecoder_decode(PyObject *_self,
246 PyObject *input, int final)
248 PyObject *output;
249 Py_ssize_t output_len;
250 nldecoder_object *self = (nldecoder_object *) _self;
252 if (self->decoder == NULL) {
253 PyErr_SetString(PyExc_ValueError,
254 "IncrementalNewlineDecoder.__init__ not called");
255 return NULL;
258 /* decode input (with the eventual \r from a previous pass) */
259 if (self->decoder != Py_None) {
260 output = PyObject_CallMethodObjArgs(self->decoder,
261 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
263 else {
264 output = input;
265 Py_INCREF(output);
268 if (output == NULL)
269 return NULL;
271 if (!PyUnicode_Check(output)) {
272 PyErr_SetString(PyExc_TypeError,
273 "decoder should return a string result");
274 goto error;
277 output_len = PyUnicode_GET_SIZE(output);
278 if (self->pendingcr && (final || output_len > 0)) {
279 Py_UNICODE *out;
280 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
281 if (modified == NULL)
282 goto error;
283 out = PyUnicode_AS_UNICODE(modified);
284 out[0] = '\r';
285 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
286 output_len * sizeof(Py_UNICODE));
287 Py_DECREF(output);
288 output = modified;
289 self->pendingcr = 0;
290 output_len++;
293 /* retain last \r even when not translating data:
294 * then readline() is sure to get \r\n in one pass
296 if (!final) {
297 if (output_len > 0
298 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
300 if (Py_REFCNT(output) == 1) {
301 if (PyUnicode_Resize(&output, output_len - 1) < 0)
302 goto error;
304 else {
305 PyObject *modified = PyUnicode_FromUnicode(
306 PyUnicode_AS_UNICODE(output),
307 output_len - 1);
308 if (modified == NULL)
309 goto error;
310 Py_DECREF(output);
311 output = modified;
313 self->pendingcr = 1;
317 /* Record which newlines are read and do newline translation if desired,
318 all in one pass. */
320 Py_UNICODE *in_str;
321 Py_ssize_t len;
322 int seennl = self->seennl;
323 int only_lf = 0;
325 in_str = PyUnicode_AS_UNICODE(output);
326 len = PyUnicode_GET_SIZE(output);
328 if (len == 0)
329 return output;
331 /* If, up to now, newlines are consistently \n, do a quick check
332 for the \r *byte* with the libc's optimized memchr.
334 if (seennl == SEEN_LF || seennl == 0) {
335 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
338 if (only_lf) {
339 /* If not already seen, quick scan for a possible "\n" character.
340 (there's nothing else to be done, even when in translation mode)
342 if (seennl == 0 &&
343 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
344 Py_UNICODE *s, *end;
345 s = in_str;
346 end = in_str + len;
347 for (;;) {
348 Py_UNICODE c;
349 /* Fast loop for non-control characters */
350 while (*s > '\n')
351 s++;
352 c = *s++;
353 if (c == '\n') {
354 seennl |= SEEN_LF;
355 break;
357 if (s > end)
358 break;
361 /* Finished: we have scanned for newlines, and none of them
362 need translating */
364 else if (!self->translate) {
365 Py_UNICODE *s, *end;
366 /* We have already seen all newline types, no need to scan again */
367 if (seennl == SEEN_ALL)
368 goto endscan;
369 s = in_str;
370 end = in_str + len;
371 for (;;) {
372 Py_UNICODE c;
373 /* Fast loop for non-control characters */
374 while (*s > '\r')
375 s++;
376 c = *s++;
377 if (c == '\n')
378 seennl |= SEEN_LF;
379 else if (c == '\r') {
380 if (*s == '\n') {
381 seennl |= SEEN_CRLF;
382 s++;
384 else
385 seennl |= SEEN_CR;
387 if (s > end)
388 break;
389 if (seennl == SEEN_ALL)
390 break;
392 endscan:
395 else {
396 PyObject *translated = NULL;
397 Py_UNICODE *out_str;
398 Py_UNICODE *in, *out, *end;
399 if (Py_REFCNT(output) != 1) {
400 /* We could try to optimize this so that we only do a copy
401 when there is something to translate. On the other hand,
402 most decoders should only output non-shared strings, i.e.
403 translation is done in place. */
404 translated = PyUnicode_FromUnicode(NULL, len);
405 if (translated == NULL)
406 goto error;
407 assert(Py_REFCNT(translated) == 1);
408 memcpy(PyUnicode_AS_UNICODE(translated),
409 PyUnicode_AS_UNICODE(output),
410 len * sizeof(Py_UNICODE));
412 else {
413 translated = output;
415 out_str = PyUnicode_AS_UNICODE(translated);
416 in = in_str;
417 out = out_str;
418 end = in_str + len;
419 for (;;) {
420 Py_UNICODE c;
421 /* Fast loop for non-control characters */
422 while ((c = *in++) > '\r')
423 *out++ = c;
424 if (c == '\n') {
425 *out++ = c;
426 seennl |= SEEN_LF;
427 continue;
429 if (c == '\r') {
430 if (*in == '\n') {
431 in++;
432 seennl |= SEEN_CRLF;
434 else
435 seennl |= SEEN_CR;
436 *out++ = '\n';
437 continue;
439 if (in > end)
440 break;
441 *out++ = c;
443 if (translated != output) {
444 Py_DECREF(output);
445 output = translated;
447 if (out - out_str != len) {
448 if (PyUnicode_Resize(&output, out - out_str) < 0)
449 goto error;
452 self->seennl |= seennl;
455 return output;
457 error:
458 Py_DECREF(output);
459 return NULL;
462 static PyObject *
463 incrementalnewlinedecoder_decode(nldecoder_object *self,
464 PyObject *args, PyObject *kwds)
466 char *kwlist[] = {"input", "final", NULL};
467 PyObject *input;
468 int final = 0;
470 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
471 kwlist, &input, &final))
472 return NULL;
473 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
476 static PyObject *
477 incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
479 PyObject *buffer;
480 unsigned PY_LONG_LONG flag;
482 if (self->decoder != Py_None) {
483 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
484 _PyIO_str_getstate, NULL);
485 if (state == NULL)
486 return NULL;
487 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
488 Py_DECREF(state);
489 return NULL;
491 Py_INCREF(buffer);
492 Py_DECREF(state);
494 else {
495 buffer = PyBytes_FromString("");
496 flag = 0;
498 flag <<= 1;
499 if (self->pendingcr)
500 flag |= 1;
501 return Py_BuildValue("NK", buffer, flag);
504 static PyObject *
505 incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
507 PyObject *buffer;
508 unsigned PY_LONG_LONG flag;
510 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
511 return NULL;
513 self->pendingcr = (int) flag & 1;
514 flag >>= 1;
516 if (self->decoder != Py_None)
517 return PyObject_CallMethod(self->decoder,
518 "setstate", "((OK))", buffer, flag);
519 else
520 Py_RETURN_NONE;
523 static PyObject *
524 incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
526 self->seennl = 0;
527 self->pendingcr = 0;
528 if (self->decoder != Py_None)
529 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
530 else
531 Py_RETURN_NONE;
534 static PyObject *
535 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
537 switch (self->seennl) {
538 case SEEN_CR:
539 return PyUnicode_FromString("\r");
540 case SEEN_LF:
541 return PyUnicode_FromString("\n");
542 case SEEN_CRLF:
543 return PyUnicode_FromString("\r\n");
544 case SEEN_CR | SEEN_LF:
545 return Py_BuildValue("ss", "\r", "\n");
546 case SEEN_CR | SEEN_CRLF:
547 return Py_BuildValue("ss", "\r", "\r\n");
548 case SEEN_LF | SEEN_CRLF:
549 return Py_BuildValue("ss", "\n", "\r\n");
550 case SEEN_CR | SEEN_LF | SEEN_CRLF:
551 return Py_BuildValue("sss", "\r", "\n", "\r\n");
552 default:
553 Py_RETURN_NONE;
559 static PyMethodDef incrementalnewlinedecoder_methods[] = {
560 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
561 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
562 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
563 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
564 {NULL}
567 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
568 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
569 {NULL}
572 PyTypeObject PyIncrementalNewlineDecoder_Type = {
573 PyVarObject_HEAD_INIT(NULL, 0)
574 "_io.IncrementalNewlineDecoder", /*tp_name*/
575 sizeof(nldecoder_object), /*tp_basicsize*/
576 0, /*tp_itemsize*/
577 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
578 0, /*tp_print*/
579 0, /*tp_getattr*/
580 0, /*tp_setattr*/
581 0, /*tp_compare */
582 0, /*tp_repr*/
583 0, /*tp_as_number*/
584 0, /*tp_as_sequence*/
585 0, /*tp_as_mapping*/
586 0, /*tp_hash */
587 0, /*tp_call*/
588 0, /*tp_str*/
589 0, /*tp_getattro*/
590 0, /*tp_setattro*/
591 0, /*tp_as_buffer*/
592 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
593 incrementalnewlinedecoder_doc, /* tp_doc */
594 0, /* tp_traverse */
595 0, /* tp_clear */
596 0, /* tp_richcompare */
597 0, /*tp_weaklistoffset*/
598 0, /* tp_iter */
599 0, /* tp_iternext */
600 incrementalnewlinedecoder_methods, /* tp_methods */
601 0, /* tp_members */
602 incrementalnewlinedecoder_getset, /* tp_getset */
603 0, /* tp_base */
604 0, /* tp_dict */
605 0, /* tp_descr_get */
606 0, /* tp_descr_set */
607 0, /* tp_dictoffset */
608 (initproc)incrementalnewlinedecoder_init, /* tp_init */
609 0, /* tp_alloc */
610 PyType_GenericNew, /* tp_new */
614 /* TextIOWrapper */
616 PyDoc_STRVAR(textiowrapper_doc,
617 "Character and line based layer over a BufferedIOBase object, buffer.\n"
618 "\n"
619 "encoding gives the name of the encoding that the stream will be\n"
620 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
621 "\n"
622 "errors determines the strictness of encoding and decoding (see the\n"
623 "codecs.register) and defaults to \"strict\".\n"
624 "\n"
625 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
626 "handling of line endings. If it is None, universal newlines is\n"
627 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
628 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
629 "caller. Conversely, on output, '\\n' is translated to the system\n"
630 "default line seperator, os.linesep. If newline is any other of its\n"
631 "legal values, that newline becomes the newline when the file is read\n"
632 "and it is returned untranslated. On output, '\\n' is converted to the\n"
633 "newline.\n"
634 "\n"
635 "If line_buffering is True, a call to flush is implied when a call to\n"
636 "write contains a newline character."
639 typedef PyObject *
640 (*encodefunc_t)(PyObject *, PyObject *);
642 typedef struct
644 PyObject_HEAD
645 int ok; /* initialized? */
646 int detached;
647 Py_ssize_t chunk_size;
648 PyObject *buffer;
649 PyObject *encoding;
650 PyObject *encoder;
651 PyObject *decoder;
652 PyObject *readnl;
653 PyObject *errors;
654 const char *writenl; /* utf-8 encoded, NULL stands for \n */
655 char line_buffering;
656 char readuniversal;
657 char readtranslate;
658 char writetranslate;
659 char seekable;
660 char telling;
661 /* Specialized encoding func (see below) */
662 encodefunc_t encodefunc;
663 /* Whether or not it's the start of the stream */
664 char encoding_start_of_stream;
666 /* Reads and writes are internally buffered in order to speed things up.
667 However, any read will first flush the write buffer if itsn't empty.
669 Please also note that text to be written is first encoded before being
670 buffered. This is necessary so that encoding errors are immediately
671 reported to the caller, but it unfortunately means that the
672 IncrementalEncoder (whose encode() method is always written in Python)
673 becomes a bottleneck for small writes.
675 PyObject *decoded_chars; /* buffer for text returned from decoder */
676 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
677 PyObject *pending_bytes; /* list of bytes objects waiting to be
678 written, or NULL */
679 Py_ssize_t pending_bytes_count;
680 PyObject *snapshot;
681 /* snapshot is either None, or a tuple (dec_flags, next_input) where
682 * dec_flags is the second (integer) item of the decoder state and
683 * next_input is the chunk of input bytes that comes next after the
684 * snapshot point. We use this to reconstruct decoder states in tell().
687 /* Cache raw object if it's a FileIO object */
688 PyObject *raw;
690 PyObject *weakreflist;
691 PyObject *dict;
692 } textio;
695 /* A couple of specialized cases in order to bypass the slow incremental
696 encoding methods for the most popular encodings. */
698 static PyObject *
699 ascii_encode(textio *self, PyObject *text)
701 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
702 PyUnicode_GET_SIZE(text),
703 PyBytes_AS_STRING(self->errors));
706 static PyObject *
707 utf16be_encode(textio *self, PyObject *text)
709 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
710 PyUnicode_GET_SIZE(text),
711 PyBytes_AS_STRING(self->errors), 1);
714 static PyObject *
715 utf16le_encode(textio *self, PyObject *text)
717 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
718 PyUnicode_GET_SIZE(text),
719 PyBytes_AS_STRING(self->errors), -1);
722 static PyObject *
723 utf16_encode(textio *self, PyObject *text)
725 if (!self->encoding_start_of_stream) {
726 /* Skip the BOM and use native byte ordering */
727 #if defined(WORDS_BIGENDIAN)
728 return utf16be_encode(self, text);
729 #else
730 return utf16le_encode(self, text);
731 #endif
733 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
734 PyUnicode_GET_SIZE(text),
735 PyBytes_AS_STRING(self->errors), 0);
738 static PyObject *
739 utf32be_encode(textio *self, PyObject *text)
741 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
742 PyUnicode_GET_SIZE(text),
743 PyBytes_AS_STRING(self->errors), 1);
746 static PyObject *
747 utf32le_encode(textio *self, PyObject *text)
749 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
750 PyUnicode_GET_SIZE(text),
751 PyBytes_AS_STRING(self->errors), -1);
754 static PyObject *
755 utf32_encode(textio *self, PyObject *text)
757 if (!self->encoding_start_of_stream) {
758 /* Skip the BOM and use native byte ordering */
759 #if defined(WORDS_BIGENDIAN)
760 return utf32be_encode(self, text);
761 #else
762 return utf32le_encode(self, text);
763 #endif
765 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
766 PyUnicode_GET_SIZE(text),
767 PyBytes_AS_STRING(self->errors), 0);
770 static PyObject *
771 utf8_encode(textio *self, PyObject *text)
773 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
774 PyUnicode_GET_SIZE(text),
775 PyBytes_AS_STRING(self->errors));
778 static PyObject *
779 latin1_encode(textio *self, PyObject *text)
781 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
782 PyUnicode_GET_SIZE(text),
783 PyBytes_AS_STRING(self->errors));
786 /* Map normalized encoding names onto the specialized encoding funcs */
788 typedef struct {
789 const char *name;
790 encodefunc_t encodefunc;
791 } encodefuncentry;
793 static encodefuncentry encodefuncs[] = {
794 {"ascii", (encodefunc_t) ascii_encode},
795 {"iso8859-1", (encodefunc_t) latin1_encode},
796 {"utf-8", (encodefunc_t) utf8_encode},
797 {"utf-16-be", (encodefunc_t) utf16be_encode},
798 {"utf-16-le", (encodefunc_t) utf16le_encode},
799 {"utf-16", (encodefunc_t) utf16_encode},
800 {"utf-32-be", (encodefunc_t) utf32be_encode},
801 {"utf-32-le", (encodefunc_t) utf32le_encode},
802 {"utf-32", (encodefunc_t) utf32_encode},
803 {NULL, NULL}
807 static int
808 textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
810 char *kwlist[] = {"buffer", "encoding", "errors",
811 "newline", "line_buffering",
812 NULL};
813 PyObject *buffer, *raw;
814 char *encoding = NULL;
815 char *errors = NULL;
816 char *newline = NULL;
817 int line_buffering = 0;
819 PyObject *res;
820 int r;
822 self->ok = 0;
823 self->detached = 0;
824 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
825 kwlist, &buffer, &encoding, &errors,
826 &newline, &line_buffering))
827 return -1;
829 if (newline && newline[0] != '\0'
830 && !(newline[0] == '\n' && newline[1] == '\0')
831 && !(newline[0] == '\r' && newline[1] == '\0')
832 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
833 PyErr_Format(PyExc_ValueError,
834 "illegal newline value: %s", newline);
835 return -1;
838 Py_CLEAR(self->buffer);
839 Py_CLEAR(self->encoding);
840 Py_CLEAR(self->encoder);
841 Py_CLEAR(self->decoder);
842 Py_CLEAR(self->readnl);
843 Py_CLEAR(self->decoded_chars);
844 Py_CLEAR(self->pending_bytes);
845 Py_CLEAR(self->snapshot);
846 Py_CLEAR(self->errors);
847 Py_CLEAR(self->raw);
848 self->decoded_chars_used = 0;
849 self->pending_bytes_count = 0;
850 self->encodefunc = NULL;
851 self->writenl = NULL;
853 if (encoding == NULL && self->encoding == NULL) {
854 if (_PyIO_locale_module == NULL) {
855 _PyIO_locale_module = PyImport_ImportModule("locale");
856 if (_PyIO_locale_module == NULL)
857 goto catch_ImportError;
858 else
859 goto use_locale;
861 else {
862 use_locale:
863 self->encoding = PyObject_CallMethod(
864 _PyIO_locale_module, "getpreferredencoding", NULL);
865 if (self->encoding == NULL) {
866 catch_ImportError:
868 Importing locale can raise a ImportError because of
869 _functools, and locale.getpreferredencoding can raise a
870 ImportError if _locale is not available. These will happen
871 during module building.
873 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
874 PyErr_Clear();
875 self->encoding = PyString_FromString("ascii");
877 else
878 goto error;
880 else if (!PyString_Check(self->encoding))
881 Py_CLEAR(self->encoding);
884 if (self->encoding != NULL)
885 encoding = PyString_AsString(self->encoding);
886 else if (encoding != NULL) {
887 self->encoding = PyString_FromString(encoding);
888 if (self->encoding == NULL)
889 goto error;
891 else {
892 PyErr_SetString(PyExc_IOError,
893 "could not determine default encoding");
896 if (errors == NULL)
897 errors = "strict";
898 self->errors = PyBytes_FromString(errors);
899 if (self->errors == NULL)
900 goto error;
902 self->chunk_size = 8192;
903 self->readuniversal = (newline == NULL || newline[0] == '\0');
904 self->line_buffering = line_buffering;
905 self->readtranslate = (newline == NULL);
906 if (newline) {
907 self->readnl = PyString_FromString(newline);
908 if (self->readnl == NULL)
909 return -1;
911 self->writetranslate = (newline == NULL || newline[0] != '\0');
912 if (!self->readuniversal && self->writetranslate) {
913 self->writenl = PyString_AsString(self->readnl);
914 if (!strcmp(self->writenl, "\n"))
915 self->writenl = NULL;
917 #ifdef MS_WINDOWS
918 else
919 self->writenl = "\r\n";
920 #endif
922 /* Build the decoder object */
923 res = PyObject_CallMethod(buffer, "readable", NULL);
924 if (res == NULL)
925 goto error;
926 r = PyObject_IsTrue(res);
927 Py_DECREF(res);
928 if (r == -1)
929 goto error;
930 if (r == 1) {
931 self->decoder = PyCodec_IncrementalDecoder(
932 encoding, errors);
933 if (self->decoder == NULL)
934 goto error;
936 if (self->readuniversal) {
937 PyObject *incrementalDecoder = PyObject_CallFunction(
938 (PyObject *)&PyIncrementalNewlineDecoder_Type,
939 "Oi", self->decoder, (int)self->readtranslate);
940 if (incrementalDecoder == NULL)
941 goto error;
942 Py_CLEAR(self->decoder);
943 self->decoder = incrementalDecoder;
947 /* Build the encoder object */
948 res = PyObject_CallMethod(buffer, "writable", NULL);
949 if (res == NULL)
950 goto error;
951 r = PyObject_IsTrue(res);
952 Py_DECREF(res);
953 if (r == -1)
954 goto error;
955 if (r == 1) {
956 PyObject *ci;
957 self->encoder = PyCodec_IncrementalEncoder(
958 encoding, errors);
959 if (self->encoder == NULL)
960 goto error;
961 /* Get the normalized named of the codec */
962 ci = _PyCodec_Lookup(encoding);
963 if (ci == NULL)
964 goto error;
965 res = PyObject_GetAttrString(ci, "name");
966 Py_DECREF(ci);
967 if (res == NULL) {
968 if (PyErr_ExceptionMatches(PyExc_AttributeError))
969 PyErr_Clear();
970 else
971 goto error;
973 else if (PyString_Check(res)) {
974 encodefuncentry *e = encodefuncs;
975 while (e->name != NULL) {
976 if (!strcmp(PyString_AS_STRING(res), e->name)) {
977 self->encodefunc = e->encodefunc;
978 break;
980 e++;
983 Py_XDECREF(res);
986 self->buffer = buffer;
987 Py_INCREF(buffer);
989 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
990 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
991 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
992 raw = PyObject_GetAttrString(buffer, "raw");
993 /* Cache the raw FileIO object to speed up 'closed' checks */
994 if (raw == NULL) {
995 if (PyErr_ExceptionMatches(PyExc_AttributeError))
996 PyErr_Clear();
997 else
998 goto error;
1000 else if (Py_TYPE(raw) == &PyFileIO_Type)
1001 self->raw = raw;
1002 else
1003 Py_DECREF(raw);
1006 res = PyObject_CallMethod(buffer, "seekable", NULL);
1007 if (res == NULL)
1008 goto error;
1009 self->seekable = self->telling = PyObject_IsTrue(res);
1010 Py_DECREF(res);
1012 self->encoding_start_of_stream = 0;
1013 if (self->seekable && self->encoder) {
1014 PyObject *cookieObj;
1015 int cmp;
1017 self->encoding_start_of_stream = 1;
1019 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1020 if (cookieObj == NULL)
1021 goto error;
1023 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1024 Py_DECREF(cookieObj);
1025 if (cmp < 0) {
1026 goto error;
1029 if (cmp == 0) {
1030 self->encoding_start_of_stream = 0;
1031 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1032 _PyIO_zero, NULL);
1033 if (res == NULL)
1034 goto error;
1035 Py_DECREF(res);
1039 self->ok = 1;
1040 return 0;
1042 error:
1043 return -1;
1046 static int
1047 _textiowrapper_clear(textio *self)
1049 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1050 return -1;
1051 self->ok = 0;
1052 Py_CLEAR(self->buffer);
1053 Py_CLEAR(self->encoding);
1054 Py_CLEAR(self->encoder);
1055 Py_CLEAR(self->decoder);
1056 Py_CLEAR(self->readnl);
1057 Py_CLEAR(self->decoded_chars);
1058 Py_CLEAR(self->pending_bytes);
1059 Py_CLEAR(self->snapshot);
1060 Py_CLEAR(self->errors);
1061 Py_CLEAR(self->raw);
1062 return 0;
1065 static void
1066 textiowrapper_dealloc(textio *self)
1068 if (_textiowrapper_clear(self) < 0)
1069 return;
1070 _PyObject_GC_UNTRACK(self);
1071 if (self->weakreflist != NULL)
1072 PyObject_ClearWeakRefs((PyObject *)self);
1073 Py_CLEAR(self->dict);
1074 Py_TYPE(self)->tp_free((PyObject *)self);
1077 static int
1078 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1080 Py_VISIT(self->buffer);
1081 Py_VISIT(self->encoding);
1082 Py_VISIT(self->encoder);
1083 Py_VISIT(self->decoder);
1084 Py_VISIT(self->readnl);
1085 Py_VISIT(self->decoded_chars);
1086 Py_VISIT(self->pending_bytes);
1087 Py_VISIT(self->snapshot);
1088 Py_VISIT(self->errors);
1089 Py_VISIT(self->raw);
1091 Py_VISIT(self->dict);
1092 return 0;
1095 static int
1096 textiowrapper_clear(textio *self)
1098 if (_textiowrapper_clear(self) < 0)
1099 return -1;
1100 Py_CLEAR(self->dict);
1101 return 0;
1104 static PyObject *
1105 textiowrapper_closed_get(textio *self, void *context);
1107 /* This macro takes some shortcuts to make the common case faster. */
1108 #define CHECK_CLOSED(self) \
1109 do { \
1110 int r; \
1111 PyObject *_res; \
1112 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1113 if (self->raw != NULL) \
1114 r = _PyFileIO_closed(self->raw); \
1115 else { \
1116 _res = textiowrapper_closed_get(self, NULL); \
1117 if (_res == NULL) \
1118 return NULL; \
1119 r = PyObject_IsTrue(_res); \
1120 Py_DECREF(_res); \
1121 if (r < 0) \
1122 return NULL; \
1124 if (r > 0) { \
1125 PyErr_SetString(PyExc_ValueError, \
1126 "I/O operation on closed file."); \
1127 return NULL; \
1130 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1131 return NULL; \
1132 } while (0)
1134 #define CHECK_INITIALIZED(self) \
1135 if (self->ok <= 0) { \
1136 if (self->detached) { \
1137 PyErr_SetString(PyExc_ValueError, \
1138 "underlying buffer has been detached"); \
1139 } else { \
1140 PyErr_SetString(PyExc_ValueError, \
1141 "I/O operation on uninitialized object"); \
1143 return NULL; \
1146 #define CHECK_INITIALIZED_INT(self) \
1147 if (self->ok <= 0) { \
1148 if (self->detached) { \
1149 PyErr_SetString(PyExc_ValueError, \
1150 "underlying buffer has been detached"); \
1151 } else { \
1152 PyErr_SetString(PyExc_ValueError, \
1153 "I/O operation on uninitialized object"); \
1155 return -1; \
1159 static PyObject *
1160 textiowrapper_detach(textio *self)
1162 PyObject *buffer, *res;
1163 CHECK_INITIALIZED(self);
1164 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1165 if (res == NULL)
1166 return NULL;
1167 Py_DECREF(res);
1168 buffer = self->buffer;
1169 self->buffer = NULL;
1170 self->detached = 1;
1171 self->ok = 0;
1172 return buffer;
1175 Py_LOCAL_INLINE(const Py_UNICODE *)
1176 findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1178 /* like wcschr, but doesn't stop at NULL characters */
1179 while (size-- > 0) {
1180 if (*s == ch)
1181 return s;
1182 s++;
1184 return NULL;
1187 /* Flush the internal write buffer. This doesn't explicitly flush the
1188 underlying buffered object, though. */
1189 static int
1190 _textiowrapper_writeflush(textio *self)
1192 PyObject *b, *ret;
1194 if (self->pending_bytes == NULL)
1195 return 0;
1196 b = _PyBytes_Join(_PyIO_empty_bytes, self->pending_bytes);
1197 if (b == NULL)
1198 return -1;
1199 ret = PyObject_CallMethodObjArgs(self->buffer,
1200 _PyIO_str_write, b, NULL);
1201 Py_DECREF(b);
1202 if (ret == NULL)
1203 return -1;
1204 Py_DECREF(ret);
1205 Py_CLEAR(self->pending_bytes);
1206 self->pending_bytes_count = 0;
1207 return 0;
1210 static PyObject *
1211 textiowrapper_write(textio *self, PyObject *args)
1213 PyObject *ret;
1214 PyObject *text; /* owned reference */
1215 PyObject *b;
1216 Py_ssize_t textlen;
1217 int haslf = 0;
1218 int needflush = 0;
1220 CHECK_INITIALIZED(self);
1222 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1223 return NULL;
1226 CHECK_CLOSED(self);
1228 if (self->encoder == NULL) {
1229 PyErr_SetString(PyExc_IOError, "not writable");
1230 return NULL;
1233 Py_INCREF(text);
1235 textlen = PyUnicode_GetSize(text);
1237 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1238 if (findchar(PyUnicode_AS_UNICODE(text),
1239 PyUnicode_GET_SIZE(text), '\n'))
1240 haslf = 1;
1242 if (haslf && self->writetranslate && self->writenl != NULL) {
1243 PyObject *newtext = PyObject_CallMethod(
1244 text, "replace", "ss", "\n", self->writenl);
1245 Py_DECREF(text);
1246 if (newtext == NULL)
1247 return NULL;
1248 text = newtext;
1251 if (self->line_buffering &&
1252 (haslf ||
1253 findchar(PyUnicode_AS_UNICODE(text),
1254 PyUnicode_GET_SIZE(text), '\r')))
1255 needflush = 1;
1257 /* XXX What if we were just reading? */
1258 if (self->encodefunc != NULL) {
1259 b = (*self->encodefunc)((PyObject *) self, text);
1260 self->encoding_start_of_stream = 0;
1262 else
1263 b = PyObject_CallMethodObjArgs(self->encoder,
1264 _PyIO_str_encode, text, NULL);
1265 Py_DECREF(text);
1266 if (b == NULL)
1267 return NULL;
1269 if (self->pending_bytes == NULL) {
1270 self->pending_bytes = PyList_New(0);
1271 if (self->pending_bytes == NULL) {
1272 Py_DECREF(b);
1273 return NULL;
1275 self->pending_bytes_count = 0;
1277 if (PyList_Append(self->pending_bytes, b) < 0) {
1278 Py_DECREF(b);
1279 return NULL;
1281 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1282 Py_DECREF(b);
1283 if (self->pending_bytes_count > self->chunk_size || needflush) {
1284 if (_textiowrapper_writeflush(self) < 0)
1285 return NULL;
1288 if (needflush) {
1289 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1290 if (ret == NULL)
1291 return NULL;
1292 Py_DECREF(ret);
1295 Py_CLEAR(self->snapshot);
1297 if (self->decoder) {
1298 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1299 if (ret == NULL)
1300 return NULL;
1301 Py_DECREF(ret);
1304 return PyLong_FromSsize_t(textlen);
1307 /* Steal a reference to chars and store it in the decoded_char buffer;
1309 static void
1310 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1312 Py_CLEAR(self->decoded_chars);
1313 self->decoded_chars = chars;
1314 self->decoded_chars_used = 0;
1317 static PyObject *
1318 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1320 PyObject *chars;
1321 Py_ssize_t avail;
1323 if (self->decoded_chars == NULL)
1324 return PyUnicode_FromStringAndSize(NULL, 0);
1326 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1327 - self->decoded_chars_used);
1329 assert(avail >= 0);
1331 if (n < 0 || n > avail)
1332 n = avail;
1334 if (self->decoded_chars_used > 0 || n < avail) {
1335 chars = PyUnicode_FromUnicode(
1336 PyUnicode_AS_UNICODE(self->decoded_chars)
1337 + self->decoded_chars_used, n);
1338 if (chars == NULL)
1339 return NULL;
1341 else {
1342 chars = self->decoded_chars;
1343 Py_INCREF(chars);
1346 self->decoded_chars_used += n;
1347 return chars;
1350 /* Read and decode the next chunk of data from the BufferedReader.
1352 static int
1353 textiowrapper_read_chunk(textio *self)
1355 PyObject *dec_buffer = NULL;
1356 PyObject *dec_flags = NULL;
1357 PyObject *input_chunk = NULL;
1358 PyObject *decoded_chars, *chunk_size;
1359 int eof;
1361 /* The return value is True unless EOF was reached. The decoded string is
1362 * placed in self._decoded_chars (replacing its previous value). The
1363 * entire input chunk is sent to the decoder, though some of it may remain
1364 * buffered in the decoder, yet to be converted.
1367 if (self->decoder == NULL) {
1368 PyErr_SetString(PyExc_IOError, "not readable");
1369 return -1;
1372 if (self->telling) {
1373 /* To prepare for tell(), we need to snapshot a point in the file
1374 * where the decoder's input buffer is empty.
1377 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1378 _PyIO_str_getstate, NULL);
1379 if (state == NULL)
1380 return -1;
1381 /* Given this, we know there was a valid snapshot point
1382 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1384 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1385 Py_DECREF(state);
1386 return -1;
1388 Py_INCREF(dec_buffer);
1389 Py_INCREF(dec_flags);
1390 Py_DECREF(state);
1393 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1394 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1395 if (chunk_size == NULL)
1396 goto fail;
1397 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1398 _PyIO_str_read1, chunk_size, NULL);
1399 Py_DECREF(chunk_size);
1400 if (input_chunk == NULL)
1401 goto fail;
1402 assert(PyBytes_Check(input_chunk));
1404 eof = (PyBytes_Size(input_chunk) == 0);
1406 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1407 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1408 self->decoder, input_chunk, eof);
1410 else {
1411 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1412 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1415 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1416 if (decoded_chars == NULL)
1417 goto fail;
1418 textiowrapper_set_decoded_chars(self, decoded_chars);
1419 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1420 eof = 0;
1422 if (self->telling) {
1423 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1424 * next input to be decoded is dec_buffer + input_chunk.
1426 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1427 if (next_input == NULL)
1428 goto fail;
1429 assert (PyBytes_Check(next_input));
1430 Py_DECREF(dec_buffer);
1431 Py_CLEAR(self->snapshot);
1432 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1434 Py_DECREF(input_chunk);
1436 return (eof == 0);
1438 fail:
1439 Py_XDECREF(dec_buffer);
1440 Py_XDECREF(dec_flags);
1441 Py_XDECREF(input_chunk);
1442 return -1;
1445 static PyObject *
1446 textiowrapper_read(textio *self, PyObject *args)
1448 Py_ssize_t n = -1;
1449 PyObject *result = NULL, *chunks = NULL;
1451 CHECK_INITIALIZED(self);
1453 if (!PyArg_ParseTuple(args, "|n:read", &n))
1454 return NULL;
1456 CHECK_CLOSED(self);
1458 if (self->decoder == NULL) {
1459 PyErr_SetString(PyExc_IOError, "not readable");
1460 return NULL;
1463 if (_textiowrapper_writeflush(self) < 0)
1464 return NULL;
1466 if (n < 0) {
1467 /* Read everything */
1468 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1469 PyObject *decoded, *final;
1470 if (bytes == NULL)
1471 goto fail;
1472 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1473 bytes, Py_True, NULL);
1474 Py_DECREF(bytes);
1475 if (decoded == NULL)
1476 goto fail;
1478 result = textiowrapper_get_decoded_chars(self, -1);
1480 if (result == NULL) {
1481 Py_DECREF(decoded);
1482 return NULL;
1485 final = PyUnicode_Concat(result, decoded);
1486 Py_DECREF(result);
1487 Py_DECREF(decoded);
1488 if (final == NULL)
1489 goto fail;
1491 Py_CLEAR(self->snapshot);
1492 return final;
1494 else {
1495 int res = 1;
1496 Py_ssize_t remaining = n;
1498 result = textiowrapper_get_decoded_chars(self, n);
1499 if (result == NULL)
1500 goto fail;
1501 remaining -= PyUnicode_GET_SIZE(result);
1503 /* Keep reading chunks until we have n characters to return */
1504 while (remaining > 0) {
1505 res = textiowrapper_read_chunk(self);
1506 if (res < 0)
1507 goto fail;
1508 if (res == 0) /* EOF */
1509 break;
1510 if (chunks == NULL) {
1511 chunks = PyList_New(0);
1512 if (chunks == NULL)
1513 goto fail;
1515 if (PyList_Append(chunks, result) < 0)
1516 goto fail;
1517 Py_DECREF(result);
1518 result = textiowrapper_get_decoded_chars(self, remaining);
1519 if (result == NULL)
1520 goto fail;
1521 remaining -= PyUnicode_GET_SIZE(result);
1523 if (chunks != NULL) {
1524 if (result != NULL && PyList_Append(chunks, result) < 0)
1525 goto fail;
1526 Py_CLEAR(result);
1527 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1528 if (result == NULL)
1529 goto fail;
1530 Py_CLEAR(chunks);
1532 return result;
1534 fail:
1535 Py_XDECREF(result);
1536 Py_XDECREF(chunks);
1537 return NULL;
1541 /* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1542 that is to the NUL character. Otherwise the function will produce
1543 incorrect results. */
1544 static Py_UNICODE *
1545 find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1547 Py_UNICODE *s = start;
1548 for (;;) {
1549 while (*s > ch)
1550 s++;
1551 if (*s == ch)
1552 return s;
1553 if (s == end)
1554 return NULL;
1555 s++;
1559 Py_ssize_t
1560 _PyIO_find_line_ending(
1561 int translated, int universal, PyObject *readnl,
1562 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1564 Py_ssize_t len = end - start;
1566 if (translated) {
1567 /* Newlines are already translated, only search for \n */
1568 Py_UNICODE *pos = find_control_char(start, end, '\n');
1569 if (pos != NULL)
1570 return pos - start + 1;
1571 else {
1572 *consumed = len;
1573 return -1;
1576 else if (universal) {
1577 /* Universal newline search. Find any of \r, \r\n, \n
1578 * The decoder ensures that \r\n are not split in two pieces
1580 Py_UNICODE *s = start;
1581 for (;;) {
1582 Py_UNICODE ch;
1583 /* Fast path for non-control chars. The loop always ends
1584 since the Py_UNICODE storage is NUL-terminated. */
1585 while (*s > '\r')
1586 s++;
1587 if (s >= end) {
1588 *consumed = len;
1589 return -1;
1591 ch = *s++;
1592 if (ch == '\n')
1593 return s - start;
1594 if (ch == '\r') {
1595 if (*s == '\n')
1596 return s - start + 1;
1597 else
1598 return s - start;
1602 else {
1603 /* Non-universal mode. */
1604 Py_ssize_t readnl_len = PyString_GET_SIZE(readnl);
1605 unsigned char *nl = (unsigned char *) PyString_AS_STRING(readnl);
1606 if (readnl_len == 1) {
1607 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1608 if (pos != NULL)
1609 return pos - start + 1;
1610 *consumed = len;
1611 return -1;
1613 else {
1614 Py_UNICODE *s = start;
1615 Py_UNICODE *e = end - readnl_len + 1;
1616 Py_UNICODE *pos;
1617 if (e < s)
1618 e = s;
1619 while (s < e) {
1620 Py_ssize_t i;
1621 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1622 if (pos == NULL || pos >= e)
1623 break;
1624 for (i = 1; i < readnl_len; i++) {
1625 if (pos[i] != nl[i])
1626 break;
1628 if (i == readnl_len)
1629 return pos - start + readnl_len;
1630 s = pos + 1;
1632 pos = find_control_char(e, end, nl[0]);
1633 if (pos == NULL)
1634 *consumed = len;
1635 else
1636 *consumed = pos - start;
1637 return -1;
1642 static PyObject *
1643 _textiowrapper_readline(textio *self, Py_ssize_t limit)
1645 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1646 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1647 int res;
1649 CHECK_CLOSED(self);
1651 if (_textiowrapper_writeflush(self) < 0)
1652 return NULL;
1654 chunked = 0;
1656 while (1) {
1657 Py_UNICODE *ptr;
1658 Py_ssize_t line_len;
1659 Py_ssize_t consumed = 0;
1661 /* First, get some data if necessary */
1662 res = 1;
1663 while (!self->decoded_chars ||
1664 !PyUnicode_GET_SIZE(self->decoded_chars)) {
1665 res = textiowrapper_read_chunk(self);
1666 if (res < 0)
1667 goto error;
1668 if (res == 0)
1669 break;
1671 if (res == 0) {
1672 /* end of file */
1673 textiowrapper_set_decoded_chars(self, NULL);
1674 Py_CLEAR(self->snapshot);
1675 start = endpos = offset_to_buffer = 0;
1676 break;
1679 if (remaining == NULL) {
1680 line = self->decoded_chars;
1681 start = self->decoded_chars_used;
1682 offset_to_buffer = 0;
1683 Py_INCREF(line);
1685 else {
1686 assert(self->decoded_chars_used == 0);
1687 line = PyUnicode_Concat(remaining, self->decoded_chars);
1688 start = 0;
1689 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1690 Py_CLEAR(remaining);
1691 if (line == NULL)
1692 goto error;
1695 ptr = PyUnicode_AS_UNICODE(line);
1696 line_len = PyUnicode_GET_SIZE(line);
1698 endpos = _PyIO_find_line_ending(
1699 self->readtranslate, self->readuniversal, self->readnl,
1700 ptr + start, ptr + line_len, &consumed);
1701 if (endpos >= 0) {
1702 endpos += start;
1703 if (limit >= 0 && (endpos - start) + chunked >= limit)
1704 endpos = start + limit - chunked;
1705 break;
1708 /* We can put aside up to `endpos` */
1709 endpos = consumed + start;
1710 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1711 /* Didn't find line ending, but reached length limit */
1712 endpos = start + limit - chunked;
1713 break;
1716 if (endpos > start) {
1717 /* No line ending seen yet - put aside current data */
1718 PyObject *s;
1719 if (chunks == NULL) {
1720 chunks = PyList_New(0);
1721 if (chunks == NULL)
1722 goto error;
1724 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1725 if (s == NULL)
1726 goto error;
1727 if (PyList_Append(chunks, s) < 0) {
1728 Py_DECREF(s);
1729 goto error;
1731 chunked += PyUnicode_GET_SIZE(s);
1732 Py_DECREF(s);
1734 /* There may be some remaining bytes we'll have to prepend to the
1735 next chunk of data */
1736 if (endpos < line_len) {
1737 remaining = PyUnicode_FromUnicode(
1738 ptr + endpos, line_len - endpos);
1739 if (remaining == NULL)
1740 goto error;
1742 Py_CLEAR(line);
1743 /* We have consumed the buffer */
1744 textiowrapper_set_decoded_chars(self, NULL);
1747 if (line != NULL) {
1748 /* Our line ends in the current buffer */
1749 self->decoded_chars_used = endpos - offset_to_buffer;
1750 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1751 if (start == 0 && Py_REFCNT(line) == 1) {
1752 if (PyUnicode_Resize(&line, endpos) < 0)
1753 goto error;
1755 else {
1756 PyObject *s = PyUnicode_FromUnicode(
1757 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1758 Py_CLEAR(line);
1759 if (s == NULL)
1760 goto error;
1761 line = s;
1765 if (remaining != NULL) {
1766 if (chunks == NULL) {
1767 chunks = PyList_New(0);
1768 if (chunks == NULL)
1769 goto error;
1771 if (PyList_Append(chunks, remaining) < 0)
1772 goto error;
1773 Py_CLEAR(remaining);
1775 if (chunks != NULL) {
1776 if (line != NULL && PyList_Append(chunks, line) < 0)
1777 goto error;
1778 Py_CLEAR(line);
1779 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1780 if (line == NULL)
1781 goto error;
1782 Py_DECREF(chunks);
1784 if (line == NULL)
1785 line = PyUnicode_FromStringAndSize(NULL, 0);
1787 return line;
1789 error:
1790 Py_XDECREF(chunks);
1791 Py_XDECREF(remaining);
1792 Py_XDECREF(line);
1793 return NULL;
1796 static PyObject *
1797 textiowrapper_readline(textio *self, PyObject *args)
1799 PyObject *limitobj = NULL;
1800 Py_ssize_t limit = -1;
1802 CHECK_INITIALIZED(self);
1803 if (!PyArg_ParseTuple(args, "|O:readline", &limitobj)) {
1804 return NULL;
1806 if (limitobj) {
1807 if (!PyNumber_Check(limitobj)) {
1808 PyErr_Format(PyExc_TypeError,
1809 "integer argument expected, got '%.200s'",
1810 Py_TYPE(limitobj)->tp_name);
1811 return NULL;
1813 limit = PyNumber_AsSsize_t(limitobj, PyExc_OverflowError);
1814 if (limit == -1 && PyErr_Occurred())
1815 return NULL;
1817 return _textiowrapper_readline(self, limit);
1820 /* Seek and Tell */
1822 typedef struct {
1823 Py_off_t start_pos;
1824 int dec_flags;
1825 int bytes_to_feed;
1826 int chars_to_skip;
1827 char need_eof;
1828 } cookie_type;
1831 To speed up cookie packing/unpacking, we store the fields in a temporary
1832 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1833 The following macros define at which offsets in the intermediary byte
1834 string the various CookieStruct fields will be stored.
1837 #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1839 #if defined(WORDS_BIGENDIAN)
1841 # define IS_LITTLE_ENDIAN 0
1843 /* We want the least significant byte of start_pos to also be the least
1844 significant byte of the cookie, which means that in big-endian mode we
1845 must copy the fields in reverse order. */
1847 # define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1848 # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1849 # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1850 # define OFF_CHARS_TO_SKIP (sizeof(char))
1851 # define OFF_NEED_EOF 0
1853 #else
1855 # define IS_LITTLE_ENDIAN 1
1857 /* Little-endian mode: the least significant byte of start_pos will
1858 naturally end up the least significant byte of the cookie. */
1860 # define OFF_START_POS 0
1861 # define OFF_DEC_FLAGS (sizeof(Py_off_t))
1862 # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1863 # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1864 # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1866 #endif
1868 static int
1869 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
1871 unsigned char buffer[COOKIE_BUF_LEN];
1872 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1873 if (cookieLong == NULL)
1874 return -1;
1876 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1877 IS_LITTLE_ENDIAN, 0) < 0) {
1878 Py_DECREF(cookieLong);
1879 return -1;
1881 Py_DECREF(cookieLong);
1883 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1884 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1885 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1886 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1887 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
1889 return 0;
1892 static PyObject *
1893 textiowrapper_build_cookie(cookie_type *cookie)
1895 unsigned char buffer[COOKIE_BUF_LEN];
1897 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1898 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1899 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1900 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1901 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
1903 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1905 #undef IS_LITTLE_ENDIAN
1907 static int
1908 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
1910 PyObject *res;
1911 /* When seeking to the start of the stream, we call decoder.reset()
1912 rather than decoder.getstate().
1913 This is for a few decoders such as utf-16 for which the state value
1914 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1915 utf-16, that we are expecting a BOM).
1917 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1918 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1919 else
1920 res = PyObject_CallMethod(self->decoder, "setstate",
1921 "((si))", "", cookie->dec_flags);
1922 if (res == NULL)
1923 return -1;
1924 Py_DECREF(res);
1925 return 0;
1928 static int
1929 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
1931 PyObject *res;
1932 /* Same as _textiowrapper_decoder_setstate() above. */
1933 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1934 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1935 self->encoding_start_of_stream = 1;
1937 else {
1938 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1939 _PyIO_zero, NULL);
1940 self->encoding_start_of_stream = 0;
1942 if (res == NULL)
1943 return -1;
1944 Py_DECREF(res);
1945 return 0;
1948 static PyObject *
1949 textiowrapper_seek(textio *self, PyObject *args)
1951 PyObject *cookieObj, *posobj;
1952 cookie_type cookie;
1953 int whence = 0;
1954 PyObject *res;
1955 int cmp;
1957 CHECK_INITIALIZED(self);
1959 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1960 return NULL;
1961 CHECK_CLOSED(self);
1963 Py_INCREF(cookieObj);
1965 if (!self->seekable) {
1966 PyErr_SetString(PyExc_IOError,
1967 "underlying stream is not seekable");
1968 goto fail;
1971 if (whence == 1) {
1972 /* seek relative to current position */
1973 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1974 if (cmp < 0)
1975 goto fail;
1977 if (cmp == 0) {
1978 PyErr_SetString(PyExc_IOError,
1979 "can't do nonzero cur-relative seeks");
1980 goto fail;
1983 /* Seeking to the current position should attempt to
1984 * sync the underlying buffer with the current position.
1986 Py_DECREF(cookieObj);
1987 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
1988 if (cookieObj == NULL)
1989 goto fail;
1991 else if (whence == 2) {
1992 /* seek relative to end of file */
1994 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1995 if (cmp < 0)
1996 goto fail;
1998 if (cmp == 0) {
1999 PyErr_SetString(PyExc_IOError,
2000 "can't do nonzero end-relative seeks");
2001 goto fail;
2004 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2005 if (res == NULL)
2006 goto fail;
2007 Py_DECREF(res);
2009 textiowrapper_set_decoded_chars(self, NULL);
2010 Py_CLEAR(self->snapshot);
2011 if (self->decoder) {
2012 res = PyObject_CallMethod(self->decoder, "reset", NULL);
2013 if (res == NULL)
2014 goto fail;
2015 Py_DECREF(res);
2018 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2019 Py_XDECREF(cookieObj);
2020 return res;
2022 else if (whence != 0) {
2023 PyErr_Format(PyExc_ValueError,
2024 "invalid whence (%d, should be 0, 1 or 2)", whence);
2025 goto fail;
2028 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
2029 if (cmp < 0)
2030 goto fail;
2032 if (cmp == 1) {
2033 PyObject *repr = PyObject_Repr(cookieObj);
2034 if (repr != NULL) {
2035 PyErr_Format(PyExc_ValueError,
2036 "negative seek position %s",
2037 PyString_AS_STRING(repr));
2038 Py_DECREF(repr);
2040 goto fail;
2043 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2044 if (res == NULL)
2045 goto fail;
2046 Py_DECREF(res);
2048 /* The strategy of seek() is to go back to the safe start point
2049 * and replay the effect of read(chars_to_skip) from there.
2051 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2052 goto fail;
2054 /* Seek back to the safe start point. */
2055 posobj = PyLong_FromOff_t(cookie.start_pos);
2056 if (posobj == NULL)
2057 goto fail;
2058 res = PyObject_CallMethodObjArgs(self->buffer,
2059 _PyIO_str_seek, posobj, NULL);
2060 Py_DECREF(posobj);
2061 if (res == NULL)
2062 goto fail;
2063 Py_DECREF(res);
2065 textiowrapper_set_decoded_chars(self, NULL);
2066 Py_CLEAR(self->snapshot);
2068 /* Restore the decoder to its state from the safe start point. */
2069 if (self->decoder) {
2070 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2071 goto fail;
2074 if (cookie.chars_to_skip) {
2075 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2076 PyObject *input_chunk = PyObject_CallMethod(
2077 self->buffer, "read", "i", cookie.bytes_to_feed);
2078 PyObject *decoded;
2080 if (input_chunk == NULL)
2081 goto fail;
2083 assert (PyBytes_Check(input_chunk));
2085 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2086 if (self->snapshot == NULL) {
2087 Py_DECREF(input_chunk);
2088 goto fail;
2091 decoded = PyObject_CallMethod(self->decoder, "decode",
2092 "Oi", input_chunk, (int)cookie.need_eof);
2094 if (decoded == NULL)
2095 goto fail;
2097 textiowrapper_set_decoded_chars(self, decoded);
2099 /* Skip chars_to_skip of the decoded characters. */
2100 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2101 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2102 goto fail;
2104 self->decoded_chars_used = cookie.chars_to_skip;
2106 else {
2107 self->snapshot = Py_BuildValue("is", cookie.dec_flags, "");
2108 if (self->snapshot == NULL)
2109 goto fail;
2112 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2113 if (self->encoder) {
2114 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2115 goto fail;
2117 return cookieObj;
2118 fail:
2119 Py_XDECREF(cookieObj);
2120 return NULL;
2124 static PyObject *
2125 textiowrapper_tell(textio *self, PyObject *args)
2127 PyObject *res;
2128 PyObject *posobj = NULL;
2129 cookie_type cookie = {0,0,0,0,0};
2130 PyObject *next_input;
2131 Py_ssize_t chars_to_skip, chars_decoded;
2132 PyObject *saved_state = NULL;
2133 char *input, *input_end;
2135 CHECK_INITIALIZED(self);
2136 CHECK_CLOSED(self);
2138 if (!self->seekable) {
2139 PyErr_SetString(PyExc_IOError,
2140 "underlying stream is not seekable");
2141 goto fail;
2143 if (!self->telling) {
2144 PyErr_SetString(PyExc_IOError,
2145 "telling position disabled by next() call");
2146 goto fail;
2149 if (_textiowrapper_writeflush(self) < 0)
2150 return NULL;
2151 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2152 if (res == NULL)
2153 goto fail;
2154 Py_DECREF(res);
2156 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2157 if (posobj == NULL)
2158 goto fail;
2160 if (self->decoder == NULL || self->snapshot == NULL) {
2161 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2162 return posobj;
2165 #if defined(HAVE_LARGEFILE_SUPPORT)
2166 cookie.start_pos = PyLong_AsLongLong(posobj);
2167 #else
2168 cookie.start_pos = PyLong_AsLong(posobj);
2169 #endif
2170 if (PyErr_Occurred())
2171 goto fail;
2173 /* Skip backward to the snapshot point (see _read_chunk). */
2174 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2175 goto fail;
2177 assert (PyBytes_Check(next_input));
2179 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2181 /* How many decoded characters have been used up since the snapshot? */
2182 if (self->decoded_chars_used == 0) {
2183 /* We haven't moved from the snapshot point. */
2184 Py_DECREF(posobj);
2185 return textiowrapper_build_cookie(&cookie);
2188 chars_to_skip = self->decoded_chars_used;
2190 /* Starting from the snapshot position, we will walk the decoder
2191 * forward until it gives us enough decoded characters.
2193 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2194 _PyIO_str_getstate, NULL);
2195 if (saved_state == NULL)
2196 goto fail;
2198 /* Note our initial start point. */
2199 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2200 goto fail;
2202 /* Feed the decoder one byte at a time. As we go, note the
2203 * nearest "safe start point" before the current location
2204 * (a point where the decoder has nothing buffered, so seek()
2205 * can safely start from there and advance to this location).
2207 chars_decoded = 0;
2208 input = PyBytes_AS_STRING(next_input);
2209 input_end = input + PyBytes_GET_SIZE(next_input);
2210 while (input < input_end) {
2211 PyObject *state;
2212 char *dec_buffer;
2213 Py_ssize_t dec_buffer_len;
2214 int dec_flags;
2216 PyObject *decoded = PyObject_CallMethod(
2217 self->decoder, "decode", "s#", input, 1);
2218 if (decoded == NULL)
2219 goto fail;
2220 assert (PyUnicode_Check(decoded));
2221 chars_decoded += PyUnicode_GET_SIZE(decoded);
2222 Py_DECREF(decoded);
2224 cookie.bytes_to_feed += 1;
2226 state = PyObject_CallMethodObjArgs(self->decoder,
2227 _PyIO_str_getstate, NULL);
2228 if (state == NULL)
2229 goto fail;
2230 if (!PyArg_Parse(state, "(s#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2231 Py_DECREF(state);
2232 goto fail;
2234 Py_DECREF(state);
2236 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2237 /* Decoder buffer is empty, so this is a safe start point. */
2238 cookie.start_pos += cookie.bytes_to_feed;
2239 chars_to_skip -= chars_decoded;
2240 cookie.dec_flags = dec_flags;
2241 cookie.bytes_to_feed = 0;
2242 chars_decoded = 0;
2244 if (chars_decoded >= chars_to_skip)
2245 break;
2246 input++;
2248 if (input == input_end) {
2249 /* We didn't get enough decoded data; signal EOF to get more. */
2250 PyObject *decoded = PyObject_CallMethod(
2251 self->decoder, "decode", "si", "", /* final = */ 1);
2252 if (decoded == NULL)
2253 goto fail;
2254 assert (PyUnicode_Check(decoded));
2255 chars_decoded += PyUnicode_GET_SIZE(decoded);
2256 Py_DECREF(decoded);
2257 cookie.need_eof = 1;
2259 if (chars_decoded < chars_to_skip) {
2260 PyErr_SetString(PyExc_IOError,
2261 "can't reconstruct logical file position");
2262 goto fail;
2266 /* finally */
2267 Py_XDECREF(posobj);
2268 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2269 Py_DECREF(saved_state);
2270 if (res == NULL)
2271 return NULL;
2272 Py_DECREF(res);
2274 /* The returned cookie corresponds to the last safe start point. */
2275 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2276 return textiowrapper_build_cookie(&cookie);
2278 fail:
2279 Py_XDECREF(posobj);
2280 if (saved_state) {
2281 PyObject *type, *value, *traceback;
2282 PyErr_Fetch(&type, &value, &traceback);
2284 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2285 Py_DECREF(saved_state);
2286 if (res == NULL)
2287 return NULL;
2288 Py_DECREF(res);
2290 PyErr_Restore(type, value, traceback);
2292 return NULL;
2295 static PyObject *
2296 textiowrapper_truncate(textio *self, PyObject *args)
2298 PyObject *pos = Py_None;
2299 PyObject *res;
2301 CHECK_INITIALIZED(self)
2302 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2303 return NULL;
2306 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2307 if (res == NULL)
2308 return NULL;
2309 Py_DECREF(res);
2311 if (pos != Py_None) {
2312 res = PyObject_CallMethodObjArgs((PyObject *) self,
2313 _PyIO_str_seek, pos, NULL);
2314 if (res == NULL)
2315 return NULL;
2316 Py_DECREF(res);
2319 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, NULL);
2322 static PyObject *
2323 textiowrapper_repr(textio *self)
2325 PyObject *nameobj, *res;
2326 PyObject *namerepr = NULL, *encrepr = NULL;
2328 CHECK_INITIALIZED(self);
2330 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2331 if (nameobj == NULL) {
2332 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2333 PyErr_Clear();
2334 else
2335 goto error;
2336 encrepr = PyObject_Repr(self->encoding);
2337 res = PyString_FromFormat("<_io.TextIOWrapper encoding=%s>",
2338 PyString_AS_STRING(encrepr));
2340 else {
2341 encrepr = PyObject_Repr(self->encoding);
2342 namerepr = PyObject_Repr(nameobj);
2343 res = PyString_FromFormat("<_io.TextIOWrapper name=%s encoding=%s>",
2344 PyString_AS_STRING(namerepr),
2345 PyString_AS_STRING(encrepr));
2346 Py_DECREF(nameobj);
2348 Py_XDECREF(namerepr);
2349 Py_XDECREF(encrepr);
2350 return res;
2352 error:
2353 Py_XDECREF(namerepr);
2354 Py_XDECREF(encrepr);
2355 return NULL;
2359 /* Inquiries */
2361 static PyObject *
2362 textiowrapper_fileno(textio *self, PyObject *args)
2364 CHECK_INITIALIZED(self);
2365 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2368 static PyObject *
2369 textiowrapper_seekable(textio *self, PyObject *args)
2371 CHECK_INITIALIZED(self);
2372 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2375 static PyObject *
2376 textiowrapper_readable(textio *self, PyObject *args)
2378 CHECK_INITIALIZED(self);
2379 return PyObject_CallMethod(self->buffer, "readable", NULL);
2382 static PyObject *
2383 textiowrapper_writable(textio *self, PyObject *args)
2385 CHECK_INITIALIZED(self);
2386 return PyObject_CallMethod(self->buffer, "writable", NULL);
2389 static PyObject *
2390 textiowrapper_isatty(textio *self, PyObject *args)
2392 CHECK_INITIALIZED(self);
2393 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2396 static PyObject *
2397 textiowrapper_flush(textio *self, PyObject *args)
2399 CHECK_INITIALIZED(self);
2400 CHECK_CLOSED(self);
2401 self->telling = self->seekable;
2402 if (_textiowrapper_writeflush(self) < 0)
2403 return NULL;
2404 return PyObject_CallMethod(self->buffer, "flush", NULL);
2407 static PyObject *
2408 textiowrapper_close(textio *self, PyObject *args)
2410 PyObject *res;
2411 CHECK_INITIALIZED(self);
2412 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2413 if (res == NULL) {
2414 /* If flush() fails, just give up */
2415 PyErr_Clear();
2417 else
2418 Py_DECREF(res);
2420 return PyObject_CallMethod(self->buffer, "close", NULL);
2423 static PyObject *
2424 textiowrapper_iternext(textio *self)
2426 PyObject *line;
2428 CHECK_INITIALIZED(self);
2430 self->telling = 0;
2431 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2432 /* Skip method call overhead for speed */
2433 line = _textiowrapper_readline(self, -1);
2435 else {
2436 line = PyObject_CallMethodObjArgs((PyObject *)self,
2437 _PyIO_str_readline, NULL);
2438 if (line && !PyUnicode_Check(line)) {
2439 PyErr_Format(PyExc_IOError,
2440 "readline() should have returned an str object, "
2441 "not '%.200s'", Py_TYPE(line)->tp_name);
2442 Py_DECREF(line);
2443 return NULL;
2447 if (line == NULL)
2448 return NULL;
2450 if (PyUnicode_GET_SIZE(line) == 0) {
2451 /* Reached EOF or would have blocked */
2452 Py_DECREF(line);
2453 Py_CLEAR(self->snapshot);
2454 self->telling = self->seekable;
2455 return NULL;
2458 return line;
2461 static PyObject *
2462 textiowrapper_name_get(textio *self, void *context)
2464 CHECK_INITIALIZED(self);
2465 return PyObject_GetAttrString(self->buffer, "name");
2468 static PyObject *
2469 textiowrapper_closed_get(textio *self, void *context)
2471 CHECK_INITIALIZED(self);
2472 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2475 static PyObject *
2476 textiowrapper_newlines_get(textio *self, void *context)
2478 PyObject *res;
2479 CHECK_INITIALIZED(self);
2480 if (self->decoder == NULL)
2481 Py_RETURN_NONE;
2482 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2483 if (res == NULL) {
2484 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2485 PyErr_Clear();
2486 Py_RETURN_NONE;
2488 else {
2489 return NULL;
2492 return res;
2495 static PyObject *
2496 textiowrapper_errors_get(textio *self, void *context)
2498 CHECK_INITIALIZED(self);
2499 Py_INCREF(self->errors);
2500 return self->errors;
2503 static PyObject *
2504 textiowrapper_chunk_size_get(textio *self, void *context)
2506 CHECK_INITIALIZED(self);
2507 return PyLong_FromSsize_t(self->chunk_size);
2510 static int
2511 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
2513 Py_ssize_t n;
2514 CHECK_INITIALIZED_INT(self);
2515 n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2516 if (n == -1 && PyErr_Occurred())
2517 return -1;
2518 if (n <= 0) {
2519 PyErr_SetString(PyExc_ValueError,
2520 "a strictly positive integer is required");
2521 return -1;
2523 self->chunk_size = n;
2524 return 0;
2527 static PyMethodDef textiowrapper_methods[] = {
2528 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2529 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2530 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2531 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2532 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2533 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
2535 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2536 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2537 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2538 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2539 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
2541 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2542 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2543 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
2544 {NULL, NULL}
2547 static PyMemberDef textiowrapper_members[] = {
2548 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2549 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2550 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
2551 {NULL}
2554 static PyGetSetDef textiowrapper_getset[] = {
2555 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2556 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
2557 /* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2559 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2560 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2561 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2562 (setter)textiowrapper_chunk_size_set, NULL},
2563 {NULL}
2566 PyTypeObject PyTextIOWrapper_Type = {
2567 PyVarObject_HEAD_INIT(NULL, 0)
2568 "_io.TextIOWrapper", /*tp_name*/
2569 sizeof(textio), /*tp_basicsize*/
2570 0, /*tp_itemsize*/
2571 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
2572 0, /*tp_print*/
2573 0, /*tp_getattr*/
2574 0, /*tps_etattr*/
2575 0, /*tp_compare */
2576 (reprfunc)textiowrapper_repr,/*tp_repr*/
2577 0, /*tp_as_number*/
2578 0, /*tp_as_sequence*/
2579 0, /*tp_as_mapping*/
2580 0, /*tp_hash */
2581 0, /*tp_call*/
2582 0, /*tp_str*/
2583 0, /*tp_getattro*/
2584 0, /*tp_setattro*/
2585 0, /*tp_as_buffer*/
2586 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2587 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2588 textiowrapper_doc, /* tp_doc */
2589 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2590 (inquiry)textiowrapper_clear, /* tp_clear */
2591 0, /* tp_richcompare */
2592 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
2593 0, /* tp_iter */
2594 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2595 textiowrapper_methods, /* tp_methods */
2596 textiowrapper_members, /* tp_members */
2597 textiowrapper_getset, /* tp_getset */
2598 0, /* tp_base */
2599 0, /* tp_dict */
2600 0, /* tp_descr_get */
2601 0, /* tp_descr_set */
2602 offsetof(textio, dict), /*tp_dictoffset*/
2603 (initproc)textiowrapper_init, /* tp_init */
2604 0, /* tp_alloc */
2605 PyType_GenericNew, /* tp_new */