2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
6 Written by Amaury Forgeot d'Arc and Antoine Pitrou
9 #define PY_SSIZE_T_CLEAN
11 #include "structmember.h"
12 #include "_iomodule.h"
16 PyDoc_STRVAR(TextIOBase_doc
,
17 "Base class for text I/O.\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
25 _unsupported(const char *message
)
27 PyErr_SetString(IO_STATE
->unsupported_operation
, message
);
31 PyDoc_STRVAR(TextIOBase_read_doc
,
32 "Read at most n characters from stream.\n"
34 "Read from underlying buffer until we have n characters or we hit EOF.\n"
35 "If n is negative or omitted, read until EOF.\n"
39 TextIOBase_read(PyObject
*self
, PyObject
*args
)
41 return _unsupported("read");
44 PyDoc_STRVAR(TextIOBase_readline_doc
,
45 "Read until newline or EOF.\n"
47 "Returns an empty string if EOF is hit immediately.\n"
51 TextIOBase_readline(PyObject
*self
, PyObject
*args
)
53 return _unsupported("readline");
56 PyDoc_STRVAR(TextIOBase_write_doc
,
57 "Write string to stream.\n"
58 "Returns the number of characters written (which is always equal to\n"
59 "the length of the string).\n"
63 TextIOBase_write(PyObject
*self
, PyObject
*args
)
65 return _unsupported("write");
68 PyDoc_STRVAR(TextIOBase_encoding_doc
,
69 "Encoding of the text stream.\n"
71 "Subclasses should override.\n"
75 TextIOBase_encoding_get(PyObject
*self
, void *context
)
80 PyDoc_STRVAR(TextIOBase_newlines_doc
,
81 "Line endings translated so far.\n"
83 "Only line endings translated during reading are considered.\n"
85 "Subclasses should override.\n"
89 TextIOBase_newlines_get(PyObject
*self
, void *context
)
95 static PyMethodDef TextIOBase_methods
[] = {
96 {"read", TextIOBase_read
, METH_VARARGS
, TextIOBase_read_doc
},
97 {"readline", TextIOBase_readline
, METH_VARARGS
, TextIOBase_readline_doc
},
98 {"write", TextIOBase_write
, METH_VARARGS
, TextIOBase_write_doc
},
102 static PyGetSetDef TextIOBase_getset
[] = {
103 {"encoding", (getter
)TextIOBase_encoding_get
, NULL
, TextIOBase_encoding_doc
},
104 {"newlines", (getter
)TextIOBase_newlines_get
, NULL
, TextIOBase_newlines_doc
},
108 PyTypeObject PyTextIOBase_Type
= {
109 PyVarObject_HEAD_INIT(NULL
, 0)
110 "_io._TextIOBase", /*tp_name*/
120 0, /*tp_as_sequence*/
128 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /*tp_flags*/
129 TextIOBase_doc
, /* tp_doc */
132 0, /* tp_richcompare */
133 0, /* tp_weaklistoffset */
136 TextIOBase_methods
, /* tp_methods */
138 TextIOBase_getset
, /* tp_getset */
139 &PyIOBase_Type
, /* tp_base */
141 0, /* tp_descr_get */
142 0, /* tp_descr_set */
143 0, /* tp_dictoffset */
150 /* IncrementalNewlineDecoder */
152 PyDoc_STRVAR(IncrementalNewlineDecoder_doc
,
153 "Codec used when reading a file in universal newlines mode. It wraps\n"
154 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
155 "records the types of newlines encountered. When used with\n"
156 "translate=False, it ensures that the newline sequence is returned in\n"
157 "one piece. When used with decoder=None, it expects unicode strings as\n"
158 "decode input and translates newlines without first invoking an external\n"
168 unsigned int seennl
:3;
169 } PyNewLineDecoderObject
;
172 IncrementalNewlineDecoder_init(PyNewLineDecoderObject
*self
,
173 PyObject
*args
, PyObject
*kwds
)
177 PyObject
*errors
= NULL
;
178 char *kwlist
[] = {"decoder", "translate", "errors", NULL
};
180 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "Oi|O:IncrementalNewlineDecoder",
181 kwlist
, &decoder
, &translate
, &errors
))
184 self
->decoder
= decoder
;
187 if (errors
== NULL
) {
188 self
->errors
= PyUnicode_FromString("strict");
189 if (self
->errors
== NULL
)
194 self
->errors
= errors
;
197 self
->translate
= translate
;
205 IncrementalNewlineDecoder_dealloc(PyNewLineDecoderObject
*self
)
207 Py_CLEAR(self
->decoder
);
208 Py_CLEAR(self
->errors
);
209 Py_TYPE(self
)->tp_free((PyObject
*)self
);
215 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
218 _PyIncrementalNewlineDecoder_decode(PyObject
*_self
,
219 PyObject
*input
, int final
)
222 Py_ssize_t output_len
;
223 PyNewLineDecoderObject
*self
= (PyNewLineDecoderObject
*) _self
;
225 if (self
->decoder
== NULL
) {
226 PyErr_SetString(PyExc_ValueError
,
227 "IncrementalNewlineDecoder.__init__ not called");
231 /* decode input (with the eventual \r from a previous pass) */
232 if (self
->decoder
!= Py_None
) {
233 output
= PyObject_CallMethodObjArgs(self
->decoder
,
234 _PyIO_str_decode
, input
, final
? Py_True
: Py_False
, NULL
);
244 if (!PyUnicode_Check(output
)) {
245 PyErr_SetString(PyExc_TypeError
,
246 "decoder should return a string result");
250 output_len
= PyUnicode_GET_SIZE(output
);
251 if (self
->pendingcr
&& (final
|| output_len
> 0)) {
253 PyObject
*modified
= PyUnicode_FromUnicode(NULL
, output_len
+ 1);
254 if (modified
== NULL
)
256 out
= PyUnicode_AS_UNICODE(modified
);
258 memcpy(out
+ 1, PyUnicode_AS_UNICODE(output
),
259 output_len
* sizeof(Py_UNICODE
));
266 /* retain last \r even when not translating data:
267 * then readline() is sure to get \r\n in one pass
271 && PyUnicode_AS_UNICODE(output
)[output_len
- 1] == '\r') {
273 if (Py_REFCNT(output
) == 1) {
274 if (PyUnicode_Resize(&output
, output_len
- 1) < 0)
278 PyObject
*modified
= PyUnicode_FromUnicode(
279 PyUnicode_AS_UNICODE(output
),
281 if (modified
== NULL
)
290 /* Record which newlines are read and do newline translation if desired,
295 int seennl
= self
->seennl
;
298 in_str
= PyUnicode_AS_UNICODE(output
);
299 len
= PyUnicode_GET_SIZE(output
);
304 /* If, up to now, newlines are consistently \n, do a quick check
305 for the \r *byte* with the libc's optimized memchr.
307 if (seennl
== SEEN_LF
|| seennl
== 0) {
308 only_lf
= (memchr(in_str
, '\r', len
* sizeof(Py_UNICODE
)) == NULL
);
312 /* If not already seen, quick scan for a possible "\n" character.
313 (there's nothing else to be done, even when in translation mode)
316 memchr(in_str
, '\n', len
* sizeof(Py_UNICODE
)) != NULL
) {
322 /* Fast loop for non-control characters */
334 /* Finished: we have scanned for newlines, and none of them
337 else if (!self
->translate
) {
339 /* We have already seen all newline types, no need to scan again */
340 if (seennl
== SEEN_ALL
)
346 /* Fast loop for non-control characters */
352 else if (c
== '\r') {
362 if (seennl
== SEEN_ALL
)
369 PyObject
*translated
= NULL
;
371 Py_UNICODE
*in
, *out
, *end
;
372 if (Py_REFCNT(output
) != 1) {
373 /* We could try to optimize this so that we only do a copy
374 when there is something to translate. On the other hand,
375 most decoders should only output non-shared strings, i.e.
376 translation is done in place. */
377 translated
= PyUnicode_FromUnicode(NULL
, len
);
378 if (translated
== NULL
)
380 assert(Py_REFCNT(translated
) == 1);
381 memcpy(PyUnicode_AS_UNICODE(translated
),
382 PyUnicode_AS_UNICODE(output
),
383 len
* sizeof(Py_UNICODE
));
388 out_str
= PyUnicode_AS_UNICODE(translated
);
394 /* Fast loop for non-control characters */
395 while ((c
= *in
++) > '\r')
416 if (translated
!= output
) {
420 if (out
- out_str
!= len
) {
421 if (PyUnicode_Resize(&output
, out
- out_str
) < 0)
425 self
->seennl
|= seennl
;
436 IncrementalNewlineDecoder_decode(PyNewLineDecoderObject
*self
,
437 PyObject
*args
, PyObject
*kwds
)
439 char *kwlist
[] = {"input", "final", NULL
};
443 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "O|i:IncrementalNewlineDecoder",
444 kwlist
, &input
, &final
))
446 return _PyIncrementalNewlineDecoder_decode((PyObject
*) self
, input
, final
);
450 IncrementalNewlineDecoder_getstate(PyNewLineDecoderObject
*self
, PyObject
*args
)
453 unsigned PY_LONG_LONG flag
;
455 if (self
->decoder
!= Py_None
) {
456 PyObject
*state
= PyObject_CallMethodObjArgs(self
->decoder
,
457 _PyIO_str_getstate
, NULL
);
460 if (!PyArg_Parse(state
, "(OK)", &buffer
, &flag
)) {
468 buffer
= PyBytes_FromString("");
474 return Py_BuildValue("NK", buffer
, flag
);
478 IncrementalNewlineDecoder_setstate(PyNewLineDecoderObject
*self
, PyObject
*state
)
481 unsigned PY_LONG_LONG flag
;
483 if (!PyArg_Parse(state
, "(OK)", &buffer
, &flag
))
486 self
->pendingcr
= (int) flag
& 1;
489 if (self
->decoder
!= Py_None
)
490 return PyObject_CallMethod(self
->decoder
,
491 "setstate", "((OK))", buffer
, flag
);
497 IncrementalNewlineDecoder_reset(PyNewLineDecoderObject
*self
, PyObject
*args
)
501 if (self
->decoder
!= Py_None
)
502 return PyObject_CallMethodObjArgs(self
->decoder
, _PyIO_str_reset
, NULL
);
508 IncrementalNewlineDecoder_newlines_get(PyNewLineDecoderObject
*self
, void *context
)
510 switch (self
->seennl
) {
512 return PyUnicode_FromString("\r");
514 return PyUnicode_FromString("\n");
516 return PyUnicode_FromString("\r\n");
517 case SEEN_CR
| SEEN_LF
:
518 return Py_BuildValue("ss", "\r", "\n");
519 case SEEN_CR
| SEEN_CRLF
:
520 return Py_BuildValue("ss", "\r", "\r\n");
521 case SEEN_LF
| SEEN_CRLF
:
522 return Py_BuildValue("ss", "\n", "\r\n");
523 case SEEN_CR
| SEEN_LF
| SEEN_CRLF
:
524 return Py_BuildValue("sss", "\r", "\n", "\r\n");
532 static PyMethodDef IncrementalNewlineDecoder_methods
[] = {
533 {"decode", (PyCFunction
)IncrementalNewlineDecoder_decode
, METH_VARARGS
|METH_KEYWORDS
},
534 {"getstate", (PyCFunction
)IncrementalNewlineDecoder_getstate
, METH_NOARGS
},
535 {"setstate", (PyCFunction
)IncrementalNewlineDecoder_setstate
, METH_O
},
536 {"reset", (PyCFunction
)IncrementalNewlineDecoder_reset
, METH_NOARGS
},
540 static PyGetSetDef IncrementalNewlineDecoder_getset
[] = {
541 {"newlines", (getter
)IncrementalNewlineDecoder_newlines_get
, NULL
, NULL
},
545 PyTypeObject PyIncrementalNewlineDecoder_Type
= {
546 PyVarObject_HEAD_INIT(NULL
, 0)
547 "_io.IncrementalNewlineDecoder", /*tp_name*/
548 sizeof(PyNewLineDecoderObject
), /*tp_basicsize*/
550 (destructor
)IncrementalNewlineDecoder_dealloc
, /*tp_dealloc*/
557 0, /*tp_as_sequence*/
565 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /*tp_flags*/
566 IncrementalNewlineDecoder_doc
, /* tp_doc */
569 0, /* tp_richcompare */
570 0, /*tp_weaklistoffset*/
573 IncrementalNewlineDecoder_methods
, /* tp_methods */
575 IncrementalNewlineDecoder_getset
, /* tp_getset */
578 0, /* tp_descr_get */
579 0, /* tp_descr_set */
580 0, /* tp_dictoffset */
581 (initproc
)IncrementalNewlineDecoder_init
, /* tp_init */
583 PyType_GenericNew
, /* tp_new */
589 PyDoc_STRVAR(TextIOWrapper_doc
,
590 "Character and line based layer over a BufferedIOBase object, buffer.\n"
592 "encoding gives the name of the encoding that the stream will be\n"
593 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
595 "errors determines the strictness of encoding and decoding (see the\n"
596 "codecs.register) and defaults to \"strict\".\n"
598 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
599 "handling of line endings. If it is None, universal newlines is\n"
600 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
601 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
602 "caller. Conversely, on output, '\\n' is translated to the system\n"
603 "default line seperator, os.linesep. If newline is any other of its\n"
604 "legal values, that newline becomes the newline when the file is read\n"
605 "and it is returned untranslated. On output, '\\n' is converted to the\n"
608 "If line_buffering is True, a call to flush is implied when a call to\n"
609 "write contains a newline character."
613 (*encodefunc_t
)(PyObject
*, PyObject
*);
618 int ok
; /* initialized? */
619 Py_ssize_t chunk_size
;
626 const char *writenl
; /* utf-8 encoded, NULL stands for \n */
633 /* Specialized encoding func (see below) */
634 encodefunc_t encodefunc
;
636 /* Reads and writes are internally buffered in order to speed things up.
637 However, any read will first flush the write buffer if itsn't empty.
639 Please also note that text to be written is first encoded before being
640 buffered. This is necessary so that encoding errors are immediately
641 reported to the caller, but it unfortunately means that the
642 IncrementalEncoder (whose encode() method is always written in Python)
643 becomes a bottleneck for small writes.
645 PyObject
*decoded_chars
; /* buffer for text returned from decoder */
646 Py_ssize_t decoded_chars_used
; /* offset into _decoded_chars for read() */
647 PyObject
*pending_bytes
; /* list of bytes objects waiting to be
649 Py_ssize_t pending_bytes_count
;
651 /* snapshot is either None, or a tuple (dec_flags, next_input) where
652 * dec_flags is the second (integer) item of the decoder state and
653 * next_input is the chunk of input bytes that comes next after the
654 * snapshot point. We use this to reconstruct decoder states in tell().
657 /* Cache raw object if it's a FileIO object */
660 PyObject
*weakreflist
;
662 } PyTextIOWrapperObject
;
665 /* A couple of specialized cases in order to bypass the slow incremental
666 encoding methods for the most popular encodings. */
669 ascii_encode(PyTextIOWrapperObject
*self
, PyObject
*text
)
671 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text
),
672 PyUnicode_GET_SIZE(text
),
673 PyBytes_AS_STRING(self
->errors
));
677 utf16be_encode(PyTextIOWrapperObject
*self
, PyObject
*text
)
679 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text
),
680 PyUnicode_GET_SIZE(text
),
681 PyBytes_AS_STRING(self
->errors
), 1);
685 utf16le_encode(PyTextIOWrapperObject
*self
, PyObject
*text
)
687 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text
),
688 PyUnicode_GET_SIZE(text
),
689 PyBytes_AS_STRING(self
->errors
), -1);
693 utf16_encode(PyTextIOWrapperObject
*self
, PyObject
*text
)
696 res
= PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text
),
697 PyUnicode_GET_SIZE(text
),
698 PyBytes_AS_STRING(self
->errors
), 0);
701 /* Next writes will skip the BOM and use native byte ordering */
702 #if defined(WORDS_BIGENDIAN)
703 self
->encodefunc
= (encodefunc_t
) utf16be_encode
;
705 self
->encodefunc
= (encodefunc_t
) utf16le_encode
;
712 utf8_encode(PyTextIOWrapperObject
*self
, PyObject
*text
)
714 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text
),
715 PyUnicode_GET_SIZE(text
),
716 PyBytes_AS_STRING(self
->errors
));
720 latin1_encode(PyTextIOWrapperObject
*self
, PyObject
*text
)
722 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text
),
723 PyUnicode_GET_SIZE(text
),
724 PyBytes_AS_STRING(self
->errors
));
727 /* Map normalized encoding names onto the specialized encoding funcs */
731 encodefunc_t encodefunc
;
734 static encodefuncentry encodefuncs
[] = {
735 {"ascii", (encodefunc_t
) ascii_encode
},
736 {"iso8859-1", (encodefunc_t
) latin1_encode
},
737 {"utf-16-be", (encodefunc_t
) utf16be_encode
},
738 {"utf-16-le", (encodefunc_t
) utf16le_encode
},
739 {"utf-16", (encodefunc_t
) utf16_encode
},
740 {"utf-8", (encodefunc_t
) utf8_encode
},
746 TextIOWrapper_init(PyTextIOWrapperObject
*self
, PyObject
*args
, PyObject
*kwds
)
748 char *kwlist
[] = {"buffer", "encoding", "errors",
749 "newline", "line_buffering",
751 PyObject
*buffer
, *raw
;
752 char *encoding
= NULL
;
754 char *newline
= NULL
;
755 int line_buffering
= 0;
756 _PyIO_State
*state
= IO_STATE
;
762 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "O|zzzi:fileio",
763 kwlist
, &buffer
, &encoding
, &errors
,
764 &newline
, &line_buffering
))
767 if (newline
&& newline
[0] != '\0'
768 && !(newline
[0] == '\n' && newline
[1] == '\0')
769 && !(newline
[0] == '\r' && newline
[1] == '\0')
770 && !(newline
[0] == '\r' && newline
[1] == '\n' && newline
[2] == '\0')) {
771 PyErr_Format(PyExc_ValueError
,
772 "illegal newline value: %s", newline
);
776 Py_CLEAR(self
->buffer
);
777 Py_CLEAR(self
->encoding
);
778 Py_CLEAR(self
->encoder
);
779 Py_CLEAR(self
->decoder
);
780 Py_CLEAR(self
->readnl
);
781 Py_CLEAR(self
->decoded_chars
);
782 Py_CLEAR(self
->pending_bytes
);
783 Py_CLEAR(self
->snapshot
);
784 Py_CLEAR(self
->errors
);
786 self
->decoded_chars_used
= 0;
787 self
->pending_bytes_count
= 0;
788 self
->encodefunc
= NULL
;
790 if (encoding
== NULL
) {
791 /* Try os.device_encoding(fileno) */
793 fileno
= PyObject_CallMethod(buffer
, "fileno", NULL
);
794 /* Ignore only AttributeError and UnsupportedOperation */
795 if (fileno
== NULL
) {
796 if (PyErr_ExceptionMatches(PyExc_AttributeError
) ||
797 PyErr_ExceptionMatches(state
->unsupported_operation
)) {
805 self
->encoding
= PyObject_CallMethod(state
->os_module
,
808 if (self
->encoding
== NULL
)
810 else if (!PyUnicode_Check(self
->encoding
))
811 Py_CLEAR(self
->encoding
);
814 if (encoding
== NULL
&& self
->encoding
== NULL
) {
815 if (state
->locale_module
== NULL
) {
816 state
->locale_module
= PyImport_ImportModule("locale");
817 if (state
->locale_module
== NULL
)
818 goto catch_ImportError
;
824 self
->encoding
= PyObject_CallMethod(
825 state
->locale_module
, "getpreferredencoding", NULL
);
826 if (self
->encoding
== NULL
) {
829 Importing locale can raise a ImportError because of
830 _functools, and locale.getpreferredencoding can raise a
831 ImportError if _locale is not available. These will happen
832 during module building.
834 if (PyErr_ExceptionMatches(PyExc_ImportError
)) {
836 self
->encoding
= PyUnicode_FromString("ascii");
841 else if (!PyUnicode_Check(self
->encoding
))
842 Py_CLEAR(self
->encoding
);
845 if (self
->encoding
!= NULL
)
846 encoding
= _PyUnicode_AsString(self
->encoding
);
847 else if (encoding
!= NULL
) {
848 self
->encoding
= PyUnicode_FromString(encoding
);
849 if (self
->encoding
== NULL
)
853 PyErr_SetString(PyExc_IOError
,
854 "could not determine default encoding");
859 self
->errors
= PyBytes_FromString(errors
);
860 if (self
->errors
== NULL
)
863 self
->chunk_size
= 8192;
864 self
->readuniversal
= (newline
== NULL
|| newline
[0] == '\0');
865 self
->line_buffering
= line_buffering
;
866 self
->readtranslate
= (newline
== NULL
);
868 self
->readnl
= PyUnicode_FromString(newline
);
869 if (self
->readnl
== NULL
)
872 self
->writetranslate
= (newline
== NULL
|| newline
[0] != '\0');
873 if (!self
->readuniversal
&& self
->readnl
) {
874 self
->writenl
= _PyUnicode_AsString(self
->readnl
);
875 if (!strcmp(self
->writenl
, "\n"))
876 self
->writenl
= NULL
;
880 self
->writenl
= "\r\n";
883 /* Build the decoder object */
884 res
= PyObject_CallMethod(buffer
, "readable", NULL
);
887 r
= PyObject_IsTrue(res
);
892 self
->decoder
= PyCodec_IncrementalDecoder(
894 if (self
->decoder
== NULL
)
897 if (self
->readuniversal
) {
898 PyObject
*incrementalDecoder
= PyObject_CallFunction(
899 (PyObject
*)&PyIncrementalNewlineDecoder_Type
,
900 "Oi", self
->decoder
, (int)self
->readtranslate
);
901 if (incrementalDecoder
== NULL
)
903 Py_CLEAR(self
->decoder
);
904 self
->decoder
= incrementalDecoder
;
908 /* Build the encoder object */
909 res
= PyObject_CallMethod(buffer
, "writable", NULL
);
912 r
= PyObject_IsTrue(res
);
918 self
->encoder
= PyCodec_IncrementalEncoder(
920 if (self
->encoder
== NULL
)
922 /* Get the normalized named of the codec */
923 ci
= _PyCodec_Lookup(encoding
);
926 res
= PyObject_GetAttrString(ci
, "name");
930 else if (PyUnicode_Check(res
)) {
931 encodefuncentry
*e
= encodefuncs
;
932 while (e
->name
!= NULL
) {
933 if (!PyUnicode_CompareWithASCIIString(res
, e
->name
)) {
934 self
->encodefunc
= e
->encodefunc
;
943 self
->buffer
= buffer
;
946 if (Py_TYPE(buffer
) == &PyBufferedReader_Type
||
947 Py_TYPE(buffer
) == &PyBufferedWriter_Type
||
948 Py_TYPE(buffer
) == &PyBufferedRandom_Type
) {
949 raw
= PyObject_GetAttrString(buffer
, "raw");
950 /* Cache the raw FileIO object to speed up 'closed' checks */
953 else if (Py_TYPE(raw
) == &PyFileIO_Type
)
959 res
= PyObject_CallMethod(buffer
, "seekable", NULL
);
962 self
->seekable
= self
->telling
= PyObject_IsTrue(res
);
973 _TextIOWrapper_clear(PyTextIOWrapperObject
*self
)
975 if (self
->ok
&& _PyIOBase_finalize((PyObject
*) self
) < 0)
978 Py_CLEAR(self
->buffer
);
979 Py_CLEAR(self
->encoding
);
980 Py_CLEAR(self
->encoder
);
981 Py_CLEAR(self
->decoder
);
982 Py_CLEAR(self
->readnl
);
983 Py_CLEAR(self
->decoded_chars
);
984 Py_CLEAR(self
->pending_bytes
);
985 Py_CLEAR(self
->snapshot
);
986 Py_CLEAR(self
->errors
);
992 TextIOWrapper_dealloc(PyTextIOWrapperObject
*self
)
994 if (_TextIOWrapper_clear(self
) < 0)
996 _PyObject_GC_UNTRACK(self
);
997 if (self
->weakreflist
!= NULL
)
998 PyObject_ClearWeakRefs((PyObject
*)self
);
999 Py_CLEAR(self
->dict
);
1000 Py_TYPE(self
)->tp_free((PyObject
*)self
);
1004 TextIOWrapper_traverse(PyTextIOWrapperObject
*self
, visitproc visit
, void *arg
)
1006 Py_VISIT(self
->buffer
);
1007 Py_VISIT(self
->encoding
);
1008 Py_VISIT(self
->encoder
);
1009 Py_VISIT(self
->decoder
);
1010 Py_VISIT(self
->readnl
);
1011 Py_VISIT(self
->decoded_chars
);
1012 Py_VISIT(self
->pending_bytes
);
1013 Py_VISIT(self
->snapshot
);
1014 Py_VISIT(self
->errors
);
1015 Py_VISIT(self
->raw
);
1017 Py_VISIT(self
->dict
);
1022 TextIOWrapper_clear(PyTextIOWrapperObject
*self
)
1024 if (_TextIOWrapper_clear(self
) < 0)
1026 Py_CLEAR(self
->dict
);
1031 TextIOWrapper_closed_get(PyTextIOWrapperObject
*self
, void *context
);
1033 /* This macro takes some shortcuts to make the common case faster. */
1034 #define CHECK_CLOSED(self) \
1038 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1039 if (self->raw != NULL) \
1040 r = _PyFileIO_closed(self->raw); \
1042 _res = TextIOWrapper_closed_get(self, NULL); \
1045 r = PyObject_IsTrue(_res); \
1051 PyErr_SetString(PyExc_ValueError, \
1052 "I/O operation on closed file."); \
1056 else if (_PyIOBase_checkClosed((PyObject *)self, Py_True) == NULL) \
1060 #define CHECK_INITIALIZED(self) \
1061 if (self->ok <= 0) { \
1062 PyErr_SetString(PyExc_ValueError, \
1063 "I/O operation on uninitialized object"); \
1067 #define CHECK_INITIALIZED_INT(self) \
1068 if (self->ok <= 0) { \
1069 PyErr_SetString(PyExc_ValueError, \
1070 "I/O operation on uninitialized object"); \
1075 Py_LOCAL_INLINE(const Py_UNICODE
*)
1076 findchar(const Py_UNICODE
*s
, Py_ssize_t size
, Py_UNICODE ch
)
1078 /* like wcschr, but doesn't stop at NULL characters */
1079 while (size
-- > 0) {
1087 /* Flush the internal write buffer. This doesn't explicitly flush the
1088 underlying buffered object, though. */
1090 _TextIOWrapper_writeflush(PyTextIOWrapperObject
*self
)
1094 if (self
->pending_bytes
== NULL
)
1096 b
= _PyBytes_Join(_PyIO_empty_bytes
, self
->pending_bytes
);
1099 ret
= PyObject_CallMethodObjArgs(self
->buffer
,
1100 _PyIO_str_write
, b
, NULL
);
1105 Py_CLEAR(self
->pending_bytes
);
1106 self
->pending_bytes_count
= 0;
1111 TextIOWrapper_write(PyTextIOWrapperObject
*self
, PyObject
*args
)
1114 PyObject
*text
; /* owned reference */
1120 CHECK_INITIALIZED(self
);
1122 if (!PyArg_ParseTuple(args
, "U:write", &text
)) {
1130 textlen
= PyUnicode_GetSize(text
);
1132 if ((self
->writetranslate
&& self
->writenl
!= NULL
) || self
->line_buffering
)
1133 if (findchar(PyUnicode_AS_UNICODE(text
),
1134 PyUnicode_GET_SIZE(text
), '\n'))
1137 if (haslf
&& self
->writetranslate
&& self
->writenl
!= NULL
) {
1138 PyObject
*newtext
= PyObject_CallMethod(
1139 text
, "replace", "ss", "\n", self
->writenl
);
1141 if (newtext
== NULL
)
1146 if (self
->line_buffering
&&
1148 findchar(PyUnicode_AS_UNICODE(text
),
1149 PyUnicode_GET_SIZE(text
), '\r')))
1152 /* XXX What if we were just reading? */
1153 if (self
->encodefunc
!= NULL
)
1154 b
= (*self
->encodefunc
)((PyObject
*) self
, text
);
1156 b
= PyObject_CallMethodObjArgs(self
->encoder
,
1157 _PyIO_str_encode
, text
, NULL
);
1162 if (self
->pending_bytes
== NULL
) {
1163 self
->pending_bytes
= PyList_New(0);
1164 if (self
->pending_bytes
== NULL
) {
1168 self
->pending_bytes_count
= 0;
1170 if (PyList_Append(self
->pending_bytes
, b
) < 0) {
1174 self
->pending_bytes_count
+= PyBytes_GET_SIZE(b
);
1176 if (self
->pending_bytes_count
> self
->chunk_size
|| needflush
) {
1177 if (_TextIOWrapper_writeflush(self
) < 0)
1182 ret
= PyObject_CallMethodObjArgs(self
->buffer
, _PyIO_str_flush
, NULL
);
1188 Py_CLEAR(self
->snapshot
);
1190 if (self
->decoder
) {
1191 ret
= PyObject_CallMethod(self
->decoder
, "reset", NULL
);
1197 return PyLong_FromSsize_t(textlen
);
1200 /* Steal a reference to chars and store it in the decoded_char buffer;
1203 TextIOWrapper_set_decoded_chars(PyTextIOWrapperObject
*self
, PyObject
*chars
)
1205 Py_CLEAR(self
->decoded_chars
);
1206 self
->decoded_chars
= chars
;
1207 self
->decoded_chars_used
= 0;
1211 TextIOWrapper_get_decoded_chars(PyTextIOWrapperObject
*self
, Py_ssize_t n
)
1216 if (self
->decoded_chars
== NULL
)
1217 return PyUnicode_FromStringAndSize(NULL
, 0);
1219 avail
= (PyUnicode_GET_SIZE(self
->decoded_chars
)
1220 - self
->decoded_chars_used
);
1224 if (n
< 0 || n
> avail
)
1227 if (self
->decoded_chars_used
> 0 || n
< avail
) {
1228 chars
= PyUnicode_FromUnicode(
1229 PyUnicode_AS_UNICODE(self
->decoded_chars
)
1230 + self
->decoded_chars_used
, n
);
1235 chars
= self
->decoded_chars
;
1239 self
->decoded_chars_used
+= n
;
1243 /* Read and decode the next chunk of data from the BufferedReader.
1246 TextIOWrapper_read_chunk(PyTextIOWrapperObject
*self
)
1248 PyObject
*dec_buffer
= NULL
;
1249 PyObject
*dec_flags
= NULL
;
1250 PyObject
*input_chunk
= NULL
;
1251 PyObject
*decoded_chars
, *chunk_size
;
1254 /* The return value is True unless EOF was reached. The decoded string is
1255 * placed in self._decoded_chars (replacing its previous value). The
1256 * entire input chunk is sent to the decoder, though some of it may remain
1257 * buffered in the decoder, yet to be converted.
1260 if (self
->decoder
== NULL
) {
1261 PyErr_SetString(PyExc_ValueError
, "no decoder");
1265 if (self
->telling
) {
1266 /* To prepare for tell(), we need to snapshot a point in the file
1267 * where the decoder's input buffer is empty.
1270 PyObject
*state
= PyObject_CallMethodObjArgs(self
->decoder
,
1271 _PyIO_str_getstate
, NULL
);
1274 /* Given this, we know there was a valid snapshot point
1275 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1277 if (PyArg_Parse(state
, "(OO)", &dec_buffer
, &dec_flags
) < 0) {
1281 Py_INCREF(dec_buffer
);
1282 Py_INCREF(dec_flags
);
1286 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1287 chunk_size
= PyLong_FromSsize_t(self
->chunk_size
);
1288 if (chunk_size
== NULL
)
1290 input_chunk
= PyObject_CallMethodObjArgs(self
->buffer
,
1291 _PyIO_str_read1
, chunk_size
, NULL
);
1292 Py_DECREF(chunk_size
);
1293 if (input_chunk
== NULL
)
1295 assert(PyBytes_Check(input_chunk
));
1297 eof
= (PyBytes_Size(input_chunk
) == 0);
1299 if (Py_TYPE(self
->decoder
) == &PyIncrementalNewlineDecoder_Type
) {
1300 decoded_chars
= _PyIncrementalNewlineDecoder_decode(
1301 self
->decoder
, input_chunk
, eof
);
1304 decoded_chars
= PyObject_CallMethodObjArgs(self
->decoder
,
1305 _PyIO_str_decode
, input_chunk
, eof
? Py_True
: Py_False
, NULL
);
1308 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1309 if (decoded_chars
== NULL
)
1311 TextIOWrapper_set_decoded_chars(self
, decoded_chars
);
1312 if (PyUnicode_GET_SIZE(decoded_chars
) > 0)
1315 if (self
->telling
) {
1316 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1317 * next input to be decoded is dec_buffer + input_chunk.
1319 PyObject
*next_input
= PyNumber_Add(dec_buffer
, input_chunk
);
1320 if (next_input
== NULL
)
1322 assert (PyBytes_Check(next_input
));
1323 Py_DECREF(dec_buffer
);
1324 Py_CLEAR(self
->snapshot
);
1325 self
->snapshot
= Py_BuildValue("NN", dec_flags
, next_input
);
1327 Py_DECREF(input_chunk
);
1332 Py_XDECREF(dec_buffer
);
1333 Py_XDECREF(dec_flags
);
1334 Py_XDECREF(input_chunk
);
1339 TextIOWrapper_read(PyTextIOWrapperObject
*self
, PyObject
*args
)
1342 PyObject
*result
= NULL
, *chunks
= NULL
;
1344 CHECK_INITIALIZED(self
);
1346 if (!PyArg_ParseTuple(args
, "|n:read", &n
))
1351 if (self
->decoder
== NULL
) {
1352 PyErr_SetString(PyExc_IOError
, "not readable");
1356 if (_TextIOWrapper_writeflush(self
) < 0)
1360 /* Read everything */
1361 PyObject
*bytes
= PyObject_CallMethod(self
->buffer
, "read", NULL
);
1365 decoded
= PyObject_CallMethodObjArgs(self
->decoder
, _PyIO_str_decode
,
1366 bytes
, Py_True
, NULL
);
1368 if (decoded
== NULL
)
1371 result
= TextIOWrapper_get_decoded_chars(self
, -1);
1373 if (result
== NULL
) {
1378 PyUnicode_AppendAndDel(&result
, decoded
);
1382 Py_CLEAR(self
->snapshot
);
1387 Py_ssize_t remaining
= n
;
1389 result
= TextIOWrapper_get_decoded_chars(self
, n
);
1392 remaining
-= PyUnicode_GET_SIZE(result
);
1394 /* Keep reading chunks until we have n characters to return */
1395 while (remaining
> 0) {
1396 res
= TextIOWrapper_read_chunk(self
);
1399 if (res
== 0) /* EOF */
1401 if (chunks
== NULL
) {
1402 chunks
= PyList_New(0);
1406 if (PyList_Append(chunks
, result
) < 0)
1409 result
= TextIOWrapper_get_decoded_chars(self
, remaining
);
1412 remaining
-= PyUnicode_GET_SIZE(result
);
1414 if (chunks
!= NULL
) {
1415 if (result
!= NULL
&& PyList_Append(chunks
, result
) < 0)
1418 result
= PyUnicode_Join(_PyIO_empty_str
, chunks
);
1432 /* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1433 that is to the NUL character. Otherwise the function will produce
1434 incorrect results. */
1436 find_control_char(Py_UNICODE
*start
, Py_UNICODE
*end
, Py_UNICODE ch
)
1438 Py_UNICODE
*s
= start
;
1451 _PyIO_find_line_ending(
1452 int translated
, int universal
, PyObject
*readnl
,
1453 Py_UNICODE
*start
, Py_UNICODE
*end
, Py_ssize_t
*consumed
)
1455 Py_ssize_t len
= end
- start
;
1458 /* Newlines are already translated, only search for \n */
1459 Py_UNICODE
*pos
= find_control_char(start
, end
, '\n');
1461 return pos
- start
+ 1;
1467 else if (universal
) {
1468 /* Universal newline search. Find any of \r, \r\n, \n
1469 * The decoder ensures that \r\n are not split in two pieces
1471 Py_UNICODE
*s
= start
;
1474 /* Fast path for non-control chars. The loop always ends
1475 since the Py_UNICODE storage is NUL-terminated. */
1487 return s
- start
+ 1;
1494 /* Non-universal mode. */
1495 Py_ssize_t readnl_len
= PyUnicode_GET_SIZE(readnl
);
1496 Py_UNICODE
*nl
= PyUnicode_AS_UNICODE(readnl
);
1497 if (readnl_len
== 1) {
1498 Py_UNICODE
*pos
= find_control_char(start
, end
, nl
[0]);
1500 return pos
- start
+ 1;
1505 Py_UNICODE
*s
= start
;
1506 Py_UNICODE
*e
= end
- readnl_len
+ 1;
1512 Py_UNICODE
*pos
= find_control_char(s
, end
, nl
[0]);
1513 if (pos
== NULL
|| pos
>= e
)
1515 for (i
= 1; i
< readnl_len
; i
++) {
1516 if (pos
[i
] != nl
[i
])
1519 if (i
== readnl_len
)
1520 return pos
- start
+ readnl_len
;
1523 pos
= find_control_char(e
, end
, nl
[0]);
1527 *consumed
= pos
- start
;
1534 _TextIOWrapper_readline(PyTextIOWrapperObject
*self
, Py_ssize_t limit
)
1536 PyObject
*line
= NULL
, *chunks
= NULL
, *remaining
= NULL
;
1537 Py_ssize_t start
, endpos
, chunked
, offset_to_buffer
;
1542 if (_TextIOWrapper_writeflush(self
) < 0)
1549 Py_ssize_t line_len
;
1550 Py_ssize_t consumed
= 0;
1552 /* First, get some data if necessary */
1554 while (!self
->decoded_chars
||
1555 !PyUnicode_GET_SIZE(self
->decoded_chars
)) {
1556 res
= TextIOWrapper_read_chunk(self
);
1564 TextIOWrapper_set_decoded_chars(self
, NULL
);
1565 Py_CLEAR(self
->snapshot
);
1566 start
= endpos
= offset_to_buffer
= 0;
1570 if (remaining
== NULL
) {
1571 line
= self
->decoded_chars
;
1572 start
= self
->decoded_chars_used
;
1573 offset_to_buffer
= 0;
1577 assert(self
->decoded_chars_used
== 0);
1578 line
= PyUnicode_Concat(remaining
, self
->decoded_chars
);
1580 offset_to_buffer
= PyUnicode_GET_SIZE(remaining
);
1581 Py_CLEAR(remaining
);
1586 ptr
= PyUnicode_AS_UNICODE(line
);
1587 line_len
= PyUnicode_GET_SIZE(line
);
1589 endpos
= _PyIO_find_line_ending(
1590 self
->readtranslate
, self
->readuniversal
, self
->readnl
,
1591 ptr
+ start
, ptr
+ line_len
, &consumed
);
1594 if (limit
>= 0 && (endpos
- start
) + chunked
>= limit
)
1595 endpos
= start
+ limit
- chunked
;
1599 /* We can put aside up to `endpos` */
1600 endpos
= consumed
+ start
;
1601 if (limit
>= 0 && (endpos
- start
) + chunked
>= limit
) {
1602 /* Didn't find line ending, but reached length limit */
1603 endpos
= start
+ limit
- chunked
;
1607 if (endpos
> start
) {
1608 /* No line ending seen yet - put aside current data */
1610 if (chunks
== NULL
) {
1611 chunks
= PyList_New(0);
1615 s
= PyUnicode_FromUnicode(ptr
+ start
, endpos
- start
);
1618 if (PyList_Append(chunks
, s
) < 0) {
1622 chunked
+= PyUnicode_GET_SIZE(s
);
1625 /* There may be some remaining bytes we'll have to prepend to the
1626 next chunk of data */
1627 if (endpos
< line_len
) {
1628 remaining
= PyUnicode_FromUnicode(
1629 ptr
+ endpos
, line_len
- endpos
);
1630 if (remaining
== NULL
)
1634 /* We have consumed the buffer */
1635 TextIOWrapper_set_decoded_chars(self
, NULL
);
1639 /* Our line ends in the current buffer */
1640 self
->decoded_chars_used
= endpos
- offset_to_buffer
;
1641 if (start
> 0 || endpos
< PyUnicode_GET_SIZE(line
)) {
1642 if (start
== 0 && Py_REFCNT(line
) == 1) {
1643 if (PyUnicode_Resize(&line
, endpos
) < 0)
1647 PyObject
*s
= PyUnicode_FromUnicode(
1648 PyUnicode_AS_UNICODE(line
) + start
, endpos
- start
);
1656 if (remaining
!= NULL
) {
1657 if (chunks
== NULL
) {
1658 chunks
= PyList_New(0);
1662 if (PyList_Append(chunks
, remaining
) < 0)
1664 Py_CLEAR(remaining
);
1666 if (chunks
!= NULL
) {
1667 if (line
!= NULL
&& PyList_Append(chunks
, line
) < 0)
1670 line
= PyUnicode_Join(_PyIO_empty_str
, chunks
);
1676 line
= PyUnicode_FromStringAndSize(NULL
, 0);
1682 Py_XDECREF(remaining
);
1688 TextIOWrapper_readline(PyTextIOWrapperObject
*self
, PyObject
*args
)
1690 Py_ssize_t limit
= -1;
1692 CHECK_INITIALIZED(self
);
1693 if (!PyArg_ParseTuple(args
, "|n:readline", &limit
)) {
1696 return _TextIOWrapper_readline(self
, limit
);
1710 To speed up cookie packing/unpacking, we store the fields in a temporary
1711 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1712 The following macros define at which offsets in the intermediary byte
1713 string the various CookieStruct fields will be stored.
1716 #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1718 #if defined(WORDS_BIGENDIAN)
1720 # define IS_LITTLE_ENDIAN 0
1722 /* We want the least significant byte of start_pos to also be the least
1723 significant byte of the cookie, which means that in big-endian mode we
1724 must copy the fields in reverse order. */
1726 # define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1727 # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1728 # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1729 # define OFF_CHARS_TO_SKIP (sizeof(char))
1730 # define OFF_NEED_EOF 0
1734 # define IS_LITTLE_ENDIAN 1
1736 /* Little-endian mode: the least significant byte of start_pos will
1737 naturally end up the least significant byte of the cookie. */
1739 # define OFF_START_POS 0
1740 # define OFF_DEC_FLAGS (sizeof(Py_off_t))
1741 # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1742 # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1743 # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1748 TextIOWrapper_parseCookie(CookieStruct
*cookie
, PyObject
*cookieObj
)
1750 unsigned char buffer
[COOKIE_BUF_LEN
];
1751 PyLongObject
*cookieLong
= (PyLongObject
*)PyNumber_Long(cookieObj
);
1752 if (cookieLong
== NULL
)
1755 if (_PyLong_AsByteArray(cookieLong
, buffer
, sizeof(buffer
),
1756 IS_LITTLE_ENDIAN
, 0) < 0) {
1757 Py_DECREF(cookieLong
);
1760 Py_DECREF(cookieLong
);
1762 memcpy(&cookie
->start_pos
, buffer
+ OFF_START_POS
, sizeof(cookie
->start_pos
));
1763 memcpy(&cookie
->dec_flags
, buffer
+ OFF_DEC_FLAGS
, sizeof(cookie
->dec_flags
));
1764 memcpy(&cookie
->bytes_to_feed
, buffer
+ OFF_BYTES_TO_FEED
, sizeof(cookie
->bytes_to_feed
));
1765 memcpy(&cookie
->chars_to_skip
, buffer
+ OFF_CHARS_TO_SKIP
, sizeof(cookie
->chars_to_skip
));
1766 memcpy(&cookie
->need_eof
, buffer
+ OFF_NEED_EOF
, sizeof(cookie
->need_eof
));
1772 TextIOWrapper_buildCookie(CookieStruct
*cookie
)
1774 unsigned char buffer
[COOKIE_BUF_LEN
];
1776 memcpy(buffer
+ OFF_START_POS
, &cookie
->start_pos
, sizeof(cookie
->start_pos
));
1777 memcpy(buffer
+ OFF_DEC_FLAGS
, &cookie
->dec_flags
, sizeof(cookie
->dec_flags
));
1778 memcpy(buffer
+ OFF_BYTES_TO_FEED
, &cookie
->bytes_to_feed
, sizeof(cookie
->bytes_to_feed
));
1779 memcpy(buffer
+ OFF_CHARS_TO_SKIP
, &cookie
->chars_to_skip
, sizeof(cookie
->chars_to_skip
));
1780 memcpy(buffer
+ OFF_NEED_EOF
, &cookie
->need_eof
, sizeof(cookie
->need_eof
));
1782 return _PyLong_FromByteArray(buffer
, sizeof(buffer
), IS_LITTLE_ENDIAN
, 0);
1784 #undef IS_LITTLE_ENDIAN
1787 _TextIOWrapper_decoder_setstate(PyTextIOWrapperObject
*self
,
1788 CookieStruct
*cookie
)
1791 /* When seeking to the start of the stream, we call decoder.reset()
1792 rather than decoder.getstate().
1793 This is for a few decoders such as utf-16 for which the state value
1794 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1795 utf-16, that we are expecting a BOM).
1797 if (cookie
->start_pos
== 0 && cookie
->dec_flags
== 0)
1798 res
= PyObject_CallMethodObjArgs(self
->decoder
, _PyIO_str_reset
, NULL
);
1800 res
= PyObject_CallMethod(self
->decoder
, "setstate",
1801 "((yi))", "", cookie
->dec_flags
);
1809 TextIOWrapper_seek(PyTextIOWrapperObject
*self
, PyObject
*args
)
1811 PyObject
*cookieObj
, *posobj
;
1812 CookieStruct cookie
;
1814 static PyObject
*zero
= NULL
;
1818 CHECK_INITIALIZED(self
);
1821 zero
= PyLong_FromLong(0L);
1826 if (!PyArg_ParseTuple(args
, "O|i:seek", &cookieObj
, &whence
))
1830 Py_INCREF(cookieObj
);
1832 if (!self
->seekable
) {
1833 PyErr_SetString(PyExc_IOError
,
1834 "underlying stream is not seekable");
1839 /* seek relative to current position */
1840 cmp
= PyObject_RichCompareBool(cookieObj
, zero
, Py_EQ
);
1845 PyErr_SetString(PyExc_IOError
,
1846 "can't do nonzero cur-relative seeks");
1850 /* Seeking to the current position should attempt to
1851 * sync the underlying buffer with the current position.
1853 Py_DECREF(cookieObj
);
1854 cookieObj
= PyObject_CallMethod((PyObject
*)self
, "tell", NULL
);
1855 if (cookieObj
== NULL
)
1858 else if (whence
== 2) {
1859 /* seek relative to end of file */
1861 cmp
= PyObject_RichCompareBool(cookieObj
, zero
, Py_EQ
);
1866 PyErr_SetString(PyExc_IOError
,
1867 "can't do nonzero end-relative seeks");
1871 res
= PyObject_CallMethod((PyObject
*)self
, "flush", NULL
);
1876 TextIOWrapper_set_decoded_chars(self
, NULL
);
1877 Py_CLEAR(self
->snapshot
);
1878 if (self
->decoder
) {
1879 res
= PyObject_CallMethod(self
->decoder
, "reset", NULL
);
1885 res
= PyObject_CallMethod(self
->buffer
, "seek", "ii", 0, 2);
1886 Py_XDECREF(cookieObj
);
1889 else if (whence
!= 0) {
1890 PyErr_Format(PyExc_ValueError
,
1891 "invalid whence (%d, should be 0, 1 or 2)", whence
);
1895 cmp
= PyObject_RichCompareBool(cookieObj
, zero
, Py_LT
);
1900 PyErr_Format(PyExc_ValueError
,
1901 "negative seek position %R", cookieObj
);
1905 res
= PyObject_CallMethodObjArgs((PyObject
*)self
, _PyIO_str_flush
, NULL
);
1910 /* The strategy of seek() is to go back to the safe start point
1911 * and replay the effect of read(chars_to_skip) from there.
1913 if (TextIOWrapper_parseCookie(&cookie
, cookieObj
) < 0)
1916 /* Seek back to the safe start point. */
1917 posobj
= PyLong_FromOff_t(cookie
.start_pos
);
1920 res
= PyObject_CallMethodObjArgs(self
->buffer
,
1921 _PyIO_str_seek
, posobj
, NULL
);
1927 TextIOWrapper_set_decoded_chars(self
, NULL
);
1928 Py_CLEAR(self
->snapshot
);
1930 /* Restore the decoder to its state from the safe start point. */
1931 if (self
->decoder
) {
1932 if (_TextIOWrapper_decoder_setstate(self
, &cookie
) < 0)
1936 if (cookie
.chars_to_skip
) {
1937 /* Just like _read_chunk, feed the decoder and save a snapshot. */
1938 PyObject
*input_chunk
= PyObject_CallMethod(
1939 self
->buffer
, "read", "i", cookie
.bytes_to_feed
);
1942 if (input_chunk
== NULL
)
1945 assert (PyBytes_Check(input_chunk
));
1947 self
->snapshot
= Py_BuildValue("iN", cookie
.dec_flags
, input_chunk
);
1948 if (self
->snapshot
== NULL
) {
1949 Py_DECREF(input_chunk
);
1953 decoded
= PyObject_CallMethod(self
->decoder
, "decode",
1954 "Oi", input_chunk
, (int)cookie
.need_eof
);
1956 if (decoded
== NULL
)
1959 TextIOWrapper_set_decoded_chars(self
, decoded
);
1961 /* Skip chars_to_skip of the decoded characters. */
1962 if (PyUnicode_GetSize(self
->decoded_chars
) < cookie
.chars_to_skip
) {
1963 PyErr_SetString(PyExc_IOError
, "can't restore logical file position");
1966 self
->decoded_chars_used
= cookie
.chars_to_skip
;
1969 self
->snapshot
= Py_BuildValue("iy", cookie
.dec_flags
, "");
1970 if (self
->snapshot
== NULL
)
1976 Py_XDECREF(cookieObj
);
1982 TextIOWrapper_tell(PyTextIOWrapperObject
*self
, PyObject
*args
)
1985 PyObject
*posobj
= NULL
;
1986 CookieStruct cookie
= {0,0,0,0,0};
1987 PyObject
*next_input
;
1988 Py_ssize_t chars_to_skip
, chars_decoded
;
1989 PyObject
*saved_state
= NULL
;
1990 char *input
, *input_end
;
1992 CHECK_INITIALIZED(self
);
1995 if (!self
->seekable
) {
1996 PyErr_SetString(PyExc_IOError
,
1997 "underlying stream is not seekable");
2000 if (!self
->telling
) {
2001 PyErr_SetString(PyExc_IOError
,
2002 "telling position disabled by next() call");
2006 if (_TextIOWrapper_writeflush(self
) < 0)
2008 res
= PyObject_CallMethod((PyObject
*)self
, "flush", NULL
);
2013 posobj
= PyObject_CallMethod(self
->buffer
, "tell", NULL
);
2017 if (self
->decoder
== NULL
|| self
->snapshot
== NULL
) {
2018 assert (self
->decoded_chars
== NULL
|| PyUnicode_GetSize(self
->decoded_chars
) == 0);
2022 #if defined(HAVE_LARGEFILE_SUPPORT)
2023 cookie
.start_pos
= PyLong_AsLongLong(posobj
);
2025 cookie
.start_pos
= PyLong_AsLong(posobj
);
2027 if (PyErr_Occurred())
2030 /* Skip backward to the snapshot point (see _read_chunk). */
2031 if (!PyArg_Parse(self
->snapshot
, "(iO)", &cookie
.dec_flags
, &next_input
))
2034 assert (PyBytes_Check(next_input
));
2036 cookie
.start_pos
-= PyBytes_GET_SIZE(next_input
);
2038 /* How many decoded characters have been used up since the snapshot? */
2039 if (self
->decoded_chars_used
== 0) {
2040 /* We haven't moved from the snapshot point. */
2042 return TextIOWrapper_buildCookie(&cookie
);
2045 chars_to_skip
= self
->decoded_chars_used
;
2047 /* Starting from the snapshot position, we will walk the decoder
2048 * forward until it gives us enough decoded characters.
2050 saved_state
= PyObject_CallMethodObjArgs(self
->decoder
,
2051 _PyIO_str_getstate
, NULL
);
2052 if (saved_state
== NULL
)
2055 /* Note our initial start point. */
2056 if (_TextIOWrapper_decoder_setstate(self
, &cookie
) < 0)
2059 /* Feed the decoder one byte at a time. As we go, note the
2060 * nearest "safe start point" before the current location
2061 * (a point where the decoder has nothing buffered, so seek()
2062 * can safely start from there and advance to this location).
2065 input
= PyBytes_AS_STRING(next_input
);
2066 input_end
= input
+ PyBytes_GET_SIZE(next_input
);
2067 while (input
< input_end
) {
2070 Py_ssize_t dec_buffer_len
;
2073 PyObject
*decoded
= PyObject_CallMethod(
2074 self
->decoder
, "decode", "y#", input
, 1);
2075 if (decoded
== NULL
)
2077 assert (PyUnicode_Check(decoded
));
2078 chars_decoded
+= PyUnicode_GET_SIZE(decoded
);
2081 cookie
.bytes_to_feed
+= 1;
2083 state
= PyObject_CallMethodObjArgs(self
->decoder
,
2084 _PyIO_str_getstate
, NULL
);
2087 if (!PyArg_Parse(state
, "(y#i)", &dec_buffer
, &dec_buffer_len
, &dec_flags
)) {
2093 if (dec_buffer_len
== 0 && chars_decoded
<= chars_to_skip
) {
2094 /* Decoder buffer is empty, so this is a safe start point. */
2095 cookie
.start_pos
+= cookie
.bytes_to_feed
;
2096 chars_to_skip
-= chars_decoded
;
2097 cookie
.dec_flags
= dec_flags
;
2098 cookie
.bytes_to_feed
= 0;
2101 if (chars_decoded
>= chars_to_skip
)
2105 if (input
== input_end
) {
2106 /* We didn't get enough decoded data; signal EOF to get more. */
2107 PyObject
*decoded
= PyObject_CallMethod(
2108 self
->decoder
, "decode", "yi", "", /* final = */ 1);
2109 if (decoded
== NULL
)
2111 assert (PyUnicode_Check(decoded
));
2112 chars_decoded
+= PyUnicode_GET_SIZE(decoded
);
2114 cookie
.need_eof
= 1;
2116 if (chars_decoded
< chars_to_skip
) {
2117 PyErr_SetString(PyExc_IOError
,
2118 "can't reconstruct logical file position");
2125 res
= PyObject_CallMethod(self
->decoder
, "setstate", "(O)", saved_state
);
2126 Py_DECREF(saved_state
);
2131 /* The returned cookie corresponds to the last safe start point. */
2132 cookie
.chars_to_skip
= Py_SAFE_DOWNCAST(chars_to_skip
, Py_ssize_t
, int);
2133 return TextIOWrapper_buildCookie(&cookie
);
2138 PyObject
*type
, *value
, *traceback
;
2139 PyErr_Fetch(&type
, &value
, &traceback
);
2141 res
= PyObject_CallMethod(self
->decoder
, "setstate", "(O)", saved_state
);
2142 Py_DECREF(saved_state
);
2147 PyErr_Restore(type
, value
, traceback
);
2153 TextIOWrapper_truncate(PyTextIOWrapperObject
*self
, PyObject
*args
)
2155 PyObject
*pos
= Py_None
;
2158 CHECK_INITIALIZED(self
)
2159 if (!PyArg_ParseTuple(args
, "|O:truncate", &pos
)) {
2163 res
= PyObject_CallMethodObjArgs((PyObject
*) self
, _PyIO_str_flush
, NULL
);
2168 if (pos
!= Py_None
) {
2169 res
= PyObject_CallMethodObjArgs((PyObject
*) self
,
2170 _PyIO_str_seek
, pos
, NULL
);
2176 return PyObject_CallMethodObjArgs(self
->buffer
, _PyIO_str_truncate
, NULL
);
2180 TextIOWrapper_repr(PyTextIOWrapperObject
*self
)
2182 CHECK_INITIALIZED(self
);
2183 return PyUnicode_FromFormat("<TextIOWrapper encoding=%S>", self
->encoding
);
2190 TextIOWrapper_fileno(PyTextIOWrapperObject
*self
, PyObject
*args
)
2192 CHECK_INITIALIZED(self
);
2193 return PyObject_CallMethod(self
->buffer
, "fileno", NULL
);
2197 TextIOWrapper_seekable(PyTextIOWrapperObject
*self
, PyObject
*args
)
2199 CHECK_INITIALIZED(self
);
2200 return PyObject_CallMethod(self
->buffer
, "seekable", NULL
);
2204 TextIOWrapper_readable(PyTextIOWrapperObject
*self
, PyObject
*args
)
2206 CHECK_INITIALIZED(self
);
2207 return PyObject_CallMethod(self
->buffer
, "readable", NULL
);
2211 TextIOWrapper_writable(PyTextIOWrapperObject
*self
, PyObject
*args
)
2213 CHECK_INITIALIZED(self
);
2214 return PyObject_CallMethod(self
->buffer
, "writable", NULL
);
2218 TextIOWrapper_isatty(PyTextIOWrapperObject
*self
, PyObject
*args
)
2220 CHECK_INITIALIZED(self
);
2221 return PyObject_CallMethod(self
->buffer
, "isatty", NULL
);
2225 TextIOWrapper_flush(PyTextIOWrapperObject
*self
, PyObject
*args
)
2227 CHECK_INITIALIZED(self
);
2229 self
->telling
= self
->seekable
;
2230 if (_TextIOWrapper_writeflush(self
) < 0)
2232 return PyObject_CallMethod(self
->buffer
, "flush", NULL
);
2236 TextIOWrapper_close(PyTextIOWrapperObject
*self
, PyObject
*args
)
2239 CHECK_INITIALIZED(self
);
2240 res
= PyObject_CallMethod((PyObject
*)self
, "flush", NULL
);
2242 /* If flush() fails, just give up */
2248 return PyObject_CallMethod(self
->buffer
, "close", NULL
);
2252 TextIOWrapper_iternext(PyTextIOWrapperObject
*self
)
2256 CHECK_INITIALIZED(self
);
2259 if (Py_TYPE(self
) == &PyTextIOWrapper_Type
) {
2260 /* Skip method call overhead for speed */
2261 line
= _TextIOWrapper_readline(self
, -1);
2264 line
= PyObject_CallMethodObjArgs((PyObject
*)self
,
2265 _PyIO_str_readline
, NULL
);
2266 if (line
&& !PyUnicode_Check(line
)) {
2267 PyErr_Format(PyExc_IOError
,
2268 "readline() should have returned an str object, "
2269 "not '%.200s'", Py_TYPE(line
)->tp_name
);
2278 if (PyUnicode_GET_SIZE(line
) == 0) {
2279 /* Reached EOF or would have blocked */
2281 Py_CLEAR(self
->snapshot
);
2282 self
->telling
= self
->seekable
;
2290 TextIOWrapper_name_get(PyTextIOWrapperObject
*self
, void *context
)
2292 CHECK_INITIALIZED(self
);
2293 return PyObject_GetAttrString(self
->buffer
, "name");
2297 TextIOWrapper_closed_get(PyTextIOWrapperObject
*self
, void *context
)
2299 CHECK_INITIALIZED(self
);
2300 return PyObject_GetAttr(self
->buffer
, _PyIO_str_closed
);
2304 TextIOWrapper_newlines_get(PyTextIOWrapperObject
*self
, void *context
)
2307 CHECK_INITIALIZED(self
);
2308 if (self
->decoder
== NULL
)
2310 res
= PyObject_GetAttr(self
->decoder
, _PyIO_str_newlines
);
2319 TextIOWrapper_chunk_size_get(PyTextIOWrapperObject
*self
, void *context
)
2321 CHECK_INITIALIZED(self
);
2322 return PyLong_FromSsize_t(self
->chunk_size
);
2326 TextIOWrapper_chunk_size_set(PyTextIOWrapperObject
*self
,
2327 PyObject
*arg
, void *context
)
2330 CHECK_INITIALIZED_INT(self
);
2331 n
= PyNumber_AsSsize_t(arg
, PyExc_TypeError
);
2332 if (n
== -1 && PyErr_Occurred())
2335 PyErr_SetString(PyExc_ValueError
,
2336 "a strictly positive integer is required");
2339 self
->chunk_size
= n
;
2343 static PyMethodDef TextIOWrapper_methods
[] = {
2344 {"write", (PyCFunction
)TextIOWrapper_write
, METH_VARARGS
},
2345 {"read", (PyCFunction
)TextIOWrapper_read
, METH_VARARGS
},
2346 {"readline", (PyCFunction
)TextIOWrapper_readline
, METH_VARARGS
},
2347 {"flush", (PyCFunction
)TextIOWrapper_flush
, METH_NOARGS
},
2348 {"close", (PyCFunction
)TextIOWrapper_close
, METH_NOARGS
},
2350 {"fileno", (PyCFunction
)TextIOWrapper_fileno
, METH_NOARGS
},
2351 {"seekable", (PyCFunction
)TextIOWrapper_seekable
, METH_NOARGS
},
2352 {"readable", (PyCFunction
)TextIOWrapper_readable
, METH_NOARGS
},
2353 {"writable", (PyCFunction
)TextIOWrapper_writable
, METH_NOARGS
},
2354 {"isatty", (PyCFunction
)TextIOWrapper_isatty
, METH_NOARGS
},
2356 {"seek", (PyCFunction
)TextIOWrapper_seek
, METH_VARARGS
},
2357 {"tell", (PyCFunction
)TextIOWrapper_tell
, METH_NOARGS
},
2358 {"truncate", (PyCFunction
)TextIOWrapper_truncate
, METH_VARARGS
},
2362 static PyMemberDef TextIOWrapper_members
[] = {
2363 {"encoding", T_OBJECT
, offsetof(PyTextIOWrapperObject
, encoding
), READONLY
},
2364 {"buffer", T_OBJECT
, offsetof(PyTextIOWrapperObject
, buffer
), READONLY
},
2365 {"line_buffering", T_BOOL
, offsetof(PyTextIOWrapperObject
, line_buffering
), READONLY
},
2369 static PyGetSetDef TextIOWrapper_getset
[] = {
2370 {"name", (getter
)TextIOWrapper_name_get
, NULL
, NULL
},
2371 {"closed", (getter
)TextIOWrapper_closed_get
, NULL
, NULL
},
2372 /* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2374 {"newlines", (getter
)TextIOWrapper_newlines_get
, NULL
, NULL
},
2375 {"_CHUNK_SIZE", (getter
)TextIOWrapper_chunk_size_get
,
2376 (setter
)TextIOWrapper_chunk_size_set
, NULL
},
2380 PyTypeObject PyTextIOWrapper_Type
= {
2381 PyVarObject_HEAD_INIT(NULL
, 0)
2382 "_io.TextIOWrapper", /*tp_name*/
2383 sizeof(PyTextIOWrapperObject
), /*tp_basicsize*/
2385 (destructor
)TextIOWrapper_dealloc
, /*tp_dealloc*/
2390 (reprfunc
)TextIOWrapper_repr
,/*tp_repr*/
2392 0, /*tp_as_sequence*/
2393 0, /*tp_as_mapping*/
2400 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
2401 | Py_TPFLAGS_HAVE_GC
, /*tp_flags*/
2402 TextIOWrapper_doc
, /* tp_doc */
2403 (traverseproc
)TextIOWrapper_traverse
, /* tp_traverse */
2404 (inquiry
)TextIOWrapper_clear
, /* tp_clear */
2405 0, /* tp_richcompare */
2406 offsetof(PyTextIOWrapperObject
, weakreflist
), /*tp_weaklistoffset*/
2408 (iternextfunc
)TextIOWrapper_iternext
, /* tp_iternext */
2409 TextIOWrapper_methods
, /* tp_methods */
2410 TextIOWrapper_members
, /* tp_members */
2411 TextIOWrapper_getset
, /* tp_getset */
2414 0, /* tp_descr_get */
2415 0, /* tp_descr_set */
2416 offsetof(PyTextIOWrapperObject
, dict
), /*tp_dictoffset*/
2417 (initproc
)TextIOWrapper_init
, /* tp_init */
2419 PyType_GenericNew
, /* tp_new */