2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
6 Written by Amaury Forgeot d'Arc and Antoine Pitrou
9 #define PY_SSIZE_T_CLEAN
11 #include "structmember.h"
12 #include "_iomodule.h"
16 PyDoc_STRVAR(textiobase_doc
,
17 "Base class for text I/O.\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
25 _unsupported(const char *message
)
27 PyErr_SetString(_PyIO_unsupported_operation
, message
);
31 PyDoc_STRVAR(textiobase_detach_doc
,
32 "Separate the underlying buffer from the TextIOBase and return it.\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
39 textiobase_detach(PyObject
*self
)
41 return _unsupported("detach");
44 PyDoc_STRVAR(textiobase_read_doc
,
45 "Read at most n characters from stream.\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
52 textiobase_read(PyObject
*self
, PyObject
*args
)
54 return _unsupported("read");
57 PyDoc_STRVAR(textiobase_readline_doc
,
58 "Read until newline or EOF.\n"
60 "Returns an empty string if EOF is hit immediately.\n"
64 textiobase_readline(PyObject
*self
, PyObject
*args
)
66 return _unsupported("readline");
69 PyDoc_STRVAR(textiobase_write_doc
,
70 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
76 textiobase_write(PyObject
*self
, PyObject
*args
)
78 return _unsupported("write");
81 PyDoc_STRVAR(textiobase_encoding_doc
,
82 "Encoding of the text stream.\n"
84 "Subclasses should override.\n"
88 textiobase_encoding_get(PyObject
*self
, void *context
)
93 PyDoc_STRVAR(textiobase_newlines_doc
,
94 "Line endings translated so far.\n"
96 "Only line endings translated during reading are considered.\n"
98 "Subclasses should override.\n"
102 textiobase_newlines_get(PyObject
*self
, void *context
)
107 PyDoc_STRVAR(textiobase_errors_doc
,
108 "The error setting of the decoder or encoder.\n"
110 "Subclasses should override.\n"
114 textiobase_errors_get(PyObject
*self
, void *context
)
120 static PyMethodDef textiobase_methods
[] = {
121 {"detach", (PyCFunction
)textiobase_detach
, METH_NOARGS
, textiobase_detach_doc
},
122 {"read", textiobase_read
, METH_VARARGS
, textiobase_read_doc
},
123 {"readline", textiobase_readline
, METH_VARARGS
, textiobase_readline_doc
},
124 {"write", textiobase_write
, METH_VARARGS
, textiobase_write_doc
},
128 static PyGetSetDef textiobase_getset
[] = {
129 {"encoding", (getter
)textiobase_encoding_get
, NULL
, textiobase_encoding_doc
},
130 {"newlines", (getter
)textiobase_newlines_get
, NULL
, textiobase_newlines_doc
},
131 {"errors", (getter
)textiobase_errors_get
, NULL
, textiobase_errors_doc
},
135 PyTypeObject PyTextIOBase_Type
= {
136 PyVarObject_HEAD_INIT(NULL
, 0)
137 "_io._TextIOBase", /*tp_name*/
147 0, /*tp_as_sequence*/
155 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /*tp_flags*/
156 textiobase_doc
, /* tp_doc */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
163 textiobase_methods
, /* tp_methods */
165 textiobase_getset
, /* tp_getset */
166 &PyIOBase_Type
, /* tp_base */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
177 /* IncrementalNewlineDecoder */
179 PyDoc_STRVAR(incrementalnewlinedecoder_doc
,
180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
195 unsigned int seennl
:3;
199 incrementalnewlinedecoder_init(nldecoder_object
*self
,
200 PyObject
*args
, PyObject
*kwds
)
204 PyObject
*errors
= NULL
;
205 char *kwlist
[] = {"decoder", "translate", "errors", NULL
};
207 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "Oi|O:IncrementalNewlineDecoder",
208 kwlist
, &decoder
, &translate
, &errors
))
211 self
->decoder
= decoder
;
214 if (errors
== NULL
) {
215 self
->errors
= PyUnicode_FromString("strict");
216 if (self
->errors
== NULL
)
221 self
->errors
= errors
;
224 self
->translate
= translate
;
232 incrementalnewlinedecoder_dealloc(nldecoder_object
*self
)
234 Py_CLEAR(self
->decoder
);
235 Py_CLEAR(self
->errors
);
236 Py_TYPE(self
)->tp_free((PyObject
*)self
);
242 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
245 _PyIncrementalNewlineDecoder_decode(PyObject
*_self
,
246 PyObject
*input
, int final
)
249 Py_ssize_t output_len
;
250 nldecoder_object
*self
= (nldecoder_object
*) _self
;
252 if (self
->decoder
== NULL
) {
253 PyErr_SetString(PyExc_ValueError
,
254 "IncrementalNewlineDecoder.__init__ not called");
258 /* decode input (with the eventual \r from a previous pass) */
259 if (self
->decoder
!= Py_None
) {
260 output
= PyObject_CallMethodObjArgs(self
->decoder
,
261 _PyIO_str_decode
, input
, final
? Py_True
: Py_False
, NULL
);
271 if (!PyUnicode_Check(output
)) {
272 PyErr_SetString(PyExc_TypeError
,
273 "decoder should return a string result");
277 output_len
= PyUnicode_GET_SIZE(output
);
278 if (self
->pendingcr
&& (final
|| output_len
> 0)) {
280 PyObject
*modified
= PyUnicode_FromUnicode(NULL
, output_len
+ 1);
281 if (modified
== NULL
)
283 out
= PyUnicode_AS_UNICODE(modified
);
285 memcpy(out
+ 1, PyUnicode_AS_UNICODE(output
),
286 output_len
* sizeof(Py_UNICODE
));
293 /* retain last \r even when not translating data:
294 * then readline() is sure to get \r\n in one pass
298 && PyUnicode_AS_UNICODE(output
)[output_len
- 1] == '\r') {
300 if (Py_REFCNT(output
) == 1) {
301 if (PyUnicode_Resize(&output
, output_len
- 1) < 0)
305 PyObject
*modified
= PyUnicode_FromUnicode(
306 PyUnicode_AS_UNICODE(output
),
308 if (modified
== NULL
)
317 /* Record which newlines are read and do newline translation if desired,
322 int seennl
= self
->seennl
;
325 in_str
= PyUnicode_AS_UNICODE(output
);
326 len
= PyUnicode_GET_SIZE(output
);
331 /* If, up to now, newlines are consistently \n, do a quick check
332 for the \r *byte* with the libc's optimized memchr.
334 if (seennl
== SEEN_LF
|| seennl
== 0) {
335 only_lf
= (memchr(in_str
, '\r', len
* sizeof(Py_UNICODE
)) == NULL
);
339 /* If not already seen, quick scan for a possible "\n" character.
340 (there's nothing else to be done, even when in translation mode)
343 memchr(in_str
, '\n', len
* sizeof(Py_UNICODE
)) != NULL
) {
349 /* Fast loop for non-control characters */
361 /* Finished: we have scanned for newlines, and none of them
364 else if (!self
->translate
) {
366 /* We have already seen all newline types, no need to scan again */
367 if (seennl
== SEEN_ALL
)
373 /* Fast loop for non-control characters */
379 else if (c
== '\r') {
389 if (seennl
== SEEN_ALL
)
396 PyObject
*translated
= NULL
;
398 Py_UNICODE
*in
, *out
, *end
;
399 if (Py_REFCNT(output
) != 1) {
400 /* We could try to optimize this so that we only do a copy
401 when there is something to translate. On the other hand,
402 most decoders should only output non-shared strings, i.e.
403 translation is done in place. */
404 translated
= PyUnicode_FromUnicode(NULL
, len
);
405 if (translated
== NULL
)
407 assert(Py_REFCNT(translated
) == 1);
408 memcpy(PyUnicode_AS_UNICODE(translated
),
409 PyUnicode_AS_UNICODE(output
),
410 len
* sizeof(Py_UNICODE
));
415 out_str
= PyUnicode_AS_UNICODE(translated
);
421 /* Fast loop for non-control characters */
422 while ((c
= *in
++) > '\r')
443 if (translated
!= output
) {
447 if (out
- out_str
!= len
) {
448 if (PyUnicode_Resize(&output
, out
- out_str
) < 0)
452 self
->seennl
|= seennl
;
463 incrementalnewlinedecoder_decode(nldecoder_object
*self
,
464 PyObject
*args
, PyObject
*kwds
)
466 char *kwlist
[] = {"input", "final", NULL
};
470 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "O|i:IncrementalNewlineDecoder",
471 kwlist
, &input
, &final
))
473 return _PyIncrementalNewlineDecoder_decode((PyObject
*) self
, input
, final
);
477 incrementalnewlinedecoder_getstate(nldecoder_object
*self
, PyObject
*args
)
480 unsigned PY_LONG_LONG flag
;
482 if (self
->decoder
!= Py_None
) {
483 PyObject
*state
= PyObject_CallMethodObjArgs(self
->decoder
,
484 _PyIO_str_getstate
, NULL
);
487 if (!PyArg_Parse(state
, "(OK)", &buffer
, &flag
)) {
495 buffer
= PyBytes_FromString("");
501 return Py_BuildValue("NK", buffer
, flag
);
505 incrementalnewlinedecoder_setstate(nldecoder_object
*self
, PyObject
*state
)
508 unsigned PY_LONG_LONG flag
;
510 if (!PyArg_Parse(state
, "(OK)", &buffer
, &flag
))
513 self
->pendingcr
= (int) flag
& 1;
516 if (self
->decoder
!= Py_None
)
517 return PyObject_CallMethod(self
->decoder
,
518 "setstate", "((OK))", buffer
, flag
);
524 incrementalnewlinedecoder_reset(nldecoder_object
*self
, PyObject
*args
)
528 if (self
->decoder
!= Py_None
)
529 return PyObject_CallMethodObjArgs(self
->decoder
, _PyIO_str_reset
, NULL
);
535 incrementalnewlinedecoder_newlines_get(nldecoder_object
*self
, void *context
)
537 switch (self
->seennl
) {
539 return PyUnicode_FromString("\r");
541 return PyUnicode_FromString("\n");
543 return PyUnicode_FromString("\r\n");
544 case SEEN_CR
| SEEN_LF
:
545 return Py_BuildValue("ss", "\r", "\n");
546 case SEEN_CR
| SEEN_CRLF
:
547 return Py_BuildValue("ss", "\r", "\r\n");
548 case SEEN_LF
| SEEN_CRLF
:
549 return Py_BuildValue("ss", "\n", "\r\n");
550 case SEEN_CR
| SEEN_LF
| SEEN_CRLF
:
551 return Py_BuildValue("sss", "\r", "\n", "\r\n");
559 static PyMethodDef incrementalnewlinedecoder_methods
[] = {
560 {"decode", (PyCFunction
)incrementalnewlinedecoder_decode
, METH_VARARGS
|METH_KEYWORDS
},
561 {"getstate", (PyCFunction
)incrementalnewlinedecoder_getstate
, METH_NOARGS
},
562 {"setstate", (PyCFunction
)incrementalnewlinedecoder_setstate
, METH_O
},
563 {"reset", (PyCFunction
)incrementalnewlinedecoder_reset
, METH_NOARGS
},
567 static PyGetSetDef incrementalnewlinedecoder_getset
[] = {
568 {"newlines", (getter
)incrementalnewlinedecoder_newlines_get
, NULL
, NULL
},
572 PyTypeObject PyIncrementalNewlineDecoder_Type
= {
573 PyVarObject_HEAD_INIT(NULL
, 0)
574 "_io.IncrementalNewlineDecoder", /*tp_name*/
575 sizeof(nldecoder_object
), /*tp_basicsize*/
577 (destructor
)incrementalnewlinedecoder_dealloc
, /*tp_dealloc*/
584 0, /*tp_as_sequence*/
592 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /*tp_flags*/
593 incrementalnewlinedecoder_doc
, /* tp_doc */
596 0, /* tp_richcompare */
597 0, /*tp_weaklistoffset*/
600 incrementalnewlinedecoder_methods
, /* tp_methods */
602 incrementalnewlinedecoder_getset
, /* tp_getset */
605 0, /* tp_descr_get */
606 0, /* tp_descr_set */
607 0, /* tp_dictoffset */
608 (initproc
)incrementalnewlinedecoder_init
, /* tp_init */
610 PyType_GenericNew
, /* tp_new */
616 PyDoc_STRVAR(textiowrapper_doc
,
617 "Character and line based layer over a BufferedIOBase object, buffer.\n"
619 "encoding gives the name of the encoding that the stream will be\n"
620 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
622 "errors determines the strictness of encoding and decoding (see the\n"
623 "codecs.register) and defaults to \"strict\".\n"
625 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
626 "handling of line endings. If it is None, universal newlines is\n"
627 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
628 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
629 "caller. Conversely, on output, '\\n' is translated to the system\n"
630 "default line seperator, os.linesep. If newline is any other of its\n"
631 "legal values, that newline becomes the newline when the file is read\n"
632 "and it is returned untranslated. On output, '\\n' is converted to the\n"
635 "If line_buffering is True, a call to flush is implied when a call to\n"
636 "write contains a newline character."
640 (*encodefunc_t
)(PyObject
*, PyObject
*);
645 int ok
; /* initialized? */
647 Py_ssize_t chunk_size
;
654 const char *writenl
; /* utf-8 encoded, NULL stands for \n */
661 /* Specialized encoding func (see below) */
662 encodefunc_t encodefunc
;
663 /* Whether or not it's the start of the stream */
664 char encoding_start_of_stream
;
666 /* Reads and writes are internally buffered in order to speed things up.
667 However, any read will first flush the write buffer if itsn't empty.
669 Please also note that text to be written is first encoded before being
670 buffered. This is necessary so that encoding errors are immediately
671 reported to the caller, but it unfortunately means that the
672 IncrementalEncoder (whose encode() method is always written in Python)
673 becomes a bottleneck for small writes.
675 PyObject
*decoded_chars
; /* buffer for text returned from decoder */
676 Py_ssize_t decoded_chars_used
; /* offset into _decoded_chars for read() */
677 PyObject
*pending_bytes
; /* list of bytes objects waiting to be
679 Py_ssize_t pending_bytes_count
;
681 /* snapshot is either None, or a tuple (dec_flags, next_input) where
682 * dec_flags is the second (integer) item of the decoder state and
683 * next_input is the chunk of input bytes that comes next after the
684 * snapshot point. We use this to reconstruct decoder states in tell().
687 /* Cache raw object if it's a FileIO object */
690 PyObject
*weakreflist
;
695 /* A couple of specialized cases in order to bypass the slow incremental
696 encoding methods for the most popular encodings. */
699 ascii_encode(textio
*self
, PyObject
*text
)
701 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text
),
702 PyUnicode_GET_SIZE(text
),
703 PyBytes_AS_STRING(self
->errors
));
707 utf16be_encode(textio
*self
, PyObject
*text
)
709 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text
),
710 PyUnicode_GET_SIZE(text
),
711 PyBytes_AS_STRING(self
->errors
), 1);
715 utf16le_encode(textio
*self
, PyObject
*text
)
717 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text
),
718 PyUnicode_GET_SIZE(text
),
719 PyBytes_AS_STRING(self
->errors
), -1);
723 utf16_encode(textio
*self
, PyObject
*text
)
725 if (!self
->encoding_start_of_stream
) {
726 /* Skip the BOM and use native byte ordering */
727 #if defined(WORDS_BIGENDIAN)
728 return utf16be_encode(self
, text
);
730 return utf16le_encode(self
, text
);
733 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text
),
734 PyUnicode_GET_SIZE(text
),
735 PyBytes_AS_STRING(self
->errors
), 0);
739 utf32be_encode(textio
*self
, PyObject
*text
)
741 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text
),
742 PyUnicode_GET_SIZE(text
),
743 PyBytes_AS_STRING(self
->errors
), 1);
747 utf32le_encode(textio
*self
, PyObject
*text
)
749 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text
),
750 PyUnicode_GET_SIZE(text
),
751 PyBytes_AS_STRING(self
->errors
), -1);
755 utf32_encode(textio
*self
, PyObject
*text
)
757 if (!self
->encoding_start_of_stream
) {
758 /* Skip the BOM and use native byte ordering */
759 #if defined(WORDS_BIGENDIAN)
760 return utf32be_encode(self
, text
);
762 return utf32le_encode(self
, text
);
765 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text
),
766 PyUnicode_GET_SIZE(text
),
767 PyBytes_AS_STRING(self
->errors
), 0);
771 utf8_encode(textio
*self
, PyObject
*text
)
773 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text
),
774 PyUnicode_GET_SIZE(text
),
775 PyBytes_AS_STRING(self
->errors
));
779 latin1_encode(textio
*self
, PyObject
*text
)
781 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text
),
782 PyUnicode_GET_SIZE(text
),
783 PyBytes_AS_STRING(self
->errors
));
786 /* Map normalized encoding names onto the specialized encoding funcs */
790 encodefunc_t encodefunc
;
793 static encodefuncentry encodefuncs
[] = {
794 {"ascii", (encodefunc_t
) ascii_encode
},
795 {"iso8859-1", (encodefunc_t
) latin1_encode
},
796 {"utf-8", (encodefunc_t
) utf8_encode
},
797 {"utf-16-be", (encodefunc_t
) utf16be_encode
},
798 {"utf-16-le", (encodefunc_t
) utf16le_encode
},
799 {"utf-16", (encodefunc_t
) utf16_encode
},
800 {"utf-32-be", (encodefunc_t
) utf32be_encode
},
801 {"utf-32-le", (encodefunc_t
) utf32le_encode
},
802 {"utf-32", (encodefunc_t
) utf32_encode
},
808 textiowrapper_init(textio
*self
, PyObject
*args
, PyObject
*kwds
)
810 char *kwlist
[] = {"buffer", "encoding", "errors",
811 "newline", "line_buffering",
813 PyObject
*buffer
, *raw
;
814 char *encoding
= NULL
;
816 char *newline
= NULL
;
817 int line_buffering
= 0;
824 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "O|zzzi:fileio",
825 kwlist
, &buffer
, &encoding
, &errors
,
826 &newline
, &line_buffering
))
829 if (newline
&& newline
[0] != '\0'
830 && !(newline
[0] == '\n' && newline
[1] == '\0')
831 && !(newline
[0] == '\r' && newline
[1] == '\0')
832 && !(newline
[0] == '\r' && newline
[1] == '\n' && newline
[2] == '\0')) {
833 PyErr_Format(PyExc_ValueError
,
834 "illegal newline value: %s", newline
);
838 Py_CLEAR(self
->buffer
);
839 Py_CLEAR(self
->encoding
);
840 Py_CLEAR(self
->encoder
);
841 Py_CLEAR(self
->decoder
);
842 Py_CLEAR(self
->readnl
);
843 Py_CLEAR(self
->decoded_chars
);
844 Py_CLEAR(self
->pending_bytes
);
845 Py_CLEAR(self
->snapshot
);
846 Py_CLEAR(self
->errors
);
848 self
->decoded_chars_used
= 0;
849 self
->pending_bytes_count
= 0;
850 self
->encodefunc
= NULL
;
851 self
->writenl
= NULL
;
853 if (encoding
== NULL
&& self
->encoding
== NULL
) {
854 if (_PyIO_locale_module
== NULL
) {
855 _PyIO_locale_module
= PyImport_ImportModule("locale");
856 if (_PyIO_locale_module
== NULL
)
857 goto catch_ImportError
;
863 self
->encoding
= PyObject_CallMethod(
864 _PyIO_locale_module
, "getpreferredencoding", NULL
);
865 if (self
->encoding
== NULL
) {
868 Importing locale can raise a ImportError because of
869 _functools, and locale.getpreferredencoding can raise a
870 ImportError if _locale is not available. These will happen
871 during module building.
873 if (PyErr_ExceptionMatches(PyExc_ImportError
)) {
875 self
->encoding
= PyString_FromString("ascii");
880 else if (!PyString_Check(self
->encoding
))
881 Py_CLEAR(self
->encoding
);
884 if (self
->encoding
!= NULL
)
885 encoding
= PyString_AsString(self
->encoding
);
886 else if (encoding
!= NULL
) {
887 self
->encoding
= PyString_FromString(encoding
);
888 if (self
->encoding
== NULL
)
892 PyErr_SetString(PyExc_IOError
,
893 "could not determine default encoding");
898 self
->errors
= PyBytes_FromString(errors
);
899 if (self
->errors
== NULL
)
902 self
->chunk_size
= 8192;
903 self
->readuniversal
= (newline
== NULL
|| newline
[0] == '\0');
904 self
->line_buffering
= line_buffering
;
905 self
->readtranslate
= (newline
== NULL
);
907 self
->readnl
= PyString_FromString(newline
);
908 if (self
->readnl
== NULL
)
911 self
->writetranslate
= (newline
== NULL
|| newline
[0] != '\0');
912 if (!self
->readuniversal
&& self
->writetranslate
) {
913 self
->writenl
= PyString_AsString(self
->readnl
);
914 if (!strcmp(self
->writenl
, "\n"))
915 self
->writenl
= NULL
;
919 self
->writenl
= "\r\n";
922 /* Build the decoder object */
923 res
= PyObject_CallMethod(buffer
, "readable", NULL
);
926 r
= PyObject_IsTrue(res
);
931 self
->decoder
= PyCodec_IncrementalDecoder(
933 if (self
->decoder
== NULL
)
936 if (self
->readuniversal
) {
937 PyObject
*incrementalDecoder
= PyObject_CallFunction(
938 (PyObject
*)&PyIncrementalNewlineDecoder_Type
,
939 "Oi", self
->decoder
, (int)self
->readtranslate
);
940 if (incrementalDecoder
== NULL
)
942 Py_CLEAR(self
->decoder
);
943 self
->decoder
= incrementalDecoder
;
947 /* Build the encoder object */
948 res
= PyObject_CallMethod(buffer
, "writable", NULL
);
951 r
= PyObject_IsTrue(res
);
957 self
->encoder
= PyCodec_IncrementalEncoder(
959 if (self
->encoder
== NULL
)
961 /* Get the normalized named of the codec */
962 ci
= _PyCodec_Lookup(encoding
);
965 res
= PyObject_GetAttrString(ci
, "name");
968 if (PyErr_ExceptionMatches(PyExc_AttributeError
))
973 else if (PyString_Check(res
)) {
974 encodefuncentry
*e
= encodefuncs
;
975 while (e
->name
!= NULL
) {
976 if (!strcmp(PyString_AS_STRING(res
), e
->name
)) {
977 self
->encodefunc
= e
->encodefunc
;
986 self
->buffer
= buffer
;
989 if (Py_TYPE(buffer
) == &PyBufferedReader_Type
||
990 Py_TYPE(buffer
) == &PyBufferedWriter_Type
||
991 Py_TYPE(buffer
) == &PyBufferedRandom_Type
) {
992 raw
= PyObject_GetAttrString(buffer
, "raw");
993 /* Cache the raw FileIO object to speed up 'closed' checks */
995 if (PyErr_ExceptionMatches(PyExc_AttributeError
))
1000 else if (Py_TYPE(raw
) == &PyFileIO_Type
)
1006 res
= PyObject_CallMethod(buffer
, "seekable", NULL
);
1009 self
->seekable
= self
->telling
= PyObject_IsTrue(res
);
1012 self
->encoding_start_of_stream
= 0;
1013 if (self
->seekable
&& self
->encoder
) {
1014 PyObject
*cookieObj
;
1017 self
->encoding_start_of_stream
= 1;
1019 cookieObj
= PyObject_CallMethodObjArgs(buffer
, _PyIO_str_tell
, NULL
);
1020 if (cookieObj
== NULL
)
1023 cmp
= PyObject_RichCompareBool(cookieObj
, _PyIO_zero
, Py_EQ
);
1024 Py_DECREF(cookieObj
);
1030 self
->encoding_start_of_stream
= 0;
1031 res
= PyObject_CallMethodObjArgs(self
->encoder
, _PyIO_str_setstate
,
1047 _textiowrapper_clear(textio
*self
)
1049 if (self
->ok
&& _PyIOBase_finalize((PyObject
*) self
) < 0)
1052 Py_CLEAR(self
->buffer
);
1053 Py_CLEAR(self
->encoding
);
1054 Py_CLEAR(self
->encoder
);
1055 Py_CLEAR(self
->decoder
);
1056 Py_CLEAR(self
->readnl
);
1057 Py_CLEAR(self
->decoded_chars
);
1058 Py_CLEAR(self
->pending_bytes
);
1059 Py_CLEAR(self
->snapshot
);
1060 Py_CLEAR(self
->errors
);
1061 Py_CLEAR(self
->raw
);
1066 textiowrapper_dealloc(textio
*self
)
1068 if (_textiowrapper_clear(self
) < 0)
1070 _PyObject_GC_UNTRACK(self
);
1071 if (self
->weakreflist
!= NULL
)
1072 PyObject_ClearWeakRefs((PyObject
*)self
);
1073 Py_CLEAR(self
->dict
);
1074 Py_TYPE(self
)->tp_free((PyObject
*)self
);
1078 textiowrapper_traverse(textio
*self
, visitproc visit
, void *arg
)
1080 Py_VISIT(self
->buffer
);
1081 Py_VISIT(self
->encoding
);
1082 Py_VISIT(self
->encoder
);
1083 Py_VISIT(self
->decoder
);
1084 Py_VISIT(self
->readnl
);
1085 Py_VISIT(self
->decoded_chars
);
1086 Py_VISIT(self
->pending_bytes
);
1087 Py_VISIT(self
->snapshot
);
1088 Py_VISIT(self
->errors
);
1089 Py_VISIT(self
->raw
);
1091 Py_VISIT(self
->dict
);
1096 textiowrapper_clear(textio
*self
)
1098 if (_textiowrapper_clear(self
) < 0)
1100 Py_CLEAR(self
->dict
);
1105 textiowrapper_closed_get(textio
*self
, void *context
);
1107 /* This macro takes some shortcuts to make the common case faster. */
1108 #define CHECK_CLOSED(self) \
1112 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1113 if (self->raw != NULL) \
1114 r = _PyFileIO_closed(self->raw); \
1116 _res = textiowrapper_closed_get(self, NULL); \
1119 r = PyObject_IsTrue(_res); \
1125 PyErr_SetString(PyExc_ValueError, \
1126 "I/O operation on closed file."); \
1130 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1134 #define CHECK_INITIALIZED(self) \
1135 if (self->ok <= 0) { \
1136 if (self->detached) { \
1137 PyErr_SetString(PyExc_ValueError, \
1138 "underlying buffer has been detached"); \
1140 PyErr_SetString(PyExc_ValueError, \
1141 "I/O operation on uninitialized object"); \
1146 #define CHECK_INITIALIZED_INT(self) \
1147 if (self->ok <= 0) { \
1148 if (self->detached) { \
1149 PyErr_SetString(PyExc_ValueError, \
1150 "underlying buffer has been detached"); \
1152 PyErr_SetString(PyExc_ValueError, \
1153 "I/O operation on uninitialized object"); \
1160 textiowrapper_detach(textio
*self
)
1162 PyObject
*buffer
, *res
;
1163 CHECK_INITIALIZED(self
);
1164 res
= PyObject_CallMethodObjArgs((PyObject
*)self
, _PyIO_str_flush
, NULL
);
1168 buffer
= self
->buffer
;
1169 self
->buffer
= NULL
;
1175 Py_LOCAL_INLINE(const Py_UNICODE
*)
1176 findchar(const Py_UNICODE
*s
, Py_ssize_t size
, Py_UNICODE ch
)
1178 /* like wcschr, but doesn't stop at NULL characters */
1179 while (size
-- > 0) {
1187 /* Flush the internal write buffer. This doesn't explicitly flush the
1188 underlying buffered object, though. */
1190 _textiowrapper_writeflush(textio
*self
)
1194 if (self
->pending_bytes
== NULL
)
1196 b
= _PyBytes_Join(_PyIO_empty_bytes
, self
->pending_bytes
);
1199 ret
= PyObject_CallMethodObjArgs(self
->buffer
,
1200 _PyIO_str_write
, b
, NULL
);
1205 Py_CLEAR(self
->pending_bytes
);
1206 self
->pending_bytes_count
= 0;
1211 textiowrapper_write(textio
*self
, PyObject
*args
)
1214 PyObject
*text
; /* owned reference */
1220 CHECK_INITIALIZED(self
);
1222 if (!PyArg_ParseTuple(args
, "U:write", &text
)) {
1228 if (self
->encoder
== NULL
) {
1229 PyErr_SetString(PyExc_IOError
, "not writable");
1235 textlen
= PyUnicode_GetSize(text
);
1237 if ((self
->writetranslate
&& self
->writenl
!= NULL
) || self
->line_buffering
)
1238 if (findchar(PyUnicode_AS_UNICODE(text
),
1239 PyUnicode_GET_SIZE(text
), '\n'))
1242 if (haslf
&& self
->writetranslate
&& self
->writenl
!= NULL
) {
1243 PyObject
*newtext
= PyObject_CallMethod(
1244 text
, "replace", "ss", "\n", self
->writenl
);
1246 if (newtext
== NULL
)
1251 if (self
->line_buffering
&&
1253 findchar(PyUnicode_AS_UNICODE(text
),
1254 PyUnicode_GET_SIZE(text
), '\r')))
1257 /* XXX What if we were just reading? */
1258 if (self
->encodefunc
!= NULL
) {
1259 b
= (*self
->encodefunc
)((PyObject
*) self
, text
);
1260 self
->encoding_start_of_stream
= 0;
1263 b
= PyObject_CallMethodObjArgs(self
->encoder
,
1264 _PyIO_str_encode
, text
, NULL
);
1269 if (self
->pending_bytes
== NULL
) {
1270 self
->pending_bytes
= PyList_New(0);
1271 if (self
->pending_bytes
== NULL
) {
1275 self
->pending_bytes_count
= 0;
1277 if (PyList_Append(self
->pending_bytes
, b
) < 0) {
1281 self
->pending_bytes_count
+= PyBytes_GET_SIZE(b
);
1283 if (self
->pending_bytes_count
> self
->chunk_size
|| needflush
) {
1284 if (_textiowrapper_writeflush(self
) < 0)
1289 ret
= PyObject_CallMethodObjArgs(self
->buffer
, _PyIO_str_flush
, NULL
);
1295 Py_CLEAR(self
->snapshot
);
1297 if (self
->decoder
) {
1298 ret
= PyObject_CallMethod(self
->decoder
, "reset", NULL
);
1304 return PyLong_FromSsize_t(textlen
);
1307 /* Steal a reference to chars and store it in the decoded_char buffer;
1310 textiowrapper_set_decoded_chars(textio
*self
, PyObject
*chars
)
1312 Py_CLEAR(self
->decoded_chars
);
1313 self
->decoded_chars
= chars
;
1314 self
->decoded_chars_used
= 0;
1318 textiowrapper_get_decoded_chars(textio
*self
, Py_ssize_t n
)
1323 if (self
->decoded_chars
== NULL
)
1324 return PyUnicode_FromStringAndSize(NULL
, 0);
1326 avail
= (PyUnicode_GET_SIZE(self
->decoded_chars
)
1327 - self
->decoded_chars_used
);
1331 if (n
< 0 || n
> avail
)
1334 if (self
->decoded_chars_used
> 0 || n
< avail
) {
1335 chars
= PyUnicode_FromUnicode(
1336 PyUnicode_AS_UNICODE(self
->decoded_chars
)
1337 + self
->decoded_chars_used
, n
);
1342 chars
= self
->decoded_chars
;
1346 self
->decoded_chars_used
+= n
;
1350 /* Read and decode the next chunk of data from the BufferedReader.
1353 textiowrapper_read_chunk(textio
*self
)
1355 PyObject
*dec_buffer
= NULL
;
1356 PyObject
*dec_flags
= NULL
;
1357 PyObject
*input_chunk
= NULL
;
1358 PyObject
*decoded_chars
, *chunk_size
;
1361 /* The return value is True unless EOF was reached. The decoded string is
1362 * placed in self._decoded_chars (replacing its previous value). The
1363 * entire input chunk is sent to the decoder, though some of it may remain
1364 * buffered in the decoder, yet to be converted.
1367 if (self
->decoder
== NULL
) {
1368 PyErr_SetString(PyExc_IOError
, "not readable");
1372 if (self
->telling
) {
1373 /* To prepare for tell(), we need to snapshot a point in the file
1374 * where the decoder's input buffer is empty.
1377 PyObject
*state
= PyObject_CallMethodObjArgs(self
->decoder
,
1378 _PyIO_str_getstate
, NULL
);
1381 /* Given this, we know there was a valid snapshot point
1382 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1384 if (PyArg_Parse(state
, "(OO)", &dec_buffer
, &dec_flags
) < 0) {
1388 Py_INCREF(dec_buffer
);
1389 Py_INCREF(dec_flags
);
1393 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1394 chunk_size
= PyLong_FromSsize_t(self
->chunk_size
);
1395 if (chunk_size
== NULL
)
1397 input_chunk
= PyObject_CallMethodObjArgs(self
->buffer
,
1398 _PyIO_str_read1
, chunk_size
, NULL
);
1399 Py_DECREF(chunk_size
);
1400 if (input_chunk
== NULL
)
1402 assert(PyBytes_Check(input_chunk
));
1404 eof
= (PyBytes_Size(input_chunk
) == 0);
1406 if (Py_TYPE(self
->decoder
) == &PyIncrementalNewlineDecoder_Type
) {
1407 decoded_chars
= _PyIncrementalNewlineDecoder_decode(
1408 self
->decoder
, input_chunk
, eof
);
1411 decoded_chars
= PyObject_CallMethodObjArgs(self
->decoder
,
1412 _PyIO_str_decode
, input_chunk
, eof
? Py_True
: Py_False
, NULL
);
1415 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1416 if (decoded_chars
== NULL
)
1418 textiowrapper_set_decoded_chars(self
, decoded_chars
);
1419 if (PyUnicode_GET_SIZE(decoded_chars
) > 0)
1422 if (self
->telling
) {
1423 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1424 * next input to be decoded is dec_buffer + input_chunk.
1426 PyObject
*next_input
= PyNumber_Add(dec_buffer
, input_chunk
);
1427 if (next_input
== NULL
)
1429 assert (PyBytes_Check(next_input
));
1430 Py_DECREF(dec_buffer
);
1431 Py_CLEAR(self
->snapshot
);
1432 self
->snapshot
= Py_BuildValue("NN", dec_flags
, next_input
);
1434 Py_DECREF(input_chunk
);
1439 Py_XDECREF(dec_buffer
);
1440 Py_XDECREF(dec_flags
);
1441 Py_XDECREF(input_chunk
);
1446 textiowrapper_read(textio
*self
, PyObject
*args
)
1449 PyObject
*result
= NULL
, *chunks
= NULL
;
1451 CHECK_INITIALIZED(self
);
1453 if (!PyArg_ParseTuple(args
, "|n:read", &n
))
1458 if (self
->decoder
== NULL
) {
1459 PyErr_SetString(PyExc_IOError
, "not readable");
1463 if (_textiowrapper_writeflush(self
) < 0)
1467 /* Read everything */
1468 PyObject
*bytes
= PyObject_CallMethod(self
->buffer
, "read", NULL
);
1469 PyObject
*decoded
, *final
;
1472 decoded
= PyObject_CallMethodObjArgs(self
->decoder
, _PyIO_str_decode
,
1473 bytes
, Py_True
, NULL
);
1475 if (decoded
== NULL
)
1478 result
= textiowrapper_get_decoded_chars(self
, -1);
1480 if (result
== NULL
) {
1485 final
= PyUnicode_Concat(result
, decoded
);
1491 Py_CLEAR(self
->snapshot
);
1496 Py_ssize_t remaining
= n
;
1498 result
= textiowrapper_get_decoded_chars(self
, n
);
1501 remaining
-= PyUnicode_GET_SIZE(result
);
1503 /* Keep reading chunks until we have n characters to return */
1504 while (remaining
> 0) {
1505 res
= textiowrapper_read_chunk(self
);
1508 if (res
== 0) /* EOF */
1510 if (chunks
== NULL
) {
1511 chunks
= PyList_New(0);
1515 if (PyList_Append(chunks
, result
) < 0)
1518 result
= textiowrapper_get_decoded_chars(self
, remaining
);
1521 remaining
-= PyUnicode_GET_SIZE(result
);
1523 if (chunks
!= NULL
) {
1524 if (result
!= NULL
&& PyList_Append(chunks
, result
) < 0)
1527 result
= PyUnicode_Join(_PyIO_empty_str
, chunks
);
1541 /* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1542 that is to the NUL character. Otherwise the function will produce
1543 incorrect results. */
1545 find_control_char(Py_UNICODE
*start
, Py_UNICODE
*end
, Py_UNICODE ch
)
1547 Py_UNICODE
*s
= start
;
1560 _PyIO_find_line_ending(
1561 int translated
, int universal
, PyObject
*readnl
,
1562 Py_UNICODE
*start
, Py_UNICODE
*end
, Py_ssize_t
*consumed
)
1564 Py_ssize_t len
= end
- start
;
1567 /* Newlines are already translated, only search for \n */
1568 Py_UNICODE
*pos
= find_control_char(start
, end
, '\n');
1570 return pos
- start
+ 1;
1576 else if (universal
) {
1577 /* Universal newline search. Find any of \r, \r\n, \n
1578 * The decoder ensures that \r\n are not split in two pieces
1580 Py_UNICODE
*s
= start
;
1583 /* Fast path for non-control chars. The loop always ends
1584 since the Py_UNICODE storage is NUL-terminated. */
1596 return s
- start
+ 1;
1603 /* Non-universal mode. */
1604 Py_ssize_t readnl_len
= PyString_GET_SIZE(readnl
);
1605 unsigned char *nl
= (unsigned char *) PyString_AS_STRING(readnl
);
1606 if (readnl_len
== 1) {
1607 Py_UNICODE
*pos
= find_control_char(start
, end
, nl
[0]);
1609 return pos
- start
+ 1;
1614 Py_UNICODE
*s
= start
;
1615 Py_UNICODE
*e
= end
- readnl_len
+ 1;
1621 Py_UNICODE
*pos
= find_control_char(s
, end
, nl
[0]);
1622 if (pos
== NULL
|| pos
>= e
)
1624 for (i
= 1; i
< readnl_len
; i
++) {
1625 if (pos
[i
] != nl
[i
])
1628 if (i
== readnl_len
)
1629 return pos
- start
+ readnl_len
;
1632 pos
= find_control_char(e
, end
, nl
[0]);
1636 *consumed
= pos
- start
;
1643 _textiowrapper_readline(textio
*self
, Py_ssize_t limit
)
1645 PyObject
*line
= NULL
, *chunks
= NULL
, *remaining
= NULL
;
1646 Py_ssize_t start
, endpos
, chunked
, offset_to_buffer
;
1651 if (_textiowrapper_writeflush(self
) < 0)
1658 Py_ssize_t line_len
;
1659 Py_ssize_t consumed
= 0;
1661 /* First, get some data if necessary */
1663 while (!self
->decoded_chars
||
1664 !PyUnicode_GET_SIZE(self
->decoded_chars
)) {
1665 res
= textiowrapper_read_chunk(self
);
1673 textiowrapper_set_decoded_chars(self
, NULL
);
1674 Py_CLEAR(self
->snapshot
);
1675 start
= endpos
= offset_to_buffer
= 0;
1679 if (remaining
== NULL
) {
1680 line
= self
->decoded_chars
;
1681 start
= self
->decoded_chars_used
;
1682 offset_to_buffer
= 0;
1686 assert(self
->decoded_chars_used
== 0);
1687 line
= PyUnicode_Concat(remaining
, self
->decoded_chars
);
1689 offset_to_buffer
= PyUnicode_GET_SIZE(remaining
);
1690 Py_CLEAR(remaining
);
1695 ptr
= PyUnicode_AS_UNICODE(line
);
1696 line_len
= PyUnicode_GET_SIZE(line
);
1698 endpos
= _PyIO_find_line_ending(
1699 self
->readtranslate
, self
->readuniversal
, self
->readnl
,
1700 ptr
+ start
, ptr
+ line_len
, &consumed
);
1703 if (limit
>= 0 && (endpos
- start
) + chunked
>= limit
)
1704 endpos
= start
+ limit
- chunked
;
1708 /* We can put aside up to `endpos` */
1709 endpos
= consumed
+ start
;
1710 if (limit
>= 0 && (endpos
- start
) + chunked
>= limit
) {
1711 /* Didn't find line ending, but reached length limit */
1712 endpos
= start
+ limit
- chunked
;
1716 if (endpos
> start
) {
1717 /* No line ending seen yet - put aside current data */
1719 if (chunks
== NULL
) {
1720 chunks
= PyList_New(0);
1724 s
= PyUnicode_FromUnicode(ptr
+ start
, endpos
- start
);
1727 if (PyList_Append(chunks
, s
) < 0) {
1731 chunked
+= PyUnicode_GET_SIZE(s
);
1734 /* There may be some remaining bytes we'll have to prepend to the
1735 next chunk of data */
1736 if (endpos
< line_len
) {
1737 remaining
= PyUnicode_FromUnicode(
1738 ptr
+ endpos
, line_len
- endpos
);
1739 if (remaining
== NULL
)
1743 /* We have consumed the buffer */
1744 textiowrapper_set_decoded_chars(self
, NULL
);
1748 /* Our line ends in the current buffer */
1749 self
->decoded_chars_used
= endpos
- offset_to_buffer
;
1750 if (start
> 0 || endpos
< PyUnicode_GET_SIZE(line
)) {
1751 if (start
== 0 && Py_REFCNT(line
) == 1) {
1752 if (PyUnicode_Resize(&line
, endpos
) < 0)
1756 PyObject
*s
= PyUnicode_FromUnicode(
1757 PyUnicode_AS_UNICODE(line
) + start
, endpos
- start
);
1765 if (remaining
!= NULL
) {
1766 if (chunks
== NULL
) {
1767 chunks
= PyList_New(0);
1771 if (PyList_Append(chunks
, remaining
) < 0)
1773 Py_CLEAR(remaining
);
1775 if (chunks
!= NULL
) {
1776 if (line
!= NULL
&& PyList_Append(chunks
, line
) < 0)
1779 line
= PyUnicode_Join(_PyIO_empty_str
, chunks
);
1785 line
= PyUnicode_FromStringAndSize(NULL
, 0);
1791 Py_XDECREF(remaining
);
1797 textiowrapper_readline(textio
*self
, PyObject
*args
)
1799 PyObject
*limitobj
= NULL
;
1800 Py_ssize_t limit
= -1;
1802 CHECK_INITIALIZED(self
);
1803 if (!PyArg_ParseTuple(args
, "|O:readline", &limitobj
)) {
1807 if (!PyNumber_Check(limitobj
)) {
1808 PyErr_Format(PyExc_TypeError
,
1809 "integer argument expected, got '%.200s'",
1810 Py_TYPE(limitobj
)->tp_name
);
1813 limit
= PyNumber_AsSsize_t(limitobj
, PyExc_OverflowError
);
1814 if (limit
== -1 && PyErr_Occurred())
1817 return _textiowrapper_readline(self
, limit
);
1831 To speed up cookie packing/unpacking, we store the fields in a temporary
1832 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1833 The following macros define at which offsets in the intermediary byte
1834 string the various CookieStruct fields will be stored.
1837 #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1839 #if defined(WORDS_BIGENDIAN)
1841 # define IS_LITTLE_ENDIAN 0
1843 /* We want the least significant byte of start_pos to also be the least
1844 significant byte of the cookie, which means that in big-endian mode we
1845 must copy the fields in reverse order. */
1847 # define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1848 # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1849 # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1850 # define OFF_CHARS_TO_SKIP (sizeof(char))
1851 # define OFF_NEED_EOF 0
1855 # define IS_LITTLE_ENDIAN 1
1857 /* Little-endian mode: the least significant byte of start_pos will
1858 naturally end up the least significant byte of the cookie. */
1860 # define OFF_START_POS 0
1861 # define OFF_DEC_FLAGS (sizeof(Py_off_t))
1862 # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1863 # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1864 # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1869 textiowrapper_parse_cookie(cookie_type
*cookie
, PyObject
*cookieObj
)
1871 unsigned char buffer
[COOKIE_BUF_LEN
];
1872 PyLongObject
*cookieLong
= (PyLongObject
*)PyNumber_Long(cookieObj
);
1873 if (cookieLong
== NULL
)
1876 if (_PyLong_AsByteArray(cookieLong
, buffer
, sizeof(buffer
),
1877 IS_LITTLE_ENDIAN
, 0) < 0) {
1878 Py_DECREF(cookieLong
);
1881 Py_DECREF(cookieLong
);
1883 memcpy(&cookie
->start_pos
, buffer
+ OFF_START_POS
, sizeof(cookie
->start_pos
));
1884 memcpy(&cookie
->dec_flags
, buffer
+ OFF_DEC_FLAGS
, sizeof(cookie
->dec_flags
));
1885 memcpy(&cookie
->bytes_to_feed
, buffer
+ OFF_BYTES_TO_FEED
, sizeof(cookie
->bytes_to_feed
));
1886 memcpy(&cookie
->chars_to_skip
, buffer
+ OFF_CHARS_TO_SKIP
, sizeof(cookie
->chars_to_skip
));
1887 memcpy(&cookie
->need_eof
, buffer
+ OFF_NEED_EOF
, sizeof(cookie
->need_eof
));
1893 textiowrapper_build_cookie(cookie_type
*cookie
)
1895 unsigned char buffer
[COOKIE_BUF_LEN
];
1897 memcpy(buffer
+ OFF_START_POS
, &cookie
->start_pos
, sizeof(cookie
->start_pos
));
1898 memcpy(buffer
+ OFF_DEC_FLAGS
, &cookie
->dec_flags
, sizeof(cookie
->dec_flags
));
1899 memcpy(buffer
+ OFF_BYTES_TO_FEED
, &cookie
->bytes_to_feed
, sizeof(cookie
->bytes_to_feed
));
1900 memcpy(buffer
+ OFF_CHARS_TO_SKIP
, &cookie
->chars_to_skip
, sizeof(cookie
->chars_to_skip
));
1901 memcpy(buffer
+ OFF_NEED_EOF
, &cookie
->need_eof
, sizeof(cookie
->need_eof
));
1903 return _PyLong_FromByteArray(buffer
, sizeof(buffer
), IS_LITTLE_ENDIAN
, 0);
1905 #undef IS_LITTLE_ENDIAN
1908 _textiowrapper_decoder_setstate(textio
*self
, cookie_type
*cookie
)
1911 /* When seeking to the start of the stream, we call decoder.reset()
1912 rather than decoder.getstate().
1913 This is for a few decoders such as utf-16 for which the state value
1914 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1915 utf-16, that we are expecting a BOM).
1917 if (cookie
->start_pos
== 0 && cookie
->dec_flags
== 0)
1918 res
= PyObject_CallMethodObjArgs(self
->decoder
, _PyIO_str_reset
, NULL
);
1920 res
= PyObject_CallMethod(self
->decoder
, "setstate",
1921 "((si))", "", cookie
->dec_flags
);
1929 _textiowrapper_encoder_setstate(textio
*self
, cookie_type
*cookie
)
1932 /* Same as _textiowrapper_decoder_setstate() above. */
1933 if (cookie
->start_pos
== 0 && cookie
->dec_flags
== 0) {
1934 res
= PyObject_CallMethodObjArgs(self
->encoder
, _PyIO_str_reset
, NULL
);
1935 self
->encoding_start_of_stream
= 1;
1938 res
= PyObject_CallMethodObjArgs(self
->encoder
, _PyIO_str_setstate
,
1940 self
->encoding_start_of_stream
= 0;
1949 textiowrapper_seek(textio
*self
, PyObject
*args
)
1951 PyObject
*cookieObj
, *posobj
;
1957 CHECK_INITIALIZED(self
);
1959 if (!PyArg_ParseTuple(args
, "O|i:seek", &cookieObj
, &whence
))
1963 Py_INCREF(cookieObj
);
1965 if (!self
->seekable
) {
1966 PyErr_SetString(PyExc_IOError
,
1967 "underlying stream is not seekable");
1972 /* seek relative to current position */
1973 cmp
= PyObject_RichCompareBool(cookieObj
, _PyIO_zero
, Py_EQ
);
1978 PyErr_SetString(PyExc_IOError
,
1979 "can't do nonzero cur-relative seeks");
1983 /* Seeking to the current position should attempt to
1984 * sync the underlying buffer with the current position.
1986 Py_DECREF(cookieObj
);
1987 cookieObj
= PyObject_CallMethod((PyObject
*)self
, "tell", NULL
);
1988 if (cookieObj
== NULL
)
1991 else if (whence
== 2) {
1992 /* seek relative to end of file */
1994 cmp
= PyObject_RichCompareBool(cookieObj
, _PyIO_zero
, Py_EQ
);
1999 PyErr_SetString(PyExc_IOError
,
2000 "can't do nonzero end-relative seeks");
2004 res
= PyObject_CallMethod((PyObject
*)self
, "flush", NULL
);
2009 textiowrapper_set_decoded_chars(self
, NULL
);
2010 Py_CLEAR(self
->snapshot
);
2011 if (self
->decoder
) {
2012 res
= PyObject_CallMethod(self
->decoder
, "reset", NULL
);
2018 res
= PyObject_CallMethod(self
->buffer
, "seek", "ii", 0, 2);
2019 Py_XDECREF(cookieObj
);
2022 else if (whence
!= 0) {
2023 PyErr_Format(PyExc_ValueError
,
2024 "invalid whence (%d, should be 0, 1 or 2)", whence
);
2028 cmp
= PyObject_RichCompareBool(cookieObj
, _PyIO_zero
, Py_LT
);
2033 PyObject
*repr
= PyObject_Repr(cookieObj
);
2035 PyErr_Format(PyExc_ValueError
,
2036 "negative seek position %s",
2037 PyString_AS_STRING(repr
));
2043 res
= PyObject_CallMethodObjArgs((PyObject
*)self
, _PyIO_str_flush
, NULL
);
2048 /* The strategy of seek() is to go back to the safe start point
2049 * and replay the effect of read(chars_to_skip) from there.
2051 if (textiowrapper_parse_cookie(&cookie
, cookieObj
) < 0)
2054 /* Seek back to the safe start point. */
2055 posobj
= PyLong_FromOff_t(cookie
.start_pos
);
2058 res
= PyObject_CallMethodObjArgs(self
->buffer
,
2059 _PyIO_str_seek
, posobj
, NULL
);
2065 textiowrapper_set_decoded_chars(self
, NULL
);
2066 Py_CLEAR(self
->snapshot
);
2068 /* Restore the decoder to its state from the safe start point. */
2069 if (self
->decoder
) {
2070 if (_textiowrapper_decoder_setstate(self
, &cookie
) < 0)
2074 if (cookie
.chars_to_skip
) {
2075 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2076 PyObject
*input_chunk
= PyObject_CallMethod(
2077 self
->buffer
, "read", "i", cookie
.bytes_to_feed
);
2080 if (input_chunk
== NULL
)
2083 assert (PyBytes_Check(input_chunk
));
2085 self
->snapshot
= Py_BuildValue("iN", cookie
.dec_flags
, input_chunk
);
2086 if (self
->snapshot
== NULL
) {
2087 Py_DECREF(input_chunk
);
2091 decoded
= PyObject_CallMethod(self
->decoder
, "decode",
2092 "Oi", input_chunk
, (int)cookie
.need_eof
);
2094 if (decoded
== NULL
)
2097 textiowrapper_set_decoded_chars(self
, decoded
);
2099 /* Skip chars_to_skip of the decoded characters. */
2100 if (PyUnicode_GetSize(self
->decoded_chars
) < cookie
.chars_to_skip
) {
2101 PyErr_SetString(PyExc_IOError
, "can't restore logical file position");
2104 self
->decoded_chars_used
= cookie
.chars_to_skip
;
2107 self
->snapshot
= Py_BuildValue("is", cookie
.dec_flags
, "");
2108 if (self
->snapshot
== NULL
)
2112 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2113 if (self
->encoder
) {
2114 if (_textiowrapper_encoder_setstate(self
, &cookie
) < 0)
2119 Py_XDECREF(cookieObj
);
2125 textiowrapper_tell(textio
*self
, PyObject
*args
)
2128 PyObject
*posobj
= NULL
;
2129 cookie_type cookie
= {0,0,0,0,0};
2130 PyObject
*next_input
;
2131 Py_ssize_t chars_to_skip
, chars_decoded
;
2132 PyObject
*saved_state
= NULL
;
2133 char *input
, *input_end
;
2135 CHECK_INITIALIZED(self
);
2138 if (!self
->seekable
) {
2139 PyErr_SetString(PyExc_IOError
,
2140 "underlying stream is not seekable");
2143 if (!self
->telling
) {
2144 PyErr_SetString(PyExc_IOError
,
2145 "telling position disabled by next() call");
2149 if (_textiowrapper_writeflush(self
) < 0)
2151 res
= PyObject_CallMethod((PyObject
*)self
, "flush", NULL
);
2156 posobj
= PyObject_CallMethod(self
->buffer
, "tell", NULL
);
2160 if (self
->decoder
== NULL
|| self
->snapshot
== NULL
) {
2161 assert (self
->decoded_chars
== NULL
|| PyUnicode_GetSize(self
->decoded_chars
) == 0);
2165 #if defined(HAVE_LARGEFILE_SUPPORT)
2166 cookie
.start_pos
= PyLong_AsLongLong(posobj
);
2168 cookie
.start_pos
= PyLong_AsLong(posobj
);
2170 if (PyErr_Occurred())
2173 /* Skip backward to the snapshot point (see _read_chunk). */
2174 if (!PyArg_Parse(self
->snapshot
, "(iO)", &cookie
.dec_flags
, &next_input
))
2177 assert (PyBytes_Check(next_input
));
2179 cookie
.start_pos
-= PyBytes_GET_SIZE(next_input
);
2181 /* How many decoded characters have been used up since the snapshot? */
2182 if (self
->decoded_chars_used
== 0) {
2183 /* We haven't moved from the snapshot point. */
2185 return textiowrapper_build_cookie(&cookie
);
2188 chars_to_skip
= self
->decoded_chars_used
;
2190 /* Starting from the snapshot position, we will walk the decoder
2191 * forward until it gives us enough decoded characters.
2193 saved_state
= PyObject_CallMethodObjArgs(self
->decoder
,
2194 _PyIO_str_getstate
, NULL
);
2195 if (saved_state
== NULL
)
2198 /* Note our initial start point. */
2199 if (_textiowrapper_decoder_setstate(self
, &cookie
) < 0)
2202 /* Feed the decoder one byte at a time. As we go, note the
2203 * nearest "safe start point" before the current location
2204 * (a point where the decoder has nothing buffered, so seek()
2205 * can safely start from there and advance to this location).
2208 input
= PyBytes_AS_STRING(next_input
);
2209 input_end
= input
+ PyBytes_GET_SIZE(next_input
);
2210 while (input
< input_end
) {
2213 Py_ssize_t dec_buffer_len
;
2216 PyObject
*decoded
= PyObject_CallMethod(
2217 self
->decoder
, "decode", "s#", input
, 1);
2218 if (decoded
== NULL
)
2220 assert (PyUnicode_Check(decoded
));
2221 chars_decoded
+= PyUnicode_GET_SIZE(decoded
);
2224 cookie
.bytes_to_feed
+= 1;
2226 state
= PyObject_CallMethodObjArgs(self
->decoder
,
2227 _PyIO_str_getstate
, NULL
);
2230 if (!PyArg_Parse(state
, "(s#i)", &dec_buffer
, &dec_buffer_len
, &dec_flags
)) {
2236 if (dec_buffer_len
== 0 && chars_decoded
<= chars_to_skip
) {
2237 /* Decoder buffer is empty, so this is a safe start point. */
2238 cookie
.start_pos
+= cookie
.bytes_to_feed
;
2239 chars_to_skip
-= chars_decoded
;
2240 cookie
.dec_flags
= dec_flags
;
2241 cookie
.bytes_to_feed
= 0;
2244 if (chars_decoded
>= chars_to_skip
)
2248 if (input
== input_end
) {
2249 /* We didn't get enough decoded data; signal EOF to get more. */
2250 PyObject
*decoded
= PyObject_CallMethod(
2251 self
->decoder
, "decode", "si", "", /* final = */ 1);
2252 if (decoded
== NULL
)
2254 assert (PyUnicode_Check(decoded
));
2255 chars_decoded
+= PyUnicode_GET_SIZE(decoded
);
2257 cookie
.need_eof
= 1;
2259 if (chars_decoded
< chars_to_skip
) {
2260 PyErr_SetString(PyExc_IOError
,
2261 "can't reconstruct logical file position");
2268 res
= PyObject_CallMethod(self
->decoder
, "setstate", "(O)", saved_state
);
2269 Py_DECREF(saved_state
);
2274 /* The returned cookie corresponds to the last safe start point. */
2275 cookie
.chars_to_skip
= Py_SAFE_DOWNCAST(chars_to_skip
, Py_ssize_t
, int);
2276 return textiowrapper_build_cookie(&cookie
);
2281 PyObject
*type
, *value
, *traceback
;
2282 PyErr_Fetch(&type
, &value
, &traceback
);
2284 res
= PyObject_CallMethod(self
->decoder
, "setstate", "(O)", saved_state
);
2285 Py_DECREF(saved_state
);
2290 PyErr_Restore(type
, value
, traceback
);
2296 textiowrapper_truncate(textio
*self
, PyObject
*args
)
2298 PyObject
*pos
= Py_None
;
2301 CHECK_INITIALIZED(self
)
2302 if (!PyArg_ParseTuple(args
, "|O:truncate", &pos
)) {
2306 res
= PyObject_CallMethodObjArgs((PyObject
*) self
, _PyIO_str_flush
, NULL
);
2311 if (pos
!= Py_None
) {
2312 res
= PyObject_CallMethodObjArgs((PyObject
*) self
,
2313 _PyIO_str_seek
, pos
, NULL
);
2319 return PyObject_CallMethodObjArgs(self
->buffer
, _PyIO_str_truncate
, NULL
);
2323 textiowrapper_repr(textio
*self
)
2325 PyObject
*nameobj
, *res
;
2326 PyObject
*namerepr
= NULL
, *encrepr
= NULL
;
2328 CHECK_INITIALIZED(self
);
2330 nameobj
= PyObject_GetAttrString((PyObject
*) self
, "name");
2331 if (nameobj
== NULL
) {
2332 if (PyErr_ExceptionMatches(PyExc_AttributeError
))
2336 encrepr
= PyObject_Repr(self
->encoding
);
2337 res
= PyString_FromFormat("<_io.TextIOWrapper encoding=%s>",
2338 PyString_AS_STRING(encrepr
));
2341 encrepr
= PyObject_Repr(self
->encoding
);
2342 namerepr
= PyObject_Repr(nameobj
);
2343 res
= PyString_FromFormat("<_io.TextIOWrapper name=%s encoding=%s>",
2344 PyString_AS_STRING(namerepr
),
2345 PyString_AS_STRING(encrepr
));
2348 Py_XDECREF(namerepr
);
2349 Py_XDECREF(encrepr
);
2353 Py_XDECREF(namerepr
);
2354 Py_XDECREF(encrepr
);
2362 textiowrapper_fileno(textio
*self
, PyObject
*args
)
2364 CHECK_INITIALIZED(self
);
2365 return PyObject_CallMethod(self
->buffer
, "fileno", NULL
);
2369 textiowrapper_seekable(textio
*self
, PyObject
*args
)
2371 CHECK_INITIALIZED(self
);
2372 return PyObject_CallMethod(self
->buffer
, "seekable", NULL
);
2376 textiowrapper_readable(textio
*self
, PyObject
*args
)
2378 CHECK_INITIALIZED(self
);
2379 return PyObject_CallMethod(self
->buffer
, "readable", NULL
);
2383 textiowrapper_writable(textio
*self
, PyObject
*args
)
2385 CHECK_INITIALIZED(self
);
2386 return PyObject_CallMethod(self
->buffer
, "writable", NULL
);
2390 textiowrapper_isatty(textio
*self
, PyObject
*args
)
2392 CHECK_INITIALIZED(self
);
2393 return PyObject_CallMethod(self
->buffer
, "isatty", NULL
);
2397 textiowrapper_flush(textio
*self
, PyObject
*args
)
2399 CHECK_INITIALIZED(self
);
2401 self
->telling
= self
->seekable
;
2402 if (_textiowrapper_writeflush(self
) < 0)
2404 return PyObject_CallMethod(self
->buffer
, "flush", NULL
);
2408 textiowrapper_close(textio
*self
, PyObject
*args
)
2411 CHECK_INITIALIZED(self
);
2412 res
= PyObject_CallMethod((PyObject
*)self
, "flush", NULL
);
2414 /* If flush() fails, just give up */
2420 return PyObject_CallMethod(self
->buffer
, "close", NULL
);
2424 textiowrapper_iternext(textio
*self
)
2428 CHECK_INITIALIZED(self
);
2431 if (Py_TYPE(self
) == &PyTextIOWrapper_Type
) {
2432 /* Skip method call overhead for speed */
2433 line
= _textiowrapper_readline(self
, -1);
2436 line
= PyObject_CallMethodObjArgs((PyObject
*)self
,
2437 _PyIO_str_readline
, NULL
);
2438 if (line
&& !PyUnicode_Check(line
)) {
2439 PyErr_Format(PyExc_IOError
,
2440 "readline() should have returned an str object, "
2441 "not '%.200s'", Py_TYPE(line
)->tp_name
);
2450 if (PyUnicode_GET_SIZE(line
) == 0) {
2451 /* Reached EOF or would have blocked */
2453 Py_CLEAR(self
->snapshot
);
2454 self
->telling
= self
->seekable
;
2462 textiowrapper_name_get(textio
*self
, void *context
)
2464 CHECK_INITIALIZED(self
);
2465 return PyObject_GetAttrString(self
->buffer
, "name");
2469 textiowrapper_closed_get(textio
*self
, void *context
)
2471 CHECK_INITIALIZED(self
);
2472 return PyObject_GetAttr(self
->buffer
, _PyIO_str_closed
);
2476 textiowrapper_newlines_get(textio
*self
, void *context
)
2479 CHECK_INITIALIZED(self
);
2480 if (self
->decoder
== NULL
)
2482 res
= PyObject_GetAttr(self
->decoder
, _PyIO_str_newlines
);
2484 if (PyErr_ExceptionMatches(PyExc_AttributeError
)) {
2496 textiowrapper_errors_get(textio
*self
, void *context
)
2498 CHECK_INITIALIZED(self
);
2499 Py_INCREF(self
->errors
);
2500 return self
->errors
;
2504 textiowrapper_chunk_size_get(textio
*self
, void *context
)
2506 CHECK_INITIALIZED(self
);
2507 return PyLong_FromSsize_t(self
->chunk_size
);
2511 textiowrapper_chunk_size_set(textio
*self
, PyObject
*arg
, void *context
)
2514 CHECK_INITIALIZED_INT(self
);
2515 n
= PyNumber_AsSsize_t(arg
, PyExc_TypeError
);
2516 if (n
== -1 && PyErr_Occurred())
2519 PyErr_SetString(PyExc_ValueError
,
2520 "a strictly positive integer is required");
2523 self
->chunk_size
= n
;
2527 static PyMethodDef textiowrapper_methods
[] = {
2528 {"detach", (PyCFunction
)textiowrapper_detach
, METH_NOARGS
},
2529 {"write", (PyCFunction
)textiowrapper_write
, METH_VARARGS
},
2530 {"read", (PyCFunction
)textiowrapper_read
, METH_VARARGS
},
2531 {"readline", (PyCFunction
)textiowrapper_readline
, METH_VARARGS
},
2532 {"flush", (PyCFunction
)textiowrapper_flush
, METH_NOARGS
},
2533 {"close", (PyCFunction
)textiowrapper_close
, METH_NOARGS
},
2535 {"fileno", (PyCFunction
)textiowrapper_fileno
, METH_NOARGS
},
2536 {"seekable", (PyCFunction
)textiowrapper_seekable
, METH_NOARGS
},
2537 {"readable", (PyCFunction
)textiowrapper_readable
, METH_NOARGS
},
2538 {"writable", (PyCFunction
)textiowrapper_writable
, METH_NOARGS
},
2539 {"isatty", (PyCFunction
)textiowrapper_isatty
, METH_NOARGS
},
2541 {"seek", (PyCFunction
)textiowrapper_seek
, METH_VARARGS
},
2542 {"tell", (PyCFunction
)textiowrapper_tell
, METH_NOARGS
},
2543 {"truncate", (PyCFunction
)textiowrapper_truncate
, METH_VARARGS
},
2547 static PyMemberDef textiowrapper_members
[] = {
2548 {"encoding", T_OBJECT
, offsetof(textio
, encoding
), READONLY
},
2549 {"buffer", T_OBJECT
, offsetof(textio
, buffer
), READONLY
},
2550 {"line_buffering", T_BOOL
, offsetof(textio
, line_buffering
), READONLY
},
2554 static PyGetSetDef textiowrapper_getset
[] = {
2555 {"name", (getter
)textiowrapper_name_get
, NULL
, NULL
},
2556 {"closed", (getter
)textiowrapper_closed_get
, NULL
, NULL
},
2557 /* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2559 {"newlines", (getter
)textiowrapper_newlines_get
, NULL
, NULL
},
2560 {"errors", (getter
)textiowrapper_errors_get
, NULL
, NULL
},
2561 {"_CHUNK_SIZE", (getter
)textiowrapper_chunk_size_get
,
2562 (setter
)textiowrapper_chunk_size_set
, NULL
},
2566 PyTypeObject PyTextIOWrapper_Type
= {
2567 PyVarObject_HEAD_INIT(NULL
, 0)
2568 "_io.TextIOWrapper", /*tp_name*/
2569 sizeof(textio
), /*tp_basicsize*/
2571 (destructor
)textiowrapper_dealloc
, /*tp_dealloc*/
2576 (reprfunc
)textiowrapper_repr
,/*tp_repr*/
2578 0, /*tp_as_sequence*/
2579 0, /*tp_as_mapping*/
2586 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
2587 | Py_TPFLAGS_HAVE_GC
, /*tp_flags*/
2588 textiowrapper_doc
, /* tp_doc */
2589 (traverseproc
)textiowrapper_traverse
, /* tp_traverse */
2590 (inquiry
)textiowrapper_clear
, /* tp_clear */
2591 0, /* tp_richcompare */
2592 offsetof(textio
, weakreflist
), /*tp_weaklistoffset*/
2594 (iternextfunc
)textiowrapper_iternext
, /* tp_iternext */
2595 textiowrapper_methods
, /* tp_methods */
2596 textiowrapper_members
, /* tp_members */
2597 textiowrapper_getset
, /* tp_getset */
2600 0, /* tp_descr_get */
2601 0, /* tp_descr_set */
2602 offsetof(textio
, dict
), /*tp_dictoffset*/
2603 (initproc
)textiowrapper_init
, /* tp_init */
2605 PyType_GenericNew
, /* tp_new */