2 #include "structmember.h" /* for offsetof() */
9 Py_ssize_t string_size
;
12 PyObject
*weakreflist
;
15 #define CHECK_CLOSED(self) \
16 if ((self)->buf == NULL) { \
17 PyErr_SetString(PyExc_ValueError, \
18 "I/O operation on closed file."); \
22 /* Internal routine to get a line from the buffer of a BytesIO
23 object. Returns the length between the current position to the
24 next newline character. */
26 get_line(bytesio
*self
, char **output
)
32 assert(self
->buf
!= NULL
);
34 /* Move to the end of the line, up to the end of the string, s. */
35 str_end
= self
->buf
+ self
->string_size
;
36 for (n
= self
->buf
+ self
->pos
;
37 n
< str_end
&& *n
!= '\n';
40 /* Skip the newline character */
44 /* Get the length from the current position to the end of the line. */
45 len
= n
- (self
->buf
+ self
->pos
);
46 *output
= self
->buf
+ self
->pos
;
49 assert(self
->pos
< PY_SSIZE_T_MAX
- len
);
55 /* Internal routine for changing the size of the buffer of BytesIO objects.
56 The caller should ensure that the 'size' argument is non-negative. Returns
57 0 on success, -1 otherwise. */
59 resize_buffer(bytesio
*self
, size_t size
)
61 /* Here, unsigned types are used to avoid dealing with signed integer
62 overflow, which is undefined in C. */
63 size_t alloc
= self
->buf_size
;
66 assert(self
->buf
!= NULL
);
68 /* For simplicity, stay in the range of the signed type. Anyway, Python
69 doesn't allow strings to be longer than this. */
70 if (size
> PY_SSIZE_T_MAX
)
73 if (size
< alloc
/ 2) {
74 /* Major downsize; resize down to exact size. */
77 else if (size
< alloc
) {
78 /* Within allocated size; quick exit */
81 else if (size
<= alloc
* 1.125) {
82 /* Moderate upsize; overallocate similar to list_resize() */
83 alloc
= size
+ (size
>> 3) + (size
< 9 ? 3 : 6);
86 /* Major upsize; resize up to exact size */
90 if (alloc
> ((size_t)-1) / sizeof(char))
92 new_buf
= (char *)PyMem_Realloc(self
->buf
, alloc
* sizeof(char));
93 if (new_buf
== NULL
) {
97 self
->buf_size
= alloc
;
103 PyErr_SetString(PyExc_OverflowError
,
104 "new buffer size too large");
108 /* Internal routine for writing a string of bytes to the buffer of a BytesIO
109 object. Returns the number of bytes wrote, or -1 on error. */
111 write_bytes(bytesio
*self
, const char *bytes
, Py_ssize_t len
)
113 assert(self
->buf
!= NULL
);
114 assert(self
->pos
>= 0);
117 if ((size_t)self
->pos
+ len
> self
->buf_size
) {
118 if (resize_buffer(self
, (size_t)self
->pos
+ len
) < 0)
122 if (self
->pos
> self
->string_size
) {
123 /* In case of overseek, pad with null bytes the buffer region between
124 the end of stream and the current position.
127 | |<---used--->|<----------available----------->|
128 | | <--to pad-->|<---to write---> |
131 memset(self
->buf
+ self
->string_size
, '\0',
132 (self
->pos
- self
->string_size
) * sizeof(char));
135 /* Copy the data to the internal buffer, overwriting some of the existing
136 data if self->pos < self->string_size. */
137 memcpy(self
->buf
+ self
->pos
, bytes
, len
);
140 /* Set the new length of the internal string if it has changed. */
141 if (self
->string_size
< self
->pos
) {
142 self
->string_size
= self
->pos
;
149 bytesio_get_closed(bytesio
*self
)
151 if (self
->buf
== NULL
) {
159 /* Generic getter for the writable, readable and seekable properties */
161 return_true(bytesio
*self
)
166 PyDoc_STRVAR(flush_doc
,
167 "flush() -> None. Does nothing.");
170 bytesio_flush(bytesio
*self
)
175 PyDoc_STRVAR(getval_doc
,
176 "getvalue() -> bytes.\n"
178 "Retrieve the entire contents of the BytesIO object.");
181 bytesio_getvalue(bytesio
*self
)
184 return PyBytes_FromStringAndSize(self
->buf
, self
->string_size
);
187 PyDoc_STRVAR(isatty_doc
,
188 "isatty() -> False.\n"
190 "Always returns False since BytesIO objects are not connected\n"
191 "to a tty-like device.");
194 bytesio_isatty(bytesio
*self
)
200 PyDoc_STRVAR(tell_doc
,
201 "tell() -> current file position, an integer\n");
204 bytesio_tell(bytesio
*self
)
207 return PyLong_FromSsize_t(self
->pos
);
210 PyDoc_STRVAR(read_doc
,
211 "read([size]) -> read at most size bytes, returned as a string.\n"
213 "If the size argument is negative, read until EOF is reached.\n"
214 "Return an empty string at EOF.");
217 bytesio_read(bytesio
*self
, PyObject
*args
)
221 PyObject
*arg
= Py_None
;
225 if (!PyArg_ParseTuple(args
, "|O:read", &arg
))
228 if (PyNumber_Check(arg
)) {
229 size
= PyNumber_AsSsize_t(arg
, PyExc_OverflowError
);
230 if (size
== -1 && PyErr_Occurred())
233 else if (arg
== Py_None
) {
234 /* Read until EOF is reached, by default. */
238 PyErr_Format(PyExc_TypeError
, "integer argument expected, got '%s'",
239 Py_TYPE(arg
)->tp_name
);
243 /* adjust invalid sizes */
244 n
= self
->string_size
- self
->pos
;
245 if (size
< 0 || size
> n
) {
251 assert(self
->buf
!= NULL
);
252 output
= self
->buf
+ self
->pos
;
255 return PyBytes_FromStringAndSize(output
, size
);
259 PyDoc_STRVAR(read1_doc
,
260 "read1(size) -> read at most size bytes, returned as a string.\n"
262 "If the size argument is negative or omitted, read until EOF is reached.\n"
263 "Return an empty string at EOF.");
266 bytesio_read1(bytesio
*self
, PyObject
*n
)
270 arg
= PyTuple_Pack(1, n
);
273 res
= bytesio_read(self
, arg
);
278 PyDoc_STRVAR(readline_doc
,
279 "readline([size]) -> next line from the file, as a string.\n"
281 "Retain newline. A non-negative size argument limits the maximum\n"
282 "number of bytes to return (an incomplete line may be returned then).\n"
283 "Return an empty string at EOF.\n");
286 bytesio_readline(bytesio
*self
, PyObject
*args
)
290 PyObject
*arg
= Py_None
;
294 if (!PyArg_ParseTuple(args
, "|O:readline", &arg
))
297 if (PyNumber_Check(arg
)) {
298 size
= PyNumber_AsSsize_t(arg
, PyExc_OverflowError
);
299 if (size
== -1 && PyErr_Occurred())
302 else if (arg
== Py_None
) {
303 /* No size limit, by default. */
307 PyErr_Format(PyExc_TypeError
, "integer argument expected, got '%s'",
308 Py_TYPE(arg
)->tp_name
);
312 n
= get_line(self
, &output
);
314 if (size
>= 0 && size
< n
) {
320 return PyBytes_FromStringAndSize(output
, n
);
323 PyDoc_STRVAR(readlines_doc
,
324 "readlines([size]) -> list of strings, each a line from the file.\n"
326 "Call readline() repeatedly and return a list of the lines so read.\n"
327 "The optional size argument, if given, is an approximate bound on the\n"
328 "total number of bytes in the lines returned.\n");
331 bytesio_readlines(bytesio
*self
, PyObject
*args
)
333 Py_ssize_t maxsize
, size
, n
;
334 PyObject
*result
, *line
;
336 PyObject
*arg
= Py_None
;
340 if (!PyArg_ParseTuple(args
, "|O:readlines", &arg
))
343 if (PyNumber_Check(arg
)) {
344 maxsize
= PyNumber_AsSsize_t(arg
, PyExc_OverflowError
);
345 if (maxsize
== -1 && PyErr_Occurred())
348 else if (arg
== Py_None
) {
349 /* No size limit, by default. */
353 PyErr_Format(PyExc_TypeError
, "integer argument expected, got '%s'",
354 Py_TYPE(arg
)->tp_name
);
359 result
= PyList_New(0);
363 while ((n
= get_line(self
, &output
)) != 0) {
364 line
= PyBytes_FromStringAndSize(output
, n
);
367 if (PyList_Append(result
, line
) == -1) {
373 if (maxsize
> 0 && size
>= maxsize
)
383 PyDoc_STRVAR(readinto_doc
,
384 "readinto(bytearray) -> int. Read up to len(b) bytes into b.\n"
386 "Returns number of bytes read (0 for EOF), or None if the object\n"
387 "is set not to block as has no data to read.");
390 bytesio_readinto(bytesio
*self
, PyObject
*args
)
397 if (!PyArg_ParseTuple(args
, "w*", &buf
))
401 if (self
->pos
+ len
> self
->string_size
)
402 len
= self
->string_size
- self
->pos
;
404 memcpy(buf
.buf
, self
->buf
+ self
->pos
, len
);
405 assert(self
->pos
+ len
< PY_SSIZE_T_MAX
);
409 PyBuffer_Release(&buf
);
410 return PyLong_FromSsize_t(len
);
413 PyDoc_STRVAR(truncate_doc
,
414 "truncate([size]) -> int. Truncate the file to at most size bytes.\n"
416 "Size defaults to the current file position, as returned by tell().\n"
417 "Returns the new size. Imply an absolute seek to the position size.");
420 bytesio_truncate(bytesio
*self
, PyObject
*args
)
423 PyObject
*arg
= Py_None
;
427 if (!PyArg_ParseTuple(args
, "|O:truncate", &arg
))
430 if (PyNumber_Check(arg
)) {
431 size
= PyNumber_AsSsize_t(arg
, PyExc_OverflowError
);
432 if (size
== -1 && PyErr_Occurred())
435 else if (arg
== Py_None
) {
436 /* Truncate to current position if no argument is passed. */
440 PyErr_Format(PyExc_TypeError
, "integer argument expected, got '%s'",
441 Py_TYPE(arg
)->tp_name
);
446 PyErr_Format(PyExc_ValueError
,
447 "negative size value %zd", size
);
451 if (size
< self
->string_size
) {
452 self
->string_size
= size
;
453 if (resize_buffer(self
, size
) < 0)
458 return PyLong_FromSsize_t(size
);
462 bytesio_iternext(bytesio
*self
)
469 n
= get_line(self
, &next
);
474 return PyBytes_FromStringAndSize(next
, n
);
477 PyDoc_STRVAR(seek_doc
,
478 "seek(pos, whence=0) -> int. Change stream position.\n"
480 "Seek to byte offset pos relative to position indicated by whence:\n"
481 " 0 Start of stream (the default). pos should be >= 0;\n"
482 " 1 Current position - pos may be negative;\n"
483 " 2 End of stream - pos usually negative.\n"
484 "Returns the new absolute position.");
487 bytesio_seek(bytesio
*self
, PyObject
*args
)
495 if (!PyArg_ParseTuple(args
, "O|i:seek", &posobj
, &mode
))
498 pos
= PyNumber_AsSsize_t(posobj
, PyExc_OverflowError
);
499 if (pos
== -1 && PyErr_Occurred())
502 if (pos
< 0 && mode
== 0) {
503 PyErr_Format(PyExc_ValueError
,
504 "negative seek value %zd", pos
);
508 /* mode 0: offset relative to beginning of the string.
509 mode 1: offset relative to current position.
510 mode 2: offset relative the end of the string. */
512 if (pos
> PY_SSIZE_T_MAX
- self
->pos
) {
513 PyErr_SetString(PyExc_OverflowError
,
514 "new position too large");
519 else if (mode
== 2) {
520 if (pos
> PY_SSIZE_T_MAX
- self
->string_size
) {
521 PyErr_SetString(PyExc_OverflowError
,
522 "new position too large");
525 pos
+= self
->string_size
;
527 else if (mode
!= 0) {
528 PyErr_Format(PyExc_ValueError
,
529 "invalid whence (%i, should be 0, 1 or 2)", mode
);
537 return PyLong_FromSsize_t(self
->pos
);
540 PyDoc_STRVAR(write_doc
,
541 "write(bytes) -> int. Write bytes to file.\n"
543 "Return the number of bytes written.");
546 bytesio_write(bytesio
*self
, PyObject
*obj
)
550 PyObject
*result
= NULL
;
554 if (PyObject_GetBuffer(obj
, &buf
, PyBUF_CONTIG_RO
) < 0)
558 n
= write_bytes(self
, buf
.buf
, buf
.len
);
560 result
= PyLong_FromSsize_t(n
);
562 PyBuffer_Release(&buf
);
566 PyDoc_STRVAR(writelines_doc
,
567 "writelines(sequence_of_strings) -> None. Write strings to the file.\n"
569 "Note that newlines are not added. The sequence can be any iterable\n"
570 "object producing strings. This is equivalent to calling write() for\n"
574 bytesio_writelines(bytesio
*self
, PyObject
*v
)
581 it
= PyObject_GetIter(v
);
585 while ((item
= PyIter_Next(it
)) != NULL
) {
586 ret
= bytesio_write(self
, item
);
596 /* See if PyIter_Next failed */
597 if (PyErr_Occurred())
603 PyDoc_STRVAR(close_doc
,
604 "close() -> None. Disable all I/O operations.");
607 bytesio_close(bytesio
*self
)
609 if (self
->buf
!= NULL
) {
610 PyMem_Free(self
->buf
);
618 Note that only pickle protocol 2 and onward are supported since we use
619 extended __reduce__ API of PEP 307 to make BytesIO instances picklable.
621 Providing support for protocol < 2 would require the __reduce_ex__ method
622 which is notably long-winded when defined properly.
624 For BytesIO, the implementation would similar to one coded for
625 object.__reduce_ex__, but slightly less general. To be more specific, we
626 could call bytesio_getstate directly and avoid checking for the presence of
627 a fallback __reduce__ method. However, we would still need a __newobj__
628 function to use the efficient instance representation of PEP 307.
632 bytesio_getstate(bytesio
*self
)
634 PyObject
*initvalue
= bytesio_getvalue(self
);
638 if (initvalue
== NULL
)
640 if (self
->dict
== NULL
) {
645 dict
= PyDict_Copy(self
->dict
);
650 state
= Py_BuildValue("(OnN)", initvalue
, self
->pos
, dict
);
651 Py_DECREF(initvalue
);
656 bytesio_setstate(bytesio
*self
, PyObject
*state
)
659 PyObject
*position_obj
;
663 assert(state
!= NULL
);
665 /* We allow the state tuple to be longer than 3, because we may need
666 someday to extend the object's state without breaking
667 backward-compatibility. */
668 if (!PyTuple_Check(state
) || Py_SIZE(state
) < 3) {
669 PyErr_Format(PyExc_TypeError
,
670 "%.200s.__setstate__ argument should be 3-tuple, got %.200s",
671 Py_TYPE(self
)->tp_name
, Py_TYPE(state
)->tp_name
);
674 /* Reset the object to its default state. This is only needed to handle
675 the case of repeated calls to __setstate__. */
676 self
->string_size
= 0;
679 /* Set the value of the internal buffer. If state[0] does not support the
680 buffer protocol, bytesio_write will raise the appropriate TypeError. */
681 result
= bytesio_write(self
, PyTuple_GET_ITEM(state
, 0));
686 /* Set carefully the position value. Alternatively, we could use the seek
687 method instead of modifying self->pos directly to better protect the
688 object internal state against errneous (or malicious) inputs. */
689 position_obj
= PyTuple_GET_ITEM(state
, 1);
690 if (!PyIndex_Check(position_obj
)) {
691 PyErr_Format(PyExc_TypeError
,
692 "second item of state must be an integer, not %.200s",
693 Py_TYPE(position_obj
)->tp_name
);
696 pos
= PyNumber_AsSsize_t(position_obj
, PyExc_OverflowError
);
697 if (pos
== -1 && PyErr_Occurred())
700 PyErr_SetString(PyExc_ValueError
,
701 "position value cannot be negative");
706 /* Set the dictionary of the instance variables. */
707 dict
= PyTuple_GET_ITEM(state
, 2);
708 if (dict
!= Py_None
) {
709 if (!PyDict_Check(dict
)) {
710 PyErr_Format(PyExc_TypeError
,
711 "third item of state should be a dict, got a %.200s",
712 Py_TYPE(dict
)->tp_name
);
716 /* Alternatively, we could replace the internal dictionary
717 completely. However, it seems more practical to just update it. */
718 if (PyDict_Update(self
->dict
, dict
) < 0)
731 bytesio_dealloc(bytesio
*self
)
733 _PyObject_GC_UNTRACK(self
);
734 if (self
->buf
!= NULL
) {
735 PyMem_Free(self
->buf
);
738 Py_CLEAR(self
->dict
);
739 if (self
->weakreflist
!= NULL
)
740 PyObject_ClearWeakRefs((PyObject
*) self
);
741 Py_TYPE(self
)->tp_free(self
);
745 bytesio_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
749 assert(type
!= NULL
&& type
->tp_alloc
!= NULL
);
750 self
= (bytesio
*)type
->tp_alloc(type
, 0);
754 /* tp_alloc initializes all the fields to zero. So we don't have to
755 initialize them here. */
757 self
->buf
= (char *)PyMem_Malloc(0);
758 if (self
->buf
== NULL
) {
760 return PyErr_NoMemory();
763 return (PyObject
*)self
;
767 bytesio_init(bytesio
*self
, PyObject
*args
, PyObject
*kwds
)
769 char *kwlist
[] = {"initial_bytes", NULL
};
770 PyObject
*initvalue
= NULL
;
772 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|O:BytesIO", kwlist
,
776 /* In case, __init__ is called multiple times. */
777 self
->string_size
= 0;
780 if (initvalue
&& initvalue
!= Py_None
) {
782 res
= bytesio_write(self
, initvalue
);
793 bytesio_traverse(bytesio
*self
, visitproc visit
, void *arg
)
795 Py_VISIT(self
->dict
);
800 bytesio_clear(bytesio
*self
)
802 Py_CLEAR(self
->dict
);
807 static PyGetSetDef bytesio_getsetlist
[] = {
808 {"closed", (getter
)bytesio_get_closed
, NULL
,
809 "True if the file is closed."},
810 {NULL
}, /* sentinel */
813 static struct PyMethodDef bytesio_methods
[] = {
814 {"readable", (PyCFunction
)return_true
, METH_NOARGS
, NULL
},
815 {"seekable", (PyCFunction
)return_true
, METH_NOARGS
, NULL
},
816 {"writable", (PyCFunction
)return_true
, METH_NOARGS
, NULL
},
817 {"close", (PyCFunction
)bytesio_close
, METH_NOARGS
, close_doc
},
818 {"flush", (PyCFunction
)bytesio_flush
, METH_NOARGS
, flush_doc
},
819 {"isatty", (PyCFunction
)bytesio_isatty
, METH_NOARGS
, isatty_doc
},
820 {"tell", (PyCFunction
)bytesio_tell
, METH_NOARGS
, tell_doc
},
821 {"write", (PyCFunction
)bytesio_write
, METH_O
, write_doc
},
822 {"writelines", (PyCFunction
)bytesio_writelines
, METH_O
, writelines_doc
},
823 {"read1", (PyCFunction
)bytesio_read1
, METH_O
, read1_doc
},
824 {"readinto", (PyCFunction
)bytesio_readinto
, METH_VARARGS
, readinto_doc
},
825 {"readline", (PyCFunction
)bytesio_readline
, METH_VARARGS
, readline_doc
},
826 {"readlines", (PyCFunction
)bytesio_readlines
, METH_VARARGS
, readlines_doc
},
827 {"read", (PyCFunction
)bytesio_read
, METH_VARARGS
, read_doc
},
828 {"getvalue", (PyCFunction
)bytesio_getvalue
, METH_VARARGS
, getval_doc
},
829 {"seek", (PyCFunction
)bytesio_seek
, METH_VARARGS
, seek_doc
},
830 {"truncate", (PyCFunction
)bytesio_truncate
, METH_VARARGS
, truncate_doc
},
831 {"__getstate__", (PyCFunction
)bytesio_getstate
, METH_NOARGS
, NULL
},
832 {"__setstate__", (PyCFunction
)bytesio_setstate
, METH_O
, NULL
},
833 {NULL
, NULL
} /* sentinel */
836 PyDoc_STRVAR(bytesio_doc
,
837 "BytesIO([buffer]) -> object\n"
839 "Create a buffered I/O implementation using an in-memory bytes\n"
840 "buffer, ready for reading and writing.");
842 PyTypeObject PyBytesIO_Type
= {
843 PyVarObject_HEAD_INIT(NULL
, 0)
844 "_io.BytesIO", /*tp_name*/
845 sizeof(bytesio
), /*tp_basicsize*/
847 (destructor
)bytesio_dealloc
, /*tp_dealloc*/
854 0, /*tp_as_sequence*/
862 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
|
863 Py_TPFLAGS_HAVE_GC
, /*tp_flags*/
864 bytesio_doc
, /*tp_doc*/
865 (traverseproc
)bytesio_traverse
, /*tp_traverse*/
866 (inquiry
)bytesio_clear
, /*tp_clear*/
867 0, /*tp_richcompare*/
868 offsetof(bytesio
, weakreflist
), /*tp_weaklistoffset*/
869 PyObject_SelfIter
, /*tp_iter*/
870 (iternextfunc
)bytesio_iternext
, /*tp_iternext*/
871 bytesio_methods
, /*tp_methods*/
873 bytesio_getsetlist
, /*tp_getset*/
878 offsetof(bytesio
, dict
), /*tp_dictoffset*/
879 (initproc
)bytesio_init
, /*tp_init*/
881 bytesio_new
, /*tp_new*/