Issue #6215: backport the 3.1 io lib
[python.git] / Modules / _io / bytesio.c
blobed2f7cc51d7e0834a52980e7d4b76a87cc3d823a
1 #include "Python.h"
2 #include "structmember.h" /* for offsetof() */
3 #include "_iomodule.h"
5 typedef struct {
6 PyObject_HEAD
7 char *buf;
8 Py_ssize_t pos;
9 Py_ssize_t string_size;
10 size_t buf_size;
11 PyObject *dict;
12 PyObject *weakreflist;
13 } bytesio;
15 #define CHECK_CLOSED(self) \
16 if ((self)->buf == NULL) { \
17 PyErr_SetString(PyExc_ValueError, \
18 "I/O operation on closed file."); \
19 return NULL; \
22 /* Internal routine to get a line from the buffer of a BytesIO
23 object. Returns the length between the current position to the
24 next newline character. */
25 static Py_ssize_t
26 get_line(bytesio *self, char **output)
28 char *n;
29 const char *str_end;
30 Py_ssize_t len;
32 assert(self->buf != NULL);
34 /* Move to the end of the line, up to the end of the string, s. */
35 str_end = self->buf + self->string_size;
36 for (n = self->buf + self->pos;
37 n < str_end && *n != '\n';
38 n++);
40 /* Skip the newline character */
41 if (n < str_end)
42 n++;
44 /* Get the length from the current position to the end of the line. */
45 len = n - (self->buf + self->pos);
46 *output = self->buf + self->pos;
48 assert(len >= 0);
49 assert(self->pos < PY_SSIZE_T_MAX - len);
50 self->pos += len;
52 return len;
55 /* Internal routine for changing the size of the buffer of BytesIO objects.
56 The caller should ensure that the 'size' argument is non-negative. Returns
57 0 on success, -1 otherwise. */
58 static int
59 resize_buffer(bytesio *self, size_t size)
61 /* Here, unsigned types are used to avoid dealing with signed integer
62 overflow, which is undefined in C. */
63 size_t alloc = self->buf_size;
64 char *new_buf = NULL;
66 assert(self->buf != NULL);
68 /* For simplicity, stay in the range of the signed type. Anyway, Python
69 doesn't allow strings to be longer than this. */
70 if (size > PY_SSIZE_T_MAX)
71 goto overflow;
73 if (size < alloc / 2) {
74 /* Major downsize; resize down to exact size. */
75 alloc = size + 1;
77 else if (size < alloc) {
78 /* Within allocated size; quick exit */
79 return 0;
81 else if (size <= alloc * 1.125) {
82 /* Moderate upsize; overallocate similar to list_resize() */
83 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
85 else {
86 /* Major upsize; resize up to exact size */
87 alloc = size + 1;
90 if (alloc > ((size_t)-1) / sizeof(char))
91 goto overflow;
92 new_buf = (char *)PyMem_Realloc(self->buf, alloc * sizeof(char));
93 if (new_buf == NULL) {
94 PyErr_NoMemory();
95 return -1;
97 self->buf_size = alloc;
98 self->buf = new_buf;
100 return 0;
102 overflow:
103 PyErr_SetString(PyExc_OverflowError,
104 "new buffer size too large");
105 return -1;
108 /* Internal routine for writing a string of bytes to the buffer of a BytesIO
109 object. Returns the number of bytes wrote, or -1 on error. */
110 static Py_ssize_t
111 write_bytes(bytesio *self, const char *bytes, Py_ssize_t len)
113 assert(self->buf != NULL);
114 assert(self->pos >= 0);
115 assert(len >= 0);
117 if ((size_t)self->pos + len > self->buf_size) {
118 if (resize_buffer(self, (size_t)self->pos + len) < 0)
119 return -1;
122 if (self->pos > self->string_size) {
123 /* In case of overseek, pad with null bytes the buffer region between
124 the end of stream and the current position.
126 0 lo string_size hi
127 | |<---used--->|<----------available----------->|
128 | | <--to pad-->|<---to write---> |
129 0 buf position
131 memset(self->buf + self->string_size, '\0',
132 (self->pos - self->string_size) * sizeof(char));
135 /* Copy the data to the internal buffer, overwriting some of the existing
136 data if self->pos < self->string_size. */
137 memcpy(self->buf + self->pos, bytes, len);
138 self->pos += len;
140 /* Set the new length of the internal string if it has changed. */
141 if (self->string_size < self->pos) {
142 self->string_size = self->pos;
145 return len;
148 static PyObject *
149 bytesio_get_closed(bytesio *self)
151 if (self->buf == NULL) {
152 Py_RETURN_TRUE;
154 else {
155 Py_RETURN_FALSE;
159 /* Generic getter for the writable, readable and seekable properties */
160 static PyObject *
161 return_true(bytesio *self)
163 Py_RETURN_TRUE;
166 PyDoc_STRVAR(flush_doc,
167 "flush() -> None. Does nothing.");
169 static PyObject *
170 bytesio_flush(bytesio *self)
172 Py_RETURN_NONE;
175 PyDoc_STRVAR(getval_doc,
176 "getvalue() -> bytes.\n"
177 "\n"
178 "Retrieve the entire contents of the BytesIO object.");
180 static PyObject *
181 bytesio_getvalue(bytesio *self)
183 CHECK_CLOSED(self);
184 return PyBytes_FromStringAndSize(self->buf, self->string_size);
187 PyDoc_STRVAR(isatty_doc,
188 "isatty() -> False.\n"
189 "\n"
190 "Always returns False since BytesIO objects are not connected\n"
191 "to a tty-like device.");
193 static PyObject *
194 bytesio_isatty(bytesio *self)
196 CHECK_CLOSED(self);
197 Py_RETURN_FALSE;
200 PyDoc_STRVAR(tell_doc,
201 "tell() -> current file position, an integer\n");
203 static PyObject *
204 bytesio_tell(bytesio *self)
206 CHECK_CLOSED(self);
207 return PyLong_FromSsize_t(self->pos);
210 PyDoc_STRVAR(read_doc,
211 "read([size]) -> read at most size bytes, returned as a string.\n"
212 "\n"
213 "If the size argument is negative, read until EOF is reached.\n"
214 "Return an empty string at EOF.");
216 static PyObject *
217 bytesio_read(bytesio *self, PyObject *args)
219 Py_ssize_t size, n;
220 char *output;
221 PyObject *arg = Py_None;
223 CHECK_CLOSED(self);
225 if (!PyArg_ParseTuple(args, "|O:read", &arg))
226 return NULL;
228 if (PyNumber_Check(arg)) {
229 size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
230 if (size == -1 && PyErr_Occurred())
231 return NULL;
233 else if (arg == Py_None) {
234 /* Read until EOF is reached, by default. */
235 size = -1;
237 else {
238 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
239 Py_TYPE(arg)->tp_name);
240 return NULL;
243 /* adjust invalid sizes */
244 n = self->string_size - self->pos;
245 if (size < 0 || size > n) {
246 size = n;
247 if (size < 0)
248 size = 0;
251 assert(self->buf != NULL);
252 output = self->buf + self->pos;
253 self->pos += size;
255 return PyBytes_FromStringAndSize(output, size);
259 PyDoc_STRVAR(read1_doc,
260 "read1(size) -> read at most size bytes, returned as a string.\n"
261 "\n"
262 "If the size argument is negative or omitted, read until EOF is reached.\n"
263 "Return an empty string at EOF.");
265 static PyObject *
266 bytesio_read1(bytesio *self, PyObject *n)
268 PyObject *arg, *res;
270 arg = PyTuple_Pack(1, n);
271 if (arg == NULL)
272 return NULL;
273 res = bytesio_read(self, arg);
274 Py_DECREF(arg);
275 return res;
278 PyDoc_STRVAR(readline_doc,
279 "readline([size]) -> next line from the file, as a string.\n"
280 "\n"
281 "Retain newline. A non-negative size argument limits the maximum\n"
282 "number of bytes to return (an incomplete line may be returned then).\n"
283 "Return an empty string at EOF.\n");
285 static PyObject *
286 bytesio_readline(bytesio *self, PyObject *args)
288 Py_ssize_t size, n;
289 char *output;
290 PyObject *arg = Py_None;
292 CHECK_CLOSED(self);
294 if (!PyArg_ParseTuple(args, "|O:readline", &arg))
295 return NULL;
297 if (PyNumber_Check(arg)) {
298 size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
299 if (size == -1 && PyErr_Occurred())
300 return NULL;
302 else if (arg == Py_None) {
303 /* No size limit, by default. */
304 size = -1;
306 else {
307 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
308 Py_TYPE(arg)->tp_name);
309 return NULL;
312 n = get_line(self, &output);
314 if (size >= 0 && size < n) {
315 size = n - size;
316 n -= size;
317 self->pos -= size;
320 return PyBytes_FromStringAndSize(output, n);
323 PyDoc_STRVAR(readlines_doc,
324 "readlines([size]) -> list of strings, each a line from the file.\n"
325 "\n"
326 "Call readline() repeatedly and return a list of the lines so read.\n"
327 "The optional size argument, if given, is an approximate bound on the\n"
328 "total number of bytes in the lines returned.\n");
330 static PyObject *
331 bytesio_readlines(bytesio *self, PyObject *args)
333 Py_ssize_t maxsize, size, n;
334 PyObject *result, *line;
335 char *output;
336 PyObject *arg = Py_None;
338 CHECK_CLOSED(self);
340 if (!PyArg_ParseTuple(args, "|O:readlines", &arg))
341 return NULL;
343 if (PyNumber_Check(arg)) {
344 maxsize = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
345 if (maxsize == -1 && PyErr_Occurred())
346 return NULL;
348 else if (arg == Py_None) {
349 /* No size limit, by default. */
350 maxsize = -1;
352 else {
353 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
354 Py_TYPE(arg)->tp_name);
355 return NULL;
358 size = 0;
359 result = PyList_New(0);
360 if (!result)
361 return NULL;
363 while ((n = get_line(self, &output)) != 0) {
364 line = PyBytes_FromStringAndSize(output, n);
365 if (!line)
366 goto on_error;
367 if (PyList_Append(result, line) == -1) {
368 Py_DECREF(line);
369 goto on_error;
371 Py_DECREF(line);
372 size += n;
373 if (maxsize > 0 && size >= maxsize)
374 break;
376 return result;
378 on_error:
379 Py_DECREF(result);
380 return NULL;
383 PyDoc_STRVAR(readinto_doc,
384 "readinto(bytearray) -> int. Read up to len(b) bytes into b.\n"
385 "\n"
386 "Returns number of bytes read (0 for EOF), or None if the object\n"
387 "is set not to block as has no data to read.");
389 static PyObject *
390 bytesio_readinto(bytesio *self, PyObject *args)
392 Py_buffer buf;
393 Py_ssize_t len;
395 CHECK_CLOSED(self);
397 if (!PyArg_ParseTuple(args, "w*", &buf))
398 return NULL;
400 len = buf.len;
401 if (self->pos + len > self->string_size)
402 len = self->string_size - self->pos;
404 memcpy(buf.buf, self->buf + self->pos, len);
405 assert(self->pos + len < PY_SSIZE_T_MAX);
406 assert(len >= 0);
407 self->pos += len;
409 PyBuffer_Release(&buf);
410 return PyLong_FromSsize_t(len);
413 PyDoc_STRVAR(truncate_doc,
414 "truncate([size]) -> int. Truncate the file to at most size bytes.\n"
415 "\n"
416 "Size defaults to the current file position, as returned by tell().\n"
417 "Returns the new size. Imply an absolute seek to the position size.");
419 static PyObject *
420 bytesio_truncate(bytesio *self, PyObject *args)
422 Py_ssize_t size;
423 PyObject *arg = Py_None;
425 CHECK_CLOSED(self);
427 if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
428 return NULL;
430 if (PyNumber_Check(arg)) {
431 size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
432 if (size == -1 && PyErr_Occurred())
433 return NULL;
435 else if (arg == Py_None) {
436 /* Truncate to current position if no argument is passed. */
437 size = self->pos;
439 else {
440 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
441 Py_TYPE(arg)->tp_name);
442 return NULL;
445 if (size < 0) {
446 PyErr_Format(PyExc_ValueError,
447 "negative size value %zd", size);
448 return NULL;
451 if (size < self->string_size) {
452 self->string_size = size;
453 if (resize_buffer(self, size) < 0)
454 return NULL;
456 self->pos = size;
458 return PyLong_FromSsize_t(size);
461 static PyObject *
462 bytesio_iternext(bytesio *self)
464 char *next;
465 Py_ssize_t n;
467 CHECK_CLOSED(self);
469 n = get_line(self, &next);
471 if (!next || n == 0)
472 return NULL;
474 return PyBytes_FromStringAndSize(next, n);
477 PyDoc_STRVAR(seek_doc,
478 "seek(pos, whence=0) -> int. Change stream position.\n"
479 "\n"
480 "Seek to byte offset pos relative to position indicated by whence:\n"
481 " 0 Start of stream (the default). pos should be >= 0;\n"
482 " 1 Current position - pos may be negative;\n"
483 " 2 End of stream - pos usually negative.\n"
484 "Returns the new absolute position.");
486 static PyObject *
487 bytesio_seek(bytesio *self, PyObject *args)
489 PyObject *posobj;
490 Py_ssize_t pos;
491 int mode = 0;
493 CHECK_CLOSED(self);
495 if (!PyArg_ParseTuple(args, "O|i:seek", &posobj, &mode))
496 return NULL;
498 pos = PyNumber_AsSsize_t(posobj, PyExc_OverflowError);
499 if (pos == -1 && PyErr_Occurred())
500 return NULL;
502 if (pos < 0 && mode == 0) {
503 PyErr_Format(PyExc_ValueError,
504 "negative seek value %zd", pos);
505 return NULL;
508 /* mode 0: offset relative to beginning of the string.
509 mode 1: offset relative to current position.
510 mode 2: offset relative the end of the string. */
511 if (mode == 1) {
512 if (pos > PY_SSIZE_T_MAX - self->pos) {
513 PyErr_SetString(PyExc_OverflowError,
514 "new position too large");
515 return NULL;
517 pos += self->pos;
519 else if (mode == 2) {
520 if (pos > PY_SSIZE_T_MAX - self->string_size) {
521 PyErr_SetString(PyExc_OverflowError,
522 "new position too large");
523 return NULL;
525 pos += self->string_size;
527 else if (mode != 0) {
528 PyErr_Format(PyExc_ValueError,
529 "invalid whence (%i, should be 0, 1 or 2)", mode);
530 return NULL;
533 if (pos < 0)
534 pos = 0;
535 self->pos = pos;
537 return PyLong_FromSsize_t(self->pos);
540 PyDoc_STRVAR(write_doc,
541 "write(bytes) -> int. Write bytes to file.\n"
542 "\n"
543 "Return the number of bytes written.");
545 static PyObject *
546 bytesio_write(bytesio *self, PyObject *obj)
548 Py_ssize_t n = 0;
549 Py_buffer buf;
550 PyObject *result = NULL;
552 CHECK_CLOSED(self);
554 if (PyObject_GetBuffer(obj, &buf, PyBUF_CONTIG_RO) < 0)
555 return NULL;
557 if (buf.len != 0)
558 n = write_bytes(self, buf.buf, buf.len);
559 if (n >= 0)
560 result = PyLong_FromSsize_t(n);
562 PyBuffer_Release(&buf);
563 return result;
566 PyDoc_STRVAR(writelines_doc,
567 "writelines(sequence_of_strings) -> None. Write strings to the file.\n"
568 "\n"
569 "Note that newlines are not added. The sequence can be any iterable\n"
570 "object producing strings. This is equivalent to calling write() for\n"
571 "each string.");
573 static PyObject *
574 bytesio_writelines(bytesio *self, PyObject *v)
576 PyObject *it, *item;
577 PyObject *ret;
579 CHECK_CLOSED(self);
581 it = PyObject_GetIter(v);
582 if (it == NULL)
583 return NULL;
585 while ((item = PyIter_Next(it)) != NULL) {
586 ret = bytesio_write(self, item);
587 Py_DECREF(item);
588 if (ret == NULL) {
589 Py_DECREF(it);
590 return NULL;
592 Py_DECREF(ret);
594 Py_DECREF(it);
596 /* See if PyIter_Next failed */
597 if (PyErr_Occurred())
598 return NULL;
600 Py_RETURN_NONE;
603 PyDoc_STRVAR(close_doc,
604 "close() -> None. Disable all I/O operations.");
606 static PyObject *
607 bytesio_close(bytesio *self)
609 if (self->buf != NULL) {
610 PyMem_Free(self->buf);
611 self->buf = NULL;
613 Py_RETURN_NONE;
616 static void
617 bytesio_dealloc(bytesio *self)
619 if (self->buf != NULL) {
620 PyMem_Free(self->buf);
621 self->buf = NULL;
623 Py_TYPE(self)->tp_clear((PyObject *)self);
624 Py_TYPE(self)->tp_free(self);
627 static PyObject *
628 bytesio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
630 bytesio *self;
632 assert(type != NULL && type->tp_alloc != NULL);
633 self = (bytesio *)type->tp_alloc(type, 0);
634 if (self == NULL)
635 return NULL;
637 self->string_size = 0;
638 self->pos = 0;
639 self->buf_size = 0;
640 self->buf = (char *)PyMem_Malloc(0);
641 if (self->buf == NULL) {
642 Py_DECREF(self);
643 return PyErr_NoMemory();
646 return (PyObject *)self;
649 static int
650 bytesio_init(bytesio *self, PyObject *args, PyObject *kwds)
652 PyObject *initvalue = NULL;
654 if (!PyArg_ParseTuple(args, "|O:BytesIO", &initvalue))
655 return -1;
657 /* In case, __init__ is called multiple times. */
658 self->string_size = 0;
659 self->pos = 0;
661 if (initvalue && initvalue != Py_None) {
662 PyObject *res;
663 res = bytesio_write(self, initvalue);
664 if (res == NULL)
665 return -1;
666 Py_DECREF(res);
667 self->pos = 0;
670 return 0;
673 static int
674 bytesio_traverse(bytesio *self, visitproc visit, void *arg)
676 Py_VISIT(self->dict);
677 Py_VISIT(self->weakreflist);
678 return 0;
681 static int
682 bytesio_clear(bytesio *self)
684 Py_CLEAR(self->dict);
685 if (self->weakreflist != NULL)
686 PyObject_ClearWeakRefs((PyObject *)self);
687 return 0;
691 static PyGetSetDef bytesio_getsetlist[] = {
692 {"closed", (getter)bytesio_get_closed, NULL,
693 "True if the file is closed."},
694 {NULL}, /* sentinel */
697 static struct PyMethodDef bytesio_methods[] = {
698 {"readable", (PyCFunction)return_true, METH_NOARGS, NULL},
699 {"seekable", (PyCFunction)return_true, METH_NOARGS, NULL},
700 {"writable", (PyCFunction)return_true, METH_NOARGS, NULL},
701 {"close", (PyCFunction)bytesio_close, METH_NOARGS, close_doc},
702 {"flush", (PyCFunction)bytesio_flush, METH_NOARGS, flush_doc},
703 {"isatty", (PyCFunction)bytesio_isatty, METH_NOARGS, isatty_doc},
704 {"tell", (PyCFunction)bytesio_tell, METH_NOARGS, tell_doc},
705 {"write", (PyCFunction)bytesio_write, METH_O, write_doc},
706 {"writelines", (PyCFunction)bytesio_writelines, METH_O, writelines_doc},
707 {"read1", (PyCFunction)bytesio_read1, METH_O, read1_doc},
708 {"readinto", (PyCFunction)bytesio_readinto, METH_VARARGS, readinto_doc},
709 {"readline", (PyCFunction)bytesio_readline, METH_VARARGS, readline_doc},
710 {"readlines", (PyCFunction)bytesio_readlines, METH_VARARGS, readlines_doc},
711 {"read", (PyCFunction)bytesio_read, METH_VARARGS, read_doc},
712 {"getvalue", (PyCFunction)bytesio_getvalue, METH_VARARGS, getval_doc},
713 {"seek", (PyCFunction)bytesio_seek, METH_VARARGS, seek_doc},
714 {"truncate", (PyCFunction)bytesio_truncate, METH_VARARGS, truncate_doc},
715 {NULL, NULL} /* sentinel */
718 PyDoc_STRVAR(bytesio_doc,
719 "BytesIO([buffer]) -> object\n"
720 "\n"
721 "Create a buffered I/O implementation using an in-memory bytes\n"
722 "buffer, ready for reading and writing.");
724 PyTypeObject PyBytesIO_Type = {
725 PyVarObject_HEAD_INIT(NULL, 0)
726 "_io.BytesIO", /*tp_name*/
727 sizeof(bytesio), /*tp_basicsize*/
728 0, /*tp_itemsize*/
729 (destructor)bytesio_dealloc, /*tp_dealloc*/
730 0, /*tp_print*/
731 0, /*tp_getattr*/
732 0, /*tp_setattr*/
733 0, /*tp_reserved*/
734 0, /*tp_repr*/
735 0, /*tp_as_number*/
736 0, /*tp_as_sequence*/
737 0, /*tp_as_mapping*/
738 0, /*tp_hash*/
739 0, /*tp_call*/
740 0, /*tp_str*/
741 0, /*tp_getattro*/
742 0, /*tp_setattro*/
743 0, /*tp_as_buffer*/
744 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
745 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
746 bytesio_doc, /*tp_doc*/
747 (traverseproc)bytesio_traverse, /*tp_traverse*/
748 (inquiry)bytesio_clear, /*tp_clear*/
749 0, /*tp_richcompare*/
750 offsetof(bytesio, weakreflist), /*tp_weaklistoffset*/
751 PyObject_SelfIter, /*tp_iter*/
752 (iternextfunc)bytesio_iternext, /*tp_iternext*/
753 bytesio_methods, /*tp_methods*/
754 0, /*tp_members*/
755 bytesio_getsetlist, /*tp_getset*/
756 0, /*tp_base*/
757 0, /*tp_dict*/
758 0, /*tp_descr_get*/
759 0, /*tp_descr_set*/
760 offsetof(bytesio, dict), /*tp_dictoffset*/
761 (initproc)bytesio_init, /*tp_init*/
762 0, /*tp_alloc*/
763 bytesio_new, /*tp_new*/