Issue #5768: Change to Unicode output logic and test case for same.
[python.git] / Modules / _bytesio.c
blobc5c14b30617135d5cdd71fe7c1856f753fef168e
1 #include "Python.h"
3 typedef struct {
4 PyObject_HEAD
5 char *buf;
6 Py_ssize_t pos;
7 Py_ssize_t string_size;
8 size_t buf_size;
9 } BytesIOObject;
11 #define CHECK_CLOSED(self) \
12 if ((self)->buf == NULL) { \
13 PyErr_SetString(PyExc_ValueError, \
14 "I/O operation on closed file."); \
15 return NULL; \
18 /* Internal routine to get a line from the buffer of a BytesIO
19 object. Returns the length between the current position to the
20 next newline character. */
21 static Py_ssize_t
22 get_line(BytesIOObject *self, char **output)
24 char *n;
25 const char *str_end;
26 Py_ssize_t len;
28 assert(self->buf != NULL);
30 /* Move to the end of the line, up to the end of the string, s. */
31 str_end = self->buf + self->string_size;
32 for (n = self->buf + self->pos;
33 n < str_end && *n != '\n';
34 n++);
36 /* Skip the newline character */
37 if (n < str_end)
38 n++;
40 /* Get the length from the current position to the end of the line. */
41 len = n - (self->buf + self->pos);
42 *output = self->buf + self->pos;
44 assert(len >= 0);
45 assert(self->pos < PY_SSIZE_T_MAX - len);
46 self->pos += len;
48 return len;
51 /* Internal routine for changing the size of the buffer of BytesIO objects.
52 The caller should ensure that the 'size' argument is non-negative. Returns
53 0 on success, -1 otherwise. */
54 static int
55 resize_buffer(BytesIOObject *self, size_t size)
57 /* Here, unsigned types are used to avoid dealing with signed integer
58 overflow, which is undefined in C. */
59 size_t alloc = self->buf_size;
60 char *new_buf = NULL;
62 assert(self->buf != NULL);
64 /* For simplicity, stay in the range of the signed type. Anyway, Python
65 doesn't allow strings to be longer than this. */
66 if (size > PY_SSIZE_T_MAX)
67 goto overflow;
69 if (size < alloc / 2) {
70 /* Major downsize; resize down to exact size. */
71 alloc = size + 1;
73 else if (size < alloc) {
74 /* Within allocated size; quick exit */
75 return 0;
77 else if (size <= alloc * 1.125) {
78 /* Moderate upsize; overallocate similar to list_resize() */
79 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
81 else {
82 /* Major upsize; resize up to exact size */
83 alloc = size + 1;
86 if (alloc > ((size_t)-1) / sizeof(char))
87 goto overflow;
88 new_buf = (char *)PyMem_Realloc(self->buf, alloc * sizeof(char));
89 if (new_buf == NULL) {
90 PyErr_NoMemory();
91 return -1;
93 self->buf_size = alloc;
94 self->buf = new_buf;
96 return 0;
98 overflow:
99 PyErr_SetString(PyExc_OverflowError,
100 "new buffer size too large");
101 return -1;
104 /* Internal routine for writing a string of bytes to the buffer of a BytesIO
105 object. Returns the number of bytes wrote, or -1 on error. */
106 static Py_ssize_t
107 write_bytes(BytesIOObject *self, const char *bytes, Py_ssize_t len)
109 assert(self->buf != NULL);
110 assert(self->pos >= 0);
111 assert(len >= 0);
113 if ((size_t)self->pos + len > self->buf_size) {
114 if (resize_buffer(self, (size_t)self->pos + len) < 0)
115 return -1;
118 if (self->pos > self->string_size) {
119 /* In case of overseek, pad with null bytes the buffer region between
120 the end of stream and the current position.
122 0 lo string_size hi
123 | |<---used--->|<----------available----------->|
124 | | <--to pad-->|<---to write---> |
125 0 buf position
127 memset(self->buf + self->string_size, '\0',
128 (self->pos - self->string_size) * sizeof(char));
131 /* Copy the data to the internal buffer, overwriting some of the existing
132 data if self->pos < self->string_size. */
133 memcpy(self->buf + self->pos, bytes, len);
134 self->pos += len;
136 /* Set the new length of the internal string if it has changed. */
137 if (self->string_size < self->pos) {
138 self->string_size = self->pos;
141 return len;
144 static PyObject *
145 bytesio_get_closed(BytesIOObject *self)
147 if (self->buf == NULL)
148 Py_RETURN_TRUE;
149 else
150 Py_RETURN_FALSE;
153 /* Generic getter for the writable, readable and seekable properties */
154 static PyObject *
155 return_true(BytesIOObject *self)
157 Py_RETURN_TRUE;
160 PyDoc_STRVAR(flush_doc,
161 "flush() -> None. Does nothing.");
163 static PyObject *
164 bytesio_flush(BytesIOObject *self)
166 Py_RETURN_NONE;
169 PyDoc_STRVAR(getval_doc,
170 "getvalue() -> bytes.\n"
171 "\n"
172 "Retrieve the entire contents of the BytesIO object.");
174 static PyObject *
175 bytesio_getvalue(BytesIOObject *self)
177 CHECK_CLOSED(self);
178 return PyString_FromStringAndSize(self->buf, self->string_size);
181 PyDoc_STRVAR(isatty_doc,
182 "isatty() -> False.\n"
183 "\n"
184 "Always returns False since BytesIO objects are not connected\n"
185 "to a tty-like device.");
187 static PyObject *
188 bytesio_isatty(BytesIOObject *self)
190 CHECK_CLOSED(self);
191 Py_RETURN_FALSE;
194 PyDoc_STRVAR(tell_doc,
195 "tell() -> current file position, an integer\n");
197 static PyObject *
198 bytesio_tell(BytesIOObject *self)
200 CHECK_CLOSED(self);
201 return PyInt_FromSsize_t(self->pos);
204 PyDoc_STRVAR(read_doc,
205 "read([size]) -> read at most size bytes, returned as a string.\n"
206 "\n"
207 "If the size argument is negative, read until EOF is reached.\n"
208 "Return an empty string at EOF.");
210 static PyObject *
211 bytesio_read(BytesIOObject *self, PyObject *args)
213 Py_ssize_t size, n;
214 char *output;
215 PyObject *arg = Py_None;
217 CHECK_CLOSED(self);
219 if (!PyArg_ParseTuple(args, "|O:read", &arg))
220 return NULL;
222 if (PyInt_Check(arg)) {
223 size = PyInt_AsSsize_t(arg);
224 if (size == -1 && PyErr_Occurred())
225 return NULL;
227 else if (arg == Py_None) {
228 /* Read until EOF is reached, by default. */
229 size = -1;
231 else {
232 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
233 Py_TYPE(arg)->tp_name);
234 return NULL;
237 /* adjust invalid sizes */
238 n = self->string_size - self->pos;
239 if (size < 0 || size > n) {
240 size = n;
241 if (size < 0)
242 size = 0;
245 assert(self->buf != NULL);
246 output = self->buf + self->pos;
247 self->pos += size;
249 return PyString_FromStringAndSize(output, size);
253 PyDoc_STRVAR(read1_doc,
254 "read1(size) -> read at most size bytes, returned as a string.\n"
255 "\n"
256 "If the size argument is negative or omitted, read until EOF is reached.\n"
257 "Return an empty string at EOF.");
259 static PyObject *
260 bytesio_read1(BytesIOObject *self, PyObject *n)
262 PyObject *arg, *res;
264 arg = PyTuple_Pack(1, n);
265 if (arg == NULL)
266 return NULL;
267 res = bytesio_read(self, arg);
268 Py_DECREF(arg);
269 return res;
272 PyDoc_STRVAR(readline_doc,
273 "readline([size]) -> next line from the file, as a string.\n"
274 "\n"
275 "Retain newline. A non-negative size argument limits the maximum\n"
276 "number of bytes to return (an incomplete line may be returned then).\n"
277 "Return an empty string at EOF.\n");
279 static PyObject *
280 bytesio_readline(BytesIOObject *self, PyObject *args)
282 Py_ssize_t size, n;
283 char *output;
284 PyObject *arg = Py_None;
286 CHECK_CLOSED(self);
288 if (!PyArg_ParseTuple(args, "|O:readline", &arg))
289 return NULL;
291 if (PyInt_Check(arg)) {
292 size = PyInt_AsSsize_t(arg);
293 if (size == -1 && PyErr_Occurred())
294 return NULL;
296 else if (arg == Py_None) {
297 /* No size limit, by default. */
298 size = -1;
300 else {
301 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
302 Py_TYPE(arg)->tp_name);
303 return NULL;
306 n = get_line(self, &output);
308 if (size >= 0 && size < n) {
309 size = n - size;
310 n -= size;
311 self->pos -= size;
314 return PyString_FromStringAndSize(output, n);
317 PyDoc_STRVAR(readlines_doc,
318 "readlines([size]) -> list of strings, each a line from the file.\n"
319 "\n"
320 "Call readline() repeatedly and return a list of the lines so read.\n"
321 "The optional size argument, if given, is an approximate bound on the\n"
322 "total number of bytes in the lines returned.\n");
324 static PyObject *
325 bytesio_readlines(BytesIOObject *self, PyObject *args)
327 Py_ssize_t maxsize, size, n;
328 PyObject *result, *line;
329 char *output;
330 PyObject *arg = Py_None;
332 CHECK_CLOSED(self);
334 if (!PyArg_ParseTuple(args, "|O:readlines", &arg))
335 return NULL;
337 if (PyInt_Check(arg)) {
338 maxsize = PyInt_AsSsize_t(arg);
339 if (maxsize == -1 && PyErr_Occurred())
340 return NULL;
342 else if (arg == Py_None) {
343 /* No size limit, by default. */
344 maxsize = -1;
346 else {
347 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
348 Py_TYPE(arg)->tp_name);
349 return NULL;
352 size = 0;
353 result = PyList_New(0);
354 if (!result)
355 return NULL;
357 while ((n = get_line(self, &output)) != 0) {
358 line = PyString_FromStringAndSize(output, n);
359 if (!line)
360 goto on_error;
361 if (PyList_Append(result, line) == -1) {
362 Py_DECREF(line);
363 goto on_error;
365 Py_DECREF(line);
366 size += n;
367 if (maxsize > 0 && size >= maxsize)
368 break;
370 return result;
372 on_error:
373 Py_DECREF(result);
374 return NULL;
377 PyDoc_STRVAR(readinto_doc,
378 "readinto(bytearray) -> int. Read up to len(b) bytes into b.\n"
379 "\n"
380 "Returns number of bytes read (0 for EOF), or None if the object\n"
381 "is set not to block as has no data to read.");
383 static PyObject *
384 bytesio_readinto(BytesIOObject *self, PyObject *buffer)
386 void *raw_buffer;
387 Py_ssize_t len;
389 CHECK_CLOSED(self);
391 if (PyObject_AsWriteBuffer(buffer, &raw_buffer, &len) == -1)
392 return NULL;
394 if (self->pos + len > self->string_size)
395 len = self->string_size - self->pos;
397 memcpy(raw_buffer, self->buf + self->pos, len);
398 assert(self->pos + len < PY_SSIZE_T_MAX);
399 assert(len >= 0);
400 self->pos += len;
402 return PyInt_FromSsize_t(len);
405 PyDoc_STRVAR(truncate_doc,
406 "truncate([size]) -> int. Truncate the file to at most size bytes.\n"
407 "\n"
408 "Size defaults to the current file position, as returned by tell().\n"
409 "Returns the new size. Imply an absolute seek to the position size.");
411 static PyObject *
412 bytesio_truncate(BytesIOObject *self, PyObject *args)
414 Py_ssize_t size;
415 PyObject *arg = Py_None;
417 CHECK_CLOSED(self);
419 if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
420 return NULL;
422 if (PyInt_Check(arg)) {
423 size = PyInt_AsSsize_t(arg);
424 if (size == -1 && PyErr_Occurred())
425 return NULL;
427 else if (arg == Py_None) {
428 /* Truncate to current position if no argument is passed. */
429 size = self->pos;
431 else {
432 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
433 Py_TYPE(arg)->tp_name);
434 return NULL;
437 if (size < 0) {
438 PyErr_Format(PyExc_ValueError,
439 "negative size value %zd", size);
440 return NULL;
443 if (size < self->string_size) {
444 self->string_size = size;
445 if (resize_buffer(self, size) < 0)
446 return NULL;
448 self->pos = size;
450 return PyInt_FromSsize_t(size);
453 static PyObject *
454 bytesio_iternext(BytesIOObject *self)
456 char *next;
457 Py_ssize_t n;
459 CHECK_CLOSED(self);
461 n = get_line(self, &next);
463 if (!next || n == 0)
464 return NULL;
466 return PyString_FromStringAndSize(next, n);
469 PyDoc_STRVAR(seek_doc,
470 "seek(pos, whence=0) -> int. Change stream position.\n"
471 "\n"
472 "Seek to byte offset pos relative to position indicated by whence:\n"
473 " 0 Start of stream (the default). pos should be >= 0;\n"
474 " 1 Current position - pos may be negative;\n"
475 " 2 End of stream - pos usually negative.\n"
476 "Returns the new absolute position.");
478 static PyObject *
479 bytesio_seek(BytesIOObject *self, PyObject *args)
481 PyObject *pos_obj, *mode_obj;
482 Py_ssize_t pos;
483 int mode = 0;
485 CHECK_CLOSED(self);
487 /* Special-case for 2.x to prevent floats from passing through.
488 This only needed to make a test in test_io succeed. */
489 if (!PyArg_UnpackTuple(args, "seek", 1, 2, &pos_obj, &mode_obj))
490 return NULL;
491 if (PyFloat_Check(pos_obj)) {
492 PyErr_SetString(PyExc_TypeError,
493 "position argument must be an integer");
494 return NULL;
497 if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode))
498 return NULL;
500 if (pos < 0 && mode == 0) {
501 PyErr_Format(PyExc_ValueError,
502 "negative seek value %zd", pos);
503 return NULL;
506 /* mode 0: offset relative to beginning of the string.
507 mode 1: offset relative to current position.
508 mode 2: offset relative the end of the string. */
509 if (mode == 1) {
510 if (pos > PY_SSIZE_T_MAX - self->pos) {
511 PyErr_SetString(PyExc_OverflowError,
512 "new position too large");
513 return NULL;
515 pos += self->pos;
517 else if (mode == 2) {
518 if (pos > PY_SSIZE_T_MAX - self->string_size) {
519 PyErr_SetString(PyExc_OverflowError,
520 "new position too large");
521 return NULL;
523 pos += self->string_size;
525 else if (mode != 0) {
526 PyErr_Format(PyExc_ValueError,
527 "invalid whence (%i, should be 0, 1 or 2)", mode);
528 return NULL;
531 if (pos < 0)
532 pos = 0;
533 self->pos = pos;
535 return PyInt_FromSsize_t(self->pos);
538 PyDoc_STRVAR(write_doc,
539 "write(bytes) -> int. Write bytes to file.\n"
540 "\n"
541 "Return the number of bytes written.");
543 static PyObject *
544 bytesio_write(BytesIOObject *self, PyObject *obj)
546 const char *bytes;
547 Py_ssize_t size;
548 Py_ssize_t n = 0;
550 CHECK_CLOSED(self);
552 /* Special-case in 2.x to prevent unicode objects to pass through. */
553 if (PyUnicode_Check(obj)) {
554 PyErr_SetString(PyExc_TypeError,
555 "expecting a bytes object, got unicode");
556 return NULL;
559 if (PyObject_AsReadBuffer(obj, (void *)&bytes, &size) < 0)
560 return NULL;
562 if (size != 0) {
563 n = write_bytes(self, bytes, size);
564 if (n < 0)
565 return NULL;
568 return PyInt_FromSsize_t(n);
571 PyDoc_STRVAR(writelines_doc,
572 "writelines(sequence_of_strings) -> None. Write strings to the file.\n"
573 "\n"
574 "Note that newlines are not added. The sequence can be any iterable\n"
575 "object producing strings. This is equivalent to calling write() for\n"
576 "each string.");
578 static PyObject *
579 bytesio_writelines(BytesIOObject *self, PyObject *v)
581 PyObject *it, *item;
582 PyObject *ret;
584 CHECK_CLOSED(self);
586 it = PyObject_GetIter(v);
587 if (it == NULL)
588 return NULL;
590 while ((item = PyIter_Next(it)) != NULL) {
591 ret = bytesio_write(self, item);
592 Py_DECREF(item);
593 if (ret == NULL) {
594 Py_DECREF(it);
595 return NULL;
597 Py_DECREF(ret);
599 Py_DECREF(it);
601 /* See if PyIter_Next failed */
602 if (PyErr_Occurred())
603 return NULL;
605 Py_RETURN_NONE;
608 PyDoc_STRVAR(close_doc,
609 "close() -> None. Disable all I/O operations.");
611 static PyObject *
612 bytesio_close(BytesIOObject *self)
614 if (self->buf != NULL) {
615 PyMem_Free(self->buf);
616 self->buf = NULL;
618 Py_RETURN_NONE;
621 static void
622 bytesio_dealloc(BytesIOObject *self)
624 if (self->buf != NULL) {
625 PyMem_Free(self->buf);
626 self->buf = NULL;
628 Py_TYPE(self)->tp_free(self);
631 static PyObject *
632 bytesio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
634 BytesIOObject *self;
636 assert(type != NULL && type->tp_alloc != NULL);
637 self = (BytesIOObject *)type->tp_alloc(type, 0);
638 if (self == NULL)
639 return NULL;
641 self->string_size = 0;
642 self->pos = 0;
643 self->buf_size = 0;
644 self->buf = (char *)PyMem_Malloc(0);
645 if (self->buf == NULL) {
646 Py_DECREF(self);
647 return PyErr_NoMemory();
650 return (PyObject *)self;
653 static int
654 bytesio_init(BytesIOObject *self, PyObject *args, PyObject *kwds)
656 PyObject *initvalue = NULL;
658 if (!PyArg_ParseTuple(args, "|O:BytesIO", &initvalue))
659 return -1;
661 /* In case, __init__ is called multiple times. */
662 self->string_size = 0;
663 self->pos = 0;
665 if (initvalue && initvalue != Py_None) {
666 PyObject *res;
667 res = bytesio_write(self, initvalue);
668 if (res == NULL)
669 return -1;
670 Py_DECREF(res);
671 self->pos = 0;
674 return 0;
677 static PyGetSetDef bytesio_getsetlist[] = {
678 {"closed", (getter)bytesio_get_closed, NULL,
679 "True if the file is closed."},
680 {0}, /* sentinel */
683 static struct PyMethodDef bytesio_methods[] = {
684 {"readable", (PyCFunction)return_true, METH_NOARGS, NULL},
685 {"seekable", (PyCFunction)return_true, METH_NOARGS, NULL},
686 {"writable", (PyCFunction)return_true, METH_NOARGS, NULL},
687 {"close", (PyCFunction)bytesio_close, METH_NOARGS, close_doc},
688 {"flush", (PyCFunction)bytesio_flush, METH_NOARGS, flush_doc},
689 {"isatty", (PyCFunction)bytesio_isatty, METH_NOARGS, isatty_doc},
690 {"tell", (PyCFunction)bytesio_tell, METH_NOARGS, tell_doc},
691 {"write", (PyCFunction)bytesio_write, METH_O, write_doc},
692 {"writelines", (PyCFunction)bytesio_writelines, METH_O, writelines_doc},
693 {"read1", (PyCFunction)bytesio_read1, METH_O, read1_doc},
694 {"readinto", (PyCFunction)bytesio_readinto, METH_O, readinto_doc},
695 {"readline", (PyCFunction)bytesio_readline, METH_VARARGS, readline_doc},
696 {"readlines", (PyCFunction)bytesio_readlines, METH_VARARGS, readlines_doc},
697 {"read", (PyCFunction)bytesio_read, METH_VARARGS, read_doc},
698 {"getvalue", (PyCFunction)bytesio_getvalue, METH_VARARGS, getval_doc},
699 {"seek", (PyCFunction)bytesio_seek, METH_VARARGS, seek_doc},
700 {"truncate", (PyCFunction)bytesio_truncate, METH_VARARGS, truncate_doc},
701 {NULL, NULL} /* sentinel */
704 PyDoc_STRVAR(bytesio_doc,
705 "BytesIO([buffer]) -> object\n"
706 "\n"
707 "Create a buffered I/O implementation using an in-memory bytes\n"
708 "buffer, ready for reading and writing.");
710 static PyTypeObject BytesIO_Type = {
711 PyVarObject_HEAD_INIT(NULL, 0)
712 "_bytesio._BytesIO", /*tp_name*/
713 sizeof(BytesIOObject), /*tp_basicsize*/
714 0, /*tp_itemsize*/
715 (destructor)bytesio_dealloc, /*tp_dealloc*/
716 0, /*tp_print*/
717 0, /*tp_getattr*/
718 0, /*tp_setattr*/
719 0, /*tp_compare*/
720 0, /*tp_repr*/
721 0, /*tp_as_number*/
722 0, /*tp_as_sequence*/
723 0, /*tp_as_mapping*/
724 0, /*tp_hash*/
725 0, /*tp_call*/
726 0, /*tp_str*/
727 0, /*tp_getattro*/
728 0, /*tp_setattro*/
729 0, /*tp_as_buffer*/
730 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
731 bytesio_doc, /*tp_doc*/
732 0, /*tp_traverse*/
733 0, /*tp_clear*/
734 0, /*tp_richcompare*/
735 0, /*tp_weaklistoffset*/
736 PyObject_SelfIter, /*tp_iter*/
737 (iternextfunc)bytesio_iternext, /*tp_iternext*/
738 bytesio_methods, /*tp_methods*/
739 0, /*tp_members*/
740 bytesio_getsetlist, /*tp_getset*/
741 0, /*tp_base*/
742 0, /*tp_dict*/
743 0, /*tp_descr_get*/
744 0, /*tp_descr_set*/
745 0, /*tp_dictoffset*/
746 (initproc)bytesio_init, /*tp_init*/
747 0, /*tp_alloc*/
748 bytesio_new, /*tp_new*/
751 PyMODINIT_FUNC
752 init_bytesio(void)
754 PyObject *m;
756 if (PyType_Ready(&BytesIO_Type) < 0)
757 return;
758 m = Py_InitModule("_bytesio", NULL);
759 if (m == NULL)
760 return;
761 Py_INCREF(&BytesIO_Type);
762 PyModule_AddObject(m, "_BytesIO", (PyObject *)&BytesIO_Type);