Merged revisions 74356-74357 via svnmerge from
[python/dscho.git] / Modules / _io / stringio.c
blob84a15bedc22c6bcc164602ec7bdd33df7e8363b1
1 #define PY_SSIZE_T_CLEAN
2 #include "Python.h"
3 #include "structmember.h"
4 #include "_iomodule.h"
6 /* Implementation note: the buffer is always at least one character longer
7 than the enclosed string, for proper functioning of _PyIO_find_line_ending.
8 */
10 typedef struct {
11 PyObject_HEAD
12 Py_UNICODE *buf;
13 Py_ssize_t pos;
14 Py_ssize_t string_size;
15 size_t buf_size;
17 char ok; /* initialized? */
18 char closed;
19 char readuniversal;
20 char readtranslate;
21 PyObject *decoder;
22 PyObject *readnl;
23 PyObject *writenl;
25 PyObject *dict;
26 PyObject *weakreflist;
27 } stringio;
29 #define CHECK_INITIALIZED(self) \
30 if (self->ok <= 0) { \
31 PyErr_SetString(PyExc_ValueError, \
32 "I/O operation on uninitialized object"); \
33 return NULL; \
36 #define CHECK_CLOSED(self) \
37 if (self->closed) { \
38 PyErr_SetString(PyExc_ValueError, \
39 "I/O operation on closed file"); \
40 return NULL; \
43 PyDoc_STRVAR(stringio_doc,
44 "Text I/O implementation using an in-memory buffer.\n"
45 "\n"
46 "The initial_value argument sets the value of object. The newline\n"
47 "argument is like the one of TextIOWrapper's constructor.");
50 /* Internal routine for changing the size, in terms of characters, of the
51 buffer of StringIO objects. The caller should ensure that the 'size'
52 argument is non-negative. Returns 0 on success, -1 otherwise. */
53 static int
54 resize_buffer(stringio *self, size_t size)
56 /* Here, unsigned types are used to avoid dealing with signed integer
57 overflow, which is undefined in C. */
58 size_t alloc = self->buf_size;
59 Py_UNICODE *new_buf = NULL;
61 assert(self->buf != NULL);
63 /* Reserve one more char for line ending detection. */
64 size = size + 1;
65 /* For simplicity, stay in the range of the signed type. Anyway, Python
66 doesn't allow strings to be longer than this. */
67 if (size > PY_SSIZE_T_MAX)
68 goto overflow;
70 if (size < alloc / 2) {
71 /* Major downsize; resize down to exact size. */
72 alloc = size + 1;
74 else if (size < alloc) {
75 /* Within allocated size; quick exit */
76 return 0;
78 else if (size <= alloc * 1.125) {
79 /* Moderate upsize; overallocate similar to list_resize() */
80 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
82 else {
83 /* Major upsize; resize up to exact size */
84 alloc = size + 1;
87 if (alloc > ((size_t)-1) / sizeof(Py_UNICODE))
88 goto overflow;
89 new_buf = (Py_UNICODE *)PyMem_Realloc(self->buf,
90 alloc * sizeof(Py_UNICODE));
91 if (new_buf == NULL) {
92 PyErr_NoMemory();
93 return -1;
95 self->buf_size = alloc;
96 self->buf = new_buf;
98 return 0;
100 overflow:
101 PyErr_SetString(PyExc_OverflowError,
102 "new buffer size too large");
103 return -1;
106 /* Internal routine for writing a whole PyUnicode object to the buffer of a
107 StringIO object. Returns 0 on success, or -1 on error. */
108 static Py_ssize_t
109 write_str(stringio *self, PyObject *obj)
111 Py_UNICODE *str;
112 Py_ssize_t len;
113 PyObject *decoded = NULL;
114 assert(self->buf != NULL);
115 assert(self->pos >= 0);
117 if (self->decoder != NULL) {
118 decoded = _PyIncrementalNewlineDecoder_decode(
119 self->decoder, obj, 1 /* always final */);
121 else {
122 decoded = obj;
123 Py_INCREF(decoded);
125 if (self->writenl) {
126 PyObject *translated = PyUnicode_Replace(
127 decoded, _PyIO_str_nl, self->writenl, -1);
128 Py_DECREF(decoded);
129 decoded = translated;
131 if (decoded == NULL)
132 return -1;
134 assert(PyUnicode_Check(decoded));
135 str = PyUnicode_AS_UNICODE(decoded);
136 len = PyUnicode_GET_SIZE(decoded);
138 assert(len >= 0);
140 /* This overflow check is not strictly necessary. However, it avoids us to
141 deal with funky things like comparing an unsigned and a signed
142 integer. */
143 if (self->pos > PY_SSIZE_T_MAX - len) {
144 PyErr_SetString(PyExc_OverflowError,
145 "new position too large");
146 goto fail;
148 if (self->pos + len > self->string_size) {
149 if (resize_buffer(self, self->pos + len) < 0)
150 goto fail;
153 if (self->pos > self->string_size) {
154 /* In case of overseek, pad with null bytes the buffer region between
155 the end of stream and the current position.
157 0 lo string_size hi
158 | |<---used--->|<----------available----------->|
159 | | <--to pad-->|<---to write---> |
160 0 buf positon
163 memset(self->buf + self->string_size, '\0',
164 (self->pos - self->string_size) * sizeof(Py_UNICODE));
167 /* Copy the data to the internal buffer, overwriting some of the
168 existing data if self->pos < self->string_size. */
169 memcpy(self->buf + self->pos, str, len * sizeof(Py_UNICODE));
170 self->pos += len;
172 /* Set the new length of the internal string if it has changed. */
173 if (self->string_size < self->pos) {
174 self->string_size = self->pos;
177 Py_DECREF(decoded);
178 return 0;
180 fail:
181 Py_XDECREF(decoded);
182 return -1;
185 PyDoc_STRVAR(stringio_getvalue_doc,
186 "Retrieve the entire contents of the object.");
188 static PyObject *
189 stringio_getvalue(stringio *self)
191 CHECK_INITIALIZED(self);
192 CHECK_CLOSED(self);
193 return PyUnicode_FromUnicode(self->buf, self->string_size);
196 PyDoc_STRVAR(stringio_tell_doc,
197 "Tell the current file position.");
199 static PyObject *
200 stringio_tell(stringio *self)
202 CHECK_INITIALIZED(self);
203 CHECK_CLOSED(self);
204 return PyLong_FromSsize_t(self->pos);
207 PyDoc_STRVAR(stringio_read_doc,
208 "Read at most n characters, returned as a string.\n"
209 "\n"
210 "If the argument is negative or omitted, read until EOF\n"
211 "is reached. Return an empty string at EOF.\n");
213 static PyObject *
214 stringio_read(stringio *self, PyObject *args)
216 Py_ssize_t size, n;
217 Py_UNICODE *output;
218 PyObject *arg = Py_None;
220 CHECK_INITIALIZED(self);
221 if (!PyArg_ParseTuple(args, "|O:read", &arg))
222 return NULL;
223 CHECK_CLOSED(self);
225 if (PyNumber_Check(arg)) {
226 size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
227 if (size == -1 && PyErr_Occurred())
228 return NULL;
230 else if (arg == Py_None) {
231 /* Read until EOF is reached, by default. */
232 size = -1;
234 else {
235 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
236 Py_TYPE(arg)->tp_name);
237 return NULL;
240 /* adjust invalid sizes */
241 n = self->string_size - self->pos;
242 if (size < 0 || size > n) {
243 size = n;
244 if (size < 0)
245 size = 0;
248 output = self->buf + self->pos;
249 self->pos += size;
250 return PyUnicode_FromUnicode(output, size);
253 /* Internal helper, used by stringio_readline and stringio_iternext */
254 static PyObject *
255 _stringio_readline(stringio *self, Py_ssize_t limit)
257 Py_UNICODE *start, *end, old_char;
258 Py_ssize_t len, consumed;
260 /* In case of overseek, return the empty string */
261 if (self->pos >= self->string_size)
262 return PyUnicode_FromString("");
264 start = self->buf + self->pos;
265 if (limit < 0 || limit > self->string_size - self->pos)
266 limit = self->string_size - self->pos;
268 end = start + limit;
269 old_char = *end;
270 *end = '\0';
271 len = _PyIO_find_line_ending(
272 self->readtranslate, self->readuniversal, self->readnl,
273 start, end, &consumed);
274 *end = old_char;
275 /* If we haven't found any line ending, we just return everything
276 (`consumed` is ignored). */
277 if (len < 0)
278 len = limit;
279 self->pos += len;
280 return PyUnicode_FromUnicode(start, len);
283 PyDoc_STRVAR(stringio_readline_doc,
284 "Read until newline or EOF.\n"
285 "\n"
286 "Returns an empty string if EOF is hit immediately.\n");
288 static PyObject *
289 stringio_readline(stringio *self, PyObject *args)
291 PyObject *arg = Py_None;
292 Py_ssize_t limit = -1;
294 CHECK_INITIALIZED(self);
295 if (!PyArg_ParseTuple(args, "|O:readline", &arg))
296 return NULL;
297 CHECK_CLOSED(self);
299 if (PyNumber_Check(arg)) {
300 limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
301 if (limit == -1 && PyErr_Occurred())
302 return NULL;
304 else if (arg != Py_None) {
305 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
306 Py_TYPE(arg)->tp_name);
307 return NULL;
309 return _stringio_readline(self, limit);
312 static PyObject *
313 stringio_iternext(stringio *self)
315 PyObject *line;
317 CHECK_INITIALIZED(self);
318 CHECK_CLOSED(self);
320 if (Py_TYPE(self) == &PyStringIO_Type) {
321 /* Skip method call overhead for speed */
322 line = _stringio_readline(self, -1);
324 else {
325 /* XXX is subclassing StringIO really supported? */
326 line = PyObject_CallMethodObjArgs((PyObject *)self,
327 _PyIO_str_readline, NULL);
328 if (line && !PyUnicode_Check(line)) {
329 PyErr_Format(PyExc_IOError,
330 "readline() should have returned an str object, "
331 "not '%.200s'", Py_TYPE(line)->tp_name);
332 Py_DECREF(line);
333 return NULL;
337 if (line == NULL)
338 return NULL;
340 if (PyUnicode_GET_SIZE(line) == 0) {
341 /* Reached EOF */
342 Py_DECREF(line);
343 return NULL;
346 return line;
349 PyDoc_STRVAR(stringio_truncate_doc,
350 "Truncate size to pos.\n"
351 "\n"
352 "The pos argument defaults to the current file position, as\n"
353 "returned by tell(). Imply an absolute seek to pos.\n"
354 "Returns the new absolute position.\n");
356 static PyObject *
357 stringio_truncate(stringio *self, PyObject *args)
359 Py_ssize_t size;
360 PyObject *arg = Py_None;
362 CHECK_INITIALIZED(self);
363 if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
364 return NULL;
365 CHECK_CLOSED(self);
367 if (PyNumber_Check(arg)) {
368 size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
369 if (size == -1 && PyErr_Occurred())
370 return NULL;
372 else if (arg == Py_None) {
373 /* Truncate to current position if no argument is passed. */
374 size = self->pos;
376 else {
377 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
378 Py_TYPE(arg)->tp_name);
379 return NULL;
382 if (size < 0) {
383 PyErr_Format(PyExc_ValueError,
384 "Negative size value %zd", size);
385 return NULL;
388 if (size < self->string_size) {
389 if (resize_buffer(self, size) < 0)
390 return NULL;
391 self->string_size = size;
393 self->pos = size;
395 return PyLong_FromSsize_t(size);
398 PyDoc_STRVAR(stringio_seek_doc,
399 "Change stream position.\n"
400 "\n"
401 "Seek to character offset pos relative to position indicated by whence:\n"
402 " 0 Start of stream (the default). pos should be >= 0;\n"
403 " 1 Current position - pos must be 0;\n"
404 " 2 End of stream - pos must be 0.\n"
405 "Returns the new absolute position.\n");
407 static PyObject *
408 stringio_seek(stringio *self, PyObject *args)
410 Py_ssize_t pos;
411 int mode = 0;
413 CHECK_INITIALIZED(self);
414 if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode))
415 return NULL;
416 CHECK_CLOSED(self);
418 if (mode != 0 && mode != 1 && mode != 2) {
419 PyErr_Format(PyExc_ValueError,
420 "Invalid whence (%i, should be 0, 1 or 2)", mode);
421 return NULL;
423 else if (pos < 0 && mode == 0) {
424 PyErr_Format(PyExc_ValueError,
425 "Negative seek position %zd", pos);
426 return NULL;
428 else if (mode != 0 && pos != 0) {
429 PyErr_SetString(PyExc_IOError,
430 "Can't do nonzero cur-relative seeks");
431 return NULL;
434 /* mode 0: offset relative to beginning of the string.
435 mode 1: no change to current position.
436 mode 2: change position to end of file. */
437 if (mode == 1) {
438 pos = self->pos;
440 else if (mode == 2) {
441 pos = self->string_size;
444 self->pos = pos;
446 return PyLong_FromSsize_t(self->pos);
449 PyDoc_STRVAR(stringio_write_doc,
450 "Write string to file.\n"
451 "\n"
452 "Returns the number of characters written, which is always equal to\n"
453 "the length of the string.\n");
455 static PyObject *
456 stringio_write(stringio *self, PyObject *obj)
458 Py_ssize_t size;
460 CHECK_INITIALIZED(self);
461 if (!PyUnicode_Check(obj)) {
462 PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
463 Py_TYPE(obj)->tp_name);
464 return NULL;
466 CHECK_CLOSED(self);
467 size = PyUnicode_GET_SIZE(obj);
469 if (size > 0 && write_str(self, obj) < 0)
470 return NULL;
472 return PyLong_FromSsize_t(size);
475 PyDoc_STRVAR(stringio_close_doc,
476 "Close the IO object. Attempting any further operation after the\n"
477 "object is closed will raise a ValueError.\n"
478 "\n"
479 "This method has no effect if the file is already closed.\n");
481 static PyObject *
482 stringio_close(stringio *self)
484 self->closed = 1;
485 /* Free up some memory */
486 if (resize_buffer(self, 0) < 0)
487 return NULL;
488 Py_CLEAR(self->readnl);
489 Py_CLEAR(self->writenl);
490 Py_CLEAR(self->decoder);
491 Py_RETURN_NONE;
494 static int
495 stringio_traverse(stringio *self, visitproc visit, void *arg)
497 Py_VISIT(self->dict);
498 return 0;
501 static int
502 stringio_clear(stringio *self)
504 Py_CLEAR(self->dict);
505 return 0;
508 static void
509 stringio_dealloc(stringio *self)
511 _PyObject_GC_UNTRACK(self);
512 Py_CLEAR(self->readnl);
513 Py_CLEAR(self->writenl);
514 Py_CLEAR(self->decoder);
515 if (self->buf)
516 PyMem_Free(self->buf);
517 if (self->weakreflist != NULL)
518 PyObject_ClearWeakRefs((PyObject *) self);
519 Py_TYPE(self)->tp_free(self);
522 static PyObject *
523 stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
525 stringio *self;
527 assert(type != NULL && type->tp_alloc != NULL);
528 self = (stringio *)type->tp_alloc(type, 0);
529 if (self == NULL)
530 return NULL;
532 self->string_size = 0;
533 self->pos = 0;
534 self->buf_size = 0;
535 self->buf = (Py_UNICODE *)PyMem_Malloc(0);
536 if (self->buf == NULL) {
537 Py_DECREF(self);
538 return PyErr_NoMemory();
541 return (PyObject *)self;
544 static int
545 stringio_init(stringio *self, PyObject *args, PyObject *kwds)
547 char *kwlist[] = {"initial_value", "newline", NULL};
548 PyObject *value = NULL;
549 char *newline = "\n";
551 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oz:__init__", kwlist,
552 &value, &newline))
553 return -1;
555 if (newline && newline[0] != '\0'
556 && !(newline[0] == '\n' && newline[1] == '\0')
557 && !(newline[0] == '\r' && newline[1] == '\0')
558 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
559 PyErr_Format(PyExc_ValueError,
560 "illegal newline value: %s", newline);
561 return -1;
563 if (value && value != Py_None && !PyUnicode_Check(value)) {
564 PyErr_Format(PyExc_ValueError,
565 "initial_value must be str or None, not %.200s",
566 Py_TYPE(value)->tp_name);
567 return -1;
570 self->ok = 0;
572 Py_CLEAR(self->readnl);
573 Py_CLEAR(self->writenl);
574 Py_CLEAR(self->decoder);
576 if (newline) {
577 self->readnl = PyUnicode_FromString(newline);
578 if (self->readnl == NULL)
579 return -1;
581 self->readuniversal = (newline == NULL || newline[0] == '\0');
582 self->readtranslate = (newline == NULL);
583 /* If newline == "", we don't translate anything.
584 If newline == "\n" or newline == None, we translate to "\n", which is
585 a no-op.
586 (for newline == None, TextIOWrapper translates to os.sepline, but it
587 is pointless for StringIO)
589 if (newline != NULL && newline[0] == '\r') {
590 self->writenl = self->readnl;
591 Py_INCREF(self->writenl);
594 if (self->readuniversal) {
595 self->decoder = PyObject_CallFunction(
596 (PyObject *)&PyIncrementalNewlineDecoder_Type,
597 "Oi", Py_None, (int) self->readtranslate);
598 if (self->decoder == NULL)
599 return -1;
602 /* Now everything is set up, resize buffer to size of initial value,
603 and copy it */
604 self->string_size = 0;
605 if (value && value != Py_None) {
606 Py_ssize_t len = PyUnicode_GetSize(value);
607 /* This is a heuristic, for newline translation might change
608 the string length. */
609 if (resize_buffer(self, len) < 0)
610 return -1;
611 self->pos = 0;
612 if (write_str(self, value) < 0)
613 return -1;
615 else {
616 if (resize_buffer(self, 0) < 0)
617 return -1;
619 self->pos = 0;
621 self->closed = 0;
622 self->ok = 1;
623 return 0;
626 /* Properties and pseudo-properties */
627 static PyObject *
628 stringio_seekable(stringio *self, PyObject *args)
630 CHECK_INITIALIZED(self);
631 Py_RETURN_TRUE;
634 static PyObject *
635 stringio_readable(stringio *self, PyObject *args)
637 CHECK_INITIALIZED(self);
638 Py_RETURN_TRUE;
641 static PyObject *
642 stringio_writable(stringio *self, PyObject *args)
644 CHECK_INITIALIZED(self);
645 Py_RETURN_TRUE;
648 static PyObject *
649 stringio_closed(stringio *self, void *context)
651 CHECK_INITIALIZED(self);
652 return PyBool_FromLong(self->closed);
655 static PyObject *
656 stringio_line_buffering(stringio *self, void *context)
658 CHECK_INITIALIZED(self);
659 CHECK_CLOSED(self);
660 Py_RETURN_FALSE;
663 static PyObject *
664 stringio_newlines(stringio *self, void *context)
666 CHECK_INITIALIZED(self);
667 CHECK_CLOSED(self);
668 if (self->decoder == NULL)
669 Py_RETURN_NONE;
670 return PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
673 static struct PyMethodDef stringio_methods[] = {
674 {"close", (PyCFunction)stringio_close, METH_NOARGS, stringio_close_doc},
675 {"getvalue", (PyCFunction)stringio_getvalue, METH_VARARGS, stringio_getvalue_doc},
676 {"read", (PyCFunction)stringio_read, METH_VARARGS, stringio_read_doc},
677 {"readline", (PyCFunction)stringio_readline, METH_VARARGS, stringio_readline_doc},
678 {"tell", (PyCFunction)stringio_tell, METH_NOARGS, stringio_tell_doc},
679 {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, stringio_truncate_doc},
680 {"seek", (PyCFunction)stringio_seek, METH_VARARGS, stringio_seek_doc},
681 {"write", (PyCFunction)stringio_write, METH_O, stringio_write_doc},
683 {"seekable", (PyCFunction)stringio_seekable, METH_NOARGS},
684 {"readable", (PyCFunction)stringio_readable, METH_NOARGS},
685 {"writable", (PyCFunction)stringio_writable, METH_NOARGS},
686 {NULL, NULL} /* sentinel */
689 static PyGetSetDef stringio_getset[] = {
690 {"closed", (getter)stringio_closed, NULL, NULL},
691 {"newlines", (getter)stringio_newlines, NULL, NULL},
692 /* (following comments straight off of the original Python wrapper:)
693 XXX Cruft to support the TextIOWrapper API. This would only
694 be meaningful if StringIO supported the buffer attribute.
695 Hopefully, a better solution, than adding these pseudo-attributes,
696 will be found.
698 {"line_buffering", (getter)stringio_line_buffering, NULL, NULL},
699 {NULL}
702 PyTypeObject PyStringIO_Type = {
703 PyVarObject_HEAD_INIT(NULL, 0)
704 "_io.StringIO", /*tp_name*/
705 sizeof(stringio), /*tp_basicsize*/
706 0, /*tp_itemsize*/
707 (destructor)stringio_dealloc, /*tp_dealloc*/
708 0, /*tp_print*/
709 0, /*tp_getattr*/
710 0, /*tp_setattr*/
711 0, /*tp_reserved*/
712 0, /*tp_repr*/
713 0, /*tp_as_number*/
714 0, /*tp_as_sequence*/
715 0, /*tp_as_mapping*/
716 0, /*tp_hash*/
717 0, /*tp_call*/
718 0, /*tp_str*/
719 0, /*tp_getattro*/
720 0, /*tp_setattro*/
721 0, /*tp_as_buffer*/
722 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
723 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
724 stringio_doc, /*tp_doc*/
725 (traverseproc)stringio_traverse, /*tp_traverse*/
726 (inquiry)stringio_clear, /*tp_clear*/
727 0, /*tp_richcompare*/
728 offsetof(stringio, weakreflist), /*tp_weaklistoffset*/
729 0, /*tp_iter*/
730 (iternextfunc)stringio_iternext, /*tp_iternext*/
731 stringio_methods, /*tp_methods*/
732 0, /*tp_members*/
733 stringio_getset, /*tp_getset*/
734 0, /*tp_base*/
735 0, /*tp_dict*/
736 0, /*tp_descr_get*/
737 0, /*tp_descr_set*/
738 offsetof(stringio, dict), /*tp_dictoffset*/
739 (initproc)stringio_init, /*tp_init*/
740 0, /*tp_alloc*/
741 stringio_new, /*tp_new*/