Applying patches backported from 3.1, by Gregor Lingl.
[python.git] / Objects / bytearrayobject.c
blob0390c1d994c3243a8c48cd27563deadd025f08f1
1 /* PyBytes (bytearray) implementation */
3 #define PY_SSIZE_T_CLEAN
4 #include "Python.h"
5 #include "structmember.h"
6 #include "bytes_methods.h"
8 static PyByteArrayObject *nullbytes = NULL;
10 void
11 PyByteArray_Fini(void)
13 Py_CLEAR(nullbytes);
16 int
17 PyByteArray_Init(void)
19 nullbytes = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
20 if (nullbytes == NULL)
21 return 0;
22 nullbytes->ob_bytes = NULL;
23 Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0;
24 nullbytes->ob_exports = 0;
25 return 1;
28 /* end nullbytes support */
30 /* Helpers */
32 static int
33 _getbytevalue(PyObject* arg, int *value)
35 long face_value;
37 if (PyBytes_CheckExact(arg)) {
38 if (Py_SIZE(arg) != 1) {
39 PyErr_SetString(PyExc_ValueError, "string must be of size 1");
40 return 0;
42 *value = Py_CHARMASK(((PyBytesObject*)arg)->ob_sval[0]);
43 return 1;
45 else if (PyInt_Check(arg) || PyLong_Check(arg)) {
46 face_value = PyLong_AsLong(arg);
48 else {
49 PyObject *index = PyNumber_Index(arg);
50 if (index == NULL) {
51 PyErr_Format(PyExc_TypeError,
52 "an integer or string of size 1 is required");
53 return 0;
55 face_value = PyLong_AsLong(index);
56 Py_DECREF(index);
59 if (face_value < 0 || face_value >= 256) {
60 /* this includes the OverflowError in case the long is too large */
61 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
62 return 0;
65 *value = face_value;
66 return 1;
69 static Py_ssize_t
70 bytearray_buffer_getreadbuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
72 if ( index != 0 ) {
73 PyErr_SetString(PyExc_SystemError,
74 "accessing non-existent bytes segment");
75 return -1;
77 *ptr = (void *)self->ob_bytes;
78 return Py_SIZE(self);
81 static Py_ssize_t
82 bytearray_buffer_getwritebuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
84 if ( index != 0 ) {
85 PyErr_SetString(PyExc_SystemError,
86 "accessing non-existent bytes segment");
87 return -1;
89 *ptr = (void *)self->ob_bytes;
90 return Py_SIZE(self);
93 static Py_ssize_t
94 bytearray_buffer_getsegcount(PyByteArrayObject *self, Py_ssize_t *lenp)
96 if ( lenp )
97 *lenp = Py_SIZE(self);
98 return 1;
101 static Py_ssize_t
102 bytearray_buffer_getcharbuf(PyByteArrayObject *self, Py_ssize_t index, const char **ptr)
104 if ( index != 0 ) {
105 PyErr_SetString(PyExc_SystemError,
106 "accessing non-existent bytes segment");
107 return -1;
109 *ptr = self->ob_bytes;
110 return Py_SIZE(self);
113 static int
114 bytearray_getbuffer(PyByteArrayObject *obj, Py_buffer *view, int flags)
116 int ret;
117 void *ptr;
118 if (view == NULL) {
119 obj->ob_exports++;
120 return 0;
122 if (obj->ob_bytes == NULL)
123 ptr = "";
124 else
125 ptr = obj->ob_bytes;
126 ret = PyBuffer_FillInfo(view, (PyObject*)obj, ptr, Py_SIZE(obj), 0, flags);
127 if (ret >= 0) {
128 obj->ob_exports++;
130 return ret;
133 static void
134 bytearray_releasebuffer(PyByteArrayObject *obj, Py_buffer *view)
136 obj->ob_exports--;
139 static Py_ssize_t
140 _getbuffer(PyObject *obj, Py_buffer *view)
142 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
144 if (buffer == NULL || buffer->bf_getbuffer == NULL)
146 PyErr_Format(PyExc_TypeError,
147 "Type %.100s doesn't support the buffer API",
148 Py_TYPE(obj)->tp_name);
149 return -1;
152 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
153 return -1;
154 return view->len;
157 static int
158 _canresize(PyByteArrayObject *self)
160 if (self->ob_exports > 0) {
161 PyErr_SetString(PyExc_BufferError,
162 "Existing exports of data: object cannot be re-sized");
163 return 0;
165 return 1;
168 /* Direct API functions */
170 PyObject *
171 PyByteArray_FromObject(PyObject *input)
173 return PyObject_CallFunctionObjArgs((PyObject *)&PyByteArray_Type,
174 input, NULL);
177 PyObject *
178 PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
180 PyByteArrayObject *new;
181 Py_ssize_t alloc;
183 if (size < 0) {
184 PyErr_SetString(PyExc_SystemError,
185 "Negative size passed to PyByteArray_FromStringAndSize");
186 return NULL;
189 new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
190 if (new == NULL)
191 return NULL;
193 if (size == 0) {
194 new->ob_bytes = NULL;
195 alloc = 0;
197 else {
198 alloc = size + 1;
199 new->ob_bytes = PyMem_Malloc(alloc);
200 if (new->ob_bytes == NULL) {
201 Py_DECREF(new);
202 return PyErr_NoMemory();
204 if (bytes != NULL)
205 memcpy(new->ob_bytes, bytes, size);
206 new->ob_bytes[size] = '\0'; /* Trailing null byte */
208 Py_SIZE(new) = size;
209 new->ob_alloc = alloc;
210 new->ob_exports = 0;
212 return (PyObject *)new;
215 Py_ssize_t
216 PyByteArray_Size(PyObject *self)
218 assert(self != NULL);
219 assert(PyByteArray_Check(self));
221 return PyByteArray_GET_SIZE(self);
224 char *
225 PyByteArray_AsString(PyObject *self)
227 assert(self != NULL);
228 assert(PyByteArray_Check(self));
230 return PyByteArray_AS_STRING(self);
234 PyByteArray_Resize(PyObject *self, Py_ssize_t size)
236 void *sval;
237 Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc;
239 assert(self != NULL);
240 assert(PyByteArray_Check(self));
241 assert(size >= 0);
243 if (size == Py_SIZE(self)) {
244 return 0;
246 if (!_canresize((PyByteArrayObject *)self)) {
247 return -1;
250 if (size < alloc / 2) {
251 /* Major downsize; resize down to exact size */
252 alloc = size + 1;
254 else if (size < alloc) {
255 /* Within allocated size; quick exit */
256 Py_SIZE(self) = size;
257 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
258 return 0;
260 else if (size <= alloc * 1.125) {
261 /* Moderate upsize; overallocate similar to list_resize() */
262 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
264 else {
265 /* Major upsize; resize up to exact size */
266 alloc = size + 1;
269 sval = PyMem_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc);
270 if (sval == NULL) {
271 PyErr_NoMemory();
272 return -1;
275 ((PyByteArrayObject *)self)->ob_bytes = sval;
276 Py_SIZE(self) = size;
277 ((PyByteArrayObject *)self)->ob_alloc = alloc;
278 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
280 return 0;
283 PyObject *
284 PyByteArray_Concat(PyObject *a, PyObject *b)
286 Py_ssize_t size;
287 Py_buffer va, vb;
288 PyByteArrayObject *result = NULL;
290 va.len = -1;
291 vb.len = -1;
292 if (_getbuffer(a, &va) < 0 ||
293 _getbuffer(b, &vb) < 0) {
294 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
295 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
296 goto done;
299 size = va.len + vb.len;
300 if (size < 0) {
301 PyErr_NoMemory();
302 goto done;
305 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, size);
306 if (result != NULL) {
307 memcpy(result->ob_bytes, va.buf, va.len);
308 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
311 done:
312 if (va.len != -1)
313 PyBuffer_Release(&va);
314 if (vb.len != -1)
315 PyBuffer_Release(&vb);
316 return (PyObject *)result;
319 /* Functions stuffed into the type object */
321 static Py_ssize_t
322 bytearray_length(PyByteArrayObject *self)
324 return Py_SIZE(self);
327 static PyObject *
328 bytearray_iconcat(PyByteArrayObject *self, PyObject *other)
330 Py_ssize_t mysize;
331 Py_ssize_t size;
332 Py_buffer vo;
334 if (_getbuffer(other, &vo) < 0) {
335 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
336 Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name);
337 return NULL;
340 mysize = Py_SIZE(self);
341 size = mysize + vo.len;
342 if (size < 0) {
343 PyBuffer_Release(&vo);
344 return PyErr_NoMemory();
346 if (size < self->ob_alloc) {
347 Py_SIZE(self) = size;
348 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
350 else if (PyByteArray_Resize((PyObject *)self, size) < 0) {
351 PyBuffer_Release(&vo);
352 return NULL;
354 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
355 PyBuffer_Release(&vo);
356 Py_INCREF(self);
357 return (PyObject *)self;
360 static PyObject *
361 bytearray_repeat(PyByteArrayObject *self, Py_ssize_t count)
363 PyByteArrayObject *result;
364 Py_ssize_t mysize;
365 Py_ssize_t size;
367 if (count < 0)
368 count = 0;
369 mysize = Py_SIZE(self);
370 size = mysize * count;
371 if (count != 0 && size / count != mysize)
372 return PyErr_NoMemory();
373 result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
374 if (result != NULL && size != 0) {
375 if (mysize == 1)
376 memset(result->ob_bytes, self->ob_bytes[0], size);
377 else {
378 Py_ssize_t i;
379 for (i = 0; i < count; i++)
380 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
383 return (PyObject *)result;
386 static PyObject *
387 bytearray_irepeat(PyByteArrayObject *self, Py_ssize_t count)
389 Py_ssize_t mysize;
390 Py_ssize_t size;
392 if (count < 0)
393 count = 0;
394 mysize = Py_SIZE(self);
395 size = mysize * count;
396 if (count != 0 && size / count != mysize)
397 return PyErr_NoMemory();
398 if (size < self->ob_alloc) {
399 Py_SIZE(self) = size;
400 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
402 else if (PyByteArray_Resize((PyObject *)self, size) < 0)
403 return NULL;
405 if (mysize == 1)
406 memset(self->ob_bytes, self->ob_bytes[0], size);
407 else {
408 Py_ssize_t i;
409 for (i = 1; i < count; i++)
410 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
413 Py_INCREF(self);
414 return (PyObject *)self;
417 static PyObject *
418 bytearray_getitem(PyByteArrayObject *self, Py_ssize_t i)
420 if (i < 0)
421 i += Py_SIZE(self);
422 if (i < 0 || i >= Py_SIZE(self)) {
423 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
424 return NULL;
426 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
429 static PyObject *
430 bytearray_subscript(PyByteArrayObject *self, PyObject *index)
432 if (PyIndex_Check(index)) {
433 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
435 if (i == -1 && PyErr_Occurred())
436 return NULL;
438 if (i < 0)
439 i += PyByteArray_GET_SIZE(self);
441 if (i < 0 || i >= Py_SIZE(self)) {
442 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
443 return NULL;
445 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
447 else if (PySlice_Check(index)) {
448 Py_ssize_t start, stop, step, slicelength, cur, i;
449 if (PySlice_GetIndicesEx((PySliceObject *)index,
450 PyByteArray_GET_SIZE(self),
451 &start, &stop, &step, &slicelength) < 0) {
452 return NULL;
455 if (slicelength <= 0)
456 return PyByteArray_FromStringAndSize("", 0);
457 else if (step == 1) {
458 return PyByteArray_FromStringAndSize(self->ob_bytes + start,
459 slicelength);
461 else {
462 char *source_buf = PyByteArray_AS_STRING(self);
463 char *result_buf = (char *)PyMem_Malloc(slicelength);
464 PyObject *result;
466 if (result_buf == NULL)
467 return PyErr_NoMemory();
469 for (cur = start, i = 0; i < slicelength;
470 cur += step, i++) {
471 result_buf[i] = source_buf[cur];
473 result = PyByteArray_FromStringAndSize(result_buf, slicelength);
474 PyMem_Free(result_buf);
475 return result;
478 else {
479 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
480 return NULL;
484 static int
485 bytearray_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
486 PyObject *values)
488 Py_ssize_t avail, needed;
489 void *bytes;
490 Py_buffer vbytes;
491 int res = 0;
493 vbytes.len = -1;
494 if (values == (PyObject *)self) {
495 /* Make a copy and call this function recursively */
496 int err;
497 values = PyByteArray_FromObject(values);
498 if (values == NULL)
499 return -1;
500 err = bytearray_setslice(self, lo, hi, values);
501 Py_DECREF(values);
502 return err;
504 if (values == NULL) {
505 /* del b[lo:hi] */
506 bytes = NULL;
507 needed = 0;
509 else {
510 if (_getbuffer(values, &vbytes) < 0) {
511 PyErr_Format(PyExc_TypeError,
512 "can't set bytearray slice from %.100s",
513 Py_TYPE(values)->tp_name);
514 return -1;
516 needed = vbytes.len;
517 bytes = vbytes.buf;
520 if (lo < 0)
521 lo = 0;
522 if (hi < lo)
523 hi = lo;
524 if (hi > Py_SIZE(self))
525 hi = Py_SIZE(self);
527 avail = hi - lo;
528 if (avail < 0)
529 lo = hi = avail = 0;
531 if (avail != needed) {
532 if (avail > needed) {
533 if (!_canresize(self)) {
534 res = -1;
535 goto finish;
538 0 lo hi old_size
539 | |<----avail----->|<-----tomove------>|
540 | |<-needed->|<-----tomove------>|
541 0 lo new_hi new_size
543 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
544 Py_SIZE(self) - hi);
546 /* XXX(nnorwitz): need to verify this can't overflow! */
547 if (PyByteArray_Resize((PyObject *)self,
548 Py_SIZE(self) + needed - avail) < 0) {
549 res = -1;
550 goto finish;
552 if (avail < needed) {
554 0 lo hi old_size
555 | |<-avail->|<-----tomove------>|
556 | |<----needed---->|<-----tomove------>|
557 0 lo new_hi new_size
559 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
560 Py_SIZE(self) - lo - needed);
564 if (needed > 0)
565 memcpy(self->ob_bytes + lo, bytes, needed);
568 finish:
569 if (vbytes.len != -1)
570 PyBuffer_Release(&vbytes);
571 return res;
574 static int
575 bytearray_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value)
577 int ival;
579 if (i < 0)
580 i += Py_SIZE(self);
582 if (i < 0 || i >= Py_SIZE(self)) {
583 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
584 return -1;
587 if (value == NULL)
588 return bytearray_setslice(self, i, i+1, NULL);
590 if (!_getbytevalue(value, &ival))
591 return -1;
593 self->ob_bytes[i] = ival;
594 return 0;
597 static int
598 bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *values)
600 Py_ssize_t start, stop, step, slicelen, needed;
601 char *bytes;
603 if (PyIndex_Check(index)) {
604 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
606 if (i == -1 && PyErr_Occurred())
607 return -1;
609 if (i < 0)
610 i += PyByteArray_GET_SIZE(self);
612 if (i < 0 || i >= Py_SIZE(self)) {
613 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
614 return -1;
617 if (values == NULL) {
618 /* Fall through to slice assignment */
619 start = i;
620 stop = i + 1;
621 step = 1;
622 slicelen = 1;
624 else {
625 int ival;
626 if (!_getbytevalue(values, &ival))
627 return -1;
628 self->ob_bytes[i] = (char)ival;
629 return 0;
632 else if (PySlice_Check(index)) {
633 if (PySlice_GetIndicesEx((PySliceObject *)index,
634 PyByteArray_GET_SIZE(self),
635 &start, &stop, &step, &slicelen) < 0) {
636 return -1;
639 else {
640 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
641 return -1;
644 if (values == NULL) {
645 bytes = NULL;
646 needed = 0;
648 else if (values == (PyObject *)self || !PyByteArray_Check(values)) {
649 /* Make a copy an call this function recursively */
650 int err;
651 values = PyByteArray_FromObject(values);
652 if (values == NULL)
653 return -1;
654 err = bytearray_ass_subscript(self, index, values);
655 Py_DECREF(values);
656 return err;
658 else {
659 assert(PyByteArray_Check(values));
660 bytes = ((PyByteArrayObject *)values)->ob_bytes;
661 needed = Py_SIZE(values);
663 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
664 if ((step < 0 && start < stop) ||
665 (step > 0 && start > stop))
666 stop = start;
667 if (step == 1) {
668 if (slicelen != needed) {
669 if (!_canresize(self))
670 return -1;
671 if (slicelen > needed) {
673 0 start stop old_size
674 | |<---slicelen--->|<-----tomove------>|
675 | |<-needed->|<-----tomove------>|
676 0 lo new_hi new_size
678 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
679 Py_SIZE(self) - stop);
681 if (PyByteArray_Resize((PyObject *)self,
682 Py_SIZE(self) + needed - slicelen) < 0)
683 return -1;
684 if (slicelen < needed) {
686 0 lo hi old_size
687 | |<-avail->|<-----tomove------>|
688 | |<----needed---->|<-----tomove------>|
689 0 lo new_hi new_size
691 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
692 Py_SIZE(self) - start - needed);
696 if (needed > 0)
697 memcpy(self->ob_bytes + start, bytes, needed);
699 return 0;
701 else {
702 if (needed == 0) {
703 /* Delete slice */
704 Py_ssize_t cur, i;
706 if (!_canresize(self))
707 return -1;
708 if (step < 0) {
709 stop = start + 1;
710 start = stop + step * (slicelen - 1) - 1;
711 step = -step;
713 for (cur = start, i = 0;
714 i < slicelen; cur += step, i++) {
715 Py_ssize_t lim = step - 1;
717 if (cur + step >= PyByteArray_GET_SIZE(self))
718 lim = PyByteArray_GET_SIZE(self) - cur - 1;
720 memmove(self->ob_bytes + cur - i,
721 self->ob_bytes + cur + 1, lim);
723 /* Move the tail of the bytes, in one chunk */
724 cur = start + slicelen*step;
725 if (cur < PyByteArray_GET_SIZE(self)) {
726 memmove(self->ob_bytes + cur - slicelen,
727 self->ob_bytes + cur,
728 PyByteArray_GET_SIZE(self) - cur);
730 if (PyByteArray_Resize((PyObject *)self,
731 PyByteArray_GET_SIZE(self) - slicelen) < 0)
732 return -1;
734 return 0;
736 else {
737 /* Assign slice */
738 Py_ssize_t cur, i;
740 if (needed != slicelen) {
741 PyErr_Format(PyExc_ValueError,
742 "attempt to assign bytes of size %zd "
743 "to extended slice of size %zd",
744 needed, slicelen);
745 return -1;
747 for (cur = start, i = 0; i < slicelen; cur += step, i++)
748 self->ob_bytes[cur] = bytes[i];
749 return 0;
754 static int
755 bytearray_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
757 static char *kwlist[] = {"source", "encoding", "errors", 0};
758 PyObject *arg = NULL;
759 const char *encoding = NULL;
760 const char *errors = NULL;
761 Py_ssize_t count;
762 PyObject *it;
763 PyObject *(*iternext)(PyObject *);
765 if (Py_SIZE(self) != 0) {
766 /* Empty previous contents (yes, do this first of all!) */
767 if (PyByteArray_Resize((PyObject *)self, 0) < 0)
768 return -1;
771 /* Parse arguments */
772 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytearray", kwlist,
773 &arg, &encoding, &errors))
774 return -1;
776 /* Make a quick exit if no first argument */
777 if (arg == NULL) {
778 if (encoding != NULL || errors != NULL) {
779 PyErr_SetString(PyExc_TypeError,
780 "encoding or errors without sequence argument");
781 return -1;
783 return 0;
786 if (PyBytes_Check(arg)) {
787 PyObject *new, *encoded;
788 if (encoding != NULL) {
789 encoded = PyCodec_Encode(arg, encoding, errors);
790 if (encoded == NULL)
791 return -1;
792 assert(PyBytes_Check(encoded));
794 else {
795 encoded = arg;
796 Py_INCREF(arg);
798 new = bytearray_iconcat(self, arg);
799 Py_DECREF(encoded);
800 if (new == NULL)
801 return -1;
802 Py_DECREF(new);
803 return 0;
806 #ifdef Py_USING_UNICODE
807 if (PyUnicode_Check(arg)) {
808 /* Encode via the codec registry */
809 PyObject *encoded, *new;
810 if (encoding == NULL) {
811 PyErr_SetString(PyExc_TypeError,
812 "unicode argument without an encoding");
813 return -1;
815 encoded = PyCodec_Encode(arg, encoding, errors);
816 if (encoded == NULL)
817 return -1;
818 assert(PyBytes_Check(encoded));
819 new = bytearray_iconcat(self, encoded);
820 Py_DECREF(encoded);
821 if (new == NULL)
822 return -1;
823 Py_DECREF(new);
824 return 0;
826 #endif
828 /* If it's not unicode, there can't be encoding or errors */
829 if (encoding != NULL || errors != NULL) {
830 PyErr_SetString(PyExc_TypeError,
831 "encoding or errors without a string argument");
832 return -1;
835 /* Is it an int? */
836 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
837 if (count == -1 && PyErr_Occurred())
838 PyErr_Clear();
839 else {
840 if (count < 0) {
841 PyErr_SetString(PyExc_ValueError, "negative count");
842 return -1;
844 if (count > 0) {
845 if (PyByteArray_Resize((PyObject *)self, count))
846 return -1;
847 memset(self->ob_bytes, 0, count);
849 return 0;
852 /* Use the buffer API */
853 if (PyObject_CheckBuffer(arg)) {
854 Py_ssize_t size;
855 Py_buffer view;
856 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
857 return -1;
858 size = view.len;
859 if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
860 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
861 goto fail;
862 PyBuffer_Release(&view);
863 return 0;
864 fail:
865 PyBuffer_Release(&view);
866 return -1;
869 /* XXX Optimize this if the arguments is a list, tuple */
871 /* Get the iterator */
872 it = PyObject_GetIter(arg);
873 if (it == NULL)
874 return -1;
875 iternext = *Py_TYPE(it)->tp_iternext;
877 /* Run the iterator to exhaustion */
878 for (;;) {
879 PyObject *item;
880 int rc, value;
882 /* Get the next item */
883 item = iternext(it);
884 if (item == NULL) {
885 if (PyErr_Occurred()) {
886 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
887 goto error;
888 PyErr_Clear();
890 break;
893 /* Interpret it as an int (__index__) */
894 rc = _getbytevalue(item, &value);
895 Py_DECREF(item);
896 if (!rc)
897 goto error;
899 /* Append the byte */
900 if (Py_SIZE(self) < self->ob_alloc)
901 Py_SIZE(self)++;
902 else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
903 goto error;
904 self->ob_bytes[Py_SIZE(self)-1] = value;
907 /* Clean up and return success */
908 Py_DECREF(it);
909 return 0;
911 error:
912 /* Error handling when it != NULL */
913 Py_DECREF(it);
914 return -1;
917 /* Mostly copied from string_repr, but without the
918 "smart quote" functionality. */
919 static PyObject *
920 bytearray_repr(PyByteArrayObject *self)
922 static const char *hexdigits = "0123456789abcdef";
923 const char *quote_prefix = "bytearray(b";
924 const char *quote_postfix = ")";
925 Py_ssize_t length = Py_SIZE(self);
926 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
927 size_t newsize = 14 + 4 * length;
928 PyObject *v;
929 if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 3 != length) {
930 PyErr_SetString(PyExc_OverflowError,
931 "bytearray object is too large to make repr");
932 return NULL;
934 v = PyString_FromStringAndSize(NULL, newsize);
935 if (v == NULL) {
936 return NULL;
938 else {
939 register Py_ssize_t i;
940 register char c;
941 register char *p;
942 int quote;
944 /* Figure out which quote to use; single is preferred */
945 quote = '\'';
947 char *test, *start;
948 start = PyByteArray_AS_STRING(self);
949 for (test = start; test < start+length; ++test) {
950 if (*test == '"') {
951 quote = '\''; /* back to single */
952 goto decided;
954 else if (*test == '\'')
955 quote = '"';
957 decided:
961 p = PyString_AS_STRING(v);
962 while (*quote_prefix)
963 *p++ = *quote_prefix++;
964 *p++ = quote;
966 for (i = 0; i < length; i++) {
967 /* There's at least enough room for a hex escape
968 and a closing quote. */
969 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
970 c = self->ob_bytes[i];
971 if (c == '\'' || c == '\\')
972 *p++ = '\\', *p++ = c;
973 else if (c == '\t')
974 *p++ = '\\', *p++ = 't';
975 else if (c == '\n')
976 *p++ = '\\', *p++ = 'n';
977 else if (c == '\r')
978 *p++ = '\\', *p++ = 'r';
979 else if (c == 0)
980 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
981 else if (c < ' ' || c >= 0x7f) {
982 *p++ = '\\';
983 *p++ = 'x';
984 *p++ = hexdigits[(c & 0xf0) >> 4];
985 *p++ = hexdigits[c & 0xf];
987 else
988 *p++ = c;
990 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
991 *p++ = quote;
992 while (*quote_postfix) {
993 *p++ = *quote_postfix++;
995 *p = '\0';
996 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v)))) {
997 Py_DECREF(v);
998 return NULL;
1000 return v;
1004 static PyObject *
1005 bytearray_str(PyObject *op)
1007 #if 0
1008 if (Py_BytesWarningFlag) {
1009 if (PyErr_WarnEx(PyExc_BytesWarning,
1010 "str() on a bytearray instance", 1))
1011 return NULL;
1013 return bytearray_repr((PyByteArrayObject*)op);
1014 #endif
1015 return PyBytes_FromStringAndSize(((PyByteArrayObject*)op)->ob_bytes, Py_SIZE(op));
1018 static PyObject *
1019 bytearray_richcompare(PyObject *self, PyObject *other, int op)
1021 Py_ssize_t self_size, other_size;
1022 Py_buffer self_bytes, other_bytes;
1023 PyObject *res;
1024 Py_ssize_t minsize;
1025 int cmp;
1027 /* Bytes can be compared to anything that supports the (binary)
1028 buffer API. Except that a comparison with Unicode is always an
1029 error, even if the comparison is for equality. */
1030 #ifdef Py_USING_UNICODE
1031 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
1032 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
1033 if (Py_BytesWarningFlag && op == Py_EQ) {
1034 if (PyErr_WarnEx(PyExc_BytesWarning,
1035 "Comparsion between bytearray and string", 1))
1036 return NULL;
1039 Py_INCREF(Py_NotImplemented);
1040 return Py_NotImplemented;
1042 #endif
1044 self_size = _getbuffer(self, &self_bytes);
1045 if (self_size < 0) {
1046 PyErr_Clear();
1047 Py_INCREF(Py_NotImplemented);
1048 return Py_NotImplemented;
1051 other_size = _getbuffer(other, &other_bytes);
1052 if (other_size < 0) {
1053 PyErr_Clear();
1054 PyBuffer_Release(&self_bytes);
1055 Py_INCREF(Py_NotImplemented);
1056 return Py_NotImplemented;
1059 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
1060 /* Shortcut: if the lengths differ, the objects differ */
1061 cmp = (op == Py_NE);
1063 else {
1064 minsize = self_size;
1065 if (other_size < minsize)
1066 minsize = other_size;
1068 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
1069 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1071 if (cmp == 0) {
1072 if (self_size < other_size)
1073 cmp = -1;
1074 else if (self_size > other_size)
1075 cmp = 1;
1078 switch (op) {
1079 case Py_LT: cmp = cmp < 0; break;
1080 case Py_LE: cmp = cmp <= 0; break;
1081 case Py_EQ: cmp = cmp == 0; break;
1082 case Py_NE: cmp = cmp != 0; break;
1083 case Py_GT: cmp = cmp > 0; break;
1084 case Py_GE: cmp = cmp >= 0; break;
1088 res = cmp ? Py_True : Py_False;
1089 PyBuffer_Release(&self_bytes);
1090 PyBuffer_Release(&other_bytes);
1091 Py_INCREF(res);
1092 return res;
1095 static void
1096 bytearray_dealloc(PyByteArrayObject *self)
1098 if (self->ob_exports > 0) {
1099 PyErr_SetString(PyExc_SystemError,
1100 "deallocated bytearray object has exported buffers");
1101 PyErr_Print();
1103 if (self->ob_bytes != 0) {
1104 PyMem_Free(self->ob_bytes);
1106 Py_TYPE(self)->tp_free((PyObject *)self);
1110 /* -------------------------------------------------------------------- */
1111 /* Methods */
1113 #define STRINGLIB_CHAR char
1114 #define STRINGLIB_CMP memcmp
1115 #define STRINGLIB_LEN PyByteArray_GET_SIZE
1116 #define STRINGLIB_STR PyByteArray_AS_STRING
1117 #define STRINGLIB_NEW PyByteArray_FromStringAndSize
1118 #define STRINGLIB_EMPTY nullbytes
1119 #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1120 #define STRINGLIB_MUTABLE 1
1121 #define FROM_BYTEARRAY 1
1123 #include "stringlib/fastsearch.h"
1124 #include "stringlib/count.h"
1125 #include "stringlib/find.h"
1126 #include "stringlib/partition.h"
1127 #include "stringlib/ctype.h"
1128 #include "stringlib/transmogrify.h"
1131 /* The following Py_LOCAL_INLINE and Py_LOCAL functions
1132 were copied from the old char* style string object. */
1134 Py_LOCAL_INLINE(void)
1135 _adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1137 if (*end > len)
1138 *end = len;
1139 else if (*end < 0)
1140 *end += len;
1141 if (*end < 0)
1142 *end = 0;
1143 if (*start < 0)
1144 *start += len;
1145 if (*start < 0)
1146 *start = 0;
1150 Py_LOCAL_INLINE(Py_ssize_t)
1151 bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
1153 PyObject *subobj;
1154 Py_buffer subbuf;
1155 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1156 Py_ssize_t res;
1158 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1159 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1160 return -2;
1161 if (_getbuffer(subobj, &subbuf) < 0)
1162 return -2;
1163 if (dir > 0)
1164 res = stringlib_find_slice(
1165 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1166 subbuf.buf, subbuf.len, start, end);
1167 else
1168 res = stringlib_rfind_slice(
1169 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1170 subbuf.buf, subbuf.len, start, end);
1171 PyBuffer_Release(&subbuf);
1172 return res;
1175 PyDoc_STRVAR(find__doc__,
1176 "B.find(sub [,start [,end]]) -> int\n\
1178 Return the lowest index in B where subsection sub is found,\n\
1179 such that sub is contained within s[start,end]. Optional\n\
1180 arguments start and end are interpreted as in slice notation.\n\
1182 Return -1 on failure.");
1184 static PyObject *
1185 bytearray_find(PyByteArrayObject *self, PyObject *args)
1187 Py_ssize_t result = bytearray_find_internal(self, args, +1);
1188 if (result == -2)
1189 return NULL;
1190 return PyInt_FromSsize_t(result);
1193 PyDoc_STRVAR(count__doc__,
1194 "B.count(sub [,start [,end]]) -> int\n\
1196 Return the number of non-overlapping occurrences of subsection sub in\n\
1197 bytes B[start:end]. Optional arguments start and end are interpreted\n\
1198 as in slice notation.");
1200 static PyObject *
1201 bytearray_count(PyByteArrayObject *self, PyObject *args)
1203 PyObject *sub_obj;
1204 const char *str = PyByteArray_AS_STRING(self);
1205 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1206 Py_buffer vsub;
1207 PyObject *count_obj;
1209 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1210 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1211 return NULL;
1213 if (_getbuffer(sub_obj, &vsub) < 0)
1214 return NULL;
1216 _adjust_indices(&start, &end, PyByteArray_GET_SIZE(self));
1218 count_obj = PyInt_FromSsize_t(
1219 stringlib_count(str + start, end - start, vsub.buf, vsub.len)
1221 PyBuffer_Release(&vsub);
1222 return count_obj;
1226 PyDoc_STRVAR(index__doc__,
1227 "B.index(sub [,start [,end]]) -> int\n\
1229 Like B.find() but raise ValueError when the subsection is not found.");
1231 static PyObject *
1232 bytearray_index(PyByteArrayObject *self, PyObject *args)
1234 Py_ssize_t result = bytearray_find_internal(self, args, +1);
1235 if (result == -2)
1236 return NULL;
1237 if (result == -1) {
1238 PyErr_SetString(PyExc_ValueError,
1239 "subsection not found");
1240 return NULL;
1242 return PyInt_FromSsize_t(result);
1246 PyDoc_STRVAR(rfind__doc__,
1247 "B.rfind(sub [,start [,end]]) -> int\n\
1249 Return the highest index in B where subsection sub is found,\n\
1250 such that sub is contained within s[start,end]. Optional\n\
1251 arguments start and end are interpreted as in slice notation.\n\
1253 Return -1 on failure.");
1255 static PyObject *
1256 bytearray_rfind(PyByteArrayObject *self, PyObject *args)
1258 Py_ssize_t result = bytearray_find_internal(self, args, -1);
1259 if (result == -2)
1260 return NULL;
1261 return PyInt_FromSsize_t(result);
1265 PyDoc_STRVAR(rindex__doc__,
1266 "B.rindex(sub [,start [,end]]) -> int\n\
1268 Like B.rfind() but raise ValueError when the subsection is not found.");
1270 static PyObject *
1271 bytearray_rindex(PyByteArrayObject *self, PyObject *args)
1273 Py_ssize_t result = bytearray_find_internal(self, args, -1);
1274 if (result == -2)
1275 return NULL;
1276 if (result == -1) {
1277 PyErr_SetString(PyExc_ValueError,
1278 "subsection not found");
1279 return NULL;
1281 return PyInt_FromSsize_t(result);
1285 static int
1286 bytearray_contains(PyObject *self, PyObject *arg)
1288 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1289 if (ival == -1 && PyErr_Occurred()) {
1290 Py_buffer varg;
1291 int pos;
1292 PyErr_Clear();
1293 if (_getbuffer(arg, &varg) < 0)
1294 return -1;
1295 pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
1296 varg.buf, varg.len, 0);
1297 PyBuffer_Release(&varg);
1298 return pos >= 0;
1300 if (ival < 0 || ival >= 256) {
1301 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1302 return -1;
1305 return memchr(PyByteArray_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
1309 /* Matches the end (direction >= 0) or start (direction < 0) of self
1310 * against substr, using the start and end arguments. Returns
1311 * -1 on error, 0 if not found and 1 if found.
1313 Py_LOCAL(int)
1314 _bytearray_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start,
1315 Py_ssize_t end, int direction)
1317 Py_ssize_t len = PyByteArray_GET_SIZE(self);
1318 const char* str;
1319 Py_buffer vsubstr;
1320 int rv = 0;
1322 str = PyByteArray_AS_STRING(self);
1324 if (_getbuffer(substr, &vsubstr) < 0)
1325 return -1;
1327 _adjust_indices(&start, &end, len);
1329 if (direction < 0) {
1330 /* startswith */
1331 if (start+vsubstr.len > len) {
1332 goto done;
1334 } else {
1335 /* endswith */
1336 if (end-start < vsubstr.len || start > len) {
1337 goto done;
1340 if (end-vsubstr.len > start)
1341 start = end - vsubstr.len;
1343 if (end-start >= vsubstr.len)
1344 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1346 done:
1347 PyBuffer_Release(&vsubstr);
1348 return rv;
1352 PyDoc_STRVAR(startswith__doc__,
1353 "B.startswith(prefix [,start [,end]]) -> bool\n\
1355 Return True if B starts with the specified prefix, False otherwise.\n\
1356 With optional start, test B beginning at that position.\n\
1357 With optional end, stop comparing B at that position.\n\
1358 prefix can also be a tuple of strings to try.");
1360 static PyObject *
1361 bytearray_startswith(PyByteArrayObject *self, PyObject *args)
1363 Py_ssize_t start = 0;
1364 Py_ssize_t end = PY_SSIZE_T_MAX;
1365 PyObject *subobj;
1366 int result;
1368 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1369 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1370 return NULL;
1371 if (PyTuple_Check(subobj)) {
1372 Py_ssize_t i;
1373 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1374 result = _bytearray_tailmatch(self,
1375 PyTuple_GET_ITEM(subobj, i),
1376 start, end, -1);
1377 if (result == -1)
1378 return NULL;
1379 else if (result) {
1380 Py_RETURN_TRUE;
1383 Py_RETURN_FALSE;
1385 result = _bytearray_tailmatch(self, subobj, start, end, -1);
1386 if (result == -1)
1387 return NULL;
1388 else
1389 return PyBool_FromLong(result);
1392 PyDoc_STRVAR(endswith__doc__,
1393 "B.endswith(suffix [,start [,end]]) -> bool\n\
1395 Return True if B ends with the specified suffix, False otherwise.\n\
1396 With optional start, test B beginning at that position.\n\
1397 With optional end, stop comparing B at that position.\n\
1398 suffix can also be a tuple of strings to try.");
1400 static PyObject *
1401 bytearray_endswith(PyByteArrayObject *self, PyObject *args)
1403 Py_ssize_t start = 0;
1404 Py_ssize_t end = PY_SSIZE_T_MAX;
1405 PyObject *subobj;
1406 int result;
1408 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1409 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1410 return NULL;
1411 if (PyTuple_Check(subobj)) {
1412 Py_ssize_t i;
1413 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1414 result = _bytearray_tailmatch(self,
1415 PyTuple_GET_ITEM(subobj, i),
1416 start, end, +1);
1417 if (result == -1)
1418 return NULL;
1419 else if (result) {
1420 Py_RETURN_TRUE;
1423 Py_RETURN_FALSE;
1425 result = _bytearray_tailmatch(self, subobj, start, end, +1);
1426 if (result == -1)
1427 return NULL;
1428 else
1429 return PyBool_FromLong(result);
1433 PyDoc_STRVAR(translate__doc__,
1434 "B.translate(table[, deletechars]) -> bytearray\n\
1436 Return a copy of B, where all characters occurring in the\n\
1437 optional argument deletechars are removed, and the remaining\n\
1438 characters have been mapped through the given translation\n\
1439 table, which must be a bytes object of length 256.");
1441 static PyObject *
1442 bytearray_translate(PyByteArrayObject *self, PyObject *args)
1444 register char *input, *output;
1445 register const char *table;
1446 register Py_ssize_t i, c;
1447 PyObject *input_obj = (PyObject*)self;
1448 const char *output_start;
1449 Py_ssize_t inlen;
1450 PyObject *result = NULL;
1451 int trans_table[256];
1452 PyObject *tableobj = NULL, *delobj = NULL;
1453 Py_buffer vtable, vdel;
1455 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1456 &tableobj, &delobj))
1457 return NULL;
1459 if (tableobj == Py_None) {
1460 table = NULL;
1461 tableobj = NULL;
1462 } else if (_getbuffer(tableobj, &vtable) < 0) {
1463 return NULL;
1464 } else {
1465 if (vtable.len != 256) {
1466 PyErr_SetString(PyExc_ValueError,
1467 "translation table must be 256 characters long");
1468 PyBuffer_Release(&vtable);
1469 return NULL;
1471 table = (const char*)vtable.buf;
1474 if (delobj != NULL) {
1475 if (_getbuffer(delobj, &vdel) < 0) {
1476 if (tableobj != NULL)
1477 PyBuffer_Release(&vtable);
1478 return NULL;
1481 else {
1482 vdel.buf = NULL;
1483 vdel.len = 0;
1486 inlen = PyByteArray_GET_SIZE(input_obj);
1487 result = PyByteArray_FromStringAndSize((char *)NULL, inlen);
1488 if (result == NULL)
1489 goto done;
1490 output_start = output = PyByteArray_AsString(result);
1491 input = PyByteArray_AS_STRING(input_obj);
1493 if (vdel.len == 0 && table != NULL) {
1494 /* If no deletions are required, use faster code */
1495 for (i = inlen; --i >= 0; ) {
1496 c = Py_CHARMASK(*input++);
1497 *output++ = table[c];
1499 goto done;
1502 if (table == NULL) {
1503 for (i = 0; i < 256; i++)
1504 trans_table[i] = Py_CHARMASK(i);
1505 } else {
1506 for (i = 0; i < 256; i++)
1507 trans_table[i] = Py_CHARMASK(table[i]);
1510 for (i = 0; i < vdel.len; i++)
1511 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1513 for (i = inlen; --i >= 0; ) {
1514 c = Py_CHARMASK(*input++);
1515 if (trans_table[c] != -1)
1516 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1517 continue;
1519 /* Fix the size of the resulting string */
1520 if (inlen > 0)
1521 PyByteArray_Resize(result, output - output_start);
1523 done:
1524 if (tableobj != NULL)
1525 PyBuffer_Release(&vtable);
1526 if (delobj != NULL)
1527 PyBuffer_Release(&vdel);
1528 return result;
1532 #define FORWARD 1
1533 #define REVERSE -1
1535 /* find and count characters and substrings */
1537 #define findchar(target, target_len, c) \
1538 ((char *)memchr((const void *)(target), c, target_len))
1540 /* Don't call if length < 2 */
1541 #define Py_STRING_MATCH(target, offset, pattern, length) \
1542 (target[offset] == pattern[0] && \
1543 target[offset+length-1] == pattern[length-1] && \
1544 !memcmp(target+offset+1, pattern+1, length-2) )
1547 /* Bytes ops must return a string, create a copy */
1548 Py_LOCAL(PyByteArrayObject *)
1549 return_self(PyByteArrayObject *self)
1551 return (PyByteArrayObject *)PyByteArray_FromStringAndSize(
1552 PyByteArray_AS_STRING(self),
1553 PyByteArray_GET_SIZE(self));
1556 Py_LOCAL_INLINE(Py_ssize_t)
1557 countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1559 Py_ssize_t count=0;
1560 const char *start=target;
1561 const char *end=target+target_len;
1563 while ( (start=findchar(start, end-start, c)) != NULL ) {
1564 count++;
1565 if (count >= maxcount)
1566 break;
1567 start += 1;
1569 return count;
1572 Py_LOCAL(Py_ssize_t)
1573 findstring(const char *target, Py_ssize_t target_len,
1574 const char *pattern, Py_ssize_t pattern_len,
1575 Py_ssize_t start,
1576 Py_ssize_t end,
1577 int direction)
1579 if (start < 0) {
1580 start += target_len;
1581 if (start < 0)
1582 start = 0;
1584 if (end > target_len) {
1585 end = target_len;
1586 } else if (end < 0) {
1587 end += target_len;
1588 if (end < 0)
1589 end = 0;
1592 /* zero-length substrings always match at the first attempt */
1593 if (pattern_len == 0)
1594 return (direction > 0) ? start : end;
1596 end -= pattern_len;
1598 if (direction < 0) {
1599 for (; end >= start; end--)
1600 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1601 return end;
1602 } else {
1603 for (; start <= end; start++)
1604 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1605 return start;
1607 return -1;
1610 Py_LOCAL_INLINE(Py_ssize_t)
1611 countstring(const char *target, Py_ssize_t target_len,
1612 const char *pattern, Py_ssize_t pattern_len,
1613 Py_ssize_t start,
1614 Py_ssize_t end,
1615 int direction, Py_ssize_t maxcount)
1617 Py_ssize_t count=0;
1619 if (start < 0) {
1620 start += target_len;
1621 if (start < 0)
1622 start = 0;
1624 if (end > target_len) {
1625 end = target_len;
1626 } else if (end < 0) {
1627 end += target_len;
1628 if (end < 0)
1629 end = 0;
1632 /* zero-length substrings match everywhere */
1633 if (pattern_len == 0 || maxcount == 0) {
1634 if (target_len+1 < maxcount)
1635 return target_len+1;
1636 return maxcount;
1639 end -= pattern_len;
1640 if (direction < 0) {
1641 for (; (end >= start); end--)
1642 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1643 count++;
1644 if (--maxcount <= 0) break;
1645 end -= pattern_len-1;
1647 } else {
1648 for (; (start <= end); start++)
1649 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1650 count++;
1651 if (--maxcount <= 0)
1652 break;
1653 start += pattern_len-1;
1656 return count;
1660 /* Algorithms for different cases of string replacement */
1662 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1663 Py_LOCAL(PyByteArrayObject *)
1664 replace_interleave(PyByteArrayObject *self,
1665 const char *to_s, Py_ssize_t to_len,
1666 Py_ssize_t maxcount)
1668 char *self_s, *result_s;
1669 Py_ssize_t self_len, result_len;
1670 Py_ssize_t count, i, product;
1671 PyByteArrayObject *result;
1673 self_len = PyByteArray_GET_SIZE(self);
1675 /* 1 at the end plus 1 after every character */
1676 count = self_len+1;
1677 if (maxcount < count)
1678 count = maxcount;
1680 /* Check for overflow */
1681 /* result_len = count * to_len + self_len; */
1682 product = count * to_len;
1683 if (product / to_len != count) {
1684 PyErr_SetString(PyExc_OverflowError,
1685 "replace string is too long");
1686 return NULL;
1688 result_len = product + self_len;
1689 if (result_len < 0) {
1690 PyErr_SetString(PyExc_OverflowError,
1691 "replace string is too long");
1692 return NULL;
1695 if (! (result = (PyByteArrayObject *)
1696 PyByteArray_FromStringAndSize(NULL, result_len)) )
1697 return NULL;
1699 self_s = PyByteArray_AS_STRING(self);
1700 result_s = PyByteArray_AS_STRING(result);
1702 /* TODO: special case single character, which doesn't need memcpy */
1704 /* Lay the first one down (guaranteed this will occur) */
1705 Py_MEMCPY(result_s, to_s, to_len);
1706 result_s += to_len;
1707 count -= 1;
1709 for (i=0; i<count; i++) {
1710 *result_s++ = *self_s++;
1711 Py_MEMCPY(result_s, to_s, to_len);
1712 result_s += to_len;
1715 /* Copy the rest of the original string */
1716 Py_MEMCPY(result_s, self_s, self_len-i);
1718 return result;
1721 /* Special case for deleting a single character */
1722 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1723 Py_LOCAL(PyByteArrayObject *)
1724 replace_delete_single_character(PyByteArrayObject *self,
1725 char from_c, Py_ssize_t maxcount)
1727 char *self_s, *result_s;
1728 char *start, *next, *end;
1729 Py_ssize_t self_len, result_len;
1730 Py_ssize_t count;
1731 PyByteArrayObject *result;
1733 self_len = PyByteArray_GET_SIZE(self);
1734 self_s = PyByteArray_AS_STRING(self);
1736 count = countchar(self_s, self_len, from_c, maxcount);
1737 if (count == 0) {
1738 return return_self(self);
1741 result_len = self_len - count; /* from_len == 1 */
1742 assert(result_len>=0);
1744 if ( (result = (PyByteArrayObject *)
1745 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1746 return NULL;
1747 result_s = PyByteArray_AS_STRING(result);
1749 start = self_s;
1750 end = self_s + self_len;
1751 while (count-- > 0) {
1752 next = findchar(start, end-start, from_c);
1753 if (next == NULL)
1754 break;
1755 Py_MEMCPY(result_s, start, next-start);
1756 result_s += (next-start);
1757 start = next+1;
1759 Py_MEMCPY(result_s, start, end-start);
1761 return result;
1764 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1766 Py_LOCAL(PyByteArrayObject *)
1767 replace_delete_substring(PyByteArrayObject *self,
1768 const char *from_s, Py_ssize_t from_len,
1769 Py_ssize_t maxcount)
1771 char *self_s, *result_s;
1772 char *start, *next, *end;
1773 Py_ssize_t self_len, result_len;
1774 Py_ssize_t count, offset;
1775 PyByteArrayObject *result;
1777 self_len = PyByteArray_GET_SIZE(self);
1778 self_s = PyByteArray_AS_STRING(self);
1780 count = countstring(self_s, self_len,
1781 from_s, from_len,
1782 0, self_len, 1,
1783 maxcount);
1785 if (count == 0) {
1786 /* no matches */
1787 return return_self(self);
1790 result_len = self_len - (count * from_len);
1791 assert (result_len>=0);
1793 if ( (result = (PyByteArrayObject *)
1794 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL )
1795 return NULL;
1797 result_s = PyByteArray_AS_STRING(result);
1799 start = self_s;
1800 end = self_s + self_len;
1801 while (count-- > 0) {
1802 offset = findstring(start, end-start,
1803 from_s, from_len,
1804 0, end-start, FORWARD);
1805 if (offset == -1)
1806 break;
1807 next = start + offset;
1809 Py_MEMCPY(result_s, start, next-start);
1811 result_s += (next-start);
1812 start = next+from_len;
1814 Py_MEMCPY(result_s, start, end-start);
1815 return result;
1818 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1819 Py_LOCAL(PyByteArrayObject *)
1820 replace_single_character_in_place(PyByteArrayObject *self,
1821 char from_c, char to_c,
1822 Py_ssize_t maxcount)
1824 char *self_s, *result_s, *start, *end, *next;
1825 Py_ssize_t self_len;
1826 PyByteArrayObject *result;
1828 /* The result string will be the same size */
1829 self_s = PyByteArray_AS_STRING(self);
1830 self_len = PyByteArray_GET_SIZE(self);
1832 next = findchar(self_s, self_len, from_c);
1834 if (next == NULL) {
1835 /* No matches; return the original bytes */
1836 return return_self(self);
1839 /* Need to make a new bytes */
1840 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1841 if (result == NULL)
1842 return NULL;
1843 result_s = PyByteArray_AS_STRING(result);
1844 Py_MEMCPY(result_s, self_s, self_len);
1846 /* change everything in-place, starting with this one */
1847 start = result_s + (next-self_s);
1848 *start = to_c;
1849 start++;
1850 end = result_s + self_len;
1852 while (--maxcount > 0) {
1853 next = findchar(start, end-start, from_c);
1854 if (next == NULL)
1855 break;
1856 *next = to_c;
1857 start = next+1;
1860 return result;
1863 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1864 Py_LOCAL(PyByteArrayObject *)
1865 replace_substring_in_place(PyByteArrayObject *self,
1866 const char *from_s, Py_ssize_t from_len,
1867 const char *to_s, Py_ssize_t to_len,
1868 Py_ssize_t maxcount)
1870 char *result_s, *start, *end;
1871 char *self_s;
1872 Py_ssize_t self_len, offset;
1873 PyByteArrayObject *result;
1875 /* The result bytes will be the same size */
1877 self_s = PyByteArray_AS_STRING(self);
1878 self_len = PyByteArray_GET_SIZE(self);
1880 offset = findstring(self_s, self_len,
1881 from_s, from_len,
1882 0, self_len, FORWARD);
1883 if (offset == -1) {
1884 /* No matches; return the original bytes */
1885 return return_self(self);
1888 /* Need to make a new bytes */
1889 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1890 if (result == NULL)
1891 return NULL;
1892 result_s = PyByteArray_AS_STRING(result);
1893 Py_MEMCPY(result_s, self_s, self_len);
1895 /* change everything in-place, starting with this one */
1896 start = result_s + offset;
1897 Py_MEMCPY(start, to_s, from_len);
1898 start += from_len;
1899 end = result_s + self_len;
1901 while ( --maxcount > 0) {
1902 offset = findstring(start, end-start,
1903 from_s, from_len,
1904 0, end-start, FORWARD);
1905 if (offset==-1)
1906 break;
1907 Py_MEMCPY(start+offset, to_s, from_len);
1908 start += offset+from_len;
1911 return result;
1914 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1915 Py_LOCAL(PyByteArrayObject *)
1916 replace_single_character(PyByteArrayObject *self,
1917 char from_c,
1918 const char *to_s, Py_ssize_t to_len,
1919 Py_ssize_t maxcount)
1921 char *self_s, *result_s;
1922 char *start, *next, *end;
1923 Py_ssize_t self_len, result_len;
1924 Py_ssize_t count, product;
1925 PyByteArrayObject *result;
1927 self_s = PyByteArray_AS_STRING(self);
1928 self_len = PyByteArray_GET_SIZE(self);
1930 count = countchar(self_s, self_len, from_c, maxcount);
1931 if (count == 0) {
1932 /* no matches, return unchanged */
1933 return return_self(self);
1936 /* use the difference between current and new, hence the "-1" */
1937 /* result_len = self_len + count * (to_len-1) */
1938 product = count * (to_len-1);
1939 if (product / (to_len-1) != count) {
1940 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1941 return NULL;
1943 result_len = self_len + product;
1944 if (result_len < 0) {
1945 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1946 return NULL;
1949 if ( (result = (PyByteArrayObject *)
1950 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1951 return NULL;
1952 result_s = PyByteArray_AS_STRING(result);
1954 start = self_s;
1955 end = self_s + self_len;
1956 while (count-- > 0) {
1957 next = findchar(start, end-start, from_c);
1958 if (next == NULL)
1959 break;
1961 if (next == start) {
1962 /* replace with the 'to' */
1963 Py_MEMCPY(result_s, to_s, to_len);
1964 result_s += to_len;
1965 start += 1;
1966 } else {
1967 /* copy the unchanged old then the 'to' */
1968 Py_MEMCPY(result_s, start, next-start);
1969 result_s += (next-start);
1970 Py_MEMCPY(result_s, to_s, to_len);
1971 result_s += to_len;
1972 start = next+1;
1975 /* Copy the remainder of the remaining bytes */
1976 Py_MEMCPY(result_s, start, end-start);
1978 return result;
1981 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1982 Py_LOCAL(PyByteArrayObject *)
1983 replace_substring(PyByteArrayObject *self,
1984 const char *from_s, Py_ssize_t from_len,
1985 const char *to_s, Py_ssize_t to_len,
1986 Py_ssize_t maxcount)
1988 char *self_s, *result_s;
1989 char *start, *next, *end;
1990 Py_ssize_t self_len, result_len;
1991 Py_ssize_t count, offset, product;
1992 PyByteArrayObject *result;
1994 self_s = PyByteArray_AS_STRING(self);
1995 self_len = PyByteArray_GET_SIZE(self);
1997 count = countstring(self_s, self_len,
1998 from_s, from_len,
1999 0, self_len, FORWARD, maxcount);
2000 if (count == 0) {
2001 /* no matches, return unchanged */
2002 return return_self(self);
2005 /* Check for overflow */
2006 /* result_len = self_len + count * (to_len-from_len) */
2007 product = count * (to_len-from_len);
2008 if (product / (to_len-from_len) != count) {
2009 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
2010 return NULL;
2012 result_len = self_len + product;
2013 if (result_len < 0) {
2014 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
2015 return NULL;
2018 if ( (result = (PyByteArrayObject *)
2019 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
2020 return NULL;
2021 result_s = PyByteArray_AS_STRING(result);
2023 start = self_s;
2024 end = self_s + self_len;
2025 while (count-- > 0) {
2026 offset = findstring(start, end-start,
2027 from_s, from_len,
2028 0, end-start, FORWARD);
2029 if (offset == -1)
2030 break;
2031 next = start+offset;
2032 if (next == start) {
2033 /* replace with the 'to' */
2034 Py_MEMCPY(result_s, to_s, to_len);
2035 result_s += to_len;
2036 start += from_len;
2037 } else {
2038 /* copy the unchanged old then the 'to' */
2039 Py_MEMCPY(result_s, start, next-start);
2040 result_s += (next-start);
2041 Py_MEMCPY(result_s, to_s, to_len);
2042 result_s += to_len;
2043 start = next+from_len;
2046 /* Copy the remainder of the remaining bytes */
2047 Py_MEMCPY(result_s, start, end-start);
2049 return result;
2053 Py_LOCAL(PyByteArrayObject *)
2054 replace(PyByteArrayObject *self,
2055 const char *from_s, Py_ssize_t from_len,
2056 const char *to_s, Py_ssize_t to_len,
2057 Py_ssize_t maxcount)
2059 if (maxcount < 0) {
2060 maxcount = PY_SSIZE_T_MAX;
2061 } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) {
2062 /* nothing to do; return the original bytes */
2063 return return_self(self);
2066 if (maxcount == 0 ||
2067 (from_len == 0 && to_len == 0)) {
2068 /* nothing to do; return the original bytes */
2069 return return_self(self);
2072 /* Handle zero-length special cases */
2074 if (from_len == 0) {
2075 /* insert the 'to' bytes everywhere. */
2076 /* >>> "Python".replace("", ".") */
2077 /* '.P.y.t.h.o.n.' */
2078 return replace_interleave(self, to_s, to_len, maxcount);
2081 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2082 /* point for an empty self bytes to generate a non-empty bytes */
2083 /* Special case so the remaining code always gets a non-empty bytes */
2084 if (PyByteArray_GET_SIZE(self) == 0) {
2085 return return_self(self);
2088 if (to_len == 0) {
2089 /* delete all occurances of 'from' bytes */
2090 if (from_len == 1) {
2091 return replace_delete_single_character(
2092 self, from_s[0], maxcount);
2093 } else {
2094 return replace_delete_substring(self, from_s, from_len, maxcount);
2098 /* Handle special case where both bytes have the same length */
2100 if (from_len == to_len) {
2101 if (from_len == 1) {
2102 return replace_single_character_in_place(
2103 self,
2104 from_s[0],
2105 to_s[0],
2106 maxcount);
2107 } else {
2108 return replace_substring_in_place(
2109 self, from_s, from_len, to_s, to_len, maxcount);
2113 /* Otherwise use the more generic algorithms */
2114 if (from_len == 1) {
2115 return replace_single_character(self, from_s[0],
2116 to_s, to_len, maxcount);
2117 } else {
2118 /* len('from')>=2, len('to')>=1 */
2119 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2124 PyDoc_STRVAR(replace__doc__,
2125 "B.replace(old, new[, count]) -> bytes\n\
2127 Return a copy of B with all occurrences of subsection\n\
2128 old replaced by new. If the optional argument count is\n\
2129 given, only the first count occurrences are replaced.");
2131 static PyObject *
2132 bytearray_replace(PyByteArrayObject *self, PyObject *args)
2134 Py_ssize_t count = -1;
2135 PyObject *from, *to, *res;
2136 Py_buffer vfrom, vto;
2138 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2139 return NULL;
2141 if (_getbuffer(from, &vfrom) < 0)
2142 return NULL;
2143 if (_getbuffer(to, &vto) < 0) {
2144 PyBuffer_Release(&vfrom);
2145 return NULL;
2148 res = (PyObject *)replace((PyByteArrayObject *) self,
2149 vfrom.buf, vfrom.len,
2150 vto.buf, vto.len, count);
2152 PyBuffer_Release(&vfrom);
2153 PyBuffer_Release(&vto);
2154 return res;
2158 /* Overallocate the initial list to reduce the number of reallocs for small
2159 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2160 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2161 text (roughly 11 words per line) and field delimited data (usually 1-10
2162 fields). For large strings the split algorithms are bandwidth limited
2163 so increasing the preallocation likely will not improve things.*/
2165 #define MAX_PREALLOC 12
2167 /* 5 splits gives 6 elements */
2168 #define PREALLOC_SIZE(maxsplit) \
2169 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2171 #define SPLIT_APPEND(data, left, right) \
2172 str = PyByteArray_FromStringAndSize((data) + (left), \
2173 (right) - (left)); \
2174 if (str == NULL) \
2175 goto onError; \
2176 if (PyList_Append(list, str)) { \
2177 Py_DECREF(str); \
2178 goto onError; \
2180 else \
2181 Py_DECREF(str);
2183 #define SPLIT_ADD(data, left, right) { \
2184 str = PyByteArray_FromStringAndSize((data) + (left), \
2185 (right) - (left)); \
2186 if (str == NULL) \
2187 goto onError; \
2188 if (count < MAX_PREALLOC) { \
2189 PyList_SET_ITEM(list, count, str); \
2190 } else { \
2191 if (PyList_Append(list, str)) { \
2192 Py_DECREF(str); \
2193 goto onError; \
2195 else \
2196 Py_DECREF(str); \
2198 count++; }
2200 /* Always force the list to the expected size. */
2201 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2204 Py_LOCAL_INLINE(PyObject *)
2205 split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2207 register Py_ssize_t i, j, count = 0;
2208 PyObject *str;
2209 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2211 if (list == NULL)
2212 return NULL;
2214 i = j = 0;
2215 while ((j < len) && (maxcount-- > 0)) {
2216 for(; j < len; j++) {
2217 /* I found that using memchr makes no difference */
2218 if (s[j] == ch) {
2219 SPLIT_ADD(s, i, j);
2220 i = j = j + 1;
2221 break;
2225 if (i <= len) {
2226 SPLIT_ADD(s, i, len);
2228 FIX_PREALLOC_SIZE(list);
2229 return list;
2231 onError:
2232 Py_DECREF(list);
2233 return NULL;
2237 Py_LOCAL_INLINE(PyObject *)
2238 split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2240 register Py_ssize_t i, j, count = 0;
2241 PyObject *str;
2242 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2244 if (list == NULL)
2245 return NULL;
2247 for (i = j = 0; i < len; ) {
2248 /* find a token */
2249 while (i < len && Py_ISSPACE(s[i]))
2250 i++;
2251 j = i;
2252 while (i < len && !Py_ISSPACE(s[i]))
2253 i++;
2254 if (j < i) {
2255 if (maxcount-- <= 0)
2256 break;
2257 SPLIT_ADD(s, j, i);
2258 while (i < len && Py_ISSPACE(s[i]))
2259 i++;
2260 j = i;
2263 if (j < len) {
2264 SPLIT_ADD(s, j, len);
2266 FIX_PREALLOC_SIZE(list);
2267 return list;
2269 onError:
2270 Py_DECREF(list);
2271 return NULL;
2274 PyDoc_STRVAR(split__doc__,
2275 "B.split([sep[, maxsplit]]) -> list of bytearray\n\
2277 Return a list of the sections in B, using sep as the delimiter.\n\
2278 If sep is not given, B is split on ASCII whitespace characters\n\
2279 (space, tab, return, newline, formfeed, vertical tab).\n\
2280 If maxsplit is given, at most maxsplit splits are done.");
2282 static PyObject *
2283 bytearray_split(PyByteArrayObject *self, PyObject *args)
2285 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2286 Py_ssize_t maxsplit = -1, count = 0;
2287 const char *s = PyByteArray_AS_STRING(self), *sub;
2288 PyObject *list, *str, *subobj = Py_None;
2289 Py_buffer vsub;
2290 #ifdef USE_FAST
2291 Py_ssize_t pos;
2292 #endif
2294 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2295 return NULL;
2296 if (maxsplit < 0)
2297 maxsplit = PY_SSIZE_T_MAX;
2299 if (subobj == Py_None)
2300 return split_whitespace(s, len, maxsplit);
2302 if (_getbuffer(subobj, &vsub) < 0)
2303 return NULL;
2304 sub = vsub.buf;
2305 n = vsub.len;
2307 if (n == 0) {
2308 PyErr_SetString(PyExc_ValueError, "empty separator");
2309 PyBuffer_Release(&vsub);
2310 return NULL;
2312 if (n == 1) {
2313 list = split_char(s, len, sub[0], maxsplit);
2314 PyBuffer_Release(&vsub);
2315 return list;
2318 list = PyList_New(PREALLOC_SIZE(maxsplit));
2319 if (list == NULL) {
2320 PyBuffer_Release(&vsub);
2321 return NULL;
2324 #ifdef USE_FAST
2325 i = j = 0;
2326 while (maxsplit-- > 0) {
2327 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2328 if (pos < 0)
2329 break;
2330 j = i+pos;
2331 SPLIT_ADD(s, i, j);
2332 i = j + n;
2334 #else
2335 i = j = 0;
2336 while ((j+n <= len) && (maxsplit-- > 0)) {
2337 for (; j+n <= len; j++) {
2338 if (Py_STRING_MATCH(s, j, sub, n)) {
2339 SPLIT_ADD(s, i, j);
2340 i = j = j + n;
2341 break;
2345 #endif
2346 SPLIT_ADD(s, i, len);
2347 FIX_PREALLOC_SIZE(list);
2348 PyBuffer_Release(&vsub);
2349 return list;
2351 onError:
2352 Py_DECREF(list);
2353 PyBuffer_Release(&vsub);
2354 return NULL;
2357 /* stringlib's partition shares nullbytes in some cases.
2358 undo this, we don't want the nullbytes to be shared. */
2359 static PyObject *
2360 make_nullbytes_unique(PyObject *result)
2362 if (result != NULL) {
2363 int i;
2364 assert(PyTuple_Check(result));
2365 assert(PyTuple_GET_SIZE(result) == 3);
2366 for (i = 0; i < 3; i++) {
2367 if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
2368 PyObject *new = PyByteArray_FromStringAndSize(NULL, 0);
2369 if (new == NULL) {
2370 Py_DECREF(result);
2371 result = NULL;
2372 break;
2374 Py_DECREF(nullbytes);
2375 PyTuple_SET_ITEM(result, i, new);
2379 return result;
2382 PyDoc_STRVAR(partition__doc__,
2383 "B.partition(sep) -> (head, sep, tail)\n\
2385 Searches for the separator sep in B, and returns the part before it,\n\
2386 the separator itself, and the part after it. If the separator is not\n\
2387 found, returns B and two empty bytearray objects.");
2389 static PyObject *
2390 bytearray_partition(PyByteArrayObject *self, PyObject *sep_obj)
2392 PyObject *bytesep, *result;
2394 bytesep = PyByteArray_FromObject(sep_obj);
2395 if (! bytesep)
2396 return NULL;
2398 result = stringlib_partition(
2399 (PyObject*) self,
2400 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2401 bytesep,
2402 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2405 Py_DECREF(bytesep);
2406 return make_nullbytes_unique(result);
2409 PyDoc_STRVAR(rpartition__doc__,
2410 "B.rpartition(sep) -> (tail, sep, head)\n\
2412 Searches for the separator sep in B, starting at the end of B,\n\
2413 and returns the part before it, the separator itself, and the\n\
2414 part after it. If the separator is not found, returns two empty\n\
2415 bytearray objects and B.");
2417 static PyObject *
2418 bytearray_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
2420 PyObject *bytesep, *result;
2422 bytesep = PyByteArray_FromObject(sep_obj);
2423 if (! bytesep)
2424 return NULL;
2426 result = stringlib_rpartition(
2427 (PyObject*) self,
2428 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2429 bytesep,
2430 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2433 Py_DECREF(bytesep);
2434 return make_nullbytes_unique(result);
2437 Py_LOCAL_INLINE(PyObject *)
2438 rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2440 register Py_ssize_t i, j, count=0;
2441 PyObject *str;
2442 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2444 if (list == NULL)
2445 return NULL;
2447 i = j = len - 1;
2448 while ((i >= 0) && (maxcount-- > 0)) {
2449 for (; i >= 0; i--) {
2450 if (s[i] == ch) {
2451 SPLIT_ADD(s, i + 1, j + 1);
2452 j = i = i - 1;
2453 break;
2457 if (j >= -1) {
2458 SPLIT_ADD(s, 0, j + 1);
2460 FIX_PREALLOC_SIZE(list);
2461 if (PyList_Reverse(list) < 0)
2462 goto onError;
2464 return list;
2466 onError:
2467 Py_DECREF(list);
2468 return NULL;
2471 Py_LOCAL_INLINE(PyObject *)
2472 rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2474 register Py_ssize_t i, j, count = 0;
2475 PyObject *str;
2476 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2478 if (list == NULL)
2479 return NULL;
2481 for (i = j = len - 1; i >= 0; ) {
2482 /* find a token */
2483 while (i >= 0 && Py_ISSPACE(s[i]))
2484 i--;
2485 j = i;
2486 while (i >= 0 && !Py_ISSPACE(s[i]))
2487 i--;
2488 if (j > i) {
2489 if (maxcount-- <= 0)
2490 break;
2491 SPLIT_ADD(s, i + 1, j + 1);
2492 while (i >= 0 && Py_ISSPACE(s[i]))
2493 i--;
2494 j = i;
2497 if (j >= 0) {
2498 SPLIT_ADD(s, 0, j + 1);
2500 FIX_PREALLOC_SIZE(list);
2501 if (PyList_Reverse(list) < 0)
2502 goto onError;
2504 return list;
2506 onError:
2507 Py_DECREF(list);
2508 return NULL;
2511 PyDoc_STRVAR(rsplit__doc__,
2512 "B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2514 Return a list of the sections in B, using sep as the delimiter,\n\
2515 starting at the end of B and working to the front.\n\
2516 If sep is not given, B is split on ASCII whitespace characters\n\
2517 (space, tab, return, newline, formfeed, vertical tab).\n\
2518 If maxsplit is given, at most maxsplit splits are done.");
2520 static PyObject *
2521 bytearray_rsplit(PyByteArrayObject *self, PyObject *args)
2523 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2524 Py_ssize_t maxsplit = -1, count = 0;
2525 const char *s = PyByteArray_AS_STRING(self), *sub;
2526 PyObject *list, *str, *subobj = Py_None;
2527 Py_buffer vsub;
2529 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2530 return NULL;
2531 if (maxsplit < 0)
2532 maxsplit = PY_SSIZE_T_MAX;
2534 if (subobj == Py_None)
2535 return rsplit_whitespace(s, len, maxsplit);
2537 if (_getbuffer(subobj, &vsub) < 0)
2538 return NULL;
2539 sub = vsub.buf;
2540 n = vsub.len;
2542 if (n == 0) {
2543 PyErr_SetString(PyExc_ValueError, "empty separator");
2544 PyBuffer_Release(&vsub);
2545 return NULL;
2547 else if (n == 1) {
2548 list = rsplit_char(s, len, sub[0], maxsplit);
2549 PyBuffer_Release(&vsub);
2550 return list;
2553 list = PyList_New(PREALLOC_SIZE(maxsplit));
2554 if (list == NULL) {
2555 PyBuffer_Release(&vsub);
2556 return NULL;
2559 j = len;
2560 i = j - n;
2562 while ( (i >= 0) && (maxsplit-- > 0) ) {
2563 for (; i>=0; i--) {
2564 if (Py_STRING_MATCH(s, i, sub, n)) {
2565 SPLIT_ADD(s, i + n, j);
2566 j = i;
2567 i -= n;
2568 break;
2572 SPLIT_ADD(s, 0, j);
2573 FIX_PREALLOC_SIZE(list);
2574 if (PyList_Reverse(list) < 0)
2575 goto onError;
2576 PyBuffer_Release(&vsub);
2577 return list;
2579 onError:
2580 Py_DECREF(list);
2581 PyBuffer_Release(&vsub);
2582 return NULL;
2585 PyDoc_STRVAR(reverse__doc__,
2586 "B.reverse() -> None\n\
2588 Reverse the order of the values in B in place.");
2589 static PyObject *
2590 bytearray_reverse(PyByteArrayObject *self, PyObject *unused)
2592 char swap, *head, *tail;
2593 Py_ssize_t i, j, n = Py_SIZE(self);
2595 j = n / 2;
2596 head = self->ob_bytes;
2597 tail = head + n - 1;
2598 for (i = 0; i < j; i++) {
2599 swap = *head;
2600 *head++ = *tail;
2601 *tail-- = swap;
2604 Py_RETURN_NONE;
2607 PyDoc_STRVAR(insert__doc__,
2608 "B.insert(index, int) -> None\n\
2610 Insert a single item into the bytearray before the given index.");
2611 static PyObject *
2612 bytearray_insert(PyByteArrayObject *self, PyObject *args)
2614 PyObject *value;
2615 int ival;
2616 Py_ssize_t where, n = Py_SIZE(self);
2618 if (!PyArg_ParseTuple(args, "nO:insert", &where, &value))
2619 return NULL;
2621 if (n == PY_SSIZE_T_MAX) {
2622 PyErr_SetString(PyExc_OverflowError,
2623 "cannot add more objects to bytearray");
2624 return NULL;
2626 if (!_getbytevalue(value, &ival))
2627 return NULL;
2628 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2629 return NULL;
2631 if (where < 0) {
2632 where += n;
2633 if (where < 0)
2634 where = 0;
2636 if (where > n)
2637 where = n;
2638 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
2639 self->ob_bytes[where] = ival;
2641 Py_RETURN_NONE;
2644 PyDoc_STRVAR(append__doc__,
2645 "B.append(int) -> None\n\
2647 Append a single item to the end of B.");
2648 static PyObject *
2649 bytearray_append(PyByteArrayObject *self, PyObject *arg)
2651 int value;
2652 Py_ssize_t n = Py_SIZE(self);
2654 if (! _getbytevalue(arg, &value))
2655 return NULL;
2656 if (n == PY_SSIZE_T_MAX) {
2657 PyErr_SetString(PyExc_OverflowError,
2658 "cannot add more objects to bytearray");
2659 return NULL;
2661 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2662 return NULL;
2664 self->ob_bytes[n] = value;
2666 Py_RETURN_NONE;
2669 PyDoc_STRVAR(extend__doc__,
2670 "B.extend(iterable int) -> None\n\
2672 Append all the elements from the iterator or sequence to the\n\
2673 end of B.");
2674 static PyObject *
2675 bytearray_extend(PyByteArrayObject *self, PyObject *arg)
2677 PyObject *it, *item, *bytearray_obj;
2678 Py_ssize_t buf_size = 0, len = 0;
2679 int value;
2680 char *buf;
2682 /* bytearray_setslice code only accepts something supporting PEP 3118. */
2683 if (PyObject_CheckBuffer(arg)) {
2684 if (bytearray_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2685 return NULL;
2687 Py_RETURN_NONE;
2690 it = PyObject_GetIter(arg);
2691 if (it == NULL)
2692 return NULL;
2694 /* Try to determine the length of the argument. 32 is abitrary. */
2695 buf_size = _PyObject_LengthHint(arg, 32);
2696 if (buf_size == -1) {
2697 Py_DECREF(it);
2698 return NULL;
2701 bytearray_obj = PyByteArray_FromStringAndSize(NULL, buf_size);
2702 if (bytearray_obj == NULL)
2703 return NULL;
2704 buf = PyByteArray_AS_STRING(bytearray_obj);
2706 while ((item = PyIter_Next(it)) != NULL) {
2707 if (! _getbytevalue(item, &value)) {
2708 Py_DECREF(item);
2709 Py_DECREF(it);
2710 Py_DECREF(bytearray_obj);
2711 return NULL;
2713 buf[len++] = value;
2714 Py_DECREF(item);
2716 if (len >= buf_size) {
2717 buf_size = len + (len >> 1) + 1;
2718 if (PyByteArray_Resize((PyObject *)bytearray_obj, buf_size) < 0) {
2719 Py_DECREF(it);
2720 Py_DECREF(bytearray_obj);
2721 return NULL;
2723 /* Recompute the `buf' pointer, since the resizing operation may
2724 have invalidated it. */
2725 buf = PyByteArray_AS_STRING(bytearray_obj);
2728 Py_DECREF(it);
2730 /* Resize down to exact size. */
2731 if (PyByteArray_Resize((PyObject *)bytearray_obj, len) < 0) {
2732 Py_DECREF(bytearray_obj);
2733 return NULL;
2736 if (bytearray_setslice(self, Py_SIZE(self), Py_SIZE(self), bytearray_obj) == -1)
2737 return NULL;
2738 Py_DECREF(bytearray_obj);
2740 Py_RETURN_NONE;
2743 PyDoc_STRVAR(pop__doc__,
2744 "B.pop([index]) -> int\n\
2746 Remove and return a single item from B. If no index\n\
2747 argument is given, will pop the last value.");
2748 static PyObject *
2749 bytearray_pop(PyByteArrayObject *self, PyObject *args)
2751 int value;
2752 Py_ssize_t where = -1, n = Py_SIZE(self);
2754 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2755 return NULL;
2757 if (n == 0) {
2758 PyErr_SetString(PyExc_OverflowError,
2759 "cannot pop an empty bytearray");
2760 return NULL;
2762 if (where < 0)
2763 where += Py_SIZE(self);
2764 if (where < 0 || where >= Py_SIZE(self)) {
2765 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2766 return NULL;
2768 if (!_canresize(self))
2769 return NULL;
2771 value = self->ob_bytes[where];
2772 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2773 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2774 return NULL;
2776 return PyInt_FromLong((unsigned char)value);
2779 PyDoc_STRVAR(remove__doc__,
2780 "B.remove(int) -> None\n\
2782 Remove the first occurance of a value in B.");
2783 static PyObject *
2784 bytearray_remove(PyByteArrayObject *self, PyObject *arg)
2786 int value;
2787 Py_ssize_t where, n = Py_SIZE(self);
2789 if (! _getbytevalue(arg, &value))
2790 return NULL;
2792 for (where = 0; where < n; where++) {
2793 if (self->ob_bytes[where] == value)
2794 break;
2796 if (where == n) {
2797 PyErr_SetString(PyExc_ValueError, "value not found in bytearray");
2798 return NULL;
2800 if (!_canresize(self))
2801 return NULL;
2803 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2804 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2805 return NULL;
2807 Py_RETURN_NONE;
2810 /* XXX These two helpers could be optimized if argsize == 1 */
2812 static Py_ssize_t
2813 lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2814 void *argptr, Py_ssize_t argsize)
2816 Py_ssize_t i = 0;
2817 while (i < mysize && memchr(argptr, myptr[i], argsize))
2818 i++;
2819 return i;
2822 static Py_ssize_t
2823 rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2824 void *argptr, Py_ssize_t argsize)
2826 Py_ssize_t i = mysize - 1;
2827 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2828 i--;
2829 return i + 1;
2832 PyDoc_STRVAR(strip__doc__,
2833 "B.strip([bytes]) -> bytearray\n\
2835 Strip leading and trailing bytes contained in the argument.\n\
2836 If the argument is omitted, strip ASCII whitespace.");
2837 static PyObject *
2838 bytearray_strip(PyByteArrayObject *self, PyObject *args)
2840 Py_ssize_t left, right, mysize, argsize;
2841 void *myptr, *argptr;
2842 PyObject *arg = Py_None;
2843 Py_buffer varg;
2844 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2845 return NULL;
2846 if (arg == Py_None) {
2847 argptr = "\t\n\r\f\v ";
2848 argsize = 6;
2850 else {
2851 if (_getbuffer(arg, &varg) < 0)
2852 return NULL;
2853 argptr = varg.buf;
2854 argsize = varg.len;
2856 myptr = self->ob_bytes;
2857 mysize = Py_SIZE(self);
2858 left = lstrip_helper(myptr, mysize, argptr, argsize);
2859 if (left == mysize)
2860 right = left;
2861 else
2862 right = rstrip_helper(myptr, mysize, argptr, argsize);
2863 if (arg != Py_None)
2864 PyBuffer_Release(&varg);
2865 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2868 PyDoc_STRVAR(lstrip__doc__,
2869 "B.lstrip([bytes]) -> bytearray\n\
2871 Strip leading bytes contained in the argument.\n\
2872 If the argument is omitted, strip leading ASCII whitespace.");
2873 static PyObject *
2874 bytearray_lstrip(PyByteArrayObject *self, PyObject *args)
2876 Py_ssize_t left, right, mysize, argsize;
2877 void *myptr, *argptr;
2878 PyObject *arg = Py_None;
2879 Py_buffer varg;
2880 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2881 return NULL;
2882 if (arg == Py_None) {
2883 argptr = "\t\n\r\f\v ";
2884 argsize = 6;
2886 else {
2887 if (_getbuffer(arg, &varg) < 0)
2888 return NULL;
2889 argptr = varg.buf;
2890 argsize = varg.len;
2892 myptr = self->ob_bytes;
2893 mysize = Py_SIZE(self);
2894 left = lstrip_helper(myptr, mysize, argptr, argsize);
2895 right = mysize;
2896 if (arg != Py_None)
2897 PyBuffer_Release(&varg);
2898 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2901 PyDoc_STRVAR(rstrip__doc__,
2902 "B.rstrip([bytes]) -> bytearray\n\
2904 Strip trailing bytes contained in the argument.\n\
2905 If the argument is omitted, strip trailing ASCII whitespace.");
2906 static PyObject *
2907 bytearray_rstrip(PyByteArrayObject *self, PyObject *args)
2909 Py_ssize_t left, right, mysize, argsize;
2910 void *myptr, *argptr;
2911 PyObject *arg = Py_None;
2912 Py_buffer varg;
2913 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2914 return NULL;
2915 if (arg == Py_None) {
2916 argptr = "\t\n\r\f\v ";
2917 argsize = 6;
2919 else {
2920 if (_getbuffer(arg, &varg) < 0)
2921 return NULL;
2922 argptr = varg.buf;
2923 argsize = varg.len;
2925 myptr = self->ob_bytes;
2926 mysize = Py_SIZE(self);
2927 left = 0;
2928 right = rstrip_helper(myptr, mysize, argptr, argsize);
2929 if (arg != Py_None)
2930 PyBuffer_Release(&varg);
2931 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2934 PyDoc_STRVAR(decode_doc,
2935 "B.decode([encoding[, errors]]) -> unicode object.\n\
2937 Decodes B using the codec registered for encoding. encoding defaults\n\
2938 to the default encoding. errors may be given to set a different error\n\
2939 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2940 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2941 as well as any other name registered with codecs.register_error that is\n\
2942 able to handle UnicodeDecodeErrors.");
2944 static PyObject *
2945 bytearray_decode(PyObject *self, PyObject *args, PyObject *kwargs)
2947 const char *encoding = NULL;
2948 const char *errors = NULL;
2949 static char *kwlist[] = {"encoding", "errors", 0};
2951 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2952 return NULL;
2953 if (encoding == NULL) {
2954 #ifdef Py_USING_UNICODE
2955 encoding = PyUnicode_GetDefaultEncoding();
2956 #else
2957 PyErr_SetString(PyExc_ValueError, "no encoding specified");
2958 return NULL;
2959 #endif
2961 return PyCodec_Decode(self, encoding, errors);
2964 PyDoc_STRVAR(alloc_doc,
2965 "B.__alloc__() -> int\n\
2967 Returns the number of bytes actually allocated.");
2969 static PyObject *
2970 bytearray_alloc(PyByteArrayObject *self)
2972 return PyInt_FromSsize_t(self->ob_alloc);
2975 PyDoc_STRVAR(join_doc,
2976 "B.join(iterable_of_bytes) -> bytes\n\
2978 Concatenates any number of bytearray objects, with B in between each pair.");
2980 static PyObject *
2981 bytearray_join(PyByteArrayObject *self, PyObject *it)
2983 PyObject *seq;
2984 Py_ssize_t mysize = Py_SIZE(self);
2985 Py_ssize_t i;
2986 Py_ssize_t n;
2987 PyObject **items;
2988 Py_ssize_t totalsize = 0;
2989 PyObject *result;
2990 char *dest;
2992 seq = PySequence_Fast(it, "can only join an iterable");
2993 if (seq == NULL)
2994 return NULL;
2995 n = PySequence_Fast_GET_SIZE(seq);
2996 items = PySequence_Fast_ITEMS(seq);
2998 /* Compute the total size, and check that they are all bytes */
2999 /* XXX Shouldn't we use _getbuffer() on these items instead? */
3000 for (i = 0; i < n; i++) {
3001 PyObject *obj = items[i];
3002 if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) {
3003 PyErr_Format(PyExc_TypeError,
3004 "can only join an iterable of bytes "
3005 "(item %ld has type '%.100s')",
3006 /* XXX %ld isn't right on Win64 */
3007 (long)i, Py_TYPE(obj)->tp_name);
3008 goto error;
3010 if (i > 0)
3011 totalsize += mysize;
3012 totalsize += Py_SIZE(obj);
3013 if (totalsize < 0) {
3014 PyErr_NoMemory();
3015 goto error;
3019 /* Allocate the result, and copy the bytes */
3020 result = PyByteArray_FromStringAndSize(NULL, totalsize);
3021 if (result == NULL)
3022 goto error;
3023 dest = PyByteArray_AS_STRING(result);
3024 for (i = 0; i < n; i++) {
3025 PyObject *obj = items[i];
3026 Py_ssize_t size = Py_SIZE(obj);
3027 char *buf;
3028 if (PyByteArray_Check(obj))
3029 buf = PyByteArray_AS_STRING(obj);
3030 else
3031 buf = PyBytes_AS_STRING(obj);
3032 if (i) {
3033 memcpy(dest, self->ob_bytes, mysize);
3034 dest += mysize;
3036 memcpy(dest, buf, size);
3037 dest += size;
3040 /* Done */
3041 Py_DECREF(seq);
3042 return result;
3044 /* Error handling */
3045 error:
3046 Py_DECREF(seq);
3047 return NULL;
3050 PyDoc_STRVAR(fromhex_doc,
3051 "bytearray.fromhex(string) -> bytearray\n\
3053 Create a bytearray object from a string of hexadecimal numbers.\n\
3054 Spaces between two numbers are accepted.\n\
3055 Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3057 static int
3058 hex_digit_to_int(char c)
3060 if (Py_ISDIGIT(c))
3061 return c - '0';
3062 else {
3063 if (Py_ISUPPER(c))
3064 c = Py_TOLOWER(c);
3065 if (c >= 'a' && c <= 'f')
3066 return c - 'a' + 10;
3068 return -1;
3071 static PyObject *
3072 bytearray_fromhex(PyObject *cls, PyObject *args)
3074 PyObject *newbytes;
3075 char *buf;
3076 char *hex;
3077 Py_ssize_t hexlen, byteslen, i, j;
3078 int top, bot;
3080 if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &hexlen))
3081 return NULL;
3082 byteslen = hexlen/2; /* This overestimates if there are spaces */
3083 newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
3084 if (!newbytes)
3085 return NULL;
3086 buf = PyByteArray_AS_STRING(newbytes);
3087 for (i = j = 0; i < hexlen; i += 2) {
3088 /* skip over spaces in the input */
3089 while (hex[i] == ' ')
3090 i++;
3091 if (i >= hexlen)
3092 break;
3093 top = hex_digit_to_int(hex[i]);
3094 bot = hex_digit_to_int(hex[i+1]);
3095 if (top == -1 || bot == -1) {
3096 PyErr_Format(PyExc_ValueError,
3097 "non-hexadecimal number found in "
3098 "fromhex() arg at position %zd", i);
3099 goto error;
3101 buf[j++] = (top << 4) + bot;
3103 if (PyByteArray_Resize(newbytes, j) < 0)
3104 goto error;
3105 return newbytes;
3107 error:
3108 Py_DECREF(newbytes);
3109 return NULL;
3112 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3114 static PyObject *
3115 bytearray_reduce(PyByteArrayObject *self)
3117 PyObject *latin1, *dict;
3118 if (self->ob_bytes)
3119 #ifdef Py_USING_UNICODE
3120 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
3121 Py_SIZE(self), NULL);
3122 #else
3123 latin1 = PyString_FromStringAndSize(self->ob_bytes, Py_SIZE(self))
3124 #endif
3125 else
3126 #ifdef Py_USING_UNICODE
3127 latin1 = PyUnicode_FromString("");
3128 #else
3129 latin1 = PyString_FromString("");
3130 #endif
3132 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
3133 if (dict == NULL) {
3134 PyErr_Clear();
3135 dict = Py_None;
3136 Py_INCREF(dict);
3139 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
3142 PyDoc_STRVAR(sizeof_doc,
3143 "B.__sizeof__() -> int\n\
3145 Returns the size of B in memory, in bytes");
3146 static PyObject *
3147 bytearray_sizeof(PyByteArrayObject *self)
3149 Py_ssize_t res;
3151 res = sizeof(PyByteArrayObject) + self->ob_alloc * sizeof(char);
3152 return PyInt_FromSsize_t(res);
3155 static PySequenceMethods bytearray_as_sequence = {
3156 (lenfunc)bytearray_length, /* sq_length */
3157 (binaryfunc)PyByteArray_Concat, /* sq_concat */
3158 (ssizeargfunc)bytearray_repeat, /* sq_repeat */
3159 (ssizeargfunc)bytearray_getitem, /* sq_item */
3160 0, /* sq_slice */
3161 (ssizeobjargproc)bytearray_setitem, /* sq_ass_item */
3162 0, /* sq_ass_slice */
3163 (objobjproc)bytearray_contains, /* sq_contains */
3164 (binaryfunc)bytearray_iconcat, /* sq_inplace_concat */
3165 (ssizeargfunc)bytearray_irepeat, /* sq_inplace_repeat */
3168 static PyMappingMethods bytearray_as_mapping = {
3169 (lenfunc)bytearray_length,
3170 (binaryfunc)bytearray_subscript,
3171 (objobjargproc)bytearray_ass_subscript,
3174 static PyBufferProcs bytearray_as_buffer = {
3175 (readbufferproc)bytearray_buffer_getreadbuf,
3176 (writebufferproc)bytearray_buffer_getwritebuf,
3177 (segcountproc)bytearray_buffer_getsegcount,
3178 (charbufferproc)bytearray_buffer_getcharbuf,
3179 (getbufferproc)bytearray_getbuffer,
3180 (releasebufferproc)bytearray_releasebuffer,
3183 static PyMethodDef
3184 bytearray_methods[] = {
3185 {"__alloc__", (PyCFunction)bytearray_alloc, METH_NOARGS, alloc_doc},
3186 {"__reduce__", (PyCFunction)bytearray_reduce, METH_NOARGS, reduce_doc},
3187 {"__sizeof__", (PyCFunction)bytearray_sizeof, METH_NOARGS, sizeof_doc},
3188 {"append", (PyCFunction)bytearray_append, METH_O, append__doc__},
3189 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3190 _Py_capitalize__doc__},
3191 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3192 {"count", (PyCFunction)bytearray_count, METH_VARARGS, count__doc__},
3193 {"decode", (PyCFunction)bytearray_decode, METH_VARARGS | METH_KEYWORDS, decode_doc},
3194 {"endswith", (PyCFunction)bytearray_endswith, METH_VARARGS, endswith__doc__},
3195 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3196 expandtabs__doc__},
3197 {"extend", (PyCFunction)bytearray_extend, METH_O, extend__doc__},
3198 {"find", (PyCFunction)bytearray_find, METH_VARARGS, find__doc__},
3199 {"fromhex", (PyCFunction)bytearray_fromhex, METH_VARARGS|METH_CLASS,
3200 fromhex_doc},
3201 {"index", (PyCFunction)bytearray_index, METH_VARARGS, index__doc__},
3202 {"insert", (PyCFunction)bytearray_insert, METH_VARARGS, insert__doc__},
3203 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3204 _Py_isalnum__doc__},
3205 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3206 _Py_isalpha__doc__},
3207 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3208 _Py_isdigit__doc__},
3209 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3210 _Py_islower__doc__},
3211 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3212 _Py_isspace__doc__},
3213 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3214 _Py_istitle__doc__},
3215 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3216 _Py_isupper__doc__},
3217 {"join", (PyCFunction)bytearray_join, METH_O, join_doc},
3218 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3219 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
3220 {"lstrip", (PyCFunction)bytearray_lstrip, METH_VARARGS, lstrip__doc__},
3221 {"partition", (PyCFunction)bytearray_partition, METH_O, partition__doc__},
3222 {"pop", (PyCFunction)bytearray_pop, METH_VARARGS, pop__doc__},
3223 {"remove", (PyCFunction)bytearray_remove, METH_O, remove__doc__},
3224 {"replace", (PyCFunction)bytearray_replace, METH_VARARGS, replace__doc__},
3225 {"reverse", (PyCFunction)bytearray_reverse, METH_NOARGS, reverse__doc__},
3226 {"rfind", (PyCFunction)bytearray_rfind, METH_VARARGS, rfind__doc__},
3227 {"rindex", (PyCFunction)bytearray_rindex, METH_VARARGS, rindex__doc__},
3228 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3229 {"rpartition", (PyCFunction)bytearray_rpartition, METH_O, rpartition__doc__},
3230 {"rsplit", (PyCFunction)bytearray_rsplit, METH_VARARGS, rsplit__doc__},
3231 {"rstrip", (PyCFunction)bytearray_rstrip, METH_VARARGS, rstrip__doc__},
3232 {"split", (PyCFunction)bytearray_split, METH_VARARGS, split__doc__},
3233 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3234 splitlines__doc__},
3235 {"startswith", (PyCFunction)bytearray_startswith, METH_VARARGS ,
3236 startswith__doc__},
3237 {"strip", (PyCFunction)bytearray_strip, METH_VARARGS, strip__doc__},
3238 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3239 _Py_swapcase__doc__},
3240 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3241 {"translate", (PyCFunction)bytearray_translate, METH_VARARGS,
3242 translate__doc__},
3243 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3244 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
3245 {NULL}
3248 PyDoc_STRVAR(bytearray_doc,
3249 "bytearray(iterable_of_ints) -> bytearray.\n\
3250 bytearray(string, encoding[, errors]) -> bytearray.\n\
3251 bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3252 bytearray(memory_view) -> bytearray.\n\
3254 Construct an mutable bytearray object from:\n\
3255 - an iterable yielding integers in range(256)\n\
3256 - a text string encoded using the specified encoding\n\
3257 - a bytes or a bytearray object\n\
3258 - any object implementing the buffer API.\n\
3260 bytearray(int) -> bytearray.\n\
3262 Construct a zero-initialized bytearray of the given length.");
3265 static PyObject *bytearray_iter(PyObject *seq);
3267 PyTypeObject PyByteArray_Type = {
3268 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3269 "bytearray",
3270 sizeof(PyByteArrayObject),
3272 (destructor)bytearray_dealloc, /* tp_dealloc */
3273 0, /* tp_print */
3274 0, /* tp_getattr */
3275 0, /* tp_setattr */
3276 0, /* tp_compare */
3277 (reprfunc)bytearray_repr, /* tp_repr */
3278 0, /* tp_as_number */
3279 &bytearray_as_sequence, /* tp_as_sequence */
3280 &bytearray_as_mapping, /* tp_as_mapping */
3281 0, /* tp_hash */
3282 0, /* tp_call */
3283 bytearray_str, /* tp_str */
3284 PyObject_GenericGetAttr, /* tp_getattro */
3285 0, /* tp_setattro */
3286 &bytearray_as_buffer, /* tp_as_buffer */
3287 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3288 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3289 bytearray_doc, /* tp_doc */
3290 0, /* tp_traverse */
3291 0, /* tp_clear */
3292 (richcmpfunc)bytearray_richcompare, /* tp_richcompare */
3293 0, /* tp_weaklistoffset */
3294 bytearray_iter, /* tp_iter */
3295 0, /* tp_iternext */
3296 bytearray_methods, /* tp_methods */
3297 0, /* tp_members */
3298 0, /* tp_getset */
3299 0, /* tp_base */
3300 0, /* tp_dict */
3301 0, /* tp_descr_get */
3302 0, /* tp_descr_set */
3303 0, /* tp_dictoffset */
3304 (initproc)bytearray_init, /* tp_init */
3305 PyType_GenericAlloc, /* tp_alloc */
3306 PyType_GenericNew, /* tp_new */
3307 PyObject_Del, /* tp_free */
3310 /*********************** Bytes Iterator ****************************/
3312 typedef struct {
3313 PyObject_HEAD
3314 Py_ssize_t it_index;
3315 PyByteArrayObject *it_seq; /* Set to NULL when iterator is exhausted */
3316 } bytesiterobject;
3318 static void
3319 bytearrayiter_dealloc(bytesiterobject *it)
3321 _PyObject_GC_UNTRACK(it);
3322 Py_XDECREF(it->it_seq);
3323 PyObject_GC_Del(it);
3326 static int
3327 bytearrayiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
3329 Py_VISIT(it->it_seq);
3330 return 0;
3333 static PyObject *
3334 bytearrayiter_next(bytesiterobject *it)
3336 PyByteArrayObject *seq;
3337 PyObject *item;
3339 assert(it != NULL);
3340 seq = it->it_seq;
3341 if (seq == NULL)
3342 return NULL;
3343 assert(PyByteArray_Check(seq));
3345 if (it->it_index < PyByteArray_GET_SIZE(seq)) {
3346 item = PyInt_FromLong(
3347 (unsigned char)seq->ob_bytes[it->it_index]);
3348 if (item != NULL)
3349 ++it->it_index;
3350 return item;
3353 Py_DECREF(seq);
3354 it->it_seq = NULL;
3355 return NULL;
3358 static PyObject *
3359 bytesarrayiter_length_hint(bytesiterobject *it)
3361 Py_ssize_t len = 0;
3362 if (it->it_seq)
3363 len = PyByteArray_GET_SIZE(it->it_seq) - it->it_index;
3364 return PyInt_FromSsize_t(len);
3367 PyDoc_STRVAR(length_hint_doc,
3368 "Private method returning an estimate of len(list(it)).");
3370 static PyMethodDef bytearrayiter_methods[] = {
3371 {"__length_hint__", (PyCFunction)bytesarrayiter_length_hint, METH_NOARGS,
3372 length_hint_doc},
3373 {NULL, NULL} /* sentinel */
3376 PyTypeObject PyByteArrayIter_Type = {
3377 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3378 "bytearray_iterator", /* tp_name */
3379 sizeof(bytesiterobject), /* tp_basicsize */
3380 0, /* tp_itemsize */
3381 /* methods */
3382 (destructor)bytearrayiter_dealloc, /* tp_dealloc */
3383 0, /* tp_print */
3384 0, /* tp_getattr */
3385 0, /* tp_setattr */
3386 0, /* tp_compare */
3387 0, /* tp_repr */
3388 0, /* tp_as_number */
3389 0, /* tp_as_sequence */
3390 0, /* tp_as_mapping */
3391 0, /* tp_hash */
3392 0, /* tp_call */
3393 0, /* tp_str */
3394 PyObject_GenericGetAttr, /* tp_getattro */
3395 0, /* tp_setattro */
3396 0, /* tp_as_buffer */
3397 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3398 0, /* tp_doc */
3399 (traverseproc)bytearrayiter_traverse, /* tp_traverse */
3400 0, /* tp_clear */
3401 0, /* tp_richcompare */
3402 0, /* tp_weaklistoffset */
3403 PyObject_SelfIter, /* tp_iter */
3404 (iternextfunc)bytearrayiter_next, /* tp_iternext */
3405 bytearrayiter_methods, /* tp_methods */
3409 static PyObject *
3410 bytearray_iter(PyObject *seq)
3412 bytesiterobject *it;
3414 if (!PyByteArray_Check(seq)) {
3415 PyErr_BadInternalCall();
3416 return NULL;
3418 it = PyObject_GC_New(bytesiterobject, &PyByteArrayIter_Type);
3419 if (it == NULL)
3420 return NULL;
3421 it->it_index = 0;
3422 Py_INCREF(seq);
3423 it->it_seq = (PyByteArrayObject *)seq;
3424 _PyObject_GC_TRACK(it);
3425 return (PyObject *)it;