Eric Smith was missing fro m the issue 7117 whatsnew attribution.
[python.git] / Objects / bytearrayobject.c
blob6157c832a1f361ae70c6d9fb02592e6e71316d7e
1 /* PyBytes (bytearray) implementation */
3 #define PY_SSIZE_T_CLEAN
4 #include "Python.h"
5 #include "structmember.h"
6 #include "bytes_methods.h"
8 static PyByteArrayObject *nullbytes = NULL;
10 void
11 PyByteArray_Fini(void)
13 Py_CLEAR(nullbytes);
16 int
17 PyByteArray_Init(void)
19 nullbytes = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
20 if (nullbytes == NULL)
21 return 0;
22 nullbytes->ob_bytes = NULL;
23 Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0;
24 nullbytes->ob_exports = 0;
25 return 1;
28 /* end nullbytes support */
30 /* Helpers */
32 static int
33 _getbytevalue(PyObject* arg, int *value)
35 long face_value;
37 if (PyBytes_CheckExact(arg)) {
38 if (Py_SIZE(arg) != 1) {
39 PyErr_SetString(PyExc_ValueError, "string must be of size 1");
40 return 0;
42 *value = Py_CHARMASK(((PyBytesObject*)arg)->ob_sval[0]);
43 return 1;
45 else if (PyInt_Check(arg) || PyLong_Check(arg)) {
46 face_value = PyLong_AsLong(arg);
48 else {
49 PyObject *index = PyNumber_Index(arg);
50 if (index == NULL) {
51 PyErr_Format(PyExc_TypeError,
52 "an integer or string of size 1 is required");
53 return 0;
55 face_value = PyLong_AsLong(index);
56 Py_DECREF(index);
59 if (face_value < 0 || face_value >= 256) {
60 /* this includes the OverflowError in case the long is too large */
61 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
62 return 0;
65 *value = face_value;
66 return 1;
69 static Py_ssize_t
70 bytearray_buffer_getreadbuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
72 if ( index != 0 ) {
73 PyErr_SetString(PyExc_SystemError,
74 "accessing non-existent bytes segment");
75 return -1;
77 *ptr = (void *)self->ob_bytes;
78 return Py_SIZE(self);
81 static Py_ssize_t
82 bytearray_buffer_getwritebuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
84 if ( index != 0 ) {
85 PyErr_SetString(PyExc_SystemError,
86 "accessing non-existent bytes segment");
87 return -1;
89 *ptr = (void *)self->ob_bytes;
90 return Py_SIZE(self);
93 static Py_ssize_t
94 bytearray_buffer_getsegcount(PyByteArrayObject *self, Py_ssize_t *lenp)
96 if ( lenp )
97 *lenp = Py_SIZE(self);
98 return 1;
101 static Py_ssize_t
102 bytearray_buffer_getcharbuf(PyByteArrayObject *self, Py_ssize_t index, const char **ptr)
104 if ( index != 0 ) {
105 PyErr_SetString(PyExc_SystemError,
106 "accessing non-existent bytes segment");
107 return -1;
109 *ptr = self->ob_bytes;
110 return Py_SIZE(self);
113 static int
114 bytearray_getbuffer(PyByteArrayObject *obj, Py_buffer *view, int flags)
116 int ret;
117 void *ptr;
118 if (view == NULL) {
119 obj->ob_exports++;
120 return 0;
122 if (obj->ob_bytes == NULL)
123 ptr = "";
124 else
125 ptr = obj->ob_bytes;
126 ret = PyBuffer_FillInfo(view, (PyObject*)obj, ptr, Py_SIZE(obj), 0, flags);
127 if (ret >= 0) {
128 obj->ob_exports++;
130 return ret;
133 static void
134 bytearray_releasebuffer(PyByteArrayObject *obj, Py_buffer *view)
136 obj->ob_exports--;
139 static Py_ssize_t
140 _getbuffer(PyObject *obj, Py_buffer *view)
142 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
144 if (buffer == NULL || buffer->bf_getbuffer == NULL)
146 PyErr_Format(PyExc_TypeError,
147 "Type %.100s doesn't support the buffer API",
148 Py_TYPE(obj)->tp_name);
149 return -1;
152 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
153 return -1;
154 return view->len;
157 static int
158 _canresize(PyByteArrayObject *self)
160 if (self->ob_exports > 0) {
161 PyErr_SetString(PyExc_BufferError,
162 "Existing exports of data: object cannot be re-sized");
163 return 0;
165 return 1;
168 /* Direct API functions */
170 PyObject *
171 PyByteArray_FromObject(PyObject *input)
173 return PyObject_CallFunctionObjArgs((PyObject *)&PyByteArray_Type,
174 input, NULL);
177 PyObject *
178 PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
180 PyByteArrayObject *new;
181 Py_ssize_t alloc;
183 if (size < 0) {
184 PyErr_SetString(PyExc_SystemError,
185 "Negative size passed to PyByteArray_FromStringAndSize");
186 return NULL;
189 new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
190 if (new == NULL)
191 return NULL;
193 if (size == 0) {
194 new->ob_bytes = NULL;
195 alloc = 0;
197 else {
198 alloc = size + 1;
199 new->ob_bytes = PyMem_Malloc(alloc);
200 if (new->ob_bytes == NULL) {
201 Py_DECREF(new);
202 return PyErr_NoMemory();
204 if (bytes != NULL)
205 memcpy(new->ob_bytes, bytes, size);
206 new->ob_bytes[size] = '\0'; /* Trailing null byte */
208 Py_SIZE(new) = size;
209 new->ob_alloc = alloc;
210 new->ob_exports = 0;
212 return (PyObject *)new;
215 Py_ssize_t
216 PyByteArray_Size(PyObject *self)
218 assert(self != NULL);
219 assert(PyByteArray_Check(self));
221 return PyByteArray_GET_SIZE(self);
224 char *
225 PyByteArray_AsString(PyObject *self)
227 assert(self != NULL);
228 assert(PyByteArray_Check(self));
230 return PyByteArray_AS_STRING(self);
234 PyByteArray_Resize(PyObject *self, Py_ssize_t size)
236 void *sval;
237 Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc;
239 assert(self != NULL);
240 assert(PyByteArray_Check(self));
241 assert(size >= 0);
243 if (size == Py_SIZE(self)) {
244 return 0;
246 if (!_canresize((PyByteArrayObject *)self)) {
247 return -1;
250 if (size < alloc / 2) {
251 /* Major downsize; resize down to exact size */
252 alloc = size + 1;
254 else if (size < alloc) {
255 /* Within allocated size; quick exit */
256 Py_SIZE(self) = size;
257 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
258 return 0;
260 else if (size <= alloc * 1.125) {
261 /* Moderate upsize; overallocate similar to list_resize() */
262 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
264 else {
265 /* Major upsize; resize up to exact size */
266 alloc = size + 1;
269 sval = PyMem_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc);
270 if (sval == NULL) {
271 PyErr_NoMemory();
272 return -1;
275 ((PyByteArrayObject *)self)->ob_bytes = sval;
276 Py_SIZE(self) = size;
277 ((PyByteArrayObject *)self)->ob_alloc = alloc;
278 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
280 return 0;
283 PyObject *
284 PyByteArray_Concat(PyObject *a, PyObject *b)
286 Py_ssize_t size;
287 Py_buffer va, vb;
288 PyByteArrayObject *result = NULL;
290 va.len = -1;
291 vb.len = -1;
292 if (_getbuffer(a, &va) < 0 ||
293 _getbuffer(b, &vb) < 0) {
294 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
295 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
296 goto done;
299 size = va.len + vb.len;
300 if (size < 0) {
301 PyErr_NoMemory();
302 goto done;
305 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, size);
306 if (result != NULL) {
307 memcpy(result->ob_bytes, va.buf, va.len);
308 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
311 done:
312 if (va.len != -1)
313 PyBuffer_Release(&va);
314 if (vb.len != -1)
315 PyBuffer_Release(&vb);
316 return (PyObject *)result;
319 /* Functions stuffed into the type object */
321 static Py_ssize_t
322 bytearray_length(PyByteArrayObject *self)
324 return Py_SIZE(self);
327 static PyObject *
328 bytearray_iconcat(PyByteArrayObject *self, PyObject *other)
330 Py_ssize_t mysize;
331 Py_ssize_t size;
332 Py_buffer vo;
334 if (_getbuffer(other, &vo) < 0) {
335 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
336 Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name);
337 return NULL;
340 mysize = Py_SIZE(self);
341 size = mysize + vo.len;
342 if (size < 0) {
343 PyBuffer_Release(&vo);
344 return PyErr_NoMemory();
346 if (size < self->ob_alloc) {
347 Py_SIZE(self) = size;
348 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
350 else if (PyByteArray_Resize((PyObject *)self, size) < 0) {
351 PyBuffer_Release(&vo);
352 return NULL;
354 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
355 PyBuffer_Release(&vo);
356 Py_INCREF(self);
357 return (PyObject *)self;
360 static PyObject *
361 bytearray_repeat(PyByteArrayObject *self, Py_ssize_t count)
363 PyByteArrayObject *result;
364 Py_ssize_t mysize;
365 Py_ssize_t size;
367 if (count < 0)
368 count = 0;
369 mysize = Py_SIZE(self);
370 size = mysize * count;
371 if (count != 0 && size / count != mysize)
372 return PyErr_NoMemory();
373 result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
374 if (result != NULL && size != 0) {
375 if (mysize == 1)
376 memset(result->ob_bytes, self->ob_bytes[0], size);
377 else {
378 Py_ssize_t i;
379 for (i = 0; i < count; i++)
380 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
383 return (PyObject *)result;
386 static PyObject *
387 bytearray_irepeat(PyByteArrayObject *self, Py_ssize_t count)
389 Py_ssize_t mysize;
390 Py_ssize_t size;
392 if (count < 0)
393 count = 0;
394 mysize = Py_SIZE(self);
395 size = mysize * count;
396 if (count != 0 && size / count != mysize)
397 return PyErr_NoMemory();
398 if (size < self->ob_alloc) {
399 Py_SIZE(self) = size;
400 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
402 else if (PyByteArray_Resize((PyObject *)self, size) < 0)
403 return NULL;
405 if (mysize == 1)
406 memset(self->ob_bytes, self->ob_bytes[0], size);
407 else {
408 Py_ssize_t i;
409 for (i = 1; i < count; i++)
410 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
413 Py_INCREF(self);
414 return (PyObject *)self;
417 static PyObject *
418 bytearray_getitem(PyByteArrayObject *self, Py_ssize_t i)
420 if (i < 0)
421 i += Py_SIZE(self);
422 if (i < 0 || i >= Py_SIZE(self)) {
423 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
424 return NULL;
426 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
429 static PyObject *
430 bytearray_subscript(PyByteArrayObject *self, PyObject *index)
432 if (PyIndex_Check(index)) {
433 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
435 if (i == -1 && PyErr_Occurred())
436 return NULL;
438 if (i < 0)
439 i += PyByteArray_GET_SIZE(self);
441 if (i < 0 || i >= Py_SIZE(self)) {
442 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
443 return NULL;
445 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
447 else if (PySlice_Check(index)) {
448 Py_ssize_t start, stop, step, slicelength, cur, i;
449 if (PySlice_GetIndicesEx((PySliceObject *)index,
450 PyByteArray_GET_SIZE(self),
451 &start, &stop, &step, &slicelength) < 0) {
452 return NULL;
455 if (slicelength <= 0)
456 return PyByteArray_FromStringAndSize("", 0);
457 else if (step == 1) {
458 return PyByteArray_FromStringAndSize(self->ob_bytes + start,
459 slicelength);
461 else {
462 char *source_buf = PyByteArray_AS_STRING(self);
463 char *result_buf = (char *)PyMem_Malloc(slicelength);
464 PyObject *result;
466 if (result_buf == NULL)
467 return PyErr_NoMemory();
469 for (cur = start, i = 0; i < slicelength;
470 cur += step, i++) {
471 result_buf[i] = source_buf[cur];
473 result = PyByteArray_FromStringAndSize(result_buf, slicelength);
474 PyMem_Free(result_buf);
475 return result;
478 else {
479 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
480 return NULL;
484 static int
485 bytearray_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
486 PyObject *values)
488 Py_ssize_t avail, needed;
489 void *bytes;
490 Py_buffer vbytes;
491 int res = 0;
493 vbytes.len = -1;
494 if (values == (PyObject *)self) {
495 /* Make a copy and call this function recursively */
496 int err;
497 values = PyByteArray_FromObject(values);
498 if (values == NULL)
499 return -1;
500 err = bytearray_setslice(self, lo, hi, values);
501 Py_DECREF(values);
502 return err;
504 if (values == NULL) {
505 /* del b[lo:hi] */
506 bytes = NULL;
507 needed = 0;
509 else {
510 if (_getbuffer(values, &vbytes) < 0) {
511 PyErr_Format(PyExc_TypeError,
512 "can't set bytearray slice from %.100s",
513 Py_TYPE(values)->tp_name);
514 return -1;
516 needed = vbytes.len;
517 bytes = vbytes.buf;
520 if (lo < 0)
521 lo = 0;
522 if (hi < lo)
523 hi = lo;
524 if (hi > Py_SIZE(self))
525 hi = Py_SIZE(self);
527 avail = hi - lo;
528 if (avail < 0)
529 lo = hi = avail = 0;
531 if (avail != needed) {
532 if (avail > needed) {
533 if (!_canresize(self)) {
534 res = -1;
535 goto finish;
538 0 lo hi old_size
539 | |<----avail----->|<-----tomove------>|
540 | |<-needed->|<-----tomove------>|
541 0 lo new_hi new_size
543 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
544 Py_SIZE(self) - hi);
546 /* XXX(nnorwitz): need to verify this can't overflow! */
547 if (PyByteArray_Resize((PyObject *)self,
548 Py_SIZE(self) + needed - avail) < 0) {
549 res = -1;
550 goto finish;
552 if (avail < needed) {
554 0 lo hi old_size
555 | |<-avail->|<-----tomove------>|
556 | |<----needed---->|<-----tomove------>|
557 0 lo new_hi new_size
559 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
560 Py_SIZE(self) - lo - needed);
564 if (needed > 0)
565 memcpy(self->ob_bytes + lo, bytes, needed);
568 finish:
569 if (vbytes.len != -1)
570 PyBuffer_Release(&vbytes);
571 return res;
574 static int
575 bytearray_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value)
577 int ival;
579 if (i < 0)
580 i += Py_SIZE(self);
582 if (i < 0 || i >= Py_SIZE(self)) {
583 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
584 return -1;
587 if (value == NULL)
588 return bytearray_setslice(self, i, i+1, NULL);
590 if (!_getbytevalue(value, &ival))
591 return -1;
593 self->ob_bytes[i] = ival;
594 return 0;
597 static int
598 bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *values)
600 Py_ssize_t start, stop, step, slicelen, needed;
601 char *bytes;
603 if (PyIndex_Check(index)) {
604 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
606 if (i == -1 && PyErr_Occurred())
607 return -1;
609 if (i < 0)
610 i += PyByteArray_GET_SIZE(self);
612 if (i < 0 || i >= Py_SIZE(self)) {
613 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
614 return -1;
617 if (values == NULL) {
618 /* Fall through to slice assignment */
619 start = i;
620 stop = i + 1;
621 step = 1;
622 slicelen = 1;
624 else {
625 int ival;
626 if (!_getbytevalue(values, &ival))
627 return -1;
628 self->ob_bytes[i] = (char)ival;
629 return 0;
632 else if (PySlice_Check(index)) {
633 if (PySlice_GetIndicesEx((PySliceObject *)index,
634 PyByteArray_GET_SIZE(self),
635 &start, &stop, &step, &slicelen) < 0) {
636 return -1;
639 else {
640 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
641 return -1;
644 if (values == NULL) {
645 bytes = NULL;
646 needed = 0;
648 else if (values == (PyObject *)self || !PyByteArray_Check(values)) {
649 /* Make a copy an call this function recursively */
650 int err;
651 values = PyByteArray_FromObject(values);
652 if (values == NULL)
653 return -1;
654 err = bytearray_ass_subscript(self, index, values);
655 Py_DECREF(values);
656 return err;
658 else {
659 assert(PyByteArray_Check(values));
660 bytes = ((PyByteArrayObject *)values)->ob_bytes;
661 needed = Py_SIZE(values);
663 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
664 if ((step < 0 && start < stop) ||
665 (step > 0 && start > stop))
666 stop = start;
667 if (step == 1) {
668 if (slicelen != needed) {
669 if (!_canresize(self))
670 return -1;
671 if (slicelen > needed) {
673 0 start stop old_size
674 | |<---slicelen--->|<-----tomove------>|
675 | |<-needed->|<-----tomove------>|
676 0 lo new_hi new_size
678 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
679 Py_SIZE(self) - stop);
681 if (PyByteArray_Resize((PyObject *)self,
682 Py_SIZE(self) + needed - slicelen) < 0)
683 return -1;
684 if (slicelen < needed) {
686 0 lo hi old_size
687 | |<-avail->|<-----tomove------>|
688 | |<----needed---->|<-----tomove------>|
689 0 lo new_hi new_size
691 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
692 Py_SIZE(self) - start - needed);
696 if (needed > 0)
697 memcpy(self->ob_bytes + start, bytes, needed);
699 return 0;
701 else {
702 if (needed == 0) {
703 /* Delete slice */
704 Py_ssize_t cur, i;
706 if (!_canresize(self))
707 return -1;
708 if (step < 0) {
709 stop = start + 1;
710 start = stop + step * (slicelen - 1) - 1;
711 step = -step;
713 for (cur = start, i = 0;
714 i < slicelen; cur += step, i++) {
715 Py_ssize_t lim = step - 1;
717 if (cur + step >= PyByteArray_GET_SIZE(self))
718 lim = PyByteArray_GET_SIZE(self) - cur - 1;
720 memmove(self->ob_bytes + cur - i,
721 self->ob_bytes + cur + 1, lim);
723 /* Move the tail of the bytes, in one chunk */
724 cur = start + slicelen*step;
725 if (cur < PyByteArray_GET_SIZE(self)) {
726 memmove(self->ob_bytes + cur - slicelen,
727 self->ob_bytes + cur,
728 PyByteArray_GET_SIZE(self) - cur);
730 if (PyByteArray_Resize((PyObject *)self,
731 PyByteArray_GET_SIZE(self) - slicelen) < 0)
732 return -1;
734 return 0;
736 else {
737 /* Assign slice */
738 Py_ssize_t cur, i;
740 if (needed != slicelen) {
741 PyErr_Format(PyExc_ValueError,
742 "attempt to assign bytes of size %zd "
743 "to extended slice of size %zd",
744 needed, slicelen);
745 return -1;
747 for (cur = start, i = 0; i < slicelen; cur += step, i++)
748 self->ob_bytes[cur] = bytes[i];
749 return 0;
754 static int
755 bytearray_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
757 static char *kwlist[] = {"source", "encoding", "errors", 0};
758 PyObject *arg = NULL;
759 const char *encoding = NULL;
760 const char *errors = NULL;
761 Py_ssize_t count;
762 PyObject *it;
763 PyObject *(*iternext)(PyObject *);
765 if (Py_SIZE(self) != 0) {
766 /* Empty previous contents (yes, do this first of all!) */
767 if (PyByteArray_Resize((PyObject *)self, 0) < 0)
768 return -1;
771 /* Parse arguments */
772 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytearray", kwlist,
773 &arg, &encoding, &errors))
774 return -1;
776 /* Make a quick exit if no first argument */
777 if (arg == NULL) {
778 if (encoding != NULL || errors != NULL) {
779 PyErr_SetString(PyExc_TypeError,
780 "encoding or errors without sequence argument");
781 return -1;
783 return 0;
786 if (PyBytes_Check(arg)) {
787 PyObject *new, *encoded;
788 if (encoding != NULL) {
789 encoded = PyCodec_Encode(arg, encoding, errors);
790 if (encoded == NULL)
791 return -1;
792 assert(PyBytes_Check(encoded));
794 else {
795 encoded = arg;
796 Py_INCREF(arg);
798 new = bytearray_iconcat(self, arg);
799 Py_DECREF(encoded);
800 if (new == NULL)
801 return -1;
802 Py_DECREF(new);
803 return 0;
806 #ifdef Py_USING_UNICODE
807 if (PyUnicode_Check(arg)) {
808 /* Encode via the codec registry */
809 PyObject *encoded, *new;
810 if (encoding == NULL) {
811 PyErr_SetString(PyExc_TypeError,
812 "unicode argument without an encoding");
813 return -1;
815 encoded = PyCodec_Encode(arg, encoding, errors);
816 if (encoded == NULL)
817 return -1;
818 assert(PyBytes_Check(encoded));
819 new = bytearray_iconcat(self, encoded);
820 Py_DECREF(encoded);
821 if (new == NULL)
822 return -1;
823 Py_DECREF(new);
824 return 0;
826 #endif
828 /* If it's not unicode, there can't be encoding or errors */
829 if (encoding != NULL || errors != NULL) {
830 PyErr_SetString(PyExc_TypeError,
831 "encoding or errors without a string argument");
832 return -1;
835 /* Is it an int? */
836 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
837 if (count == -1 && PyErr_Occurred())
838 PyErr_Clear();
839 else {
840 if (count < 0) {
841 PyErr_SetString(PyExc_ValueError, "negative count");
842 return -1;
844 if (count > 0) {
845 if (PyByteArray_Resize((PyObject *)self, count))
846 return -1;
847 memset(self->ob_bytes, 0, count);
849 return 0;
852 /* Use the buffer API */
853 if (PyObject_CheckBuffer(arg)) {
854 Py_ssize_t size;
855 Py_buffer view;
856 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
857 return -1;
858 size = view.len;
859 if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
860 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
861 goto fail;
862 PyBuffer_Release(&view);
863 return 0;
864 fail:
865 PyBuffer_Release(&view);
866 return -1;
869 /* XXX Optimize this if the arguments is a list, tuple */
871 /* Get the iterator */
872 it = PyObject_GetIter(arg);
873 if (it == NULL)
874 return -1;
875 iternext = *Py_TYPE(it)->tp_iternext;
877 /* Run the iterator to exhaustion */
878 for (;;) {
879 PyObject *item;
880 int rc, value;
882 /* Get the next item */
883 item = iternext(it);
884 if (item == NULL) {
885 if (PyErr_Occurred()) {
886 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
887 goto error;
888 PyErr_Clear();
890 break;
893 /* Interpret it as an int (__index__) */
894 rc = _getbytevalue(item, &value);
895 Py_DECREF(item);
896 if (!rc)
897 goto error;
899 /* Append the byte */
900 if (Py_SIZE(self) < self->ob_alloc)
901 Py_SIZE(self)++;
902 else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
903 goto error;
904 self->ob_bytes[Py_SIZE(self)-1] = value;
907 /* Clean up and return success */
908 Py_DECREF(it);
909 return 0;
911 error:
912 /* Error handling when it != NULL */
913 Py_DECREF(it);
914 return -1;
917 /* Mostly copied from string_repr, but without the
918 "smart quote" functionality. */
919 static PyObject *
920 bytearray_repr(PyByteArrayObject *self)
922 static const char *hexdigits = "0123456789abcdef";
923 const char *quote_prefix = "bytearray(b";
924 const char *quote_postfix = ")";
925 Py_ssize_t length = Py_SIZE(self);
926 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
927 size_t newsize = 14 + 4 * length;
928 PyObject *v;
929 if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 3 != length) {
930 PyErr_SetString(PyExc_OverflowError,
931 "bytearray object is too large to make repr");
932 return NULL;
934 v = PyString_FromStringAndSize(NULL, newsize);
935 if (v == NULL) {
936 return NULL;
938 else {
939 register Py_ssize_t i;
940 register char c;
941 register char *p;
942 int quote;
944 /* Figure out which quote to use; single is preferred */
945 quote = '\'';
947 char *test, *start;
948 start = PyByteArray_AS_STRING(self);
949 for (test = start; test < start+length; ++test) {
950 if (*test == '"') {
951 quote = '\''; /* back to single */
952 goto decided;
954 else if (*test == '\'')
955 quote = '"';
957 decided:
961 p = PyString_AS_STRING(v);
962 while (*quote_prefix)
963 *p++ = *quote_prefix++;
964 *p++ = quote;
966 for (i = 0; i < length; i++) {
967 /* There's at least enough room for a hex escape
968 and a closing quote. */
969 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
970 c = self->ob_bytes[i];
971 if (c == '\'' || c == '\\')
972 *p++ = '\\', *p++ = c;
973 else if (c == '\t')
974 *p++ = '\\', *p++ = 't';
975 else if (c == '\n')
976 *p++ = '\\', *p++ = 'n';
977 else if (c == '\r')
978 *p++ = '\\', *p++ = 'r';
979 else if (c == 0)
980 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
981 else if (c < ' ' || c >= 0x7f) {
982 *p++ = '\\';
983 *p++ = 'x';
984 *p++ = hexdigits[(c & 0xf0) >> 4];
985 *p++ = hexdigits[c & 0xf];
987 else
988 *p++ = c;
990 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
991 *p++ = quote;
992 while (*quote_postfix) {
993 *p++ = *quote_postfix++;
995 *p = '\0';
996 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v)))) {
997 Py_DECREF(v);
998 return NULL;
1000 return v;
1004 static PyObject *
1005 bytearray_str(PyObject *op)
1007 #if 0
1008 if (Py_BytesWarningFlag) {
1009 if (PyErr_WarnEx(PyExc_BytesWarning,
1010 "str() on a bytearray instance", 1))
1011 return NULL;
1013 return bytearray_repr((PyByteArrayObject*)op);
1014 #endif
1015 return PyBytes_FromStringAndSize(((PyByteArrayObject*)op)->ob_bytes, Py_SIZE(op));
1018 static PyObject *
1019 bytearray_richcompare(PyObject *self, PyObject *other, int op)
1021 Py_ssize_t self_size, other_size;
1022 Py_buffer self_bytes, other_bytes;
1023 PyObject *res;
1024 Py_ssize_t minsize;
1025 int cmp;
1027 /* Bytes can be compared to anything that supports the (binary)
1028 buffer API. Except that a comparison with Unicode is always an
1029 error, even if the comparison is for equality. */
1030 #ifdef Py_USING_UNICODE
1031 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
1032 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
1033 if (Py_BytesWarningFlag && op == Py_EQ) {
1034 if (PyErr_WarnEx(PyExc_BytesWarning,
1035 "Comparsion between bytearray and string", 1))
1036 return NULL;
1039 Py_INCREF(Py_NotImplemented);
1040 return Py_NotImplemented;
1042 #endif
1044 self_size = _getbuffer(self, &self_bytes);
1045 if (self_size < 0) {
1046 PyErr_Clear();
1047 Py_INCREF(Py_NotImplemented);
1048 return Py_NotImplemented;
1051 other_size = _getbuffer(other, &other_bytes);
1052 if (other_size < 0) {
1053 PyErr_Clear();
1054 PyBuffer_Release(&self_bytes);
1055 Py_INCREF(Py_NotImplemented);
1056 return Py_NotImplemented;
1059 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
1060 /* Shortcut: if the lengths differ, the objects differ */
1061 cmp = (op == Py_NE);
1063 else {
1064 minsize = self_size;
1065 if (other_size < minsize)
1066 minsize = other_size;
1068 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
1069 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1071 if (cmp == 0) {
1072 if (self_size < other_size)
1073 cmp = -1;
1074 else if (self_size > other_size)
1075 cmp = 1;
1078 switch (op) {
1079 case Py_LT: cmp = cmp < 0; break;
1080 case Py_LE: cmp = cmp <= 0; break;
1081 case Py_EQ: cmp = cmp == 0; break;
1082 case Py_NE: cmp = cmp != 0; break;
1083 case Py_GT: cmp = cmp > 0; break;
1084 case Py_GE: cmp = cmp >= 0; break;
1088 res = cmp ? Py_True : Py_False;
1089 PyBuffer_Release(&self_bytes);
1090 PyBuffer_Release(&other_bytes);
1091 Py_INCREF(res);
1092 return res;
1095 static void
1096 bytearray_dealloc(PyByteArrayObject *self)
1098 if (self->ob_exports > 0) {
1099 PyErr_SetString(PyExc_SystemError,
1100 "deallocated bytearray object has exported buffers");
1101 PyErr_Print();
1103 if (self->ob_bytes != 0) {
1104 PyMem_Free(self->ob_bytes);
1106 Py_TYPE(self)->tp_free((PyObject *)self);
1110 /* -------------------------------------------------------------------- */
1111 /* Methods */
1113 #define STRINGLIB_CHAR char
1114 #define STRINGLIB_LEN PyByteArray_GET_SIZE
1115 #define STRINGLIB_STR PyByteArray_AS_STRING
1116 #define STRINGLIB_NEW PyByteArray_FromStringAndSize
1117 #define STRINGLIB_EMPTY nullbytes
1118 #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1119 #define STRINGLIB_MUTABLE 1
1120 #define FROM_BYTEARRAY 1
1122 #include "stringlib/fastsearch.h"
1123 #include "stringlib/count.h"
1124 #include "stringlib/find.h"
1125 #include "stringlib/partition.h"
1126 #include "stringlib/ctype.h"
1127 #include "stringlib/transmogrify.h"
1130 /* The following Py_LOCAL_INLINE and Py_LOCAL functions
1131 were copied from the old char* style string object. */
1133 Py_LOCAL_INLINE(void)
1134 _adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1136 if (*end > len)
1137 *end = len;
1138 else if (*end < 0)
1139 *end += len;
1140 if (*end < 0)
1141 *end = 0;
1142 if (*start < 0)
1143 *start += len;
1144 if (*start < 0)
1145 *start = 0;
1149 Py_LOCAL_INLINE(Py_ssize_t)
1150 bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
1152 PyObject *subobj;
1153 Py_buffer subbuf;
1154 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1155 Py_ssize_t res;
1157 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1158 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1159 return -2;
1160 if (_getbuffer(subobj, &subbuf) < 0)
1161 return -2;
1162 if (dir > 0)
1163 res = stringlib_find_slice(
1164 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1165 subbuf.buf, subbuf.len, start, end);
1166 else
1167 res = stringlib_rfind_slice(
1168 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1169 subbuf.buf, subbuf.len, start, end);
1170 PyBuffer_Release(&subbuf);
1171 return res;
1174 PyDoc_STRVAR(find__doc__,
1175 "B.find(sub [,start [,end]]) -> int\n\
1177 Return the lowest index in B where subsection sub is found,\n\
1178 such that sub is contained within s[start,end]. Optional\n\
1179 arguments start and end are interpreted as in slice notation.\n\
1181 Return -1 on failure.");
1183 static PyObject *
1184 bytearray_find(PyByteArrayObject *self, PyObject *args)
1186 Py_ssize_t result = bytearray_find_internal(self, args, +1);
1187 if (result == -2)
1188 return NULL;
1189 return PyInt_FromSsize_t(result);
1192 PyDoc_STRVAR(count__doc__,
1193 "B.count(sub [,start [,end]]) -> int\n\
1195 Return the number of non-overlapping occurrences of subsection sub in\n\
1196 bytes B[start:end]. Optional arguments start and end are interpreted\n\
1197 as in slice notation.");
1199 static PyObject *
1200 bytearray_count(PyByteArrayObject *self, PyObject *args)
1202 PyObject *sub_obj;
1203 const char *str = PyByteArray_AS_STRING(self);
1204 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1205 Py_buffer vsub;
1206 PyObject *count_obj;
1208 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1209 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1210 return NULL;
1212 if (_getbuffer(sub_obj, &vsub) < 0)
1213 return NULL;
1215 _adjust_indices(&start, &end, PyByteArray_GET_SIZE(self));
1217 count_obj = PyInt_FromSsize_t(
1218 stringlib_count(str + start, end - start, vsub.buf, vsub.len)
1220 PyBuffer_Release(&vsub);
1221 return count_obj;
1225 PyDoc_STRVAR(index__doc__,
1226 "B.index(sub [,start [,end]]) -> int\n\
1228 Like B.find() but raise ValueError when the subsection is not found.");
1230 static PyObject *
1231 bytearray_index(PyByteArrayObject *self, PyObject *args)
1233 Py_ssize_t result = bytearray_find_internal(self, args, +1);
1234 if (result == -2)
1235 return NULL;
1236 if (result == -1) {
1237 PyErr_SetString(PyExc_ValueError,
1238 "subsection not found");
1239 return NULL;
1241 return PyInt_FromSsize_t(result);
1245 PyDoc_STRVAR(rfind__doc__,
1246 "B.rfind(sub [,start [,end]]) -> int\n\
1248 Return the highest index in B where subsection sub is found,\n\
1249 such that sub is contained within s[start,end]. Optional\n\
1250 arguments start and end are interpreted as in slice notation.\n\
1252 Return -1 on failure.");
1254 static PyObject *
1255 bytearray_rfind(PyByteArrayObject *self, PyObject *args)
1257 Py_ssize_t result = bytearray_find_internal(self, args, -1);
1258 if (result == -2)
1259 return NULL;
1260 return PyInt_FromSsize_t(result);
1264 PyDoc_STRVAR(rindex__doc__,
1265 "B.rindex(sub [,start [,end]]) -> int\n\
1267 Like B.rfind() but raise ValueError when the subsection is not found.");
1269 static PyObject *
1270 bytearray_rindex(PyByteArrayObject *self, PyObject *args)
1272 Py_ssize_t result = bytearray_find_internal(self, args, -1);
1273 if (result == -2)
1274 return NULL;
1275 if (result == -1) {
1276 PyErr_SetString(PyExc_ValueError,
1277 "subsection not found");
1278 return NULL;
1280 return PyInt_FromSsize_t(result);
1284 static int
1285 bytearray_contains(PyObject *self, PyObject *arg)
1287 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1288 if (ival == -1 && PyErr_Occurred()) {
1289 Py_buffer varg;
1290 int pos;
1291 PyErr_Clear();
1292 if (_getbuffer(arg, &varg) < 0)
1293 return -1;
1294 pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
1295 varg.buf, varg.len, 0);
1296 PyBuffer_Release(&varg);
1297 return pos >= 0;
1299 if (ival < 0 || ival >= 256) {
1300 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1301 return -1;
1304 return memchr(PyByteArray_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
1308 /* Matches the end (direction >= 0) or start (direction < 0) of self
1309 * against substr, using the start and end arguments. Returns
1310 * -1 on error, 0 if not found and 1 if found.
1312 Py_LOCAL(int)
1313 _bytearray_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start,
1314 Py_ssize_t end, int direction)
1316 Py_ssize_t len = PyByteArray_GET_SIZE(self);
1317 const char* str;
1318 Py_buffer vsubstr;
1319 int rv = 0;
1321 str = PyByteArray_AS_STRING(self);
1323 if (_getbuffer(substr, &vsubstr) < 0)
1324 return -1;
1326 _adjust_indices(&start, &end, len);
1328 if (direction < 0) {
1329 /* startswith */
1330 if (start+vsubstr.len > len) {
1331 goto done;
1333 } else {
1334 /* endswith */
1335 if (end-start < vsubstr.len || start > len) {
1336 goto done;
1339 if (end-vsubstr.len > start)
1340 start = end - vsubstr.len;
1342 if (end-start >= vsubstr.len)
1343 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1345 done:
1346 PyBuffer_Release(&vsubstr);
1347 return rv;
1351 PyDoc_STRVAR(startswith__doc__,
1352 "B.startswith(prefix [,start [,end]]) -> bool\n\
1354 Return True if B starts with the specified prefix, False otherwise.\n\
1355 With optional start, test B beginning at that position.\n\
1356 With optional end, stop comparing B at that position.\n\
1357 prefix can also be a tuple of strings to try.");
1359 static PyObject *
1360 bytearray_startswith(PyByteArrayObject *self, PyObject *args)
1362 Py_ssize_t start = 0;
1363 Py_ssize_t end = PY_SSIZE_T_MAX;
1364 PyObject *subobj;
1365 int result;
1367 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1368 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1369 return NULL;
1370 if (PyTuple_Check(subobj)) {
1371 Py_ssize_t i;
1372 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1373 result = _bytearray_tailmatch(self,
1374 PyTuple_GET_ITEM(subobj, i),
1375 start, end, -1);
1376 if (result == -1)
1377 return NULL;
1378 else if (result) {
1379 Py_RETURN_TRUE;
1382 Py_RETURN_FALSE;
1384 result = _bytearray_tailmatch(self, subobj, start, end, -1);
1385 if (result == -1)
1386 return NULL;
1387 else
1388 return PyBool_FromLong(result);
1391 PyDoc_STRVAR(endswith__doc__,
1392 "B.endswith(suffix [,start [,end]]) -> bool\n\
1394 Return True if B ends with the specified suffix, False otherwise.\n\
1395 With optional start, test B beginning at that position.\n\
1396 With optional end, stop comparing B at that position.\n\
1397 suffix can also be a tuple of strings to try.");
1399 static PyObject *
1400 bytearray_endswith(PyByteArrayObject *self, PyObject *args)
1402 Py_ssize_t start = 0;
1403 Py_ssize_t end = PY_SSIZE_T_MAX;
1404 PyObject *subobj;
1405 int result;
1407 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1408 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1409 return NULL;
1410 if (PyTuple_Check(subobj)) {
1411 Py_ssize_t i;
1412 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1413 result = _bytearray_tailmatch(self,
1414 PyTuple_GET_ITEM(subobj, i),
1415 start, end, +1);
1416 if (result == -1)
1417 return NULL;
1418 else if (result) {
1419 Py_RETURN_TRUE;
1422 Py_RETURN_FALSE;
1424 result = _bytearray_tailmatch(self, subobj, start, end, +1);
1425 if (result == -1)
1426 return NULL;
1427 else
1428 return PyBool_FromLong(result);
1432 PyDoc_STRVAR(translate__doc__,
1433 "B.translate(table[, deletechars]) -> bytearray\n\
1435 Return a copy of B, where all characters occurring in the\n\
1436 optional argument deletechars are removed, and the remaining\n\
1437 characters have been mapped through the given translation\n\
1438 table, which must be a bytes object of length 256.");
1440 static PyObject *
1441 bytearray_translate(PyByteArrayObject *self, PyObject *args)
1443 register char *input, *output;
1444 register const char *table;
1445 register Py_ssize_t i, c;
1446 PyObject *input_obj = (PyObject*)self;
1447 const char *output_start;
1448 Py_ssize_t inlen;
1449 PyObject *result = NULL;
1450 int trans_table[256];
1451 PyObject *tableobj = NULL, *delobj = NULL;
1452 Py_buffer vtable, vdel;
1454 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1455 &tableobj, &delobj))
1456 return NULL;
1458 if (tableobj == Py_None) {
1459 table = NULL;
1460 tableobj = NULL;
1461 } else if (_getbuffer(tableobj, &vtable) < 0) {
1462 return NULL;
1463 } else {
1464 if (vtable.len != 256) {
1465 PyErr_SetString(PyExc_ValueError,
1466 "translation table must be 256 characters long");
1467 PyBuffer_Release(&vtable);
1468 return NULL;
1470 table = (const char*)vtable.buf;
1473 if (delobj != NULL) {
1474 if (_getbuffer(delobj, &vdel) < 0) {
1475 if (tableobj != NULL)
1476 PyBuffer_Release(&vtable);
1477 return NULL;
1480 else {
1481 vdel.buf = NULL;
1482 vdel.len = 0;
1485 inlen = PyByteArray_GET_SIZE(input_obj);
1486 result = PyByteArray_FromStringAndSize((char *)NULL, inlen);
1487 if (result == NULL)
1488 goto done;
1489 output_start = output = PyByteArray_AsString(result);
1490 input = PyByteArray_AS_STRING(input_obj);
1492 if (vdel.len == 0 && table != NULL) {
1493 /* If no deletions are required, use faster code */
1494 for (i = inlen; --i >= 0; ) {
1495 c = Py_CHARMASK(*input++);
1496 *output++ = table[c];
1498 goto done;
1501 if (table == NULL) {
1502 for (i = 0; i < 256; i++)
1503 trans_table[i] = Py_CHARMASK(i);
1504 } else {
1505 for (i = 0; i < 256; i++)
1506 trans_table[i] = Py_CHARMASK(table[i]);
1509 for (i = 0; i < vdel.len; i++)
1510 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1512 for (i = inlen; --i >= 0; ) {
1513 c = Py_CHARMASK(*input++);
1514 if (trans_table[c] != -1)
1515 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1516 continue;
1518 /* Fix the size of the resulting string */
1519 if (inlen > 0)
1520 PyByteArray_Resize(result, output - output_start);
1522 done:
1523 if (tableobj != NULL)
1524 PyBuffer_Release(&vtable);
1525 if (delobj != NULL)
1526 PyBuffer_Release(&vdel);
1527 return result;
1531 #define FORWARD 1
1532 #define REVERSE -1
1534 /* find and count characters and substrings */
1536 #define findchar(target, target_len, c) \
1537 ((char *)memchr((const void *)(target), c, target_len))
1539 /* Don't call if length < 2 */
1540 #define Py_STRING_MATCH(target, offset, pattern, length) \
1541 (target[offset] == pattern[0] && \
1542 target[offset+length-1] == pattern[length-1] && \
1543 !memcmp(target+offset+1, pattern+1, length-2) )
1546 /* Bytes ops must return a string, create a copy */
1547 Py_LOCAL(PyByteArrayObject *)
1548 return_self(PyByteArrayObject *self)
1550 return (PyByteArrayObject *)PyByteArray_FromStringAndSize(
1551 PyByteArray_AS_STRING(self),
1552 PyByteArray_GET_SIZE(self));
1555 Py_LOCAL_INLINE(Py_ssize_t)
1556 countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1558 Py_ssize_t count=0;
1559 const char *start=target;
1560 const char *end=target+target_len;
1562 while ( (start=findchar(start, end-start, c)) != NULL ) {
1563 count++;
1564 if (count >= maxcount)
1565 break;
1566 start += 1;
1568 return count;
1571 Py_LOCAL(Py_ssize_t)
1572 findstring(const char *target, Py_ssize_t target_len,
1573 const char *pattern, Py_ssize_t pattern_len,
1574 Py_ssize_t start,
1575 Py_ssize_t end,
1576 int direction)
1578 if (start < 0) {
1579 start += target_len;
1580 if (start < 0)
1581 start = 0;
1583 if (end > target_len) {
1584 end = target_len;
1585 } else if (end < 0) {
1586 end += target_len;
1587 if (end < 0)
1588 end = 0;
1591 /* zero-length substrings always match at the first attempt */
1592 if (pattern_len == 0)
1593 return (direction > 0) ? start : end;
1595 end -= pattern_len;
1597 if (direction < 0) {
1598 for (; end >= start; end--)
1599 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1600 return end;
1601 } else {
1602 for (; start <= end; start++)
1603 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1604 return start;
1606 return -1;
1609 Py_LOCAL_INLINE(Py_ssize_t)
1610 countstring(const char *target, Py_ssize_t target_len,
1611 const char *pattern, Py_ssize_t pattern_len,
1612 Py_ssize_t start,
1613 Py_ssize_t end,
1614 int direction, Py_ssize_t maxcount)
1616 Py_ssize_t count=0;
1618 if (start < 0) {
1619 start += target_len;
1620 if (start < 0)
1621 start = 0;
1623 if (end > target_len) {
1624 end = target_len;
1625 } else if (end < 0) {
1626 end += target_len;
1627 if (end < 0)
1628 end = 0;
1631 /* zero-length substrings match everywhere */
1632 if (pattern_len == 0 || maxcount == 0) {
1633 if (target_len+1 < maxcount)
1634 return target_len+1;
1635 return maxcount;
1638 end -= pattern_len;
1639 if (direction < 0) {
1640 for (; (end >= start); end--)
1641 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1642 count++;
1643 if (--maxcount <= 0) break;
1644 end -= pattern_len-1;
1646 } else {
1647 for (; (start <= end); start++)
1648 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1649 count++;
1650 if (--maxcount <= 0)
1651 break;
1652 start += pattern_len-1;
1655 return count;
1659 /* Algorithms for different cases of string replacement */
1661 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1662 Py_LOCAL(PyByteArrayObject *)
1663 replace_interleave(PyByteArrayObject *self,
1664 const char *to_s, Py_ssize_t to_len,
1665 Py_ssize_t maxcount)
1667 char *self_s, *result_s;
1668 Py_ssize_t self_len, result_len;
1669 Py_ssize_t count, i, product;
1670 PyByteArrayObject *result;
1672 self_len = PyByteArray_GET_SIZE(self);
1674 /* 1 at the end plus 1 after every character */
1675 count = self_len+1;
1676 if (maxcount < count)
1677 count = maxcount;
1679 /* Check for overflow */
1680 /* result_len = count * to_len + self_len; */
1681 product = count * to_len;
1682 if (product / to_len != count) {
1683 PyErr_SetString(PyExc_OverflowError,
1684 "replace string is too long");
1685 return NULL;
1687 result_len = product + self_len;
1688 if (result_len < 0) {
1689 PyErr_SetString(PyExc_OverflowError,
1690 "replace string is too long");
1691 return NULL;
1694 if (! (result = (PyByteArrayObject *)
1695 PyByteArray_FromStringAndSize(NULL, result_len)) )
1696 return NULL;
1698 self_s = PyByteArray_AS_STRING(self);
1699 result_s = PyByteArray_AS_STRING(result);
1701 /* TODO: special case single character, which doesn't need memcpy */
1703 /* Lay the first one down (guaranteed this will occur) */
1704 Py_MEMCPY(result_s, to_s, to_len);
1705 result_s += to_len;
1706 count -= 1;
1708 for (i=0; i<count; i++) {
1709 *result_s++ = *self_s++;
1710 Py_MEMCPY(result_s, to_s, to_len);
1711 result_s += to_len;
1714 /* Copy the rest of the original string */
1715 Py_MEMCPY(result_s, self_s, self_len-i);
1717 return result;
1720 /* Special case for deleting a single character */
1721 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1722 Py_LOCAL(PyByteArrayObject *)
1723 replace_delete_single_character(PyByteArrayObject *self,
1724 char from_c, Py_ssize_t maxcount)
1726 char *self_s, *result_s;
1727 char *start, *next, *end;
1728 Py_ssize_t self_len, result_len;
1729 Py_ssize_t count;
1730 PyByteArrayObject *result;
1732 self_len = PyByteArray_GET_SIZE(self);
1733 self_s = PyByteArray_AS_STRING(self);
1735 count = countchar(self_s, self_len, from_c, maxcount);
1736 if (count == 0) {
1737 return return_self(self);
1740 result_len = self_len - count; /* from_len == 1 */
1741 assert(result_len>=0);
1743 if ( (result = (PyByteArrayObject *)
1744 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1745 return NULL;
1746 result_s = PyByteArray_AS_STRING(result);
1748 start = self_s;
1749 end = self_s + self_len;
1750 while (count-- > 0) {
1751 next = findchar(start, end-start, from_c);
1752 if (next == NULL)
1753 break;
1754 Py_MEMCPY(result_s, start, next-start);
1755 result_s += (next-start);
1756 start = next+1;
1758 Py_MEMCPY(result_s, start, end-start);
1760 return result;
1763 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1765 Py_LOCAL(PyByteArrayObject *)
1766 replace_delete_substring(PyByteArrayObject *self,
1767 const char *from_s, Py_ssize_t from_len,
1768 Py_ssize_t maxcount)
1770 char *self_s, *result_s;
1771 char *start, *next, *end;
1772 Py_ssize_t self_len, result_len;
1773 Py_ssize_t count, offset;
1774 PyByteArrayObject *result;
1776 self_len = PyByteArray_GET_SIZE(self);
1777 self_s = PyByteArray_AS_STRING(self);
1779 count = countstring(self_s, self_len,
1780 from_s, from_len,
1781 0, self_len, 1,
1782 maxcount);
1784 if (count == 0) {
1785 /* no matches */
1786 return return_self(self);
1789 result_len = self_len - (count * from_len);
1790 assert (result_len>=0);
1792 if ( (result = (PyByteArrayObject *)
1793 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL )
1794 return NULL;
1796 result_s = PyByteArray_AS_STRING(result);
1798 start = self_s;
1799 end = self_s + self_len;
1800 while (count-- > 0) {
1801 offset = findstring(start, end-start,
1802 from_s, from_len,
1803 0, end-start, FORWARD);
1804 if (offset == -1)
1805 break;
1806 next = start + offset;
1808 Py_MEMCPY(result_s, start, next-start);
1810 result_s += (next-start);
1811 start = next+from_len;
1813 Py_MEMCPY(result_s, start, end-start);
1814 return result;
1817 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1818 Py_LOCAL(PyByteArrayObject *)
1819 replace_single_character_in_place(PyByteArrayObject *self,
1820 char from_c, char to_c,
1821 Py_ssize_t maxcount)
1823 char *self_s, *result_s, *start, *end, *next;
1824 Py_ssize_t self_len;
1825 PyByteArrayObject *result;
1827 /* The result string will be the same size */
1828 self_s = PyByteArray_AS_STRING(self);
1829 self_len = PyByteArray_GET_SIZE(self);
1831 next = findchar(self_s, self_len, from_c);
1833 if (next == NULL) {
1834 /* No matches; return the original bytes */
1835 return return_self(self);
1838 /* Need to make a new bytes */
1839 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1840 if (result == NULL)
1841 return NULL;
1842 result_s = PyByteArray_AS_STRING(result);
1843 Py_MEMCPY(result_s, self_s, self_len);
1845 /* change everything in-place, starting with this one */
1846 start = result_s + (next-self_s);
1847 *start = to_c;
1848 start++;
1849 end = result_s + self_len;
1851 while (--maxcount > 0) {
1852 next = findchar(start, end-start, from_c);
1853 if (next == NULL)
1854 break;
1855 *next = to_c;
1856 start = next+1;
1859 return result;
1862 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1863 Py_LOCAL(PyByteArrayObject *)
1864 replace_substring_in_place(PyByteArrayObject *self,
1865 const char *from_s, Py_ssize_t from_len,
1866 const char *to_s, Py_ssize_t to_len,
1867 Py_ssize_t maxcount)
1869 char *result_s, *start, *end;
1870 char *self_s;
1871 Py_ssize_t self_len, offset;
1872 PyByteArrayObject *result;
1874 /* The result bytes will be the same size */
1876 self_s = PyByteArray_AS_STRING(self);
1877 self_len = PyByteArray_GET_SIZE(self);
1879 offset = findstring(self_s, self_len,
1880 from_s, from_len,
1881 0, self_len, FORWARD);
1882 if (offset == -1) {
1883 /* No matches; return the original bytes */
1884 return return_self(self);
1887 /* Need to make a new bytes */
1888 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1889 if (result == NULL)
1890 return NULL;
1891 result_s = PyByteArray_AS_STRING(result);
1892 Py_MEMCPY(result_s, self_s, self_len);
1894 /* change everything in-place, starting with this one */
1895 start = result_s + offset;
1896 Py_MEMCPY(start, to_s, from_len);
1897 start += from_len;
1898 end = result_s + self_len;
1900 while ( --maxcount > 0) {
1901 offset = findstring(start, end-start,
1902 from_s, from_len,
1903 0, end-start, FORWARD);
1904 if (offset==-1)
1905 break;
1906 Py_MEMCPY(start+offset, to_s, from_len);
1907 start += offset+from_len;
1910 return result;
1913 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1914 Py_LOCAL(PyByteArrayObject *)
1915 replace_single_character(PyByteArrayObject *self,
1916 char from_c,
1917 const char *to_s, Py_ssize_t to_len,
1918 Py_ssize_t maxcount)
1920 char *self_s, *result_s;
1921 char *start, *next, *end;
1922 Py_ssize_t self_len, result_len;
1923 Py_ssize_t count, product;
1924 PyByteArrayObject *result;
1926 self_s = PyByteArray_AS_STRING(self);
1927 self_len = PyByteArray_GET_SIZE(self);
1929 count = countchar(self_s, self_len, from_c, maxcount);
1930 if (count == 0) {
1931 /* no matches, return unchanged */
1932 return return_self(self);
1935 /* use the difference between current and new, hence the "-1" */
1936 /* result_len = self_len + count * (to_len-1) */
1937 product = count * (to_len-1);
1938 if (product / (to_len-1) != count) {
1939 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1940 return NULL;
1942 result_len = self_len + product;
1943 if (result_len < 0) {
1944 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1945 return NULL;
1948 if ( (result = (PyByteArrayObject *)
1949 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1950 return NULL;
1951 result_s = PyByteArray_AS_STRING(result);
1953 start = self_s;
1954 end = self_s + self_len;
1955 while (count-- > 0) {
1956 next = findchar(start, end-start, from_c);
1957 if (next == NULL)
1958 break;
1960 if (next == start) {
1961 /* replace with the 'to' */
1962 Py_MEMCPY(result_s, to_s, to_len);
1963 result_s += to_len;
1964 start += 1;
1965 } else {
1966 /* copy the unchanged old then the 'to' */
1967 Py_MEMCPY(result_s, start, next-start);
1968 result_s += (next-start);
1969 Py_MEMCPY(result_s, to_s, to_len);
1970 result_s += to_len;
1971 start = next+1;
1974 /* Copy the remainder of the remaining bytes */
1975 Py_MEMCPY(result_s, start, end-start);
1977 return result;
1980 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1981 Py_LOCAL(PyByteArrayObject *)
1982 replace_substring(PyByteArrayObject *self,
1983 const char *from_s, Py_ssize_t from_len,
1984 const char *to_s, Py_ssize_t to_len,
1985 Py_ssize_t maxcount)
1987 char *self_s, *result_s;
1988 char *start, *next, *end;
1989 Py_ssize_t self_len, result_len;
1990 Py_ssize_t count, offset, product;
1991 PyByteArrayObject *result;
1993 self_s = PyByteArray_AS_STRING(self);
1994 self_len = PyByteArray_GET_SIZE(self);
1996 count = countstring(self_s, self_len,
1997 from_s, from_len,
1998 0, self_len, FORWARD, maxcount);
1999 if (count == 0) {
2000 /* no matches, return unchanged */
2001 return return_self(self);
2004 /* Check for overflow */
2005 /* result_len = self_len + count * (to_len-from_len) */
2006 product = count * (to_len-from_len);
2007 if (product / (to_len-from_len) != count) {
2008 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
2009 return NULL;
2011 result_len = self_len + product;
2012 if (result_len < 0) {
2013 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
2014 return NULL;
2017 if ( (result = (PyByteArrayObject *)
2018 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
2019 return NULL;
2020 result_s = PyByteArray_AS_STRING(result);
2022 start = self_s;
2023 end = self_s + self_len;
2024 while (count-- > 0) {
2025 offset = findstring(start, end-start,
2026 from_s, from_len,
2027 0, end-start, FORWARD);
2028 if (offset == -1)
2029 break;
2030 next = start+offset;
2031 if (next == start) {
2032 /* replace with the 'to' */
2033 Py_MEMCPY(result_s, to_s, to_len);
2034 result_s += to_len;
2035 start += from_len;
2036 } else {
2037 /* copy the unchanged old then the 'to' */
2038 Py_MEMCPY(result_s, start, next-start);
2039 result_s += (next-start);
2040 Py_MEMCPY(result_s, to_s, to_len);
2041 result_s += to_len;
2042 start = next+from_len;
2045 /* Copy the remainder of the remaining bytes */
2046 Py_MEMCPY(result_s, start, end-start);
2048 return result;
2052 Py_LOCAL(PyByteArrayObject *)
2053 replace(PyByteArrayObject *self,
2054 const char *from_s, Py_ssize_t from_len,
2055 const char *to_s, Py_ssize_t to_len,
2056 Py_ssize_t maxcount)
2058 if (maxcount < 0) {
2059 maxcount = PY_SSIZE_T_MAX;
2060 } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) {
2061 /* nothing to do; return the original bytes */
2062 return return_self(self);
2065 if (maxcount == 0 ||
2066 (from_len == 0 && to_len == 0)) {
2067 /* nothing to do; return the original bytes */
2068 return return_self(self);
2071 /* Handle zero-length special cases */
2073 if (from_len == 0) {
2074 /* insert the 'to' bytes everywhere. */
2075 /* >>> "Python".replace("", ".") */
2076 /* '.P.y.t.h.o.n.' */
2077 return replace_interleave(self, to_s, to_len, maxcount);
2080 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2081 /* point for an empty self bytes to generate a non-empty bytes */
2082 /* Special case so the remaining code always gets a non-empty bytes */
2083 if (PyByteArray_GET_SIZE(self) == 0) {
2084 return return_self(self);
2087 if (to_len == 0) {
2088 /* delete all occurances of 'from' bytes */
2089 if (from_len == 1) {
2090 return replace_delete_single_character(
2091 self, from_s[0], maxcount);
2092 } else {
2093 return replace_delete_substring(self, from_s, from_len, maxcount);
2097 /* Handle special case where both bytes have the same length */
2099 if (from_len == to_len) {
2100 if (from_len == 1) {
2101 return replace_single_character_in_place(
2102 self,
2103 from_s[0],
2104 to_s[0],
2105 maxcount);
2106 } else {
2107 return replace_substring_in_place(
2108 self, from_s, from_len, to_s, to_len, maxcount);
2112 /* Otherwise use the more generic algorithms */
2113 if (from_len == 1) {
2114 return replace_single_character(self, from_s[0],
2115 to_s, to_len, maxcount);
2116 } else {
2117 /* len('from')>=2, len('to')>=1 */
2118 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2123 PyDoc_STRVAR(replace__doc__,
2124 "B.replace(old, new[, count]) -> bytes\n\
2126 Return a copy of B with all occurrences of subsection\n\
2127 old replaced by new. If the optional argument count is\n\
2128 given, only the first count occurrences are replaced.");
2130 static PyObject *
2131 bytearray_replace(PyByteArrayObject *self, PyObject *args)
2133 Py_ssize_t count = -1;
2134 PyObject *from, *to, *res;
2135 Py_buffer vfrom, vto;
2137 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2138 return NULL;
2140 if (_getbuffer(from, &vfrom) < 0)
2141 return NULL;
2142 if (_getbuffer(to, &vto) < 0) {
2143 PyBuffer_Release(&vfrom);
2144 return NULL;
2147 res = (PyObject *)replace((PyByteArrayObject *) self,
2148 vfrom.buf, vfrom.len,
2149 vto.buf, vto.len, count);
2151 PyBuffer_Release(&vfrom);
2152 PyBuffer_Release(&vto);
2153 return res;
2157 /* Overallocate the initial list to reduce the number of reallocs for small
2158 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2159 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2160 text (roughly 11 words per line) and field delimited data (usually 1-10
2161 fields). For large strings the split algorithms are bandwidth limited
2162 so increasing the preallocation likely will not improve things.*/
2164 #define MAX_PREALLOC 12
2166 /* 5 splits gives 6 elements */
2167 #define PREALLOC_SIZE(maxsplit) \
2168 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2170 #define SPLIT_APPEND(data, left, right) \
2171 str = PyByteArray_FromStringAndSize((data) + (left), \
2172 (right) - (left)); \
2173 if (str == NULL) \
2174 goto onError; \
2175 if (PyList_Append(list, str)) { \
2176 Py_DECREF(str); \
2177 goto onError; \
2179 else \
2180 Py_DECREF(str);
2182 #define SPLIT_ADD(data, left, right) { \
2183 str = PyByteArray_FromStringAndSize((data) + (left), \
2184 (right) - (left)); \
2185 if (str == NULL) \
2186 goto onError; \
2187 if (count < MAX_PREALLOC) { \
2188 PyList_SET_ITEM(list, count, str); \
2189 } else { \
2190 if (PyList_Append(list, str)) { \
2191 Py_DECREF(str); \
2192 goto onError; \
2194 else \
2195 Py_DECREF(str); \
2197 count++; }
2199 /* Always force the list to the expected size. */
2200 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2203 Py_LOCAL_INLINE(PyObject *)
2204 split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2206 register Py_ssize_t i, j, count = 0;
2207 PyObject *str;
2208 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2210 if (list == NULL)
2211 return NULL;
2213 i = j = 0;
2214 while ((j < len) && (maxcount-- > 0)) {
2215 for(; j < len; j++) {
2216 /* I found that using memchr makes no difference */
2217 if (s[j] == ch) {
2218 SPLIT_ADD(s, i, j);
2219 i = j = j + 1;
2220 break;
2224 if (i <= len) {
2225 SPLIT_ADD(s, i, len);
2227 FIX_PREALLOC_SIZE(list);
2228 return list;
2230 onError:
2231 Py_DECREF(list);
2232 return NULL;
2236 Py_LOCAL_INLINE(PyObject *)
2237 split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2239 register Py_ssize_t i, j, count = 0;
2240 PyObject *str;
2241 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2243 if (list == NULL)
2244 return NULL;
2246 for (i = j = 0; i < len; ) {
2247 /* find a token */
2248 while (i < len && Py_ISSPACE(s[i]))
2249 i++;
2250 j = i;
2251 while (i < len && !Py_ISSPACE(s[i]))
2252 i++;
2253 if (j < i) {
2254 if (maxcount-- <= 0)
2255 break;
2256 SPLIT_ADD(s, j, i);
2257 while (i < len && Py_ISSPACE(s[i]))
2258 i++;
2259 j = i;
2262 if (j < len) {
2263 SPLIT_ADD(s, j, len);
2265 FIX_PREALLOC_SIZE(list);
2266 return list;
2268 onError:
2269 Py_DECREF(list);
2270 return NULL;
2273 PyDoc_STRVAR(split__doc__,
2274 "B.split([sep[, maxsplit]]) -> list of bytearray\n\
2276 Return a list of the sections in B, using sep as the delimiter.\n\
2277 If sep is not given, B is split on ASCII whitespace characters\n\
2278 (space, tab, return, newline, formfeed, vertical tab).\n\
2279 If maxsplit is given, at most maxsplit splits are done.");
2281 static PyObject *
2282 bytearray_split(PyByteArrayObject *self, PyObject *args)
2284 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j, pos;
2285 Py_ssize_t maxsplit = -1, count = 0;
2286 const char *s = PyByteArray_AS_STRING(self), *sub;
2287 PyObject *list, *str, *subobj = Py_None;
2288 Py_buffer vsub;
2290 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2291 return NULL;
2292 if (maxsplit < 0)
2293 maxsplit = PY_SSIZE_T_MAX;
2295 if (subobj == Py_None)
2296 return split_whitespace(s, len, maxsplit);
2298 if (_getbuffer(subobj, &vsub) < 0)
2299 return NULL;
2300 sub = vsub.buf;
2301 n = vsub.len;
2303 if (n == 0) {
2304 PyErr_SetString(PyExc_ValueError, "empty separator");
2305 PyBuffer_Release(&vsub);
2306 return NULL;
2308 if (n == 1) {
2309 list = split_char(s, len, sub[0], maxsplit);
2310 PyBuffer_Release(&vsub);
2311 return list;
2314 list = PyList_New(PREALLOC_SIZE(maxsplit));
2315 if (list == NULL) {
2316 PyBuffer_Release(&vsub);
2317 return NULL;
2320 i = j = 0;
2321 while (maxsplit-- > 0) {
2322 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2323 if (pos < 0)
2324 break;
2325 j = i+pos;
2326 SPLIT_ADD(s, i, j);
2327 i = j + n;
2329 SPLIT_ADD(s, i, len);
2330 FIX_PREALLOC_SIZE(list);
2331 PyBuffer_Release(&vsub);
2332 return list;
2334 onError:
2335 Py_DECREF(list);
2336 PyBuffer_Release(&vsub);
2337 return NULL;
2340 /* stringlib's partition shares nullbytes in some cases.
2341 undo this, we don't want the nullbytes to be shared. */
2342 static PyObject *
2343 make_nullbytes_unique(PyObject *result)
2345 if (result != NULL) {
2346 int i;
2347 assert(PyTuple_Check(result));
2348 assert(PyTuple_GET_SIZE(result) == 3);
2349 for (i = 0; i < 3; i++) {
2350 if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
2351 PyObject *new = PyByteArray_FromStringAndSize(NULL, 0);
2352 if (new == NULL) {
2353 Py_DECREF(result);
2354 result = NULL;
2355 break;
2357 Py_DECREF(nullbytes);
2358 PyTuple_SET_ITEM(result, i, new);
2362 return result;
2365 PyDoc_STRVAR(partition__doc__,
2366 "B.partition(sep) -> (head, sep, tail)\n\
2368 Searches for the separator sep in B, and returns the part before it,\n\
2369 the separator itself, and the part after it. If the separator is not\n\
2370 found, returns B and two empty bytearray objects.");
2372 static PyObject *
2373 bytearray_partition(PyByteArrayObject *self, PyObject *sep_obj)
2375 PyObject *bytesep, *result;
2377 bytesep = PyByteArray_FromObject(sep_obj);
2378 if (! bytesep)
2379 return NULL;
2381 result = stringlib_partition(
2382 (PyObject*) self,
2383 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2384 bytesep,
2385 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2388 Py_DECREF(bytesep);
2389 return make_nullbytes_unique(result);
2392 PyDoc_STRVAR(rpartition__doc__,
2393 "B.rpartition(sep) -> (tail, sep, head)\n\
2395 Searches for the separator sep in B, starting at the end of B,\n\
2396 and returns the part before it, the separator itself, and the\n\
2397 part after it. If the separator is not found, returns two empty\n\
2398 bytearray objects and B.");
2400 static PyObject *
2401 bytearray_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
2403 PyObject *bytesep, *result;
2405 bytesep = PyByteArray_FromObject(sep_obj);
2406 if (! bytesep)
2407 return NULL;
2409 result = stringlib_rpartition(
2410 (PyObject*) self,
2411 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2412 bytesep,
2413 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2416 Py_DECREF(bytesep);
2417 return make_nullbytes_unique(result);
2420 Py_LOCAL_INLINE(PyObject *)
2421 rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2423 register Py_ssize_t i, j, count=0;
2424 PyObject *str;
2425 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2427 if (list == NULL)
2428 return NULL;
2430 i = j = len - 1;
2431 while ((i >= 0) && (maxcount-- > 0)) {
2432 for (; i >= 0; i--) {
2433 if (s[i] == ch) {
2434 SPLIT_ADD(s, i + 1, j + 1);
2435 j = i = i - 1;
2436 break;
2440 if (j >= -1) {
2441 SPLIT_ADD(s, 0, j + 1);
2443 FIX_PREALLOC_SIZE(list);
2444 if (PyList_Reverse(list) < 0)
2445 goto onError;
2447 return list;
2449 onError:
2450 Py_DECREF(list);
2451 return NULL;
2454 Py_LOCAL_INLINE(PyObject *)
2455 rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2457 register Py_ssize_t i, j, count = 0;
2458 PyObject *str;
2459 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2461 if (list == NULL)
2462 return NULL;
2464 for (i = j = len - 1; i >= 0; ) {
2465 /* find a token */
2466 while (i >= 0 && Py_ISSPACE(s[i]))
2467 i--;
2468 j = i;
2469 while (i >= 0 && !Py_ISSPACE(s[i]))
2470 i--;
2471 if (j > i) {
2472 if (maxcount-- <= 0)
2473 break;
2474 SPLIT_ADD(s, i + 1, j + 1);
2475 while (i >= 0 && Py_ISSPACE(s[i]))
2476 i--;
2477 j = i;
2480 if (j >= 0) {
2481 SPLIT_ADD(s, 0, j + 1);
2483 FIX_PREALLOC_SIZE(list);
2484 if (PyList_Reverse(list) < 0)
2485 goto onError;
2487 return list;
2489 onError:
2490 Py_DECREF(list);
2491 return NULL;
2494 PyDoc_STRVAR(rsplit__doc__,
2495 "B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2497 Return a list of the sections in B, using sep as the delimiter,\n\
2498 starting at the end of B and working to the front.\n\
2499 If sep is not given, B is split on ASCII whitespace characters\n\
2500 (space, tab, return, newline, formfeed, vertical tab).\n\
2501 If maxsplit is given, at most maxsplit splits are done.");
2503 static PyObject *
2504 bytearray_rsplit(PyByteArrayObject *self, PyObject *args)
2506 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, j, pos;
2507 Py_ssize_t maxsplit = -1, count = 0;
2508 const char *s = PyByteArray_AS_STRING(self), *sub;
2509 PyObject *list, *str, *subobj = Py_None;
2510 Py_buffer vsub;
2512 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2513 return NULL;
2514 if (maxsplit < 0)
2515 maxsplit = PY_SSIZE_T_MAX;
2517 if (subobj == Py_None)
2518 return rsplit_whitespace(s, len, maxsplit);
2520 if (_getbuffer(subobj, &vsub) < 0)
2521 return NULL;
2522 sub = vsub.buf;
2523 n = vsub.len;
2525 if (n == 0) {
2526 PyErr_SetString(PyExc_ValueError, "empty separator");
2527 PyBuffer_Release(&vsub);
2528 return NULL;
2530 else if (n == 1) {
2531 list = rsplit_char(s, len, sub[0], maxsplit);
2532 PyBuffer_Release(&vsub);
2533 return list;
2536 list = PyList_New(PREALLOC_SIZE(maxsplit));
2537 if (list == NULL) {
2538 PyBuffer_Release(&vsub);
2539 return NULL;
2542 j = len;
2544 while (maxsplit-- > 0) {
2545 pos = fastsearch(s, j, sub, n, FAST_RSEARCH);
2546 if (pos < 0)
2547 break;
2548 SPLIT_ADD(s, pos + n, j);
2549 j = pos;
2551 SPLIT_ADD(s, 0, j);
2552 FIX_PREALLOC_SIZE(list);
2553 if (PyList_Reverse(list) < 0)
2554 goto onError;
2555 PyBuffer_Release(&vsub);
2556 return list;
2558 onError:
2559 Py_DECREF(list);
2560 PyBuffer_Release(&vsub);
2561 return NULL;
2564 PyDoc_STRVAR(reverse__doc__,
2565 "B.reverse() -> None\n\
2567 Reverse the order of the values in B in place.");
2568 static PyObject *
2569 bytearray_reverse(PyByteArrayObject *self, PyObject *unused)
2571 char swap, *head, *tail;
2572 Py_ssize_t i, j, n = Py_SIZE(self);
2574 j = n / 2;
2575 head = self->ob_bytes;
2576 tail = head + n - 1;
2577 for (i = 0; i < j; i++) {
2578 swap = *head;
2579 *head++ = *tail;
2580 *tail-- = swap;
2583 Py_RETURN_NONE;
2586 PyDoc_STRVAR(insert__doc__,
2587 "B.insert(index, int) -> None\n\
2589 Insert a single item into the bytearray before the given index.");
2590 static PyObject *
2591 bytearray_insert(PyByteArrayObject *self, PyObject *args)
2593 PyObject *value;
2594 int ival;
2595 Py_ssize_t where, n = Py_SIZE(self);
2597 if (!PyArg_ParseTuple(args, "nO:insert", &where, &value))
2598 return NULL;
2600 if (n == PY_SSIZE_T_MAX) {
2601 PyErr_SetString(PyExc_OverflowError,
2602 "cannot add more objects to bytearray");
2603 return NULL;
2605 if (!_getbytevalue(value, &ival))
2606 return NULL;
2607 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2608 return NULL;
2610 if (where < 0) {
2611 where += n;
2612 if (where < 0)
2613 where = 0;
2615 if (where > n)
2616 where = n;
2617 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
2618 self->ob_bytes[where] = ival;
2620 Py_RETURN_NONE;
2623 PyDoc_STRVAR(append__doc__,
2624 "B.append(int) -> None\n\
2626 Append a single item to the end of B.");
2627 static PyObject *
2628 bytearray_append(PyByteArrayObject *self, PyObject *arg)
2630 int value;
2631 Py_ssize_t n = Py_SIZE(self);
2633 if (! _getbytevalue(arg, &value))
2634 return NULL;
2635 if (n == PY_SSIZE_T_MAX) {
2636 PyErr_SetString(PyExc_OverflowError,
2637 "cannot add more objects to bytearray");
2638 return NULL;
2640 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2641 return NULL;
2643 self->ob_bytes[n] = value;
2645 Py_RETURN_NONE;
2648 PyDoc_STRVAR(extend__doc__,
2649 "B.extend(iterable int) -> None\n\
2651 Append all the elements from the iterator or sequence to the\n\
2652 end of B.");
2653 static PyObject *
2654 bytearray_extend(PyByteArrayObject *self, PyObject *arg)
2656 PyObject *it, *item, *bytearray_obj;
2657 Py_ssize_t buf_size = 0, len = 0;
2658 int value;
2659 char *buf;
2661 /* bytearray_setslice code only accepts something supporting PEP 3118. */
2662 if (PyObject_CheckBuffer(arg)) {
2663 if (bytearray_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2664 return NULL;
2666 Py_RETURN_NONE;
2669 it = PyObject_GetIter(arg);
2670 if (it == NULL)
2671 return NULL;
2673 /* Try to determine the length of the argument. 32 is abitrary. */
2674 buf_size = _PyObject_LengthHint(arg, 32);
2675 if (buf_size == -1) {
2676 Py_DECREF(it);
2677 return NULL;
2680 bytearray_obj = PyByteArray_FromStringAndSize(NULL, buf_size);
2681 if (bytearray_obj == NULL)
2682 return NULL;
2683 buf = PyByteArray_AS_STRING(bytearray_obj);
2685 while ((item = PyIter_Next(it)) != NULL) {
2686 if (! _getbytevalue(item, &value)) {
2687 Py_DECREF(item);
2688 Py_DECREF(it);
2689 Py_DECREF(bytearray_obj);
2690 return NULL;
2692 buf[len++] = value;
2693 Py_DECREF(item);
2695 if (len >= buf_size) {
2696 buf_size = len + (len >> 1) + 1;
2697 if (PyByteArray_Resize((PyObject *)bytearray_obj, buf_size) < 0) {
2698 Py_DECREF(it);
2699 Py_DECREF(bytearray_obj);
2700 return NULL;
2702 /* Recompute the `buf' pointer, since the resizing operation may
2703 have invalidated it. */
2704 buf = PyByteArray_AS_STRING(bytearray_obj);
2707 Py_DECREF(it);
2709 /* Resize down to exact size. */
2710 if (PyByteArray_Resize((PyObject *)bytearray_obj, len) < 0) {
2711 Py_DECREF(bytearray_obj);
2712 return NULL;
2715 if (bytearray_setslice(self, Py_SIZE(self), Py_SIZE(self), bytearray_obj) == -1)
2716 return NULL;
2717 Py_DECREF(bytearray_obj);
2719 Py_RETURN_NONE;
2722 PyDoc_STRVAR(pop__doc__,
2723 "B.pop([index]) -> int\n\
2725 Remove and return a single item from B. If no index\n\
2726 argument is given, will pop the last value.");
2727 static PyObject *
2728 bytearray_pop(PyByteArrayObject *self, PyObject *args)
2730 int value;
2731 Py_ssize_t where = -1, n = Py_SIZE(self);
2733 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2734 return NULL;
2736 if (n == 0) {
2737 PyErr_SetString(PyExc_OverflowError,
2738 "cannot pop an empty bytearray");
2739 return NULL;
2741 if (where < 0)
2742 where += Py_SIZE(self);
2743 if (where < 0 || where >= Py_SIZE(self)) {
2744 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2745 return NULL;
2747 if (!_canresize(self))
2748 return NULL;
2750 value = self->ob_bytes[where];
2751 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2752 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2753 return NULL;
2755 return PyInt_FromLong((unsigned char)value);
2758 PyDoc_STRVAR(remove__doc__,
2759 "B.remove(int) -> None\n\
2761 Remove the first occurance of a value in B.");
2762 static PyObject *
2763 bytearray_remove(PyByteArrayObject *self, PyObject *arg)
2765 int value;
2766 Py_ssize_t where, n = Py_SIZE(self);
2768 if (! _getbytevalue(arg, &value))
2769 return NULL;
2771 for (where = 0; where < n; where++) {
2772 if (self->ob_bytes[where] == value)
2773 break;
2775 if (where == n) {
2776 PyErr_SetString(PyExc_ValueError, "value not found in bytearray");
2777 return NULL;
2779 if (!_canresize(self))
2780 return NULL;
2782 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2783 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2784 return NULL;
2786 Py_RETURN_NONE;
2789 /* XXX These two helpers could be optimized if argsize == 1 */
2791 static Py_ssize_t
2792 lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2793 void *argptr, Py_ssize_t argsize)
2795 Py_ssize_t i = 0;
2796 while (i < mysize && memchr(argptr, myptr[i], argsize))
2797 i++;
2798 return i;
2801 static Py_ssize_t
2802 rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2803 void *argptr, Py_ssize_t argsize)
2805 Py_ssize_t i = mysize - 1;
2806 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2807 i--;
2808 return i + 1;
2811 PyDoc_STRVAR(strip__doc__,
2812 "B.strip([bytes]) -> bytearray\n\
2814 Strip leading and trailing bytes contained in the argument.\n\
2815 If the argument is omitted, strip ASCII whitespace.");
2816 static PyObject *
2817 bytearray_strip(PyByteArrayObject *self, PyObject *args)
2819 Py_ssize_t left, right, mysize, argsize;
2820 void *myptr, *argptr;
2821 PyObject *arg = Py_None;
2822 Py_buffer varg;
2823 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2824 return NULL;
2825 if (arg == Py_None) {
2826 argptr = "\t\n\r\f\v ";
2827 argsize = 6;
2829 else {
2830 if (_getbuffer(arg, &varg) < 0)
2831 return NULL;
2832 argptr = varg.buf;
2833 argsize = varg.len;
2835 myptr = self->ob_bytes;
2836 mysize = Py_SIZE(self);
2837 left = lstrip_helper(myptr, mysize, argptr, argsize);
2838 if (left == mysize)
2839 right = left;
2840 else
2841 right = rstrip_helper(myptr, mysize, argptr, argsize);
2842 if (arg != Py_None)
2843 PyBuffer_Release(&varg);
2844 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2847 PyDoc_STRVAR(lstrip__doc__,
2848 "B.lstrip([bytes]) -> bytearray\n\
2850 Strip leading bytes contained in the argument.\n\
2851 If the argument is omitted, strip leading ASCII whitespace.");
2852 static PyObject *
2853 bytearray_lstrip(PyByteArrayObject *self, PyObject *args)
2855 Py_ssize_t left, right, mysize, argsize;
2856 void *myptr, *argptr;
2857 PyObject *arg = Py_None;
2858 Py_buffer varg;
2859 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2860 return NULL;
2861 if (arg == Py_None) {
2862 argptr = "\t\n\r\f\v ";
2863 argsize = 6;
2865 else {
2866 if (_getbuffer(arg, &varg) < 0)
2867 return NULL;
2868 argptr = varg.buf;
2869 argsize = varg.len;
2871 myptr = self->ob_bytes;
2872 mysize = Py_SIZE(self);
2873 left = lstrip_helper(myptr, mysize, argptr, argsize);
2874 right = mysize;
2875 if (arg != Py_None)
2876 PyBuffer_Release(&varg);
2877 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2880 PyDoc_STRVAR(rstrip__doc__,
2881 "B.rstrip([bytes]) -> bytearray\n\
2883 Strip trailing bytes contained in the argument.\n\
2884 If the argument is omitted, strip trailing ASCII whitespace.");
2885 static PyObject *
2886 bytearray_rstrip(PyByteArrayObject *self, PyObject *args)
2888 Py_ssize_t left, right, mysize, argsize;
2889 void *myptr, *argptr;
2890 PyObject *arg = Py_None;
2891 Py_buffer varg;
2892 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2893 return NULL;
2894 if (arg == Py_None) {
2895 argptr = "\t\n\r\f\v ";
2896 argsize = 6;
2898 else {
2899 if (_getbuffer(arg, &varg) < 0)
2900 return NULL;
2901 argptr = varg.buf;
2902 argsize = varg.len;
2904 myptr = self->ob_bytes;
2905 mysize = Py_SIZE(self);
2906 left = 0;
2907 right = rstrip_helper(myptr, mysize, argptr, argsize);
2908 if (arg != Py_None)
2909 PyBuffer_Release(&varg);
2910 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2913 PyDoc_STRVAR(decode_doc,
2914 "B.decode([encoding[, errors]]) -> unicode object.\n\
2916 Decodes B using the codec registered for encoding. encoding defaults\n\
2917 to the default encoding. errors may be given to set a different error\n\
2918 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2919 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2920 as well as any other name registered with codecs.register_error that is\n\
2921 able to handle UnicodeDecodeErrors.");
2923 static PyObject *
2924 bytearray_decode(PyObject *self, PyObject *args, PyObject *kwargs)
2926 const char *encoding = NULL;
2927 const char *errors = NULL;
2928 static char *kwlist[] = {"encoding", "errors", 0};
2930 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2931 return NULL;
2932 if (encoding == NULL) {
2933 #ifdef Py_USING_UNICODE
2934 encoding = PyUnicode_GetDefaultEncoding();
2935 #else
2936 PyErr_SetString(PyExc_ValueError, "no encoding specified");
2937 return NULL;
2938 #endif
2940 return PyCodec_Decode(self, encoding, errors);
2943 PyDoc_STRVAR(alloc_doc,
2944 "B.__alloc__() -> int\n\
2946 Returns the number of bytes actually allocated.");
2948 static PyObject *
2949 bytearray_alloc(PyByteArrayObject *self)
2951 return PyInt_FromSsize_t(self->ob_alloc);
2954 PyDoc_STRVAR(join_doc,
2955 "B.join(iterable_of_bytes) -> bytes\n\
2957 Concatenates any number of bytearray objects, with B in between each pair.");
2959 static PyObject *
2960 bytearray_join(PyByteArrayObject *self, PyObject *it)
2962 PyObject *seq;
2963 Py_ssize_t mysize = Py_SIZE(self);
2964 Py_ssize_t i;
2965 Py_ssize_t n;
2966 PyObject **items;
2967 Py_ssize_t totalsize = 0;
2968 PyObject *result;
2969 char *dest;
2971 seq = PySequence_Fast(it, "can only join an iterable");
2972 if (seq == NULL)
2973 return NULL;
2974 n = PySequence_Fast_GET_SIZE(seq);
2975 items = PySequence_Fast_ITEMS(seq);
2977 /* Compute the total size, and check that they are all bytes */
2978 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2979 for (i = 0; i < n; i++) {
2980 PyObject *obj = items[i];
2981 if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) {
2982 PyErr_Format(PyExc_TypeError,
2983 "can only join an iterable of bytes "
2984 "(item %ld has type '%.100s')",
2985 /* XXX %ld isn't right on Win64 */
2986 (long)i, Py_TYPE(obj)->tp_name);
2987 goto error;
2989 if (i > 0)
2990 totalsize += mysize;
2991 totalsize += Py_SIZE(obj);
2992 if (totalsize < 0) {
2993 PyErr_NoMemory();
2994 goto error;
2998 /* Allocate the result, and copy the bytes */
2999 result = PyByteArray_FromStringAndSize(NULL, totalsize);
3000 if (result == NULL)
3001 goto error;
3002 dest = PyByteArray_AS_STRING(result);
3003 for (i = 0; i < n; i++) {
3004 PyObject *obj = items[i];
3005 Py_ssize_t size = Py_SIZE(obj);
3006 char *buf;
3007 if (PyByteArray_Check(obj))
3008 buf = PyByteArray_AS_STRING(obj);
3009 else
3010 buf = PyBytes_AS_STRING(obj);
3011 if (i) {
3012 memcpy(dest, self->ob_bytes, mysize);
3013 dest += mysize;
3015 memcpy(dest, buf, size);
3016 dest += size;
3019 /* Done */
3020 Py_DECREF(seq);
3021 return result;
3023 /* Error handling */
3024 error:
3025 Py_DECREF(seq);
3026 return NULL;
3029 PyDoc_STRVAR(fromhex_doc,
3030 "bytearray.fromhex(string) -> bytearray\n\
3032 Create a bytearray object from a string of hexadecimal numbers.\n\
3033 Spaces between two numbers are accepted.\n\
3034 Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3036 static int
3037 hex_digit_to_int(char c)
3039 if (Py_ISDIGIT(c))
3040 return c - '0';
3041 else {
3042 if (Py_ISUPPER(c))
3043 c = Py_TOLOWER(c);
3044 if (c >= 'a' && c <= 'f')
3045 return c - 'a' + 10;
3047 return -1;
3050 static PyObject *
3051 bytearray_fromhex(PyObject *cls, PyObject *args)
3053 PyObject *newbytes;
3054 char *buf;
3055 char *hex;
3056 Py_ssize_t hexlen, byteslen, i, j;
3057 int top, bot;
3059 if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &hexlen))
3060 return NULL;
3061 byteslen = hexlen/2; /* This overestimates if there are spaces */
3062 newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
3063 if (!newbytes)
3064 return NULL;
3065 buf = PyByteArray_AS_STRING(newbytes);
3066 for (i = j = 0; i < hexlen; i += 2) {
3067 /* skip over spaces in the input */
3068 while (hex[i] == ' ')
3069 i++;
3070 if (i >= hexlen)
3071 break;
3072 top = hex_digit_to_int(hex[i]);
3073 bot = hex_digit_to_int(hex[i+1]);
3074 if (top == -1 || bot == -1) {
3075 PyErr_Format(PyExc_ValueError,
3076 "non-hexadecimal number found in "
3077 "fromhex() arg at position %zd", i);
3078 goto error;
3080 buf[j++] = (top << 4) + bot;
3082 if (PyByteArray_Resize(newbytes, j) < 0)
3083 goto error;
3084 return newbytes;
3086 error:
3087 Py_DECREF(newbytes);
3088 return NULL;
3091 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3093 static PyObject *
3094 bytearray_reduce(PyByteArrayObject *self)
3096 PyObject *latin1, *dict;
3097 if (self->ob_bytes)
3098 #ifdef Py_USING_UNICODE
3099 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
3100 Py_SIZE(self), NULL);
3101 #else
3102 latin1 = PyString_FromStringAndSize(self->ob_bytes, Py_SIZE(self));
3103 #endif
3104 else
3105 #ifdef Py_USING_UNICODE
3106 latin1 = PyUnicode_FromString("");
3107 #else
3108 latin1 = PyString_FromString("");
3109 #endif
3111 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
3112 if (dict == NULL) {
3113 PyErr_Clear();
3114 dict = Py_None;
3115 Py_INCREF(dict);
3118 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
3121 PyDoc_STRVAR(sizeof_doc,
3122 "B.__sizeof__() -> int\n\
3124 Returns the size of B in memory, in bytes");
3125 static PyObject *
3126 bytearray_sizeof(PyByteArrayObject *self)
3128 Py_ssize_t res;
3130 res = sizeof(PyByteArrayObject) + self->ob_alloc * sizeof(char);
3131 return PyInt_FromSsize_t(res);
3134 static PySequenceMethods bytearray_as_sequence = {
3135 (lenfunc)bytearray_length, /* sq_length */
3136 (binaryfunc)PyByteArray_Concat, /* sq_concat */
3137 (ssizeargfunc)bytearray_repeat, /* sq_repeat */
3138 (ssizeargfunc)bytearray_getitem, /* sq_item */
3139 0, /* sq_slice */
3140 (ssizeobjargproc)bytearray_setitem, /* sq_ass_item */
3141 0, /* sq_ass_slice */
3142 (objobjproc)bytearray_contains, /* sq_contains */
3143 (binaryfunc)bytearray_iconcat, /* sq_inplace_concat */
3144 (ssizeargfunc)bytearray_irepeat, /* sq_inplace_repeat */
3147 static PyMappingMethods bytearray_as_mapping = {
3148 (lenfunc)bytearray_length,
3149 (binaryfunc)bytearray_subscript,
3150 (objobjargproc)bytearray_ass_subscript,
3153 static PyBufferProcs bytearray_as_buffer = {
3154 (readbufferproc)bytearray_buffer_getreadbuf,
3155 (writebufferproc)bytearray_buffer_getwritebuf,
3156 (segcountproc)bytearray_buffer_getsegcount,
3157 (charbufferproc)bytearray_buffer_getcharbuf,
3158 (getbufferproc)bytearray_getbuffer,
3159 (releasebufferproc)bytearray_releasebuffer,
3162 static PyMethodDef
3163 bytearray_methods[] = {
3164 {"__alloc__", (PyCFunction)bytearray_alloc, METH_NOARGS, alloc_doc},
3165 {"__reduce__", (PyCFunction)bytearray_reduce, METH_NOARGS, reduce_doc},
3166 {"__sizeof__", (PyCFunction)bytearray_sizeof, METH_NOARGS, sizeof_doc},
3167 {"append", (PyCFunction)bytearray_append, METH_O, append__doc__},
3168 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3169 _Py_capitalize__doc__},
3170 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3171 {"count", (PyCFunction)bytearray_count, METH_VARARGS, count__doc__},
3172 {"decode", (PyCFunction)bytearray_decode, METH_VARARGS | METH_KEYWORDS, decode_doc},
3173 {"endswith", (PyCFunction)bytearray_endswith, METH_VARARGS, endswith__doc__},
3174 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3175 expandtabs__doc__},
3176 {"extend", (PyCFunction)bytearray_extend, METH_O, extend__doc__},
3177 {"find", (PyCFunction)bytearray_find, METH_VARARGS, find__doc__},
3178 {"fromhex", (PyCFunction)bytearray_fromhex, METH_VARARGS|METH_CLASS,
3179 fromhex_doc},
3180 {"index", (PyCFunction)bytearray_index, METH_VARARGS, index__doc__},
3181 {"insert", (PyCFunction)bytearray_insert, METH_VARARGS, insert__doc__},
3182 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3183 _Py_isalnum__doc__},
3184 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3185 _Py_isalpha__doc__},
3186 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3187 _Py_isdigit__doc__},
3188 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3189 _Py_islower__doc__},
3190 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3191 _Py_isspace__doc__},
3192 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3193 _Py_istitle__doc__},
3194 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3195 _Py_isupper__doc__},
3196 {"join", (PyCFunction)bytearray_join, METH_O, join_doc},
3197 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3198 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
3199 {"lstrip", (PyCFunction)bytearray_lstrip, METH_VARARGS, lstrip__doc__},
3200 {"partition", (PyCFunction)bytearray_partition, METH_O, partition__doc__},
3201 {"pop", (PyCFunction)bytearray_pop, METH_VARARGS, pop__doc__},
3202 {"remove", (PyCFunction)bytearray_remove, METH_O, remove__doc__},
3203 {"replace", (PyCFunction)bytearray_replace, METH_VARARGS, replace__doc__},
3204 {"reverse", (PyCFunction)bytearray_reverse, METH_NOARGS, reverse__doc__},
3205 {"rfind", (PyCFunction)bytearray_rfind, METH_VARARGS, rfind__doc__},
3206 {"rindex", (PyCFunction)bytearray_rindex, METH_VARARGS, rindex__doc__},
3207 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3208 {"rpartition", (PyCFunction)bytearray_rpartition, METH_O, rpartition__doc__},
3209 {"rsplit", (PyCFunction)bytearray_rsplit, METH_VARARGS, rsplit__doc__},
3210 {"rstrip", (PyCFunction)bytearray_rstrip, METH_VARARGS, rstrip__doc__},
3211 {"split", (PyCFunction)bytearray_split, METH_VARARGS, split__doc__},
3212 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3213 splitlines__doc__},
3214 {"startswith", (PyCFunction)bytearray_startswith, METH_VARARGS ,
3215 startswith__doc__},
3216 {"strip", (PyCFunction)bytearray_strip, METH_VARARGS, strip__doc__},
3217 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3218 _Py_swapcase__doc__},
3219 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3220 {"translate", (PyCFunction)bytearray_translate, METH_VARARGS,
3221 translate__doc__},
3222 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3223 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
3224 {NULL}
3227 PyDoc_STRVAR(bytearray_doc,
3228 "bytearray(iterable_of_ints) -> bytearray.\n\
3229 bytearray(string, encoding[, errors]) -> bytearray.\n\
3230 bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3231 bytearray(memory_view) -> bytearray.\n\
3233 Construct an mutable bytearray object from:\n\
3234 - an iterable yielding integers in range(256)\n\
3235 - a text string encoded using the specified encoding\n\
3236 - a bytes or a bytearray object\n\
3237 - any object implementing the buffer API.\n\
3239 bytearray(int) -> bytearray.\n\
3241 Construct a zero-initialized bytearray of the given length.");
3244 static PyObject *bytearray_iter(PyObject *seq);
3246 PyTypeObject PyByteArray_Type = {
3247 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3248 "bytearray",
3249 sizeof(PyByteArrayObject),
3251 (destructor)bytearray_dealloc, /* tp_dealloc */
3252 0, /* tp_print */
3253 0, /* tp_getattr */
3254 0, /* tp_setattr */
3255 0, /* tp_compare */
3256 (reprfunc)bytearray_repr, /* tp_repr */
3257 0, /* tp_as_number */
3258 &bytearray_as_sequence, /* tp_as_sequence */
3259 &bytearray_as_mapping, /* tp_as_mapping */
3260 0, /* tp_hash */
3261 0, /* tp_call */
3262 bytearray_str, /* tp_str */
3263 PyObject_GenericGetAttr, /* tp_getattro */
3264 0, /* tp_setattro */
3265 &bytearray_as_buffer, /* tp_as_buffer */
3266 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3267 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3268 bytearray_doc, /* tp_doc */
3269 0, /* tp_traverse */
3270 0, /* tp_clear */
3271 (richcmpfunc)bytearray_richcompare, /* tp_richcompare */
3272 0, /* tp_weaklistoffset */
3273 bytearray_iter, /* tp_iter */
3274 0, /* tp_iternext */
3275 bytearray_methods, /* tp_methods */
3276 0, /* tp_members */
3277 0, /* tp_getset */
3278 0, /* tp_base */
3279 0, /* tp_dict */
3280 0, /* tp_descr_get */
3281 0, /* tp_descr_set */
3282 0, /* tp_dictoffset */
3283 (initproc)bytearray_init, /* tp_init */
3284 PyType_GenericAlloc, /* tp_alloc */
3285 PyType_GenericNew, /* tp_new */
3286 PyObject_Del, /* tp_free */
3289 /*********************** Bytes Iterator ****************************/
3291 typedef struct {
3292 PyObject_HEAD
3293 Py_ssize_t it_index;
3294 PyByteArrayObject *it_seq; /* Set to NULL when iterator is exhausted */
3295 } bytesiterobject;
3297 static void
3298 bytearrayiter_dealloc(bytesiterobject *it)
3300 _PyObject_GC_UNTRACK(it);
3301 Py_XDECREF(it->it_seq);
3302 PyObject_GC_Del(it);
3305 static int
3306 bytearrayiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
3308 Py_VISIT(it->it_seq);
3309 return 0;
3312 static PyObject *
3313 bytearrayiter_next(bytesiterobject *it)
3315 PyByteArrayObject *seq;
3316 PyObject *item;
3318 assert(it != NULL);
3319 seq = it->it_seq;
3320 if (seq == NULL)
3321 return NULL;
3322 assert(PyByteArray_Check(seq));
3324 if (it->it_index < PyByteArray_GET_SIZE(seq)) {
3325 item = PyInt_FromLong(
3326 (unsigned char)seq->ob_bytes[it->it_index]);
3327 if (item != NULL)
3328 ++it->it_index;
3329 return item;
3332 Py_DECREF(seq);
3333 it->it_seq = NULL;
3334 return NULL;
3337 static PyObject *
3338 bytesarrayiter_length_hint(bytesiterobject *it)
3340 Py_ssize_t len = 0;
3341 if (it->it_seq)
3342 len = PyByteArray_GET_SIZE(it->it_seq) - it->it_index;
3343 return PyInt_FromSsize_t(len);
3346 PyDoc_STRVAR(length_hint_doc,
3347 "Private method returning an estimate of len(list(it)).");
3349 static PyMethodDef bytearrayiter_methods[] = {
3350 {"__length_hint__", (PyCFunction)bytesarrayiter_length_hint, METH_NOARGS,
3351 length_hint_doc},
3352 {NULL, NULL} /* sentinel */
3355 PyTypeObject PyByteArrayIter_Type = {
3356 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3357 "bytearray_iterator", /* tp_name */
3358 sizeof(bytesiterobject), /* tp_basicsize */
3359 0, /* tp_itemsize */
3360 /* methods */
3361 (destructor)bytearrayiter_dealloc, /* tp_dealloc */
3362 0, /* tp_print */
3363 0, /* tp_getattr */
3364 0, /* tp_setattr */
3365 0, /* tp_compare */
3366 0, /* tp_repr */
3367 0, /* tp_as_number */
3368 0, /* tp_as_sequence */
3369 0, /* tp_as_mapping */
3370 0, /* tp_hash */
3371 0, /* tp_call */
3372 0, /* tp_str */
3373 PyObject_GenericGetAttr, /* tp_getattro */
3374 0, /* tp_setattro */
3375 0, /* tp_as_buffer */
3376 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3377 0, /* tp_doc */
3378 (traverseproc)bytearrayiter_traverse, /* tp_traverse */
3379 0, /* tp_clear */
3380 0, /* tp_richcompare */
3381 0, /* tp_weaklistoffset */
3382 PyObject_SelfIter, /* tp_iter */
3383 (iternextfunc)bytearrayiter_next, /* tp_iternext */
3384 bytearrayiter_methods, /* tp_methods */
3388 static PyObject *
3389 bytearray_iter(PyObject *seq)
3391 bytesiterobject *it;
3393 if (!PyByteArray_Check(seq)) {
3394 PyErr_BadInternalCall();
3395 return NULL;
3397 it = PyObject_GC_New(bytesiterobject, &PyByteArrayIter_Type);
3398 if (it == NULL)
3399 return NULL;
3400 it->it_index = 0;
3401 Py_INCREF(seq);
3402 it->it_seq = (PyByteArrayObject *)seq;
3403 _PyObject_GC_TRACK(it);
3404 return (PyObject *)it;