Issue #4677: add two list comprehension tests to pybench.
[python.git] / Objects / bytearrayobject.c
blobd75eb53e437c4027af008719e339851af62f2cbc
1 /* PyBytes (bytearray) implementation */
3 #define PY_SSIZE_T_CLEAN
4 #include "Python.h"
5 #include "structmember.h"
6 #include "bytes_methods.h"
8 static PyByteArrayObject *nullbytes = NULL;
10 void
11 PyByteArray_Fini(void)
13 Py_CLEAR(nullbytes);
16 int
17 PyByteArray_Init(void)
19 nullbytes = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
20 if (nullbytes == NULL)
21 return 0;
22 nullbytes->ob_bytes = NULL;
23 Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0;
24 nullbytes->ob_exports = 0;
25 return 1;
28 /* end nullbytes support */
30 /* Helpers */
32 static int
33 _getbytevalue(PyObject* arg, int *value)
35 long face_value;
37 if (PyBytes_CheckExact(arg)) {
38 if (Py_SIZE(arg) != 1) {
39 PyErr_SetString(PyExc_ValueError, "string must be of size 1");
40 return 0;
42 *value = Py_CHARMASK(((PyBytesObject*)arg)->ob_sval[0]);
43 return 1;
45 else if (PyInt_Check(arg) || PyLong_Check(arg)) {
46 face_value = PyLong_AsLong(arg);
48 else {
49 PyObject *index = PyNumber_Index(arg);
50 if (index == NULL) {
51 PyErr_Format(PyExc_TypeError,
52 "an integer or string of size 1 is required");
53 return 0;
55 face_value = PyLong_AsLong(index);
56 Py_DECREF(index);
59 if (face_value < 0 || face_value >= 256) {
60 /* this includes the OverflowError in case the long is too large */
61 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
62 return 0;
65 *value = face_value;
66 return 1;
69 static Py_ssize_t
70 bytes_buffer_getreadbuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
72 if ( index != 0 ) {
73 PyErr_SetString(PyExc_SystemError,
74 "accessing non-existent bytes segment");
75 return -1;
77 *ptr = (void *)self->ob_bytes;
78 return Py_SIZE(self);
81 static Py_ssize_t
82 bytes_buffer_getwritebuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
84 if ( index != 0 ) {
85 PyErr_SetString(PyExc_SystemError,
86 "accessing non-existent bytes segment");
87 return -1;
89 *ptr = (void *)self->ob_bytes;
90 return Py_SIZE(self);
93 static Py_ssize_t
94 bytes_buffer_getsegcount(PyByteArrayObject *self, Py_ssize_t *lenp)
96 if ( lenp )
97 *lenp = Py_SIZE(self);
98 return 1;
101 static Py_ssize_t
102 bytes_buffer_getcharbuf(PyByteArrayObject *self, Py_ssize_t index, const char **ptr)
104 if ( index != 0 ) {
105 PyErr_SetString(PyExc_SystemError,
106 "accessing non-existent bytes segment");
107 return -1;
109 *ptr = self->ob_bytes;
110 return Py_SIZE(self);
113 static int
114 bytes_getbuffer(PyByteArrayObject *obj, Py_buffer *view, int flags)
116 int ret;
117 void *ptr;
118 if (view == NULL) {
119 obj->ob_exports++;
120 return 0;
122 if (obj->ob_bytes == NULL)
123 ptr = "";
124 else
125 ptr = obj->ob_bytes;
126 ret = PyBuffer_FillInfo(view, (PyObject*)obj, ptr, Py_SIZE(obj), 0, flags);
127 if (ret >= 0) {
128 obj->ob_exports++;
130 return ret;
133 static void
134 bytes_releasebuffer(PyByteArrayObject *obj, Py_buffer *view)
136 obj->ob_exports--;
139 static Py_ssize_t
140 _getbuffer(PyObject *obj, Py_buffer *view)
142 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
144 if (buffer == NULL || buffer->bf_getbuffer == NULL)
146 PyErr_Format(PyExc_TypeError,
147 "Type %.100s doesn't support the buffer API",
148 Py_TYPE(obj)->tp_name);
149 return -1;
152 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
153 return -1;
154 return view->len;
157 static int
158 _canresize(PyByteArrayObject *self)
160 if (self->ob_exports > 0) {
161 PyErr_SetString(PyExc_BufferError,
162 "Existing exports of data: object cannot be re-sized");
163 return 0;
165 return 1;
168 /* Direct API functions */
170 PyObject *
171 PyByteArray_FromObject(PyObject *input)
173 return PyObject_CallFunctionObjArgs((PyObject *)&PyByteArray_Type,
174 input, NULL);
177 PyObject *
178 PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
180 PyByteArrayObject *new;
181 Py_ssize_t alloc;
183 if (size < 0) {
184 PyErr_SetString(PyExc_SystemError,
185 "Negative size passed to PyByteArray_FromStringAndSize");
186 return NULL;
189 new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
190 if (new == NULL)
191 return NULL;
193 if (size == 0) {
194 new->ob_bytes = NULL;
195 alloc = 0;
197 else {
198 alloc = size + 1;
199 new->ob_bytes = PyMem_Malloc(alloc);
200 if (new->ob_bytes == NULL) {
201 Py_DECREF(new);
202 return PyErr_NoMemory();
204 if (bytes != NULL)
205 memcpy(new->ob_bytes, bytes, size);
206 new->ob_bytes[size] = '\0'; /* Trailing null byte */
208 Py_SIZE(new) = size;
209 new->ob_alloc = alloc;
210 new->ob_exports = 0;
212 return (PyObject *)new;
215 Py_ssize_t
216 PyByteArray_Size(PyObject *self)
218 assert(self != NULL);
219 assert(PyByteArray_Check(self));
221 return PyByteArray_GET_SIZE(self);
224 char *
225 PyByteArray_AsString(PyObject *self)
227 assert(self != NULL);
228 assert(PyByteArray_Check(self));
230 return PyByteArray_AS_STRING(self);
234 PyByteArray_Resize(PyObject *self, Py_ssize_t size)
236 void *sval;
237 Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc;
239 assert(self != NULL);
240 assert(PyByteArray_Check(self));
241 assert(size >= 0);
243 if (size == Py_SIZE(self)) {
244 return 0;
246 if (!_canresize((PyByteArrayObject *)self)) {
247 return -1;
250 if (size < alloc / 2) {
251 /* Major downsize; resize down to exact size */
252 alloc = size + 1;
254 else if (size < alloc) {
255 /* Within allocated size; quick exit */
256 Py_SIZE(self) = size;
257 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
258 return 0;
260 else if (size <= alloc * 1.125) {
261 /* Moderate upsize; overallocate similar to list_resize() */
262 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
264 else {
265 /* Major upsize; resize up to exact size */
266 alloc = size + 1;
269 sval = PyMem_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc);
270 if (sval == NULL) {
271 PyErr_NoMemory();
272 return -1;
275 ((PyByteArrayObject *)self)->ob_bytes = sval;
276 Py_SIZE(self) = size;
277 ((PyByteArrayObject *)self)->ob_alloc = alloc;
278 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
280 return 0;
283 PyObject *
284 PyByteArray_Concat(PyObject *a, PyObject *b)
286 Py_ssize_t size;
287 Py_buffer va, vb;
288 PyByteArrayObject *result = NULL;
290 va.len = -1;
291 vb.len = -1;
292 if (_getbuffer(a, &va) < 0 ||
293 _getbuffer(b, &vb) < 0) {
294 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
295 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
296 goto done;
299 size = va.len + vb.len;
300 if (size < 0) {
301 return PyErr_NoMemory();
302 goto done;
305 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, size);
306 if (result != NULL) {
307 memcpy(result->ob_bytes, va.buf, va.len);
308 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
311 done:
312 if (va.len != -1)
313 PyBuffer_Release(&va);
314 if (vb.len != -1)
315 PyBuffer_Release(&vb);
316 return (PyObject *)result;
319 /* Functions stuffed into the type object */
321 static Py_ssize_t
322 bytes_length(PyByteArrayObject *self)
324 return Py_SIZE(self);
327 static PyObject *
328 bytes_iconcat(PyByteArrayObject *self, PyObject *other)
330 Py_ssize_t mysize;
331 Py_ssize_t size;
332 Py_buffer vo;
334 if (_getbuffer(other, &vo) < 0) {
335 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
336 Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name);
337 return NULL;
340 mysize = Py_SIZE(self);
341 size = mysize + vo.len;
342 if (size < 0) {
343 PyBuffer_Release(&vo);
344 return PyErr_NoMemory();
346 if (size < self->ob_alloc) {
347 Py_SIZE(self) = size;
348 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
350 else if (PyByteArray_Resize((PyObject *)self, size) < 0) {
351 PyBuffer_Release(&vo);
352 return NULL;
354 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
355 PyBuffer_Release(&vo);
356 Py_INCREF(self);
357 return (PyObject *)self;
360 static PyObject *
361 bytes_repeat(PyByteArrayObject *self, Py_ssize_t count)
363 PyByteArrayObject *result;
364 Py_ssize_t mysize;
365 Py_ssize_t size;
367 if (count < 0)
368 count = 0;
369 mysize = Py_SIZE(self);
370 size = mysize * count;
371 if (count != 0 && size / count != mysize)
372 return PyErr_NoMemory();
373 result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
374 if (result != NULL && size != 0) {
375 if (mysize == 1)
376 memset(result->ob_bytes, self->ob_bytes[0], size);
377 else {
378 Py_ssize_t i;
379 for (i = 0; i < count; i++)
380 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
383 return (PyObject *)result;
386 static PyObject *
387 bytes_irepeat(PyByteArrayObject *self, Py_ssize_t count)
389 Py_ssize_t mysize;
390 Py_ssize_t size;
392 if (count < 0)
393 count = 0;
394 mysize = Py_SIZE(self);
395 size = mysize * count;
396 if (count != 0 && size / count != mysize)
397 return PyErr_NoMemory();
398 if (size < self->ob_alloc) {
399 Py_SIZE(self) = size;
400 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
402 else if (PyByteArray_Resize((PyObject *)self, size) < 0)
403 return NULL;
405 if (mysize == 1)
406 memset(self->ob_bytes, self->ob_bytes[0], size);
407 else {
408 Py_ssize_t i;
409 for (i = 1; i < count; i++)
410 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
413 Py_INCREF(self);
414 return (PyObject *)self;
417 static PyObject *
418 bytes_getitem(PyByteArrayObject *self, Py_ssize_t i)
420 if (i < 0)
421 i += Py_SIZE(self);
422 if (i < 0 || i >= Py_SIZE(self)) {
423 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
424 return NULL;
426 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
429 static PyObject *
430 bytes_subscript(PyByteArrayObject *self, PyObject *index)
432 if (PyIndex_Check(index)) {
433 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
435 if (i == -1 && PyErr_Occurred())
436 return NULL;
438 if (i < 0)
439 i += PyByteArray_GET_SIZE(self);
441 if (i < 0 || i >= Py_SIZE(self)) {
442 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
443 return NULL;
445 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
447 else if (PySlice_Check(index)) {
448 Py_ssize_t start, stop, step, slicelength, cur, i;
449 if (PySlice_GetIndicesEx((PySliceObject *)index,
450 PyByteArray_GET_SIZE(self),
451 &start, &stop, &step, &slicelength) < 0) {
452 return NULL;
455 if (slicelength <= 0)
456 return PyByteArray_FromStringAndSize("", 0);
457 else if (step == 1) {
458 return PyByteArray_FromStringAndSize(self->ob_bytes + start,
459 slicelength);
461 else {
462 char *source_buf = PyByteArray_AS_STRING(self);
463 char *result_buf = (char *)PyMem_Malloc(slicelength);
464 PyObject *result;
466 if (result_buf == NULL)
467 return PyErr_NoMemory();
469 for (cur = start, i = 0; i < slicelength;
470 cur += step, i++) {
471 result_buf[i] = source_buf[cur];
473 result = PyByteArray_FromStringAndSize(result_buf, slicelength);
474 PyMem_Free(result_buf);
475 return result;
478 else {
479 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
480 return NULL;
484 static int
485 bytes_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
486 PyObject *values)
488 Py_ssize_t avail, needed;
489 void *bytes;
490 Py_buffer vbytes;
491 int res = 0;
493 vbytes.len = -1;
494 if (values == (PyObject *)self) {
495 /* Make a copy and call this function recursively */
496 int err;
497 values = PyByteArray_FromObject(values);
498 if (values == NULL)
499 return -1;
500 err = bytes_setslice(self, lo, hi, values);
501 Py_DECREF(values);
502 return err;
504 if (values == NULL) {
505 /* del b[lo:hi] */
506 bytes = NULL;
507 needed = 0;
509 else {
510 if (_getbuffer(values, &vbytes) < 0) {
511 PyErr_Format(PyExc_TypeError,
512 "can't set bytearray slice from %.100s",
513 Py_TYPE(values)->tp_name);
514 return -1;
516 needed = vbytes.len;
517 bytes = vbytes.buf;
520 if (lo < 0)
521 lo = 0;
522 if (hi < lo)
523 hi = lo;
524 if (hi > Py_SIZE(self))
525 hi = Py_SIZE(self);
527 avail = hi - lo;
528 if (avail < 0)
529 lo = hi = avail = 0;
531 if (avail != needed) {
532 if (avail > needed) {
533 if (!_canresize(self)) {
534 res = -1;
535 goto finish;
538 0 lo hi old_size
539 | |<----avail----->|<-----tomove------>|
540 | |<-needed->|<-----tomove------>|
541 0 lo new_hi new_size
543 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
544 Py_SIZE(self) - hi);
546 /* XXX(nnorwitz): need to verify this can't overflow! */
547 if (PyByteArray_Resize((PyObject *)self,
548 Py_SIZE(self) + needed - avail) < 0) {
549 res = -1;
550 goto finish;
552 if (avail < needed) {
554 0 lo hi old_size
555 | |<-avail->|<-----tomove------>|
556 | |<----needed---->|<-----tomove------>|
557 0 lo new_hi new_size
559 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
560 Py_SIZE(self) - lo - needed);
564 if (needed > 0)
565 memcpy(self->ob_bytes + lo, bytes, needed);
568 finish:
569 if (vbytes.len != -1)
570 PyBuffer_Release(&vbytes);
571 return res;
574 static int
575 bytes_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value)
577 int ival;
579 if (i < 0)
580 i += Py_SIZE(self);
582 if (i < 0 || i >= Py_SIZE(self)) {
583 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
584 return -1;
587 if (value == NULL)
588 return bytes_setslice(self, i, i+1, NULL);
590 if (!_getbytevalue(value, &ival))
591 return -1;
593 self->ob_bytes[i] = ival;
594 return 0;
597 static int
598 bytes_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *values)
600 Py_ssize_t start, stop, step, slicelen, needed;
601 char *bytes;
603 if (PyIndex_Check(index)) {
604 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
606 if (i == -1 && PyErr_Occurred())
607 return -1;
609 if (i < 0)
610 i += PyByteArray_GET_SIZE(self);
612 if (i < 0 || i >= Py_SIZE(self)) {
613 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
614 return -1;
617 if (values == NULL) {
618 /* Fall through to slice assignment */
619 start = i;
620 stop = i + 1;
621 step = 1;
622 slicelen = 1;
624 else {
625 int ival;
626 if (!_getbytevalue(values, &ival))
627 return -1;
628 self->ob_bytes[i] = (char)ival;
629 return 0;
632 else if (PySlice_Check(index)) {
633 if (PySlice_GetIndicesEx((PySliceObject *)index,
634 PyByteArray_GET_SIZE(self),
635 &start, &stop, &step, &slicelen) < 0) {
636 return -1;
639 else {
640 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
641 return -1;
644 if (values == NULL) {
645 bytes = NULL;
646 needed = 0;
648 else if (values == (PyObject *)self || !PyByteArray_Check(values)) {
649 /* Make a copy an call this function recursively */
650 int err;
651 values = PyByteArray_FromObject(values);
652 if (values == NULL)
653 return -1;
654 err = bytes_ass_subscript(self, index, values);
655 Py_DECREF(values);
656 return err;
658 else {
659 assert(PyByteArray_Check(values));
660 bytes = ((PyByteArrayObject *)values)->ob_bytes;
661 needed = Py_SIZE(values);
663 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
664 if ((step < 0 && start < stop) ||
665 (step > 0 && start > stop))
666 stop = start;
667 if (step == 1) {
668 if (slicelen != needed) {
669 if (!_canresize(self))
670 return -1;
671 if (slicelen > needed) {
673 0 start stop old_size
674 | |<---slicelen--->|<-----tomove------>|
675 | |<-needed->|<-----tomove------>|
676 0 lo new_hi new_size
678 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
679 Py_SIZE(self) - stop);
681 if (PyByteArray_Resize((PyObject *)self,
682 Py_SIZE(self) + needed - slicelen) < 0)
683 return -1;
684 if (slicelen < needed) {
686 0 lo hi old_size
687 | |<-avail->|<-----tomove------>|
688 | |<----needed---->|<-----tomove------>|
689 0 lo new_hi new_size
691 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
692 Py_SIZE(self) - start - needed);
696 if (needed > 0)
697 memcpy(self->ob_bytes + start, bytes, needed);
699 return 0;
701 else {
702 if (needed == 0) {
703 /* Delete slice */
704 Py_ssize_t cur, i;
706 if (!_canresize(self))
707 return -1;
708 if (step < 0) {
709 stop = start + 1;
710 start = stop + step * (slicelen - 1) - 1;
711 step = -step;
713 for (cur = start, i = 0;
714 i < slicelen; cur += step, i++) {
715 Py_ssize_t lim = step - 1;
717 if (cur + step >= PyByteArray_GET_SIZE(self))
718 lim = PyByteArray_GET_SIZE(self) - cur - 1;
720 memmove(self->ob_bytes + cur - i,
721 self->ob_bytes + cur + 1, lim);
723 /* Move the tail of the bytes, in one chunk */
724 cur = start + slicelen*step;
725 if (cur < PyByteArray_GET_SIZE(self)) {
726 memmove(self->ob_bytes + cur - slicelen,
727 self->ob_bytes + cur,
728 PyByteArray_GET_SIZE(self) - cur);
730 if (PyByteArray_Resize((PyObject *)self,
731 PyByteArray_GET_SIZE(self) - slicelen) < 0)
732 return -1;
734 return 0;
736 else {
737 /* Assign slice */
738 Py_ssize_t cur, i;
740 if (needed != slicelen) {
741 PyErr_Format(PyExc_ValueError,
742 "attempt to assign bytes of size %zd "
743 "to extended slice of size %zd",
744 needed, slicelen);
745 return -1;
747 for (cur = start, i = 0; i < slicelen; cur += step, i++)
748 self->ob_bytes[cur] = bytes[i];
749 return 0;
754 static int
755 bytes_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
757 static char *kwlist[] = {"source", "encoding", "errors", 0};
758 PyObject *arg = NULL;
759 const char *encoding = NULL;
760 const char *errors = NULL;
761 Py_ssize_t count;
762 PyObject *it;
763 PyObject *(*iternext)(PyObject *);
765 if (Py_SIZE(self) != 0) {
766 /* Empty previous contents (yes, do this first of all!) */
767 if (PyByteArray_Resize((PyObject *)self, 0) < 0)
768 return -1;
771 /* Parse arguments */
772 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytearray", kwlist,
773 &arg, &encoding, &errors))
774 return -1;
776 /* Make a quick exit if no first argument */
777 if (arg == NULL) {
778 if (encoding != NULL || errors != NULL) {
779 PyErr_SetString(PyExc_TypeError,
780 "encoding or errors without sequence argument");
781 return -1;
783 return 0;
786 if (PyBytes_Check(arg)) {
787 PyObject *new, *encoded;
788 if (encoding != NULL) {
789 encoded = PyCodec_Encode(arg, encoding, errors);
790 if (encoded == NULL)
791 return -1;
792 assert(PyBytes_Check(encoded));
794 else {
795 encoded = arg;
796 Py_INCREF(arg);
798 new = bytes_iconcat(self, arg);
799 Py_DECREF(encoded);
800 if (new == NULL)
801 return -1;
802 Py_DECREF(new);
803 return 0;
806 if (PyUnicode_Check(arg)) {
807 /* Encode via the codec registry */
808 PyObject *encoded, *new;
809 if (encoding == NULL) {
810 PyErr_SetString(PyExc_TypeError,
811 "unicode argument without an encoding");
812 return -1;
814 encoded = PyCodec_Encode(arg, encoding, errors);
815 if (encoded == NULL)
816 return -1;
817 assert(PyBytes_Check(encoded));
818 new = bytes_iconcat(self, encoded);
819 Py_DECREF(encoded);
820 if (new == NULL)
821 return -1;
822 Py_DECREF(new);
823 return 0;
826 /* If it's not unicode, there can't be encoding or errors */
827 if (encoding != NULL || errors != NULL) {
828 PyErr_SetString(PyExc_TypeError,
829 "encoding or errors without a string argument");
830 return -1;
833 /* Is it an int? */
834 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
835 if (count == -1 && PyErr_Occurred())
836 PyErr_Clear();
837 else {
838 if (count < 0) {
839 PyErr_SetString(PyExc_ValueError, "negative count");
840 return -1;
842 if (count > 0) {
843 if (PyByteArray_Resize((PyObject *)self, count))
844 return -1;
845 memset(self->ob_bytes, 0, count);
847 return 0;
850 /* Use the buffer API */
851 if (PyObject_CheckBuffer(arg)) {
852 Py_ssize_t size;
853 Py_buffer view;
854 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
855 return -1;
856 size = view.len;
857 if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
858 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
859 goto fail;
860 PyBuffer_Release(&view);
861 return 0;
862 fail:
863 PyBuffer_Release(&view);
864 return -1;
867 /* XXX Optimize this if the arguments is a list, tuple */
869 /* Get the iterator */
870 it = PyObject_GetIter(arg);
871 if (it == NULL)
872 return -1;
873 iternext = *Py_TYPE(it)->tp_iternext;
875 /* Run the iterator to exhaustion */
876 for (;;) {
877 PyObject *item;
878 int rc, value;
880 /* Get the next item */
881 item = iternext(it);
882 if (item == NULL) {
883 if (PyErr_Occurred()) {
884 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
885 goto error;
886 PyErr_Clear();
888 break;
891 /* Interpret it as an int (__index__) */
892 rc = _getbytevalue(item, &value);
893 Py_DECREF(item);
894 if (!rc)
895 goto error;
897 /* Append the byte */
898 if (Py_SIZE(self) < self->ob_alloc)
899 Py_SIZE(self)++;
900 else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
901 goto error;
902 self->ob_bytes[Py_SIZE(self)-1] = value;
905 /* Clean up and return success */
906 Py_DECREF(it);
907 return 0;
909 error:
910 /* Error handling when it != NULL */
911 Py_DECREF(it);
912 return -1;
915 /* Mostly copied from string_repr, but without the
916 "smart quote" functionality. */
917 static PyObject *
918 bytes_repr(PyByteArrayObject *self)
920 static const char *hexdigits = "0123456789abcdef";
921 const char *quote_prefix = "bytearray(b";
922 const char *quote_postfix = ")";
923 Py_ssize_t length = Py_SIZE(self);
924 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
925 size_t newsize = 14 + 4 * length;
926 PyObject *v;
927 if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 3 != length) {
928 PyErr_SetString(PyExc_OverflowError,
929 "bytearray object is too large to make repr");
930 return NULL;
932 v = PyUnicode_FromUnicode(NULL, newsize);
933 if (v == NULL) {
934 return NULL;
936 else {
937 register Py_ssize_t i;
938 register Py_UNICODE c;
939 register Py_UNICODE *p;
940 int quote;
942 /* Figure out which quote to use; single is preferred */
943 quote = '\'';
945 char *test, *start;
946 start = PyByteArray_AS_STRING(self);
947 for (test = start; test < start+length; ++test) {
948 if (*test == '"') {
949 quote = '\''; /* back to single */
950 goto decided;
952 else if (*test == '\'')
953 quote = '"';
955 decided:
959 p = PyUnicode_AS_UNICODE(v);
960 while (*quote_prefix)
961 *p++ = *quote_prefix++;
962 *p++ = quote;
964 for (i = 0; i < length; i++) {
965 /* There's at least enough room for a hex escape
966 and a closing quote. */
967 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
968 c = self->ob_bytes[i];
969 if (c == '\'' || c == '\\')
970 *p++ = '\\', *p++ = c;
971 else if (c == '\t')
972 *p++ = '\\', *p++ = 't';
973 else if (c == '\n')
974 *p++ = '\\', *p++ = 'n';
975 else if (c == '\r')
976 *p++ = '\\', *p++ = 'r';
977 else if (c == 0)
978 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
979 else if (c < ' ' || c >= 0x7f) {
980 *p++ = '\\';
981 *p++ = 'x';
982 *p++ = hexdigits[(c & 0xf0) >> 4];
983 *p++ = hexdigits[c & 0xf];
985 else
986 *p++ = c;
988 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
989 *p++ = quote;
990 while (*quote_postfix) {
991 *p++ = *quote_postfix++;
993 *p = '\0';
994 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
995 Py_DECREF(v);
996 return NULL;
998 return v;
1002 static PyObject *
1003 bytes_str(PyObject *op)
1005 #if 0
1006 if (Py_BytesWarningFlag) {
1007 if (PyErr_WarnEx(PyExc_BytesWarning,
1008 "str() on a bytearray instance", 1))
1009 return NULL;
1011 return bytes_repr((PyByteArrayObject*)op);
1012 #endif
1013 return PyBytes_FromStringAndSize(((PyByteArrayObject*)op)->ob_bytes, Py_SIZE(op));
1016 static PyObject *
1017 bytes_richcompare(PyObject *self, PyObject *other, int op)
1019 Py_ssize_t self_size, other_size;
1020 Py_buffer self_bytes, other_bytes;
1021 PyObject *res;
1022 Py_ssize_t minsize;
1023 int cmp;
1025 /* Bytes can be compared to anything that supports the (binary)
1026 buffer API. Except that a comparison with Unicode is always an
1027 error, even if the comparison is for equality. */
1028 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
1029 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
1030 if (Py_BytesWarningFlag && op == Py_EQ) {
1031 if (PyErr_WarnEx(PyExc_BytesWarning,
1032 "Comparsion between bytearray and string", 1))
1033 return NULL;
1036 Py_INCREF(Py_NotImplemented);
1037 return Py_NotImplemented;
1040 self_size = _getbuffer(self, &self_bytes);
1041 if (self_size < 0) {
1042 PyErr_Clear();
1043 Py_INCREF(Py_NotImplemented);
1044 return Py_NotImplemented;
1047 other_size = _getbuffer(other, &other_bytes);
1048 if (other_size < 0) {
1049 PyErr_Clear();
1050 PyBuffer_Release(&self_bytes);
1051 Py_INCREF(Py_NotImplemented);
1052 return Py_NotImplemented;
1055 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
1056 /* Shortcut: if the lengths differ, the objects differ */
1057 cmp = (op == Py_NE);
1059 else {
1060 minsize = self_size;
1061 if (other_size < minsize)
1062 minsize = other_size;
1064 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
1065 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1067 if (cmp == 0) {
1068 if (self_size < other_size)
1069 cmp = -1;
1070 else if (self_size > other_size)
1071 cmp = 1;
1074 switch (op) {
1075 case Py_LT: cmp = cmp < 0; break;
1076 case Py_LE: cmp = cmp <= 0; break;
1077 case Py_EQ: cmp = cmp == 0; break;
1078 case Py_NE: cmp = cmp != 0; break;
1079 case Py_GT: cmp = cmp > 0; break;
1080 case Py_GE: cmp = cmp >= 0; break;
1084 res = cmp ? Py_True : Py_False;
1085 PyBuffer_Release(&self_bytes);
1086 PyBuffer_Release(&other_bytes);
1087 Py_INCREF(res);
1088 return res;
1091 static void
1092 bytes_dealloc(PyByteArrayObject *self)
1094 if (self->ob_exports > 0) {
1095 PyErr_SetString(PyExc_SystemError,
1096 "deallocated bytearray object has exported buffers");
1097 PyErr_Print();
1099 if (self->ob_bytes != 0) {
1100 PyMem_Free(self->ob_bytes);
1102 Py_TYPE(self)->tp_free((PyObject *)self);
1106 /* -------------------------------------------------------------------- */
1107 /* Methods */
1109 #define STRINGLIB_CHAR char
1110 #define STRINGLIB_CMP memcmp
1111 #define STRINGLIB_LEN PyByteArray_GET_SIZE
1112 #define STRINGLIB_STR PyByteArray_AS_STRING
1113 #define STRINGLIB_NEW PyByteArray_FromStringAndSize
1114 #define STRINGLIB_EMPTY nullbytes
1115 #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1116 #define STRINGLIB_MUTABLE 1
1117 #define FROM_BYTEARRAY 1
1119 #include "stringlib/fastsearch.h"
1120 #include "stringlib/count.h"
1121 #include "stringlib/find.h"
1122 #include "stringlib/partition.h"
1123 #include "stringlib/ctype.h"
1124 #include "stringlib/transmogrify.h"
1127 /* The following Py_LOCAL_INLINE and Py_LOCAL functions
1128 were copied from the old char* style string object. */
1130 Py_LOCAL_INLINE(void)
1131 _adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1133 if (*end > len)
1134 *end = len;
1135 else if (*end < 0)
1136 *end += len;
1137 if (*end < 0)
1138 *end = 0;
1139 if (*start < 0)
1140 *start += len;
1141 if (*start < 0)
1142 *start = 0;
1146 Py_LOCAL_INLINE(Py_ssize_t)
1147 bytes_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
1149 PyObject *subobj;
1150 Py_buffer subbuf;
1151 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1152 Py_ssize_t res;
1154 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1155 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1156 return -2;
1157 if (_getbuffer(subobj, &subbuf) < 0)
1158 return -2;
1159 if (dir > 0)
1160 res = stringlib_find_slice(
1161 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1162 subbuf.buf, subbuf.len, start, end);
1163 else
1164 res = stringlib_rfind_slice(
1165 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1166 subbuf.buf, subbuf.len, start, end);
1167 PyBuffer_Release(&subbuf);
1168 return res;
1171 PyDoc_STRVAR(find__doc__,
1172 "B.find(sub [,start [,end]]) -> int\n\
1174 Return the lowest index in B where subsection sub is found,\n\
1175 such that sub is contained within s[start,end]. Optional\n\
1176 arguments start and end are interpreted as in slice notation.\n\
1178 Return -1 on failure.");
1180 static PyObject *
1181 bytes_find(PyByteArrayObject *self, PyObject *args)
1183 Py_ssize_t result = bytes_find_internal(self, args, +1);
1184 if (result == -2)
1185 return NULL;
1186 return PyInt_FromSsize_t(result);
1189 PyDoc_STRVAR(count__doc__,
1190 "B.count(sub [,start [,end]]) -> int\n\
1192 Return the number of non-overlapping occurrences of subsection sub in\n\
1193 bytes B[start:end]. Optional arguments start and end are interpreted\n\
1194 as in slice notation.");
1196 static PyObject *
1197 bytes_count(PyByteArrayObject *self, PyObject *args)
1199 PyObject *sub_obj;
1200 const char *str = PyByteArray_AS_STRING(self);
1201 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1202 Py_buffer vsub;
1203 PyObject *count_obj;
1205 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1206 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1207 return NULL;
1209 if (_getbuffer(sub_obj, &vsub) < 0)
1210 return NULL;
1212 _adjust_indices(&start, &end, PyByteArray_GET_SIZE(self));
1214 count_obj = PyInt_FromSsize_t(
1215 stringlib_count(str + start, end - start, vsub.buf, vsub.len)
1217 PyBuffer_Release(&vsub);
1218 return count_obj;
1222 PyDoc_STRVAR(index__doc__,
1223 "B.index(sub [,start [,end]]) -> int\n\
1225 Like B.find() but raise ValueError when the subsection is not found.");
1227 static PyObject *
1228 bytes_index(PyByteArrayObject *self, PyObject *args)
1230 Py_ssize_t result = bytes_find_internal(self, args, +1);
1231 if (result == -2)
1232 return NULL;
1233 if (result == -1) {
1234 PyErr_SetString(PyExc_ValueError,
1235 "subsection not found");
1236 return NULL;
1238 return PyInt_FromSsize_t(result);
1242 PyDoc_STRVAR(rfind__doc__,
1243 "B.rfind(sub [,start [,end]]) -> int\n\
1245 Return the highest index in B where subsection sub is found,\n\
1246 such that sub is contained within s[start,end]. Optional\n\
1247 arguments start and end are interpreted as in slice notation.\n\
1249 Return -1 on failure.");
1251 static PyObject *
1252 bytes_rfind(PyByteArrayObject *self, PyObject *args)
1254 Py_ssize_t result = bytes_find_internal(self, args, -1);
1255 if (result == -2)
1256 return NULL;
1257 return PyInt_FromSsize_t(result);
1261 PyDoc_STRVAR(rindex__doc__,
1262 "B.rindex(sub [,start [,end]]) -> int\n\
1264 Like B.rfind() but raise ValueError when the subsection is not found.");
1266 static PyObject *
1267 bytes_rindex(PyByteArrayObject *self, PyObject *args)
1269 Py_ssize_t result = bytes_find_internal(self, args, -1);
1270 if (result == -2)
1271 return NULL;
1272 if (result == -1) {
1273 PyErr_SetString(PyExc_ValueError,
1274 "subsection not found");
1275 return NULL;
1277 return PyInt_FromSsize_t(result);
1281 static int
1282 bytes_contains(PyObject *self, PyObject *arg)
1284 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1285 if (ival == -1 && PyErr_Occurred()) {
1286 Py_buffer varg;
1287 int pos;
1288 PyErr_Clear();
1289 if (_getbuffer(arg, &varg) < 0)
1290 return -1;
1291 pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
1292 varg.buf, varg.len, 0);
1293 PyBuffer_Release(&varg);
1294 return pos >= 0;
1296 if (ival < 0 || ival >= 256) {
1297 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1298 return -1;
1301 return memchr(PyByteArray_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
1305 /* Matches the end (direction >= 0) or start (direction < 0) of self
1306 * against substr, using the start and end arguments. Returns
1307 * -1 on error, 0 if not found and 1 if found.
1309 Py_LOCAL(int)
1310 _bytes_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start,
1311 Py_ssize_t end, int direction)
1313 Py_ssize_t len = PyByteArray_GET_SIZE(self);
1314 const char* str;
1315 Py_buffer vsubstr;
1316 int rv = 0;
1318 str = PyByteArray_AS_STRING(self);
1320 if (_getbuffer(substr, &vsubstr) < 0)
1321 return -1;
1323 _adjust_indices(&start, &end, len);
1325 if (direction < 0) {
1326 /* startswith */
1327 if (start+vsubstr.len > len) {
1328 goto done;
1330 } else {
1331 /* endswith */
1332 if (end-start < vsubstr.len || start > len) {
1333 goto done;
1336 if (end-vsubstr.len > start)
1337 start = end - vsubstr.len;
1339 if (end-start >= vsubstr.len)
1340 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1342 done:
1343 PyBuffer_Release(&vsubstr);
1344 return rv;
1348 PyDoc_STRVAR(startswith__doc__,
1349 "B.startswith(prefix [,start [,end]]) -> bool\n\
1351 Return True if B starts with the specified prefix, False otherwise.\n\
1352 With optional start, test B beginning at that position.\n\
1353 With optional end, stop comparing B at that position.\n\
1354 prefix can also be a tuple of strings to try.");
1356 static PyObject *
1357 bytes_startswith(PyByteArrayObject *self, PyObject *args)
1359 Py_ssize_t start = 0;
1360 Py_ssize_t end = PY_SSIZE_T_MAX;
1361 PyObject *subobj;
1362 int result;
1364 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1365 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1366 return NULL;
1367 if (PyTuple_Check(subobj)) {
1368 Py_ssize_t i;
1369 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1370 result = _bytes_tailmatch(self,
1371 PyTuple_GET_ITEM(subobj, i),
1372 start, end, -1);
1373 if (result == -1)
1374 return NULL;
1375 else if (result) {
1376 Py_RETURN_TRUE;
1379 Py_RETURN_FALSE;
1381 result = _bytes_tailmatch(self, subobj, start, end, -1);
1382 if (result == -1)
1383 return NULL;
1384 else
1385 return PyBool_FromLong(result);
1388 PyDoc_STRVAR(endswith__doc__,
1389 "B.endswith(suffix [,start [,end]]) -> bool\n\
1391 Return True if B ends with the specified suffix, False otherwise.\n\
1392 With optional start, test B beginning at that position.\n\
1393 With optional end, stop comparing B at that position.\n\
1394 suffix can also be a tuple of strings to try.");
1396 static PyObject *
1397 bytes_endswith(PyByteArrayObject *self, PyObject *args)
1399 Py_ssize_t start = 0;
1400 Py_ssize_t end = PY_SSIZE_T_MAX;
1401 PyObject *subobj;
1402 int result;
1404 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1405 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1406 return NULL;
1407 if (PyTuple_Check(subobj)) {
1408 Py_ssize_t i;
1409 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1410 result = _bytes_tailmatch(self,
1411 PyTuple_GET_ITEM(subobj, i),
1412 start, end, +1);
1413 if (result == -1)
1414 return NULL;
1415 else if (result) {
1416 Py_RETURN_TRUE;
1419 Py_RETURN_FALSE;
1421 result = _bytes_tailmatch(self, subobj, start, end, +1);
1422 if (result == -1)
1423 return NULL;
1424 else
1425 return PyBool_FromLong(result);
1429 PyDoc_STRVAR(translate__doc__,
1430 "B.translate(table[, deletechars]) -> bytearray\n\
1432 Return a copy of B, where all characters occurring in the\n\
1433 optional argument deletechars are removed, and the remaining\n\
1434 characters have been mapped through the given translation\n\
1435 table, which must be a bytes object of length 256.");
1437 static PyObject *
1438 bytes_translate(PyByteArrayObject *self, PyObject *args)
1440 register char *input, *output;
1441 register const char *table;
1442 register Py_ssize_t i, c;
1443 PyObject *input_obj = (PyObject*)self;
1444 const char *output_start;
1445 Py_ssize_t inlen;
1446 PyObject *result;
1447 int trans_table[256];
1448 PyObject *tableobj, *delobj = NULL;
1449 Py_buffer vtable, vdel;
1451 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1452 &tableobj, &delobj))
1453 return NULL;
1455 if (_getbuffer(tableobj, &vtable) < 0)
1456 return NULL;
1458 if (vtable.len != 256) {
1459 PyErr_SetString(PyExc_ValueError,
1460 "translation table must be 256 characters long");
1461 result = NULL;
1462 goto done;
1465 if (delobj != NULL) {
1466 if (_getbuffer(delobj, &vdel) < 0) {
1467 result = NULL;
1468 goto done;
1471 else {
1472 vdel.buf = NULL;
1473 vdel.len = 0;
1476 table = (const char *)vtable.buf;
1477 inlen = PyByteArray_GET_SIZE(input_obj);
1478 result = PyByteArray_FromStringAndSize((char *)NULL, inlen);
1479 if (result == NULL)
1480 goto done;
1481 output_start = output = PyByteArray_AsString(result);
1482 input = PyByteArray_AS_STRING(input_obj);
1484 if (vdel.len == 0) {
1485 /* If no deletions are required, use faster code */
1486 for (i = inlen; --i >= 0; ) {
1487 c = Py_CHARMASK(*input++);
1488 *output++ = table[c];
1490 goto done;
1493 for (i = 0; i < 256; i++)
1494 trans_table[i] = Py_CHARMASK(table[i]);
1496 for (i = 0; i < vdel.len; i++)
1497 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1499 for (i = inlen; --i >= 0; ) {
1500 c = Py_CHARMASK(*input++);
1501 if (trans_table[c] != -1)
1502 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1503 continue;
1505 /* Fix the size of the resulting string */
1506 if (inlen > 0)
1507 PyByteArray_Resize(result, output - output_start);
1509 done:
1510 PyBuffer_Release(&vtable);
1511 if (delobj != NULL)
1512 PyBuffer_Release(&vdel);
1513 return result;
1517 #define FORWARD 1
1518 #define REVERSE -1
1520 /* find and count characters and substrings */
1522 #define findchar(target, target_len, c) \
1523 ((char *)memchr((const void *)(target), c, target_len))
1525 /* Don't call if length < 2 */
1526 #define Py_STRING_MATCH(target, offset, pattern, length) \
1527 (target[offset] == pattern[0] && \
1528 target[offset+length-1] == pattern[length-1] && \
1529 !memcmp(target+offset+1, pattern+1, length-2) )
1532 /* Bytes ops must return a string, create a copy */
1533 Py_LOCAL(PyByteArrayObject *)
1534 return_self(PyByteArrayObject *self)
1536 return (PyByteArrayObject *)PyByteArray_FromStringAndSize(
1537 PyByteArray_AS_STRING(self),
1538 PyByteArray_GET_SIZE(self));
1541 Py_LOCAL_INLINE(Py_ssize_t)
1542 countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1544 Py_ssize_t count=0;
1545 const char *start=target;
1546 const char *end=target+target_len;
1548 while ( (start=findchar(start, end-start, c)) != NULL ) {
1549 count++;
1550 if (count >= maxcount)
1551 break;
1552 start += 1;
1554 return count;
1557 Py_LOCAL(Py_ssize_t)
1558 findstring(const char *target, Py_ssize_t target_len,
1559 const char *pattern, Py_ssize_t pattern_len,
1560 Py_ssize_t start,
1561 Py_ssize_t end,
1562 int direction)
1564 if (start < 0) {
1565 start += target_len;
1566 if (start < 0)
1567 start = 0;
1569 if (end > target_len) {
1570 end = target_len;
1571 } else if (end < 0) {
1572 end += target_len;
1573 if (end < 0)
1574 end = 0;
1577 /* zero-length substrings always match at the first attempt */
1578 if (pattern_len == 0)
1579 return (direction > 0) ? start : end;
1581 end -= pattern_len;
1583 if (direction < 0) {
1584 for (; end >= start; end--)
1585 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1586 return end;
1587 } else {
1588 for (; start <= end; start++)
1589 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1590 return start;
1592 return -1;
1595 Py_LOCAL_INLINE(Py_ssize_t)
1596 countstring(const char *target, Py_ssize_t target_len,
1597 const char *pattern, Py_ssize_t pattern_len,
1598 Py_ssize_t start,
1599 Py_ssize_t end,
1600 int direction, Py_ssize_t maxcount)
1602 Py_ssize_t count=0;
1604 if (start < 0) {
1605 start += target_len;
1606 if (start < 0)
1607 start = 0;
1609 if (end > target_len) {
1610 end = target_len;
1611 } else if (end < 0) {
1612 end += target_len;
1613 if (end < 0)
1614 end = 0;
1617 /* zero-length substrings match everywhere */
1618 if (pattern_len == 0 || maxcount == 0) {
1619 if (target_len+1 < maxcount)
1620 return target_len+1;
1621 return maxcount;
1624 end -= pattern_len;
1625 if (direction < 0) {
1626 for (; (end >= start); end--)
1627 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1628 count++;
1629 if (--maxcount <= 0) break;
1630 end -= pattern_len-1;
1632 } else {
1633 for (; (start <= end); start++)
1634 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1635 count++;
1636 if (--maxcount <= 0)
1637 break;
1638 start += pattern_len-1;
1641 return count;
1645 /* Algorithms for different cases of string replacement */
1647 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1648 Py_LOCAL(PyByteArrayObject *)
1649 replace_interleave(PyByteArrayObject *self,
1650 const char *to_s, Py_ssize_t to_len,
1651 Py_ssize_t maxcount)
1653 char *self_s, *result_s;
1654 Py_ssize_t self_len, result_len;
1655 Py_ssize_t count, i, product;
1656 PyByteArrayObject *result;
1658 self_len = PyByteArray_GET_SIZE(self);
1660 /* 1 at the end plus 1 after every character */
1661 count = self_len+1;
1662 if (maxcount < count)
1663 count = maxcount;
1665 /* Check for overflow */
1666 /* result_len = count * to_len + self_len; */
1667 product = count * to_len;
1668 if (product / to_len != count) {
1669 PyErr_SetString(PyExc_OverflowError,
1670 "replace string is too long");
1671 return NULL;
1673 result_len = product + self_len;
1674 if (result_len < 0) {
1675 PyErr_SetString(PyExc_OverflowError,
1676 "replace string is too long");
1677 return NULL;
1680 if (! (result = (PyByteArrayObject *)
1681 PyByteArray_FromStringAndSize(NULL, result_len)) )
1682 return NULL;
1684 self_s = PyByteArray_AS_STRING(self);
1685 result_s = PyByteArray_AS_STRING(result);
1687 /* TODO: special case single character, which doesn't need memcpy */
1689 /* Lay the first one down (guaranteed this will occur) */
1690 Py_MEMCPY(result_s, to_s, to_len);
1691 result_s += to_len;
1692 count -= 1;
1694 for (i=0; i<count; i++) {
1695 *result_s++ = *self_s++;
1696 Py_MEMCPY(result_s, to_s, to_len);
1697 result_s += to_len;
1700 /* Copy the rest of the original string */
1701 Py_MEMCPY(result_s, self_s, self_len-i);
1703 return result;
1706 /* Special case for deleting a single character */
1707 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1708 Py_LOCAL(PyByteArrayObject *)
1709 replace_delete_single_character(PyByteArrayObject *self,
1710 char from_c, Py_ssize_t maxcount)
1712 char *self_s, *result_s;
1713 char *start, *next, *end;
1714 Py_ssize_t self_len, result_len;
1715 Py_ssize_t count;
1716 PyByteArrayObject *result;
1718 self_len = PyByteArray_GET_SIZE(self);
1719 self_s = PyByteArray_AS_STRING(self);
1721 count = countchar(self_s, self_len, from_c, maxcount);
1722 if (count == 0) {
1723 return return_self(self);
1726 result_len = self_len - count; /* from_len == 1 */
1727 assert(result_len>=0);
1729 if ( (result = (PyByteArrayObject *)
1730 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1731 return NULL;
1732 result_s = PyByteArray_AS_STRING(result);
1734 start = self_s;
1735 end = self_s + self_len;
1736 while (count-- > 0) {
1737 next = findchar(start, end-start, from_c);
1738 if (next == NULL)
1739 break;
1740 Py_MEMCPY(result_s, start, next-start);
1741 result_s += (next-start);
1742 start = next+1;
1744 Py_MEMCPY(result_s, start, end-start);
1746 return result;
1749 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1751 Py_LOCAL(PyByteArrayObject *)
1752 replace_delete_substring(PyByteArrayObject *self,
1753 const char *from_s, Py_ssize_t from_len,
1754 Py_ssize_t maxcount)
1756 char *self_s, *result_s;
1757 char *start, *next, *end;
1758 Py_ssize_t self_len, result_len;
1759 Py_ssize_t count, offset;
1760 PyByteArrayObject *result;
1762 self_len = PyByteArray_GET_SIZE(self);
1763 self_s = PyByteArray_AS_STRING(self);
1765 count = countstring(self_s, self_len,
1766 from_s, from_len,
1767 0, self_len, 1,
1768 maxcount);
1770 if (count == 0) {
1771 /* no matches */
1772 return return_self(self);
1775 result_len = self_len - (count * from_len);
1776 assert (result_len>=0);
1778 if ( (result = (PyByteArrayObject *)
1779 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL )
1780 return NULL;
1782 result_s = PyByteArray_AS_STRING(result);
1784 start = self_s;
1785 end = self_s + self_len;
1786 while (count-- > 0) {
1787 offset = findstring(start, end-start,
1788 from_s, from_len,
1789 0, end-start, FORWARD);
1790 if (offset == -1)
1791 break;
1792 next = start + offset;
1794 Py_MEMCPY(result_s, start, next-start);
1796 result_s += (next-start);
1797 start = next+from_len;
1799 Py_MEMCPY(result_s, start, end-start);
1800 return result;
1803 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1804 Py_LOCAL(PyByteArrayObject *)
1805 replace_single_character_in_place(PyByteArrayObject *self,
1806 char from_c, char to_c,
1807 Py_ssize_t maxcount)
1809 char *self_s, *result_s, *start, *end, *next;
1810 Py_ssize_t self_len;
1811 PyByteArrayObject *result;
1813 /* The result string will be the same size */
1814 self_s = PyByteArray_AS_STRING(self);
1815 self_len = PyByteArray_GET_SIZE(self);
1817 next = findchar(self_s, self_len, from_c);
1819 if (next == NULL) {
1820 /* No matches; return the original bytes */
1821 return return_self(self);
1824 /* Need to make a new bytes */
1825 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1826 if (result == NULL)
1827 return NULL;
1828 result_s = PyByteArray_AS_STRING(result);
1829 Py_MEMCPY(result_s, self_s, self_len);
1831 /* change everything in-place, starting with this one */
1832 start = result_s + (next-self_s);
1833 *start = to_c;
1834 start++;
1835 end = result_s + self_len;
1837 while (--maxcount > 0) {
1838 next = findchar(start, end-start, from_c);
1839 if (next == NULL)
1840 break;
1841 *next = to_c;
1842 start = next+1;
1845 return result;
1848 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1849 Py_LOCAL(PyByteArrayObject *)
1850 replace_substring_in_place(PyByteArrayObject *self,
1851 const char *from_s, Py_ssize_t from_len,
1852 const char *to_s, Py_ssize_t to_len,
1853 Py_ssize_t maxcount)
1855 char *result_s, *start, *end;
1856 char *self_s;
1857 Py_ssize_t self_len, offset;
1858 PyByteArrayObject *result;
1860 /* The result bytes will be the same size */
1862 self_s = PyByteArray_AS_STRING(self);
1863 self_len = PyByteArray_GET_SIZE(self);
1865 offset = findstring(self_s, self_len,
1866 from_s, from_len,
1867 0, self_len, FORWARD);
1868 if (offset == -1) {
1869 /* No matches; return the original bytes */
1870 return return_self(self);
1873 /* Need to make a new bytes */
1874 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1875 if (result == NULL)
1876 return NULL;
1877 result_s = PyByteArray_AS_STRING(result);
1878 Py_MEMCPY(result_s, self_s, self_len);
1880 /* change everything in-place, starting with this one */
1881 start = result_s + offset;
1882 Py_MEMCPY(start, to_s, from_len);
1883 start += from_len;
1884 end = result_s + self_len;
1886 while ( --maxcount > 0) {
1887 offset = findstring(start, end-start,
1888 from_s, from_len,
1889 0, end-start, FORWARD);
1890 if (offset==-1)
1891 break;
1892 Py_MEMCPY(start+offset, to_s, from_len);
1893 start += offset+from_len;
1896 return result;
1899 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1900 Py_LOCAL(PyByteArrayObject *)
1901 replace_single_character(PyByteArrayObject *self,
1902 char from_c,
1903 const char *to_s, Py_ssize_t to_len,
1904 Py_ssize_t maxcount)
1906 char *self_s, *result_s;
1907 char *start, *next, *end;
1908 Py_ssize_t self_len, result_len;
1909 Py_ssize_t count, product;
1910 PyByteArrayObject *result;
1912 self_s = PyByteArray_AS_STRING(self);
1913 self_len = PyByteArray_GET_SIZE(self);
1915 count = countchar(self_s, self_len, from_c, maxcount);
1916 if (count == 0) {
1917 /* no matches, return unchanged */
1918 return return_self(self);
1921 /* use the difference between current and new, hence the "-1" */
1922 /* result_len = self_len + count * (to_len-1) */
1923 product = count * (to_len-1);
1924 if (product / (to_len-1) != count) {
1925 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1926 return NULL;
1928 result_len = self_len + product;
1929 if (result_len < 0) {
1930 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1931 return NULL;
1934 if ( (result = (PyByteArrayObject *)
1935 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1936 return NULL;
1937 result_s = PyByteArray_AS_STRING(result);
1939 start = self_s;
1940 end = self_s + self_len;
1941 while (count-- > 0) {
1942 next = findchar(start, end-start, from_c);
1943 if (next == NULL)
1944 break;
1946 if (next == start) {
1947 /* replace with the 'to' */
1948 Py_MEMCPY(result_s, to_s, to_len);
1949 result_s += to_len;
1950 start += 1;
1951 } else {
1952 /* copy the unchanged old then the 'to' */
1953 Py_MEMCPY(result_s, start, next-start);
1954 result_s += (next-start);
1955 Py_MEMCPY(result_s, to_s, to_len);
1956 result_s += to_len;
1957 start = next+1;
1960 /* Copy the remainder of the remaining bytes */
1961 Py_MEMCPY(result_s, start, end-start);
1963 return result;
1966 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1967 Py_LOCAL(PyByteArrayObject *)
1968 replace_substring(PyByteArrayObject *self,
1969 const char *from_s, Py_ssize_t from_len,
1970 const char *to_s, Py_ssize_t to_len,
1971 Py_ssize_t maxcount)
1973 char *self_s, *result_s;
1974 char *start, *next, *end;
1975 Py_ssize_t self_len, result_len;
1976 Py_ssize_t count, offset, product;
1977 PyByteArrayObject *result;
1979 self_s = PyByteArray_AS_STRING(self);
1980 self_len = PyByteArray_GET_SIZE(self);
1982 count = countstring(self_s, self_len,
1983 from_s, from_len,
1984 0, self_len, FORWARD, maxcount);
1985 if (count == 0) {
1986 /* no matches, return unchanged */
1987 return return_self(self);
1990 /* Check for overflow */
1991 /* result_len = self_len + count * (to_len-from_len) */
1992 product = count * (to_len-from_len);
1993 if (product / (to_len-from_len) != count) {
1994 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1995 return NULL;
1997 result_len = self_len + product;
1998 if (result_len < 0) {
1999 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
2000 return NULL;
2003 if ( (result = (PyByteArrayObject *)
2004 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
2005 return NULL;
2006 result_s = PyByteArray_AS_STRING(result);
2008 start = self_s;
2009 end = self_s + self_len;
2010 while (count-- > 0) {
2011 offset = findstring(start, end-start,
2012 from_s, from_len,
2013 0, end-start, FORWARD);
2014 if (offset == -1)
2015 break;
2016 next = start+offset;
2017 if (next == start) {
2018 /* replace with the 'to' */
2019 Py_MEMCPY(result_s, to_s, to_len);
2020 result_s += to_len;
2021 start += from_len;
2022 } else {
2023 /* copy the unchanged old then the 'to' */
2024 Py_MEMCPY(result_s, start, next-start);
2025 result_s += (next-start);
2026 Py_MEMCPY(result_s, to_s, to_len);
2027 result_s += to_len;
2028 start = next+from_len;
2031 /* Copy the remainder of the remaining bytes */
2032 Py_MEMCPY(result_s, start, end-start);
2034 return result;
2038 Py_LOCAL(PyByteArrayObject *)
2039 replace(PyByteArrayObject *self,
2040 const char *from_s, Py_ssize_t from_len,
2041 const char *to_s, Py_ssize_t to_len,
2042 Py_ssize_t maxcount)
2044 if (maxcount < 0) {
2045 maxcount = PY_SSIZE_T_MAX;
2046 } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) {
2047 /* nothing to do; return the original bytes */
2048 return return_self(self);
2051 if (maxcount == 0 ||
2052 (from_len == 0 && to_len == 0)) {
2053 /* nothing to do; return the original bytes */
2054 return return_self(self);
2057 /* Handle zero-length special cases */
2059 if (from_len == 0) {
2060 /* insert the 'to' bytes everywhere. */
2061 /* >>> "Python".replace("", ".") */
2062 /* '.P.y.t.h.o.n.' */
2063 return replace_interleave(self, to_s, to_len, maxcount);
2066 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2067 /* point for an empty self bytes to generate a non-empty bytes */
2068 /* Special case so the remaining code always gets a non-empty bytes */
2069 if (PyByteArray_GET_SIZE(self) == 0) {
2070 return return_self(self);
2073 if (to_len == 0) {
2074 /* delete all occurances of 'from' bytes */
2075 if (from_len == 1) {
2076 return replace_delete_single_character(
2077 self, from_s[0], maxcount);
2078 } else {
2079 return replace_delete_substring(self, from_s, from_len, maxcount);
2083 /* Handle special case where both bytes have the same length */
2085 if (from_len == to_len) {
2086 if (from_len == 1) {
2087 return replace_single_character_in_place(
2088 self,
2089 from_s[0],
2090 to_s[0],
2091 maxcount);
2092 } else {
2093 return replace_substring_in_place(
2094 self, from_s, from_len, to_s, to_len, maxcount);
2098 /* Otherwise use the more generic algorithms */
2099 if (from_len == 1) {
2100 return replace_single_character(self, from_s[0],
2101 to_s, to_len, maxcount);
2102 } else {
2103 /* len('from')>=2, len('to')>=1 */
2104 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2109 PyDoc_STRVAR(replace__doc__,
2110 "B.replace(old, new[, count]) -> bytes\n\
2112 Return a copy of B with all occurrences of subsection\n\
2113 old replaced by new. If the optional argument count is\n\
2114 given, only the first count occurrences are replaced.");
2116 static PyObject *
2117 bytes_replace(PyByteArrayObject *self, PyObject *args)
2119 Py_ssize_t count = -1;
2120 PyObject *from, *to, *res;
2121 Py_buffer vfrom, vto;
2123 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2124 return NULL;
2126 if (_getbuffer(from, &vfrom) < 0)
2127 return NULL;
2128 if (_getbuffer(to, &vto) < 0) {
2129 PyBuffer_Release(&vfrom);
2130 return NULL;
2133 res = (PyObject *)replace((PyByteArrayObject *) self,
2134 vfrom.buf, vfrom.len,
2135 vto.buf, vto.len, count);
2137 PyBuffer_Release(&vfrom);
2138 PyBuffer_Release(&vto);
2139 return res;
2143 /* Overallocate the initial list to reduce the number of reallocs for small
2144 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2145 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2146 text (roughly 11 words per line) and field delimited data (usually 1-10
2147 fields). For large strings the split algorithms are bandwidth limited
2148 so increasing the preallocation likely will not improve things.*/
2150 #define MAX_PREALLOC 12
2152 /* 5 splits gives 6 elements */
2153 #define PREALLOC_SIZE(maxsplit) \
2154 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2156 #define SPLIT_APPEND(data, left, right) \
2157 str = PyByteArray_FromStringAndSize((data) + (left), \
2158 (right) - (left)); \
2159 if (str == NULL) \
2160 goto onError; \
2161 if (PyList_Append(list, str)) { \
2162 Py_DECREF(str); \
2163 goto onError; \
2165 else \
2166 Py_DECREF(str);
2168 #define SPLIT_ADD(data, left, right) { \
2169 str = PyByteArray_FromStringAndSize((data) + (left), \
2170 (right) - (left)); \
2171 if (str == NULL) \
2172 goto onError; \
2173 if (count < MAX_PREALLOC) { \
2174 PyList_SET_ITEM(list, count, str); \
2175 } else { \
2176 if (PyList_Append(list, str)) { \
2177 Py_DECREF(str); \
2178 goto onError; \
2180 else \
2181 Py_DECREF(str); \
2183 count++; }
2185 /* Always force the list to the expected size. */
2186 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2189 Py_LOCAL_INLINE(PyObject *)
2190 split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2192 register Py_ssize_t i, j, count = 0;
2193 PyObject *str;
2194 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2196 if (list == NULL)
2197 return NULL;
2199 i = j = 0;
2200 while ((j < len) && (maxcount-- > 0)) {
2201 for(; j < len; j++) {
2202 /* I found that using memchr makes no difference */
2203 if (s[j] == ch) {
2204 SPLIT_ADD(s, i, j);
2205 i = j = j + 1;
2206 break;
2210 if (i <= len) {
2211 SPLIT_ADD(s, i, len);
2213 FIX_PREALLOC_SIZE(list);
2214 return list;
2216 onError:
2217 Py_DECREF(list);
2218 return NULL;
2222 Py_LOCAL_INLINE(PyObject *)
2223 split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2225 register Py_ssize_t i, j, count = 0;
2226 PyObject *str;
2227 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2229 if (list == NULL)
2230 return NULL;
2232 for (i = j = 0; i < len; ) {
2233 /* find a token */
2234 while (i < len && ISSPACE(s[i]))
2235 i++;
2236 j = i;
2237 while (i < len && !ISSPACE(s[i]))
2238 i++;
2239 if (j < i) {
2240 if (maxcount-- <= 0)
2241 break;
2242 SPLIT_ADD(s, j, i);
2243 while (i < len && ISSPACE(s[i]))
2244 i++;
2245 j = i;
2248 if (j < len) {
2249 SPLIT_ADD(s, j, len);
2251 FIX_PREALLOC_SIZE(list);
2252 return list;
2254 onError:
2255 Py_DECREF(list);
2256 return NULL;
2259 PyDoc_STRVAR(split__doc__,
2260 "B.split([sep[, maxsplit]]) -> list of bytearray\n\
2262 Return a list of the sections in B, using sep as the delimiter.\n\
2263 If sep is not given, B is split on ASCII whitespace characters\n\
2264 (space, tab, return, newline, formfeed, vertical tab).\n\
2265 If maxsplit is given, at most maxsplit splits are done.");
2267 static PyObject *
2268 bytes_split(PyByteArrayObject *self, PyObject *args)
2270 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2271 Py_ssize_t maxsplit = -1, count = 0;
2272 const char *s = PyByteArray_AS_STRING(self), *sub;
2273 PyObject *list, *str, *subobj = Py_None;
2274 Py_buffer vsub;
2275 #ifdef USE_FAST
2276 Py_ssize_t pos;
2277 #endif
2279 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2280 return NULL;
2281 if (maxsplit < 0)
2282 maxsplit = PY_SSIZE_T_MAX;
2284 if (subobj == Py_None)
2285 return split_whitespace(s, len, maxsplit);
2287 if (_getbuffer(subobj, &vsub) < 0)
2288 return NULL;
2289 sub = vsub.buf;
2290 n = vsub.len;
2292 if (n == 0) {
2293 PyErr_SetString(PyExc_ValueError, "empty separator");
2294 PyBuffer_Release(&vsub);
2295 return NULL;
2297 if (n == 1) {
2298 list = split_char(s, len, sub[0], maxsplit);
2299 PyBuffer_Release(&vsub);
2300 return list;
2303 list = PyList_New(PREALLOC_SIZE(maxsplit));
2304 if (list == NULL) {
2305 PyBuffer_Release(&vsub);
2306 return NULL;
2309 #ifdef USE_FAST
2310 i = j = 0;
2311 while (maxsplit-- > 0) {
2312 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2313 if (pos < 0)
2314 break;
2315 j = i+pos;
2316 SPLIT_ADD(s, i, j);
2317 i = j + n;
2319 #else
2320 i = j = 0;
2321 while ((j+n <= len) && (maxsplit-- > 0)) {
2322 for (; j+n <= len; j++) {
2323 if (Py_STRING_MATCH(s, j, sub, n)) {
2324 SPLIT_ADD(s, i, j);
2325 i = j = j + n;
2326 break;
2330 #endif
2331 SPLIT_ADD(s, i, len);
2332 FIX_PREALLOC_SIZE(list);
2333 PyBuffer_Release(&vsub);
2334 return list;
2336 onError:
2337 Py_DECREF(list);
2338 PyBuffer_Release(&vsub);
2339 return NULL;
2342 /* stringlib's partition shares nullbytes in some cases.
2343 undo this, we don't want the nullbytes to be shared. */
2344 static PyObject *
2345 make_nullbytes_unique(PyObject *result)
2347 if (result != NULL) {
2348 int i;
2349 assert(PyTuple_Check(result));
2350 assert(PyTuple_GET_SIZE(result) == 3);
2351 for (i = 0; i < 3; i++) {
2352 if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
2353 PyObject *new = PyByteArray_FromStringAndSize(NULL, 0);
2354 if (new == NULL) {
2355 Py_DECREF(result);
2356 result = NULL;
2357 break;
2359 Py_DECREF(nullbytes);
2360 PyTuple_SET_ITEM(result, i, new);
2364 return result;
2367 PyDoc_STRVAR(partition__doc__,
2368 "B.partition(sep) -> (head, sep, tail)\n\
2370 Searches for the separator sep in B, and returns the part before it,\n\
2371 the separator itself, and the part after it. If the separator is not\n\
2372 found, returns B and two empty bytearray objects.");
2374 static PyObject *
2375 bytes_partition(PyByteArrayObject *self, PyObject *sep_obj)
2377 PyObject *bytesep, *result;
2379 bytesep = PyByteArray_FromObject(sep_obj);
2380 if (! bytesep)
2381 return NULL;
2383 result = stringlib_partition(
2384 (PyObject*) self,
2385 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2386 bytesep,
2387 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2390 Py_DECREF(bytesep);
2391 return make_nullbytes_unique(result);
2394 PyDoc_STRVAR(rpartition__doc__,
2395 "B.rpartition(sep) -> (tail, sep, head)\n\
2397 Searches for the separator sep in B, starting at the end of B,\n\
2398 and returns the part before it, the separator itself, and the\n\
2399 part after it. If the separator is not found, returns two empty\n\
2400 bytearray objects and B.");
2402 static PyObject *
2403 bytes_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
2405 PyObject *bytesep, *result;
2407 bytesep = PyByteArray_FromObject(sep_obj);
2408 if (! bytesep)
2409 return NULL;
2411 result = stringlib_rpartition(
2412 (PyObject*) self,
2413 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2414 bytesep,
2415 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2418 Py_DECREF(bytesep);
2419 return make_nullbytes_unique(result);
2422 Py_LOCAL_INLINE(PyObject *)
2423 rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2425 register Py_ssize_t i, j, count=0;
2426 PyObject *str;
2427 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2429 if (list == NULL)
2430 return NULL;
2432 i = j = len - 1;
2433 while ((i >= 0) && (maxcount-- > 0)) {
2434 for (; i >= 0; i--) {
2435 if (s[i] == ch) {
2436 SPLIT_ADD(s, i + 1, j + 1);
2437 j = i = i - 1;
2438 break;
2442 if (j >= -1) {
2443 SPLIT_ADD(s, 0, j + 1);
2445 FIX_PREALLOC_SIZE(list);
2446 if (PyList_Reverse(list) < 0)
2447 goto onError;
2449 return list;
2451 onError:
2452 Py_DECREF(list);
2453 return NULL;
2456 Py_LOCAL_INLINE(PyObject *)
2457 rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2459 register Py_ssize_t i, j, count = 0;
2460 PyObject *str;
2461 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2463 if (list == NULL)
2464 return NULL;
2466 for (i = j = len - 1; i >= 0; ) {
2467 /* find a token */
2468 while (i >= 0 && ISSPACE(s[i]))
2469 i--;
2470 j = i;
2471 while (i >= 0 && !ISSPACE(s[i]))
2472 i--;
2473 if (j > i) {
2474 if (maxcount-- <= 0)
2475 break;
2476 SPLIT_ADD(s, i + 1, j + 1);
2477 while (i >= 0 && ISSPACE(s[i]))
2478 i--;
2479 j = i;
2482 if (j >= 0) {
2483 SPLIT_ADD(s, 0, j + 1);
2485 FIX_PREALLOC_SIZE(list);
2486 if (PyList_Reverse(list) < 0)
2487 goto onError;
2489 return list;
2491 onError:
2492 Py_DECREF(list);
2493 return NULL;
2496 PyDoc_STRVAR(rsplit__doc__,
2497 "B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2499 Return a list of the sections in B, using sep as the delimiter,\n\
2500 starting at the end of B and working to the front.\n\
2501 If sep is not given, B is split on ASCII whitespace characters\n\
2502 (space, tab, return, newline, formfeed, vertical tab).\n\
2503 If maxsplit is given, at most maxsplit splits are done.");
2505 static PyObject *
2506 bytes_rsplit(PyByteArrayObject *self, PyObject *args)
2508 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2509 Py_ssize_t maxsplit = -1, count = 0;
2510 const char *s = PyByteArray_AS_STRING(self), *sub;
2511 PyObject *list, *str, *subobj = Py_None;
2512 Py_buffer vsub;
2514 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2515 return NULL;
2516 if (maxsplit < 0)
2517 maxsplit = PY_SSIZE_T_MAX;
2519 if (subobj == Py_None)
2520 return rsplit_whitespace(s, len, maxsplit);
2522 if (_getbuffer(subobj, &vsub) < 0)
2523 return NULL;
2524 sub = vsub.buf;
2525 n = vsub.len;
2527 if (n == 0) {
2528 PyErr_SetString(PyExc_ValueError, "empty separator");
2529 PyBuffer_Release(&vsub);
2530 return NULL;
2532 else if (n == 1) {
2533 list = rsplit_char(s, len, sub[0], maxsplit);
2534 PyBuffer_Release(&vsub);
2535 return list;
2538 list = PyList_New(PREALLOC_SIZE(maxsplit));
2539 if (list == NULL) {
2540 PyBuffer_Release(&vsub);
2541 return NULL;
2544 j = len;
2545 i = j - n;
2547 while ( (i >= 0) && (maxsplit-- > 0) ) {
2548 for (; i>=0; i--) {
2549 if (Py_STRING_MATCH(s, i, sub, n)) {
2550 SPLIT_ADD(s, i + n, j);
2551 j = i;
2552 i -= n;
2553 break;
2557 SPLIT_ADD(s, 0, j);
2558 FIX_PREALLOC_SIZE(list);
2559 if (PyList_Reverse(list) < 0)
2560 goto onError;
2561 PyBuffer_Release(&vsub);
2562 return list;
2564 onError:
2565 Py_DECREF(list);
2566 PyBuffer_Release(&vsub);
2567 return NULL;
2570 PyDoc_STRVAR(reverse__doc__,
2571 "B.reverse() -> None\n\
2573 Reverse the order of the values in B in place.");
2574 static PyObject *
2575 bytes_reverse(PyByteArrayObject *self, PyObject *unused)
2577 char swap, *head, *tail;
2578 Py_ssize_t i, j, n = Py_SIZE(self);
2580 j = n / 2;
2581 head = self->ob_bytes;
2582 tail = head + n - 1;
2583 for (i = 0; i < j; i++) {
2584 swap = *head;
2585 *head++ = *tail;
2586 *tail-- = swap;
2589 Py_RETURN_NONE;
2592 PyDoc_STRVAR(insert__doc__,
2593 "B.insert(index, int) -> None\n\
2595 Insert a single item into the bytearray before the given index.");
2596 static PyObject *
2597 bytes_insert(PyByteArrayObject *self, PyObject *args)
2599 PyObject *value;
2600 int ival;
2601 Py_ssize_t where, n = Py_SIZE(self);
2603 if (!PyArg_ParseTuple(args, "nO:insert", &where, &value))
2604 return NULL;
2606 if (n == PY_SSIZE_T_MAX) {
2607 PyErr_SetString(PyExc_OverflowError,
2608 "cannot add more objects to bytes");
2609 return NULL;
2611 if (!_getbytevalue(value, &ival))
2612 return NULL;
2613 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2614 return NULL;
2616 if (where < 0) {
2617 where += n;
2618 if (where < 0)
2619 where = 0;
2621 if (where > n)
2622 where = n;
2623 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
2624 self->ob_bytes[where] = ival;
2626 Py_RETURN_NONE;
2629 PyDoc_STRVAR(append__doc__,
2630 "B.append(int) -> None\n\
2632 Append a single item to the end of B.");
2633 static PyObject *
2634 bytes_append(PyByteArrayObject *self, PyObject *arg)
2636 int value;
2637 Py_ssize_t n = Py_SIZE(self);
2639 if (! _getbytevalue(arg, &value))
2640 return NULL;
2641 if (n == PY_SSIZE_T_MAX) {
2642 PyErr_SetString(PyExc_OverflowError,
2643 "cannot add more objects to bytes");
2644 return NULL;
2646 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2647 return NULL;
2649 self->ob_bytes[n] = value;
2651 Py_RETURN_NONE;
2654 PyDoc_STRVAR(extend__doc__,
2655 "B.extend(iterable int) -> None\n\
2657 Append all the elements from the iterator or sequence to the\n\
2658 end of B.");
2659 static PyObject *
2660 bytes_extend(PyByteArrayObject *self, PyObject *arg)
2662 PyObject *it, *item, *bytes_obj;
2663 Py_ssize_t buf_size = 0, len = 0;
2664 int value;
2665 char *buf;
2667 /* bytes_setslice code only accepts something supporting PEP 3118. */
2668 if (PyObject_CheckBuffer(arg)) {
2669 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2670 return NULL;
2672 Py_RETURN_NONE;
2675 it = PyObject_GetIter(arg);
2676 if (it == NULL)
2677 return NULL;
2679 /* Try to determine the length of the argument. 32 is abitrary. */
2680 buf_size = _PyObject_LengthHint(arg, 32);
2682 bytes_obj = PyByteArray_FromStringAndSize(NULL, buf_size);
2683 if (bytes_obj == NULL)
2684 return NULL;
2685 buf = PyByteArray_AS_STRING(bytes_obj);
2687 while ((item = PyIter_Next(it)) != NULL) {
2688 if (! _getbytevalue(item, &value)) {
2689 Py_DECREF(item);
2690 Py_DECREF(it);
2691 Py_DECREF(bytes_obj);
2692 return NULL;
2694 buf[len++] = value;
2695 Py_DECREF(item);
2697 if (len >= buf_size) {
2698 buf_size = len + (len >> 1) + 1;
2699 if (PyByteArray_Resize((PyObject *)bytes_obj, buf_size) < 0) {
2700 Py_DECREF(it);
2701 Py_DECREF(bytes_obj);
2702 return NULL;
2704 /* Recompute the `buf' pointer, since the resizing operation may
2705 have invalidated it. */
2706 buf = PyByteArray_AS_STRING(bytes_obj);
2709 Py_DECREF(it);
2711 /* Resize down to exact size. */
2712 if (PyByteArray_Resize((PyObject *)bytes_obj, len) < 0) {
2713 Py_DECREF(bytes_obj);
2714 return NULL;
2717 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), bytes_obj) == -1)
2718 return NULL;
2719 Py_DECREF(bytes_obj);
2721 Py_RETURN_NONE;
2724 PyDoc_STRVAR(pop__doc__,
2725 "B.pop([index]) -> int\n\
2727 Remove and return a single item from B. If no index\n\
2728 argument is given, will pop the last value.");
2729 static PyObject *
2730 bytes_pop(PyByteArrayObject *self, PyObject *args)
2732 int value;
2733 Py_ssize_t where = -1, n = Py_SIZE(self);
2735 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2736 return NULL;
2738 if (n == 0) {
2739 PyErr_SetString(PyExc_OverflowError,
2740 "cannot pop an empty bytes");
2741 return NULL;
2743 if (where < 0)
2744 where += Py_SIZE(self);
2745 if (where < 0 || where >= Py_SIZE(self)) {
2746 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2747 return NULL;
2749 if (!_canresize(self))
2750 return NULL;
2752 value = self->ob_bytes[where];
2753 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2754 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2755 return NULL;
2757 return PyInt_FromLong(value);
2760 PyDoc_STRVAR(remove__doc__,
2761 "B.remove(int) -> None\n\
2763 Remove the first occurance of a value in B.");
2764 static PyObject *
2765 bytes_remove(PyByteArrayObject *self, PyObject *arg)
2767 int value;
2768 Py_ssize_t where, n = Py_SIZE(self);
2770 if (! _getbytevalue(arg, &value))
2771 return NULL;
2773 for (where = 0; where < n; where++) {
2774 if (self->ob_bytes[where] == value)
2775 break;
2777 if (where == n) {
2778 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2779 return NULL;
2781 if (!_canresize(self))
2782 return NULL;
2784 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2785 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2786 return NULL;
2788 Py_RETURN_NONE;
2791 /* XXX These two helpers could be optimized if argsize == 1 */
2793 static Py_ssize_t
2794 lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2795 void *argptr, Py_ssize_t argsize)
2797 Py_ssize_t i = 0;
2798 while (i < mysize && memchr(argptr, myptr[i], argsize))
2799 i++;
2800 return i;
2803 static Py_ssize_t
2804 rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2805 void *argptr, Py_ssize_t argsize)
2807 Py_ssize_t i = mysize - 1;
2808 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2809 i--;
2810 return i + 1;
2813 PyDoc_STRVAR(strip__doc__,
2814 "B.strip([bytes]) -> bytearray\n\
2816 Strip leading and trailing bytes contained in the argument.\n\
2817 If the argument is omitted, strip ASCII whitespace.");
2818 static PyObject *
2819 bytes_strip(PyByteArrayObject *self, PyObject *args)
2821 Py_ssize_t left, right, mysize, argsize;
2822 void *myptr, *argptr;
2823 PyObject *arg = Py_None;
2824 Py_buffer varg;
2825 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2826 return NULL;
2827 if (arg == Py_None) {
2828 argptr = "\t\n\r\f\v ";
2829 argsize = 6;
2831 else {
2832 if (_getbuffer(arg, &varg) < 0)
2833 return NULL;
2834 argptr = varg.buf;
2835 argsize = varg.len;
2837 myptr = self->ob_bytes;
2838 mysize = Py_SIZE(self);
2839 left = lstrip_helper(myptr, mysize, argptr, argsize);
2840 if (left == mysize)
2841 right = left;
2842 else
2843 right = rstrip_helper(myptr, mysize, argptr, argsize);
2844 if (arg != Py_None)
2845 PyBuffer_Release(&varg);
2846 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2849 PyDoc_STRVAR(lstrip__doc__,
2850 "B.lstrip([bytes]) -> bytearray\n\
2852 Strip leading bytes contained in the argument.\n\
2853 If the argument is omitted, strip leading ASCII whitespace.");
2854 static PyObject *
2855 bytes_lstrip(PyByteArrayObject *self, PyObject *args)
2857 Py_ssize_t left, right, mysize, argsize;
2858 void *myptr, *argptr;
2859 PyObject *arg = Py_None;
2860 Py_buffer varg;
2861 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2862 return NULL;
2863 if (arg == Py_None) {
2864 argptr = "\t\n\r\f\v ";
2865 argsize = 6;
2867 else {
2868 if (_getbuffer(arg, &varg) < 0)
2869 return NULL;
2870 argptr = varg.buf;
2871 argsize = varg.len;
2873 myptr = self->ob_bytes;
2874 mysize = Py_SIZE(self);
2875 left = lstrip_helper(myptr, mysize, argptr, argsize);
2876 right = mysize;
2877 if (arg != Py_None)
2878 PyBuffer_Release(&varg);
2879 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2882 PyDoc_STRVAR(rstrip__doc__,
2883 "B.rstrip([bytes]) -> bytearray\n\
2885 Strip trailing bytes contained in the argument.\n\
2886 If the argument is omitted, strip trailing ASCII whitespace.");
2887 static PyObject *
2888 bytes_rstrip(PyByteArrayObject *self, PyObject *args)
2890 Py_ssize_t left, right, mysize, argsize;
2891 void *myptr, *argptr;
2892 PyObject *arg = Py_None;
2893 Py_buffer varg;
2894 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2895 return NULL;
2896 if (arg == Py_None) {
2897 argptr = "\t\n\r\f\v ";
2898 argsize = 6;
2900 else {
2901 if (_getbuffer(arg, &varg) < 0)
2902 return NULL;
2903 argptr = varg.buf;
2904 argsize = varg.len;
2906 myptr = self->ob_bytes;
2907 mysize = Py_SIZE(self);
2908 left = 0;
2909 right = rstrip_helper(myptr, mysize, argptr, argsize);
2910 if (arg != Py_None)
2911 PyBuffer_Release(&varg);
2912 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2915 PyDoc_STRVAR(decode_doc,
2916 "B.decode([encoding[, errors]]) -> unicode object.\n\
2918 Decodes B using the codec registered for encoding. encoding defaults\n\
2919 to the default encoding. errors may be given to set a different error\n\
2920 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2921 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2922 as well as any other name registered with codecs.register_error that is\n\
2923 able to handle UnicodeDecodeErrors.");
2925 static PyObject *
2926 bytes_decode(PyObject *self, PyObject *args)
2928 const char *encoding = NULL;
2929 const char *errors = NULL;
2931 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2932 return NULL;
2933 if (encoding == NULL)
2934 encoding = PyUnicode_GetDefaultEncoding();
2935 return PyCodec_Decode(self, encoding, errors);
2938 PyDoc_STRVAR(alloc_doc,
2939 "B.__alloc__() -> int\n\
2941 Returns the number of bytes actually allocated.");
2943 static PyObject *
2944 bytes_alloc(PyByteArrayObject *self)
2946 return PyInt_FromSsize_t(self->ob_alloc);
2949 PyDoc_STRVAR(join_doc,
2950 "B.join(iterable_of_bytes) -> bytes\n\
2952 Concatenates any number of bytearray objects, with B in between each pair.");
2954 static PyObject *
2955 bytes_join(PyByteArrayObject *self, PyObject *it)
2957 PyObject *seq;
2958 Py_ssize_t mysize = Py_SIZE(self);
2959 Py_ssize_t i;
2960 Py_ssize_t n;
2961 PyObject **items;
2962 Py_ssize_t totalsize = 0;
2963 PyObject *result;
2964 char *dest;
2966 seq = PySequence_Fast(it, "can only join an iterable");
2967 if (seq == NULL)
2968 return NULL;
2969 n = PySequence_Fast_GET_SIZE(seq);
2970 items = PySequence_Fast_ITEMS(seq);
2972 /* Compute the total size, and check that they are all bytes */
2973 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2974 for (i = 0; i < n; i++) {
2975 PyObject *obj = items[i];
2976 if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) {
2977 PyErr_Format(PyExc_TypeError,
2978 "can only join an iterable of bytes "
2979 "(item %ld has type '%.100s')",
2980 /* XXX %ld isn't right on Win64 */
2981 (long)i, Py_TYPE(obj)->tp_name);
2982 goto error;
2984 if (i > 0)
2985 totalsize += mysize;
2986 totalsize += Py_SIZE(obj);
2987 if (totalsize < 0) {
2988 PyErr_NoMemory();
2989 goto error;
2993 /* Allocate the result, and copy the bytes */
2994 result = PyByteArray_FromStringAndSize(NULL, totalsize);
2995 if (result == NULL)
2996 goto error;
2997 dest = PyByteArray_AS_STRING(result);
2998 for (i = 0; i < n; i++) {
2999 PyObject *obj = items[i];
3000 Py_ssize_t size = Py_SIZE(obj);
3001 char *buf;
3002 if (PyByteArray_Check(obj))
3003 buf = PyByteArray_AS_STRING(obj);
3004 else
3005 buf = PyBytes_AS_STRING(obj);
3006 if (i) {
3007 memcpy(dest, self->ob_bytes, mysize);
3008 dest += mysize;
3010 memcpy(dest, buf, size);
3011 dest += size;
3014 /* Done */
3015 Py_DECREF(seq);
3016 return result;
3018 /* Error handling */
3019 error:
3020 Py_DECREF(seq);
3021 return NULL;
3024 PyDoc_STRVAR(fromhex_doc,
3025 "bytearray.fromhex(string) -> bytearray\n\
3027 Create a bytearray object from a string of hexadecimal numbers.\n\
3028 Spaces between two numbers are accepted.\n\
3029 Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3031 static int
3032 hex_digit_to_int(Py_UNICODE c)
3034 if (c >= 128)
3035 return -1;
3036 if (ISDIGIT(c))
3037 return c - '0';
3038 else {
3039 if (ISUPPER(c))
3040 c = TOLOWER(c);
3041 if (c >= 'a' && c <= 'f')
3042 return c - 'a' + 10;
3044 return -1;
3047 static PyObject *
3048 bytes_fromhex(PyObject *cls, PyObject *args)
3050 PyObject *newbytes, *hexobj;
3051 char *buf;
3052 Py_UNICODE *hex;
3053 Py_ssize_t hexlen, byteslen, i, j;
3054 int top, bot;
3056 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
3057 return NULL;
3058 assert(PyUnicode_Check(hexobj));
3059 hexlen = PyUnicode_GET_SIZE(hexobj);
3060 hex = PyUnicode_AS_UNICODE(hexobj);
3061 byteslen = hexlen/2; /* This overestimates if there are spaces */
3062 newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
3063 if (!newbytes)
3064 return NULL;
3065 buf = PyByteArray_AS_STRING(newbytes);
3066 for (i = j = 0; i < hexlen; i += 2) {
3067 /* skip over spaces in the input */
3068 while (hex[i] == ' ')
3069 i++;
3070 if (i >= hexlen)
3071 break;
3072 top = hex_digit_to_int(hex[i]);
3073 bot = hex_digit_to_int(hex[i+1]);
3074 if (top == -1 || bot == -1) {
3075 PyErr_Format(PyExc_ValueError,
3076 "non-hexadecimal number found in "
3077 "fromhex() arg at position %zd", i);
3078 goto error;
3080 buf[j++] = (top << 4) + bot;
3082 if (PyByteArray_Resize(newbytes, j) < 0)
3083 goto error;
3084 return newbytes;
3086 error:
3087 Py_DECREF(newbytes);
3088 return NULL;
3091 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3093 static PyObject *
3094 bytes_reduce(PyByteArrayObject *self)
3096 PyObject *latin1, *dict;
3097 if (self->ob_bytes)
3098 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
3099 Py_SIZE(self), NULL);
3100 else
3101 latin1 = PyUnicode_FromString("");
3103 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
3104 if (dict == NULL) {
3105 PyErr_Clear();
3106 dict = Py_None;
3107 Py_INCREF(dict);
3110 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
3113 PyDoc_STRVAR(sizeof_doc,
3114 "B.__sizeof__() -> int\n\
3116 Returns the size of B in memory, in bytes");
3117 static PyObject *
3118 bytes_sizeof(PyByteArrayObject *self)
3120 Py_ssize_t res;
3122 res = sizeof(PyByteArrayObject) + self->ob_alloc * sizeof(char);
3123 return PyInt_FromSsize_t(res);
3126 static PySequenceMethods bytes_as_sequence = {
3127 (lenfunc)bytes_length, /* sq_length */
3128 (binaryfunc)PyByteArray_Concat, /* sq_concat */
3129 (ssizeargfunc)bytes_repeat, /* sq_repeat */
3130 (ssizeargfunc)bytes_getitem, /* sq_item */
3131 0, /* sq_slice */
3132 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
3133 0, /* sq_ass_slice */
3134 (objobjproc)bytes_contains, /* sq_contains */
3135 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
3136 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
3139 static PyMappingMethods bytes_as_mapping = {
3140 (lenfunc)bytes_length,
3141 (binaryfunc)bytes_subscript,
3142 (objobjargproc)bytes_ass_subscript,
3145 static PyBufferProcs bytes_as_buffer = {
3146 (readbufferproc)bytes_buffer_getreadbuf,
3147 (writebufferproc)bytes_buffer_getwritebuf,
3148 (segcountproc)bytes_buffer_getsegcount,
3149 (charbufferproc)bytes_buffer_getcharbuf,
3150 (getbufferproc)bytes_getbuffer,
3151 (releasebufferproc)bytes_releasebuffer,
3154 static PyMethodDef
3155 bytes_methods[] = {
3156 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
3157 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
3158 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS, sizeof_doc},
3159 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
3160 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3161 _Py_capitalize__doc__},
3162 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3163 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
3164 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
3165 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
3166 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3167 expandtabs__doc__},
3168 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
3169 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
3170 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
3171 fromhex_doc},
3172 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3173 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
3174 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3175 _Py_isalnum__doc__},
3176 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3177 _Py_isalpha__doc__},
3178 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3179 _Py_isdigit__doc__},
3180 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3181 _Py_islower__doc__},
3182 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3183 _Py_isspace__doc__},
3184 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3185 _Py_istitle__doc__},
3186 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3187 _Py_isupper__doc__},
3188 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
3189 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3190 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
3191 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
3192 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
3193 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
3194 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
3195 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
3196 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
3197 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3198 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3199 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3200 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
3201 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
3202 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
3203 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
3204 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3205 splitlines__doc__},
3206 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS ,
3207 startswith__doc__},
3208 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
3209 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3210 _Py_swapcase__doc__},
3211 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3212 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
3213 translate__doc__},
3214 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3215 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
3216 {NULL}
3219 PyDoc_STRVAR(bytes_doc,
3220 "bytearray(iterable_of_ints) -> bytearray.\n\
3221 bytearray(string, encoding[, errors]) -> bytearray.\n\
3222 bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3223 bytearray(memory_view) -> bytearray.\n\
3225 Construct an mutable bytearray object from:\n\
3226 - an iterable yielding integers in range(256)\n\
3227 - a text string encoded using the specified encoding\n\
3228 - a bytes or a bytearray object\n\
3229 - any object implementing the buffer API.\n\
3231 bytearray(int) -> bytearray.\n\
3233 Construct a zero-initialized bytearray of the given length.");
3236 static PyObject *bytes_iter(PyObject *seq);
3238 PyTypeObject PyByteArray_Type = {
3239 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3240 "bytearray",
3241 sizeof(PyByteArrayObject),
3243 (destructor)bytes_dealloc, /* tp_dealloc */
3244 0, /* tp_print */
3245 0, /* tp_getattr */
3246 0, /* tp_setattr */
3247 0, /* tp_compare */
3248 (reprfunc)bytes_repr, /* tp_repr */
3249 0, /* tp_as_number */
3250 &bytes_as_sequence, /* tp_as_sequence */
3251 &bytes_as_mapping, /* tp_as_mapping */
3252 0, /* tp_hash */
3253 0, /* tp_call */
3254 bytes_str, /* tp_str */
3255 PyObject_GenericGetAttr, /* tp_getattro */
3256 0, /* tp_setattro */
3257 &bytes_as_buffer, /* tp_as_buffer */
3258 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3259 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3260 bytes_doc, /* tp_doc */
3261 0, /* tp_traverse */
3262 0, /* tp_clear */
3263 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3264 0, /* tp_weaklistoffset */
3265 bytes_iter, /* tp_iter */
3266 0, /* tp_iternext */
3267 bytes_methods, /* tp_methods */
3268 0, /* tp_members */
3269 0, /* tp_getset */
3270 0, /* tp_base */
3271 0, /* tp_dict */
3272 0, /* tp_descr_get */
3273 0, /* tp_descr_set */
3274 0, /* tp_dictoffset */
3275 (initproc)bytes_init, /* tp_init */
3276 PyType_GenericAlloc, /* tp_alloc */
3277 PyType_GenericNew, /* tp_new */
3278 PyObject_Del, /* tp_free */
3281 /*********************** Bytes Iterator ****************************/
3283 typedef struct {
3284 PyObject_HEAD
3285 Py_ssize_t it_index;
3286 PyByteArrayObject *it_seq; /* Set to NULL when iterator is exhausted */
3287 } bytesiterobject;
3289 static void
3290 bytesiter_dealloc(bytesiterobject *it)
3292 _PyObject_GC_UNTRACK(it);
3293 Py_XDECREF(it->it_seq);
3294 PyObject_GC_Del(it);
3297 static int
3298 bytesiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
3300 Py_VISIT(it->it_seq);
3301 return 0;
3304 static PyObject *
3305 bytesiter_next(bytesiterobject *it)
3307 PyByteArrayObject *seq;
3308 PyObject *item;
3310 assert(it != NULL);
3311 seq = it->it_seq;
3312 if (seq == NULL)
3313 return NULL;
3314 assert(PyByteArray_Check(seq));
3316 if (it->it_index < PyByteArray_GET_SIZE(seq)) {
3317 item = PyInt_FromLong(
3318 (unsigned char)seq->ob_bytes[it->it_index]);
3319 if (item != NULL)
3320 ++it->it_index;
3321 return item;
3324 Py_DECREF(seq);
3325 it->it_seq = NULL;
3326 return NULL;
3329 static PyObject *
3330 bytesiter_length_hint(bytesiterobject *it)
3332 Py_ssize_t len = 0;
3333 if (it->it_seq)
3334 len = PyByteArray_GET_SIZE(it->it_seq) - it->it_index;
3335 return PyInt_FromSsize_t(len);
3338 PyDoc_STRVAR(length_hint_doc,
3339 "Private method returning an estimate of len(list(it)).");
3341 static PyMethodDef bytesiter_methods[] = {
3342 {"__length_hint__", (PyCFunction)bytesiter_length_hint, METH_NOARGS,
3343 length_hint_doc},
3344 {NULL, NULL} /* sentinel */
3347 PyTypeObject PyByteArrayIter_Type = {
3348 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3349 "bytearray_iterator", /* tp_name */
3350 sizeof(bytesiterobject), /* tp_basicsize */
3351 0, /* tp_itemsize */
3352 /* methods */
3353 (destructor)bytesiter_dealloc, /* tp_dealloc */
3354 0, /* tp_print */
3355 0, /* tp_getattr */
3356 0, /* tp_setattr */
3357 0, /* tp_compare */
3358 0, /* tp_repr */
3359 0, /* tp_as_number */
3360 0, /* tp_as_sequence */
3361 0, /* tp_as_mapping */
3362 0, /* tp_hash */
3363 0, /* tp_call */
3364 0, /* tp_str */
3365 PyObject_GenericGetAttr, /* tp_getattro */
3366 0, /* tp_setattro */
3367 0, /* tp_as_buffer */
3368 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3369 0, /* tp_doc */
3370 (traverseproc)bytesiter_traverse, /* tp_traverse */
3371 0, /* tp_clear */
3372 0, /* tp_richcompare */
3373 0, /* tp_weaklistoffset */
3374 PyObject_SelfIter, /* tp_iter */
3375 (iternextfunc)bytesiter_next, /* tp_iternext */
3376 bytesiter_methods, /* tp_methods */
3380 static PyObject *
3381 bytes_iter(PyObject *seq)
3383 bytesiterobject *it;
3385 if (!PyByteArray_Check(seq)) {
3386 PyErr_BadInternalCall();
3387 return NULL;
3389 it = PyObject_GC_New(bytesiterobject, &PyByteArrayIter_Type);
3390 if (it == NULL)
3391 return NULL;
3392 it->it_index = 0;
3393 Py_INCREF(seq);
3394 it->it_seq = (PyByteArrayObject *)seq;
3395 _PyObject_GC_TRACK(it);
3396 return (PyObject *)it;