The _lsprof module could crash the interpreter if it was given an external
[python.git] / Objects / bytearrayobject.c
blobc4fc37f070e304cc16cfa9be3bc5a952154fdd8a
1 /* PyBytes (bytearray) implementation */
3 #define PY_SSIZE_T_CLEAN
4 #include "Python.h"
5 #include "structmember.h"
6 #include "bytes_methods.h"
8 static PyByteArrayObject *nullbytes = NULL;
10 void
11 PyByteArray_Fini(void)
13 Py_CLEAR(nullbytes);
16 int
17 PyByteArray_Init(void)
19 nullbytes = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
20 if (nullbytes == NULL)
21 return 0;
22 nullbytes->ob_bytes = NULL;
23 Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0;
24 nullbytes->ob_exports = 0;
25 return 1;
28 /* end nullbytes support */
30 /* Helpers */
32 static int
33 _getbytevalue(PyObject* arg, int *value)
35 long face_value;
37 if (PyBytes_CheckExact(arg)) {
38 if (Py_SIZE(arg) != 1) {
39 PyErr_SetString(PyExc_ValueError, "string must be of size 1");
40 return 0;
42 *value = Py_CHARMASK(((PyBytesObject*)arg)->ob_sval[0]);
43 return 1;
45 else if (PyInt_Check(arg) || PyLong_Check(arg)) {
46 face_value = PyLong_AsLong(arg);
48 else {
49 PyObject *index = PyNumber_Index(arg);
50 if (index == NULL) {
51 PyErr_Format(PyExc_TypeError,
52 "an integer or string of size 1 is required");
53 return 0;
55 face_value = PyLong_AsLong(index);
56 Py_DECREF(index);
59 if (face_value < 0 || face_value >= 256) {
60 /* this includes the OverflowError in case the long is too large */
61 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
62 return 0;
65 *value = face_value;
66 return 1;
69 static Py_ssize_t
70 bytes_buffer_getreadbuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
72 if ( index != 0 ) {
73 PyErr_SetString(PyExc_SystemError,
74 "accessing non-existent bytes segment");
75 return -1;
77 *ptr = (void *)self->ob_bytes;
78 return Py_SIZE(self);
81 static Py_ssize_t
82 bytes_buffer_getwritebuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
84 if ( index != 0 ) {
85 PyErr_SetString(PyExc_SystemError,
86 "accessing non-existent bytes segment");
87 return -1;
89 *ptr = (void *)self->ob_bytes;
90 return Py_SIZE(self);
93 static Py_ssize_t
94 bytes_buffer_getsegcount(PyByteArrayObject *self, Py_ssize_t *lenp)
96 if ( lenp )
97 *lenp = Py_SIZE(self);
98 return 1;
101 static Py_ssize_t
102 bytes_buffer_getcharbuf(PyByteArrayObject *self, Py_ssize_t index, const char **ptr)
104 if ( index != 0 ) {
105 PyErr_SetString(PyExc_SystemError,
106 "accessing non-existent bytes segment");
107 return -1;
109 *ptr = self->ob_bytes;
110 return Py_SIZE(self);
113 static int
114 bytes_getbuffer(PyByteArrayObject *obj, Py_buffer *view, int flags)
116 int ret;
117 void *ptr;
118 if (view == NULL) {
119 obj->ob_exports++;
120 return 0;
122 if (obj->ob_bytes == NULL)
123 ptr = "";
124 else
125 ptr = obj->ob_bytes;
126 ret = PyBuffer_FillInfo(view, (PyObject*)obj, ptr, Py_SIZE(obj), 0, flags);
127 if (ret >= 0) {
128 obj->ob_exports++;
130 return ret;
133 static void
134 bytes_releasebuffer(PyByteArrayObject *obj, Py_buffer *view)
136 obj->ob_exports--;
139 static Py_ssize_t
140 _getbuffer(PyObject *obj, Py_buffer *view)
142 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
144 if (buffer == NULL || buffer->bf_getbuffer == NULL)
146 PyErr_Format(PyExc_TypeError,
147 "Type %.100s doesn't support the buffer API",
148 Py_TYPE(obj)->tp_name);
149 return -1;
152 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
153 return -1;
154 return view->len;
157 /* Direct API functions */
159 PyObject *
160 PyByteArray_FromObject(PyObject *input)
162 return PyObject_CallFunctionObjArgs((PyObject *)&PyByteArray_Type,
163 input, NULL);
166 PyObject *
167 PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
169 PyByteArrayObject *new;
170 Py_ssize_t alloc;
172 if (size < 0) {
173 PyErr_SetString(PyExc_SystemError,
174 "Negative size passed to PyByteArray_FromStringAndSize");
175 return NULL;
178 new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
179 if (new == NULL)
180 return NULL;
182 if (size == 0) {
183 new->ob_bytes = NULL;
184 alloc = 0;
186 else {
187 alloc = size + 1;
188 new->ob_bytes = PyMem_Malloc(alloc);
189 if (new->ob_bytes == NULL) {
190 Py_DECREF(new);
191 return PyErr_NoMemory();
193 if (bytes != NULL)
194 memcpy(new->ob_bytes, bytes, size);
195 new->ob_bytes[size] = '\0'; /* Trailing null byte */
197 Py_SIZE(new) = size;
198 new->ob_alloc = alloc;
199 new->ob_exports = 0;
201 return (PyObject *)new;
204 Py_ssize_t
205 PyByteArray_Size(PyObject *self)
207 assert(self != NULL);
208 assert(PyByteArray_Check(self));
210 return PyByteArray_GET_SIZE(self);
213 char *
214 PyByteArray_AsString(PyObject *self)
216 assert(self != NULL);
217 assert(PyByteArray_Check(self));
219 return PyByteArray_AS_STRING(self);
223 PyByteArray_Resize(PyObject *self, Py_ssize_t size)
225 void *sval;
226 Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc;
228 assert(self != NULL);
229 assert(PyByteArray_Check(self));
230 assert(size >= 0);
232 if (size < alloc / 2) {
233 /* Major downsize; resize down to exact size */
234 alloc = size + 1;
236 else if (size < alloc) {
237 /* Within allocated size; quick exit */
238 Py_SIZE(self) = size;
239 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
240 return 0;
242 else if (size <= alloc * 1.125) {
243 /* Moderate upsize; overallocate similar to list_resize() */
244 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
246 else {
247 /* Major upsize; resize up to exact size */
248 alloc = size + 1;
251 if (((PyByteArrayObject *)self)->ob_exports > 0) {
253 fprintf(stderr, "%d: %s", ((PyByteArrayObject *)self)->ob_exports,
254 ((PyByteArrayObject *)self)->ob_bytes);
256 PyErr_SetString(PyExc_BufferError,
257 "Existing exports of data: object cannot be re-sized");
258 return -1;
261 sval = PyMem_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc);
262 if (sval == NULL) {
263 PyErr_NoMemory();
264 return -1;
267 ((PyByteArrayObject *)self)->ob_bytes = sval;
268 Py_SIZE(self) = size;
269 ((PyByteArrayObject *)self)->ob_alloc = alloc;
270 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
272 return 0;
275 PyObject *
276 PyByteArray_Concat(PyObject *a, PyObject *b)
278 Py_ssize_t size;
279 Py_buffer va, vb;
280 PyByteArrayObject *result = NULL;
282 va.len = -1;
283 vb.len = -1;
284 if (_getbuffer(a, &va) < 0 ||
285 _getbuffer(b, &vb) < 0) {
286 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
287 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
288 goto done;
291 size = va.len + vb.len;
292 if (size < 0) {
293 return PyErr_NoMemory();
294 goto done;
297 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, size);
298 if (result != NULL) {
299 memcpy(result->ob_bytes, va.buf, va.len);
300 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
303 done:
304 if (va.len != -1)
305 PyBuffer_Release(&va);
306 if (vb.len != -1)
307 PyBuffer_Release(&vb);
308 return (PyObject *)result;
311 /* Functions stuffed into the type object */
313 static Py_ssize_t
314 bytes_length(PyByteArrayObject *self)
316 return Py_SIZE(self);
319 static PyObject *
320 bytes_iconcat(PyByteArrayObject *self, PyObject *other)
322 Py_ssize_t mysize;
323 Py_ssize_t size;
324 Py_buffer vo;
326 if (_getbuffer(other, &vo) < 0) {
327 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
328 Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name);
329 return NULL;
332 mysize = Py_SIZE(self);
333 size = mysize + vo.len;
334 if (size < 0) {
335 PyBuffer_Release(&vo);
336 return PyErr_NoMemory();
338 if (size < self->ob_alloc) {
339 Py_SIZE(self) = size;
340 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
342 else if (PyByteArray_Resize((PyObject *)self, size) < 0) {
343 PyBuffer_Release(&vo);
344 return NULL;
346 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
347 PyBuffer_Release(&vo);
348 Py_INCREF(self);
349 return (PyObject *)self;
352 static PyObject *
353 bytes_repeat(PyByteArrayObject *self, Py_ssize_t count)
355 PyByteArrayObject *result;
356 Py_ssize_t mysize;
357 Py_ssize_t size;
359 if (count < 0)
360 count = 0;
361 mysize = Py_SIZE(self);
362 size = mysize * count;
363 if (count != 0 && size / count != mysize)
364 return PyErr_NoMemory();
365 result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
366 if (result != NULL && size != 0) {
367 if (mysize == 1)
368 memset(result->ob_bytes, self->ob_bytes[0], size);
369 else {
370 Py_ssize_t i;
371 for (i = 0; i < count; i++)
372 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
375 return (PyObject *)result;
378 static PyObject *
379 bytes_irepeat(PyByteArrayObject *self, Py_ssize_t count)
381 Py_ssize_t mysize;
382 Py_ssize_t size;
384 if (count < 0)
385 count = 0;
386 mysize = Py_SIZE(self);
387 size = mysize * count;
388 if (count != 0 && size / count != mysize)
389 return PyErr_NoMemory();
390 if (size < self->ob_alloc) {
391 Py_SIZE(self) = size;
392 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
394 else if (PyByteArray_Resize((PyObject *)self, size) < 0)
395 return NULL;
397 if (mysize == 1)
398 memset(self->ob_bytes, self->ob_bytes[0], size);
399 else {
400 Py_ssize_t i;
401 for (i = 1; i < count; i++)
402 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
405 Py_INCREF(self);
406 return (PyObject *)self;
409 static PyObject *
410 bytes_getitem(PyByteArrayObject *self, Py_ssize_t i)
412 if (i < 0)
413 i += Py_SIZE(self);
414 if (i < 0 || i >= Py_SIZE(self)) {
415 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
416 return NULL;
418 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
421 static PyObject *
422 bytes_subscript(PyByteArrayObject *self, PyObject *index)
424 if (PyIndex_Check(index)) {
425 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
427 if (i == -1 && PyErr_Occurred())
428 return NULL;
430 if (i < 0)
431 i += PyByteArray_GET_SIZE(self);
433 if (i < 0 || i >= Py_SIZE(self)) {
434 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
435 return NULL;
437 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
439 else if (PySlice_Check(index)) {
440 Py_ssize_t start, stop, step, slicelength, cur, i;
441 if (PySlice_GetIndicesEx((PySliceObject *)index,
442 PyByteArray_GET_SIZE(self),
443 &start, &stop, &step, &slicelength) < 0) {
444 return NULL;
447 if (slicelength <= 0)
448 return PyByteArray_FromStringAndSize("", 0);
449 else if (step == 1) {
450 return PyByteArray_FromStringAndSize(self->ob_bytes + start,
451 slicelength);
453 else {
454 char *source_buf = PyByteArray_AS_STRING(self);
455 char *result_buf = (char *)PyMem_Malloc(slicelength);
456 PyObject *result;
458 if (result_buf == NULL)
459 return PyErr_NoMemory();
461 for (cur = start, i = 0; i < slicelength;
462 cur += step, i++) {
463 result_buf[i] = source_buf[cur];
465 result = PyByteArray_FromStringAndSize(result_buf, slicelength);
466 PyMem_Free(result_buf);
467 return result;
470 else {
471 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
472 return NULL;
476 static int
477 bytes_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
478 PyObject *values)
480 Py_ssize_t avail, needed;
481 void *bytes;
482 Py_buffer vbytes;
483 int res = 0;
485 vbytes.len = -1;
486 if (values == (PyObject *)self) {
487 /* Make a copy and call this function recursively */
488 int err;
489 values = PyByteArray_FromObject(values);
490 if (values == NULL)
491 return -1;
492 err = bytes_setslice(self, lo, hi, values);
493 Py_DECREF(values);
494 return err;
496 if (values == NULL) {
497 /* del b[lo:hi] */
498 bytes = NULL;
499 needed = 0;
501 else {
502 if (_getbuffer(values, &vbytes) < 0) {
503 PyErr_Format(PyExc_TypeError,
504 "can't set bytearray slice from %.100s",
505 Py_TYPE(values)->tp_name);
506 return -1;
508 needed = vbytes.len;
509 bytes = vbytes.buf;
512 if (lo < 0)
513 lo = 0;
514 if (hi < lo)
515 hi = lo;
516 if (hi > Py_SIZE(self))
517 hi = Py_SIZE(self);
519 avail = hi - lo;
520 if (avail < 0)
521 lo = hi = avail = 0;
523 if (avail != needed) {
524 if (avail > needed) {
526 0 lo hi old_size
527 | |<----avail----->|<-----tomove------>|
528 | |<-needed->|<-----tomove------>|
529 0 lo new_hi new_size
531 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
532 Py_SIZE(self) - hi);
534 /* XXX(nnorwitz): need to verify this can't overflow! */
535 if (PyByteArray_Resize((PyObject *)self,
536 Py_SIZE(self) + needed - avail) < 0) {
537 res = -1;
538 goto finish;
540 if (avail < needed) {
542 0 lo hi old_size
543 | |<-avail->|<-----tomove------>|
544 | |<----needed---->|<-----tomove------>|
545 0 lo new_hi new_size
547 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
548 Py_SIZE(self) - lo - needed);
552 if (needed > 0)
553 memcpy(self->ob_bytes + lo, bytes, needed);
556 finish:
557 if (vbytes.len != -1)
558 PyBuffer_Release(&vbytes);
559 return res;
562 static int
563 bytes_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value)
565 int ival;
567 if (i < 0)
568 i += Py_SIZE(self);
570 if (i < 0 || i >= Py_SIZE(self)) {
571 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
572 return -1;
575 if (value == NULL)
576 return bytes_setslice(self, i, i+1, NULL);
578 if (!_getbytevalue(value, &ival))
579 return -1;
581 self->ob_bytes[i] = ival;
582 return 0;
585 static int
586 bytes_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *values)
588 Py_ssize_t start, stop, step, slicelen, needed;
589 char *bytes;
591 if (PyIndex_Check(index)) {
592 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
594 if (i == -1 && PyErr_Occurred())
595 return -1;
597 if (i < 0)
598 i += PyByteArray_GET_SIZE(self);
600 if (i < 0 || i >= Py_SIZE(self)) {
601 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
602 return -1;
605 if (values == NULL) {
606 /* Fall through to slice assignment */
607 start = i;
608 stop = i + 1;
609 step = 1;
610 slicelen = 1;
612 else {
613 int ival;
614 if (!_getbytevalue(values, &ival))
615 return -1;
616 self->ob_bytes[i] = (char)ival;
617 return 0;
620 else if (PySlice_Check(index)) {
621 if (PySlice_GetIndicesEx((PySliceObject *)index,
622 PyByteArray_GET_SIZE(self),
623 &start, &stop, &step, &slicelen) < 0) {
624 return -1;
627 else {
628 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
629 return -1;
632 if (values == NULL) {
633 bytes = NULL;
634 needed = 0;
636 else if (values == (PyObject *)self || !PyByteArray_Check(values)) {
637 /* Make a copy an call this function recursively */
638 int err;
639 values = PyByteArray_FromObject(values);
640 if (values == NULL)
641 return -1;
642 err = bytes_ass_subscript(self, index, values);
643 Py_DECREF(values);
644 return err;
646 else {
647 assert(PyByteArray_Check(values));
648 bytes = ((PyByteArrayObject *)values)->ob_bytes;
649 needed = Py_SIZE(values);
651 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
652 if ((step < 0 && start < stop) ||
653 (step > 0 && start > stop))
654 stop = start;
655 if (step == 1) {
656 if (slicelen != needed) {
657 if (slicelen > needed) {
659 0 start stop old_size
660 | |<---slicelen--->|<-----tomove------>|
661 | |<-needed->|<-----tomove------>|
662 0 lo new_hi new_size
664 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
665 Py_SIZE(self) - stop);
667 if (PyByteArray_Resize((PyObject *)self,
668 Py_SIZE(self) + needed - slicelen) < 0)
669 return -1;
670 if (slicelen < needed) {
672 0 lo hi old_size
673 | |<-avail->|<-----tomove------>|
674 | |<----needed---->|<-----tomove------>|
675 0 lo new_hi new_size
677 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
678 Py_SIZE(self) - start - needed);
682 if (needed > 0)
683 memcpy(self->ob_bytes + start, bytes, needed);
685 return 0;
687 else {
688 if (needed == 0) {
689 /* Delete slice */
690 Py_ssize_t cur, i;
692 if (step < 0) {
693 stop = start + 1;
694 start = stop + step * (slicelen - 1) - 1;
695 step = -step;
697 for (cur = start, i = 0;
698 i < slicelen; cur += step, i++) {
699 Py_ssize_t lim = step - 1;
701 if (cur + step >= PyByteArray_GET_SIZE(self))
702 lim = PyByteArray_GET_SIZE(self) - cur - 1;
704 memmove(self->ob_bytes + cur - i,
705 self->ob_bytes + cur + 1, lim);
707 /* Move the tail of the bytes, in one chunk */
708 cur = start + slicelen*step;
709 if (cur < PyByteArray_GET_SIZE(self)) {
710 memmove(self->ob_bytes + cur - slicelen,
711 self->ob_bytes + cur,
712 PyByteArray_GET_SIZE(self) - cur);
714 if (PyByteArray_Resize((PyObject *)self,
715 PyByteArray_GET_SIZE(self) - slicelen) < 0)
716 return -1;
718 return 0;
720 else {
721 /* Assign slice */
722 Py_ssize_t cur, i;
724 if (needed != slicelen) {
725 PyErr_Format(PyExc_ValueError,
726 "attempt to assign bytes of size %zd "
727 "to extended slice of size %zd",
728 needed, slicelen);
729 return -1;
731 for (cur = start, i = 0; i < slicelen; cur += step, i++)
732 self->ob_bytes[cur] = bytes[i];
733 return 0;
738 static int
739 bytes_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
741 static char *kwlist[] = {"source", "encoding", "errors", 0};
742 PyObject *arg = NULL;
743 const char *encoding = NULL;
744 const char *errors = NULL;
745 Py_ssize_t count;
746 PyObject *it;
747 PyObject *(*iternext)(PyObject *);
749 if (Py_SIZE(self) != 0) {
750 /* Empty previous contents (yes, do this first of all!) */
751 if (PyByteArray_Resize((PyObject *)self, 0) < 0)
752 return -1;
755 /* Parse arguments */
756 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytearray", kwlist,
757 &arg, &encoding, &errors))
758 return -1;
760 /* Make a quick exit if no first argument */
761 if (arg == NULL) {
762 if (encoding != NULL || errors != NULL) {
763 PyErr_SetString(PyExc_TypeError,
764 "encoding or errors without sequence argument");
765 return -1;
767 return 0;
770 if (PyBytes_Check(arg)) {
771 PyObject *new, *encoded;
772 if (encoding != NULL) {
773 encoded = PyCodec_Encode(arg, encoding, errors);
774 if (encoded == NULL)
775 return -1;
776 assert(PyBytes_Check(encoded));
778 else {
779 encoded = arg;
780 Py_INCREF(arg);
782 new = bytes_iconcat(self, arg);
783 Py_DECREF(encoded);
784 if (new == NULL)
785 return -1;
786 Py_DECREF(new);
787 return 0;
790 if (PyUnicode_Check(arg)) {
791 /* Encode via the codec registry */
792 PyObject *encoded, *new;
793 if (encoding == NULL) {
794 PyErr_SetString(PyExc_TypeError,
795 "unicode argument without an encoding");
796 return -1;
798 encoded = PyCodec_Encode(arg, encoding, errors);
799 if (encoded == NULL)
800 return -1;
801 assert(PyBytes_Check(encoded));
802 new = bytes_iconcat(self, encoded);
803 Py_DECREF(encoded);
804 if (new == NULL)
805 return -1;
806 Py_DECREF(new);
807 return 0;
810 /* If it's not unicode, there can't be encoding or errors */
811 if (encoding != NULL || errors != NULL) {
812 PyErr_SetString(PyExc_TypeError,
813 "encoding or errors without a string argument");
814 return -1;
817 /* Is it an int? */
818 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
819 if (count == -1 && PyErr_Occurred())
820 PyErr_Clear();
821 else {
822 if (count < 0) {
823 PyErr_SetString(PyExc_ValueError, "negative count");
824 return -1;
826 if (count > 0) {
827 if (PyByteArray_Resize((PyObject *)self, count))
828 return -1;
829 memset(self->ob_bytes, 0, count);
831 return 0;
834 /* Use the buffer API */
835 if (PyObject_CheckBuffer(arg)) {
836 Py_ssize_t size;
837 Py_buffer view;
838 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
839 return -1;
840 size = view.len;
841 if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
842 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
843 goto fail;
844 PyBuffer_Release(&view);
845 return 0;
846 fail:
847 PyBuffer_Release(&view);
848 return -1;
851 /* XXX Optimize this if the arguments is a list, tuple */
853 /* Get the iterator */
854 it = PyObject_GetIter(arg);
855 if (it == NULL)
856 return -1;
857 iternext = *Py_TYPE(it)->tp_iternext;
859 /* Run the iterator to exhaustion */
860 for (;;) {
861 PyObject *item;
862 int rc, value;
864 /* Get the next item */
865 item = iternext(it);
866 if (item == NULL) {
867 if (PyErr_Occurred()) {
868 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
869 goto error;
870 PyErr_Clear();
872 break;
875 /* Interpret it as an int (__index__) */
876 rc = _getbytevalue(item, &value);
877 Py_DECREF(item);
878 if (!rc)
879 goto error;
881 /* Append the byte */
882 if (Py_SIZE(self) < self->ob_alloc)
883 Py_SIZE(self)++;
884 else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
885 goto error;
886 self->ob_bytes[Py_SIZE(self)-1] = value;
889 /* Clean up and return success */
890 Py_DECREF(it);
891 return 0;
893 error:
894 /* Error handling when it != NULL */
895 Py_DECREF(it);
896 return -1;
899 /* Mostly copied from string_repr, but without the
900 "smart quote" functionality. */
901 static PyObject *
902 bytes_repr(PyByteArrayObject *self)
904 static const char *hexdigits = "0123456789abcdef";
905 const char *quote_prefix = "bytearray(b";
906 const char *quote_postfix = ")";
907 Py_ssize_t length = Py_SIZE(self);
908 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
909 size_t newsize = 14 + 4 * length;
910 PyObject *v;
911 if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 3 != length) {
912 PyErr_SetString(PyExc_OverflowError,
913 "bytearray object is too large to make repr");
914 return NULL;
916 v = PyUnicode_FromUnicode(NULL, newsize);
917 if (v == NULL) {
918 return NULL;
920 else {
921 register Py_ssize_t i;
922 register Py_UNICODE c;
923 register Py_UNICODE *p;
924 int quote;
926 /* Figure out which quote to use; single is preferred */
927 quote = '\'';
929 char *test, *start;
930 start = PyByteArray_AS_STRING(self);
931 for (test = start; test < start+length; ++test) {
932 if (*test == '"') {
933 quote = '\''; /* back to single */
934 goto decided;
936 else if (*test == '\'')
937 quote = '"';
939 decided:
943 p = PyUnicode_AS_UNICODE(v);
944 while (*quote_prefix)
945 *p++ = *quote_prefix++;
946 *p++ = quote;
948 for (i = 0; i < length; i++) {
949 /* There's at least enough room for a hex escape
950 and a closing quote. */
951 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
952 c = self->ob_bytes[i];
953 if (c == '\'' || c == '\\')
954 *p++ = '\\', *p++ = c;
955 else if (c == '\t')
956 *p++ = '\\', *p++ = 't';
957 else if (c == '\n')
958 *p++ = '\\', *p++ = 'n';
959 else if (c == '\r')
960 *p++ = '\\', *p++ = 'r';
961 else if (c == 0)
962 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
963 else if (c < ' ' || c >= 0x7f) {
964 *p++ = '\\';
965 *p++ = 'x';
966 *p++ = hexdigits[(c & 0xf0) >> 4];
967 *p++ = hexdigits[c & 0xf];
969 else
970 *p++ = c;
972 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
973 *p++ = quote;
974 while (*quote_postfix) {
975 *p++ = *quote_postfix++;
977 *p = '\0';
978 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
979 Py_DECREF(v);
980 return NULL;
982 return v;
986 static PyObject *
987 bytes_str(PyObject *op)
989 #if 0
990 if (Py_BytesWarningFlag) {
991 if (PyErr_WarnEx(PyExc_BytesWarning,
992 "str() on a bytearray instance", 1))
993 return NULL;
995 return bytes_repr((PyByteArrayObject*)op);
996 #endif
997 return PyBytes_FromStringAndSize(((PyByteArrayObject*)op)->ob_bytes, Py_SIZE(op));
1000 static PyObject *
1001 bytes_richcompare(PyObject *self, PyObject *other, int op)
1003 Py_ssize_t self_size, other_size;
1004 Py_buffer self_bytes, other_bytes;
1005 PyObject *res;
1006 Py_ssize_t minsize;
1007 int cmp;
1009 /* Bytes can be compared to anything that supports the (binary)
1010 buffer API. Except that a comparison with Unicode is always an
1011 error, even if the comparison is for equality. */
1012 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
1013 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
1014 if (Py_BytesWarningFlag && op == Py_EQ) {
1015 if (PyErr_WarnEx(PyExc_BytesWarning,
1016 "Comparsion between bytearray and string", 1))
1017 return NULL;
1020 Py_INCREF(Py_NotImplemented);
1021 return Py_NotImplemented;
1024 self_size = _getbuffer(self, &self_bytes);
1025 if (self_size < 0) {
1026 PyErr_Clear();
1027 Py_INCREF(Py_NotImplemented);
1028 return Py_NotImplemented;
1031 other_size = _getbuffer(other, &other_bytes);
1032 if (other_size < 0) {
1033 PyErr_Clear();
1034 PyBuffer_Release(&self_bytes);
1035 Py_INCREF(Py_NotImplemented);
1036 return Py_NotImplemented;
1039 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
1040 /* Shortcut: if the lengths differ, the objects differ */
1041 cmp = (op == Py_NE);
1043 else {
1044 minsize = self_size;
1045 if (other_size < minsize)
1046 minsize = other_size;
1048 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
1049 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1051 if (cmp == 0) {
1052 if (self_size < other_size)
1053 cmp = -1;
1054 else if (self_size > other_size)
1055 cmp = 1;
1058 switch (op) {
1059 case Py_LT: cmp = cmp < 0; break;
1060 case Py_LE: cmp = cmp <= 0; break;
1061 case Py_EQ: cmp = cmp == 0; break;
1062 case Py_NE: cmp = cmp != 0; break;
1063 case Py_GT: cmp = cmp > 0; break;
1064 case Py_GE: cmp = cmp >= 0; break;
1068 res = cmp ? Py_True : Py_False;
1069 PyBuffer_Release(&self_bytes);
1070 PyBuffer_Release(&other_bytes);
1071 Py_INCREF(res);
1072 return res;
1075 static void
1076 bytes_dealloc(PyByteArrayObject *self)
1078 if (self->ob_exports > 0) {
1079 PyErr_SetString(PyExc_SystemError,
1080 "deallocated bytearray object has exported buffers");
1081 PyErr_Print();
1083 if (self->ob_bytes != 0) {
1084 PyMem_Free(self->ob_bytes);
1086 Py_TYPE(self)->tp_free((PyObject *)self);
1090 /* -------------------------------------------------------------------- */
1091 /* Methods */
1093 #define STRINGLIB_CHAR char
1094 #define STRINGLIB_CMP memcmp
1095 #define STRINGLIB_LEN PyByteArray_GET_SIZE
1096 #define STRINGLIB_STR PyByteArray_AS_STRING
1097 #define STRINGLIB_NEW PyByteArray_FromStringAndSize
1098 #define STRINGLIB_EMPTY nullbytes
1099 #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1100 #define STRINGLIB_MUTABLE 1
1101 #define FROM_BYTEARRAY 1
1103 #include "stringlib/fastsearch.h"
1104 #include "stringlib/count.h"
1105 #include "stringlib/find.h"
1106 #include "stringlib/partition.h"
1107 #include "stringlib/ctype.h"
1108 #include "stringlib/transmogrify.h"
1111 /* The following Py_LOCAL_INLINE and Py_LOCAL functions
1112 were copied from the old char* style string object. */
1114 Py_LOCAL_INLINE(void)
1115 _adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1117 if (*end > len)
1118 *end = len;
1119 else if (*end < 0)
1120 *end += len;
1121 if (*end < 0)
1122 *end = 0;
1123 if (*start < 0)
1124 *start += len;
1125 if (*start < 0)
1126 *start = 0;
1130 Py_LOCAL_INLINE(Py_ssize_t)
1131 bytes_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
1133 PyObject *subobj;
1134 Py_buffer subbuf;
1135 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1136 Py_ssize_t res;
1138 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1139 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1140 return -2;
1141 if (_getbuffer(subobj, &subbuf) < 0)
1142 return -2;
1143 if (dir > 0)
1144 res = stringlib_find_slice(
1145 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1146 subbuf.buf, subbuf.len, start, end);
1147 else
1148 res = stringlib_rfind_slice(
1149 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1150 subbuf.buf, subbuf.len, start, end);
1151 PyBuffer_Release(&subbuf);
1152 return res;
1155 PyDoc_STRVAR(find__doc__,
1156 "B.find(sub [,start [,end]]) -> int\n\
1158 Return the lowest index in B where subsection sub is found,\n\
1159 such that sub is contained within s[start,end]. Optional\n\
1160 arguments start and end are interpreted as in slice notation.\n\
1162 Return -1 on failure.");
1164 static PyObject *
1165 bytes_find(PyByteArrayObject *self, PyObject *args)
1167 Py_ssize_t result = bytes_find_internal(self, args, +1);
1168 if (result == -2)
1169 return NULL;
1170 return PyInt_FromSsize_t(result);
1173 PyDoc_STRVAR(count__doc__,
1174 "B.count(sub [,start [,end]]) -> int\n\
1176 Return the number of non-overlapping occurrences of subsection sub in\n\
1177 bytes B[start:end]. Optional arguments start and end are interpreted\n\
1178 as in slice notation.");
1180 static PyObject *
1181 bytes_count(PyByteArrayObject *self, PyObject *args)
1183 PyObject *sub_obj;
1184 const char *str = PyByteArray_AS_STRING(self);
1185 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1186 Py_buffer vsub;
1187 PyObject *count_obj;
1189 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1190 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1191 return NULL;
1193 if (_getbuffer(sub_obj, &vsub) < 0)
1194 return NULL;
1196 _adjust_indices(&start, &end, PyByteArray_GET_SIZE(self));
1198 count_obj = PyInt_FromSsize_t(
1199 stringlib_count(str + start, end - start, vsub.buf, vsub.len)
1201 PyBuffer_Release(&vsub);
1202 return count_obj;
1206 PyDoc_STRVAR(index__doc__,
1207 "B.index(sub [,start [,end]]) -> int\n\
1209 Like B.find() but raise ValueError when the subsection is not found.");
1211 static PyObject *
1212 bytes_index(PyByteArrayObject *self, PyObject *args)
1214 Py_ssize_t result = bytes_find_internal(self, args, +1);
1215 if (result == -2)
1216 return NULL;
1217 if (result == -1) {
1218 PyErr_SetString(PyExc_ValueError,
1219 "subsection not found");
1220 return NULL;
1222 return PyInt_FromSsize_t(result);
1226 PyDoc_STRVAR(rfind__doc__,
1227 "B.rfind(sub [,start [,end]]) -> int\n\
1229 Return the highest index in B where subsection sub is found,\n\
1230 such that sub is contained within s[start,end]. Optional\n\
1231 arguments start and end are interpreted as in slice notation.\n\
1233 Return -1 on failure.");
1235 static PyObject *
1236 bytes_rfind(PyByteArrayObject *self, PyObject *args)
1238 Py_ssize_t result = bytes_find_internal(self, args, -1);
1239 if (result == -2)
1240 return NULL;
1241 return PyInt_FromSsize_t(result);
1245 PyDoc_STRVAR(rindex__doc__,
1246 "B.rindex(sub [,start [,end]]) -> int\n\
1248 Like B.rfind() but raise ValueError when the subsection is not found.");
1250 static PyObject *
1251 bytes_rindex(PyByteArrayObject *self, PyObject *args)
1253 Py_ssize_t result = bytes_find_internal(self, args, -1);
1254 if (result == -2)
1255 return NULL;
1256 if (result == -1) {
1257 PyErr_SetString(PyExc_ValueError,
1258 "subsection not found");
1259 return NULL;
1261 return PyInt_FromSsize_t(result);
1265 static int
1266 bytes_contains(PyObject *self, PyObject *arg)
1268 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1269 if (ival == -1 && PyErr_Occurred()) {
1270 Py_buffer varg;
1271 int pos;
1272 PyErr_Clear();
1273 if (_getbuffer(arg, &varg) < 0)
1274 return -1;
1275 pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
1276 varg.buf, varg.len, 0);
1277 PyBuffer_Release(&varg);
1278 return pos >= 0;
1280 if (ival < 0 || ival >= 256) {
1281 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1282 return -1;
1285 return memchr(PyByteArray_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
1289 /* Matches the end (direction >= 0) or start (direction < 0) of self
1290 * against substr, using the start and end arguments. Returns
1291 * -1 on error, 0 if not found and 1 if found.
1293 Py_LOCAL(int)
1294 _bytes_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start,
1295 Py_ssize_t end, int direction)
1297 Py_ssize_t len = PyByteArray_GET_SIZE(self);
1298 const char* str;
1299 Py_buffer vsubstr;
1300 int rv = 0;
1302 str = PyByteArray_AS_STRING(self);
1304 if (_getbuffer(substr, &vsubstr) < 0)
1305 return -1;
1307 _adjust_indices(&start, &end, len);
1309 if (direction < 0) {
1310 /* startswith */
1311 if (start+vsubstr.len > len) {
1312 goto done;
1314 } else {
1315 /* endswith */
1316 if (end-start < vsubstr.len || start > len) {
1317 goto done;
1320 if (end-vsubstr.len > start)
1321 start = end - vsubstr.len;
1323 if (end-start >= vsubstr.len)
1324 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1326 done:
1327 PyBuffer_Release(&vsubstr);
1328 return rv;
1332 PyDoc_STRVAR(startswith__doc__,
1333 "B.startswith(prefix [,start [,end]]) -> bool\n\
1335 Return True if B starts with the specified prefix, False otherwise.\n\
1336 With optional start, test B beginning at that position.\n\
1337 With optional end, stop comparing B at that position.\n\
1338 prefix can also be a tuple of strings to try.");
1340 static PyObject *
1341 bytes_startswith(PyByteArrayObject *self, PyObject *args)
1343 Py_ssize_t start = 0;
1344 Py_ssize_t end = PY_SSIZE_T_MAX;
1345 PyObject *subobj;
1346 int result;
1348 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1349 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1350 return NULL;
1351 if (PyTuple_Check(subobj)) {
1352 Py_ssize_t i;
1353 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1354 result = _bytes_tailmatch(self,
1355 PyTuple_GET_ITEM(subobj, i),
1356 start, end, -1);
1357 if (result == -1)
1358 return NULL;
1359 else if (result) {
1360 Py_RETURN_TRUE;
1363 Py_RETURN_FALSE;
1365 result = _bytes_tailmatch(self, subobj, start, end, -1);
1366 if (result == -1)
1367 return NULL;
1368 else
1369 return PyBool_FromLong(result);
1372 PyDoc_STRVAR(endswith__doc__,
1373 "B.endswith(suffix [,start [,end]]) -> bool\n\
1375 Return True if B ends with the specified suffix, False otherwise.\n\
1376 With optional start, test B beginning at that position.\n\
1377 With optional end, stop comparing B at that position.\n\
1378 suffix can also be a tuple of strings to try.");
1380 static PyObject *
1381 bytes_endswith(PyByteArrayObject *self, PyObject *args)
1383 Py_ssize_t start = 0;
1384 Py_ssize_t end = PY_SSIZE_T_MAX;
1385 PyObject *subobj;
1386 int result;
1388 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1389 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1390 return NULL;
1391 if (PyTuple_Check(subobj)) {
1392 Py_ssize_t i;
1393 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1394 result = _bytes_tailmatch(self,
1395 PyTuple_GET_ITEM(subobj, i),
1396 start, end, +1);
1397 if (result == -1)
1398 return NULL;
1399 else if (result) {
1400 Py_RETURN_TRUE;
1403 Py_RETURN_FALSE;
1405 result = _bytes_tailmatch(self, subobj, start, end, +1);
1406 if (result == -1)
1407 return NULL;
1408 else
1409 return PyBool_FromLong(result);
1413 PyDoc_STRVAR(translate__doc__,
1414 "B.translate(table[, deletechars]) -> bytearray\n\
1416 Return a copy of B, where all characters occurring in the\n\
1417 optional argument deletechars are removed, and the remaining\n\
1418 characters have been mapped through the given translation\n\
1419 table, which must be a bytes object of length 256.");
1421 static PyObject *
1422 bytes_translate(PyByteArrayObject *self, PyObject *args)
1424 register char *input, *output;
1425 register const char *table;
1426 register Py_ssize_t i, c, changed = 0;
1427 PyObject *input_obj = (PyObject*)self;
1428 const char *output_start;
1429 Py_ssize_t inlen;
1430 PyObject *result;
1431 int trans_table[256];
1432 PyObject *tableobj, *delobj = NULL;
1433 Py_buffer vtable, vdel;
1435 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1436 &tableobj, &delobj))
1437 return NULL;
1439 if (_getbuffer(tableobj, &vtable) < 0)
1440 return NULL;
1442 if (vtable.len != 256) {
1443 PyErr_SetString(PyExc_ValueError,
1444 "translation table must be 256 characters long");
1445 result = NULL;
1446 goto done;
1449 if (delobj != NULL) {
1450 if (_getbuffer(delobj, &vdel) < 0) {
1451 result = NULL;
1452 goto done;
1455 else {
1456 vdel.buf = NULL;
1457 vdel.len = 0;
1460 table = (const char *)vtable.buf;
1461 inlen = PyByteArray_GET_SIZE(input_obj);
1462 result = PyByteArray_FromStringAndSize((char *)NULL, inlen);
1463 if (result == NULL)
1464 goto done;
1465 output_start = output = PyByteArray_AsString(result);
1466 input = PyByteArray_AS_STRING(input_obj);
1468 if (vdel.len == 0) {
1469 /* If no deletions are required, use faster code */
1470 for (i = inlen; --i >= 0; ) {
1471 c = Py_CHARMASK(*input++);
1472 if (Py_CHARMASK((*output++ = table[c])) != c)
1473 changed = 1;
1475 if (changed || !PyByteArray_CheckExact(input_obj))
1476 goto done;
1477 Py_DECREF(result);
1478 Py_INCREF(input_obj);
1479 result = input_obj;
1480 goto done;
1483 for (i = 0; i < 256; i++)
1484 trans_table[i] = Py_CHARMASK(table[i]);
1486 for (i = 0; i < vdel.len; i++)
1487 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1489 for (i = inlen; --i >= 0; ) {
1490 c = Py_CHARMASK(*input++);
1491 if (trans_table[c] != -1)
1492 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1493 continue;
1494 changed = 1;
1496 if (!changed && PyByteArray_CheckExact(input_obj)) {
1497 Py_DECREF(result);
1498 Py_INCREF(input_obj);
1499 result = input_obj;
1500 goto done;
1502 /* Fix the size of the resulting string */
1503 if (inlen > 0)
1504 PyByteArray_Resize(result, output - output_start);
1506 done:
1507 PyBuffer_Release(&vtable);
1508 if (delobj != NULL)
1509 PyBuffer_Release(&vdel);
1510 return result;
1514 #define FORWARD 1
1515 #define REVERSE -1
1517 /* find and count characters and substrings */
1519 #define findchar(target, target_len, c) \
1520 ((char *)memchr((const void *)(target), c, target_len))
1522 /* Don't call if length < 2 */
1523 #define Py_STRING_MATCH(target, offset, pattern, length) \
1524 (target[offset] == pattern[0] && \
1525 target[offset+length-1] == pattern[length-1] && \
1526 !memcmp(target+offset+1, pattern+1, length-2) )
1529 /* Bytes ops must return a string. */
1530 /* If the object is subclass of bytes, create a copy */
1531 Py_LOCAL(PyByteArrayObject *)
1532 return_self(PyByteArrayObject *self)
1534 if (PyByteArray_CheckExact(self)) {
1535 Py_INCREF(self);
1536 return (PyByteArrayObject *)self;
1538 return (PyByteArrayObject *)PyByteArray_FromStringAndSize(
1539 PyByteArray_AS_STRING(self),
1540 PyByteArray_GET_SIZE(self));
1543 Py_LOCAL_INLINE(Py_ssize_t)
1544 countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1546 Py_ssize_t count=0;
1547 const char *start=target;
1548 const char *end=target+target_len;
1550 while ( (start=findchar(start, end-start, c)) != NULL ) {
1551 count++;
1552 if (count >= maxcount)
1553 break;
1554 start += 1;
1556 return count;
1559 Py_LOCAL(Py_ssize_t)
1560 findstring(const char *target, Py_ssize_t target_len,
1561 const char *pattern, Py_ssize_t pattern_len,
1562 Py_ssize_t start,
1563 Py_ssize_t end,
1564 int direction)
1566 if (start < 0) {
1567 start += target_len;
1568 if (start < 0)
1569 start = 0;
1571 if (end > target_len) {
1572 end = target_len;
1573 } else if (end < 0) {
1574 end += target_len;
1575 if (end < 0)
1576 end = 0;
1579 /* zero-length substrings always match at the first attempt */
1580 if (pattern_len == 0)
1581 return (direction > 0) ? start : end;
1583 end -= pattern_len;
1585 if (direction < 0) {
1586 for (; end >= start; end--)
1587 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1588 return end;
1589 } else {
1590 for (; start <= end; start++)
1591 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1592 return start;
1594 return -1;
1597 Py_LOCAL_INLINE(Py_ssize_t)
1598 countstring(const char *target, Py_ssize_t target_len,
1599 const char *pattern, Py_ssize_t pattern_len,
1600 Py_ssize_t start,
1601 Py_ssize_t end,
1602 int direction, Py_ssize_t maxcount)
1604 Py_ssize_t count=0;
1606 if (start < 0) {
1607 start += target_len;
1608 if (start < 0)
1609 start = 0;
1611 if (end > target_len) {
1612 end = target_len;
1613 } else if (end < 0) {
1614 end += target_len;
1615 if (end < 0)
1616 end = 0;
1619 /* zero-length substrings match everywhere */
1620 if (pattern_len == 0 || maxcount == 0) {
1621 if (target_len+1 < maxcount)
1622 return target_len+1;
1623 return maxcount;
1626 end -= pattern_len;
1627 if (direction < 0) {
1628 for (; (end >= start); end--)
1629 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1630 count++;
1631 if (--maxcount <= 0) break;
1632 end -= pattern_len-1;
1634 } else {
1635 for (; (start <= end); start++)
1636 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1637 count++;
1638 if (--maxcount <= 0)
1639 break;
1640 start += pattern_len-1;
1643 return count;
1647 /* Algorithms for different cases of string replacement */
1649 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1650 Py_LOCAL(PyByteArrayObject *)
1651 replace_interleave(PyByteArrayObject *self,
1652 const char *to_s, Py_ssize_t to_len,
1653 Py_ssize_t maxcount)
1655 char *self_s, *result_s;
1656 Py_ssize_t self_len, result_len;
1657 Py_ssize_t count, i, product;
1658 PyByteArrayObject *result;
1660 self_len = PyByteArray_GET_SIZE(self);
1662 /* 1 at the end plus 1 after every character */
1663 count = self_len+1;
1664 if (maxcount < count)
1665 count = maxcount;
1667 /* Check for overflow */
1668 /* result_len = count * to_len + self_len; */
1669 product = count * to_len;
1670 if (product / to_len != count) {
1671 PyErr_SetString(PyExc_OverflowError,
1672 "replace string is too long");
1673 return NULL;
1675 result_len = product + self_len;
1676 if (result_len < 0) {
1677 PyErr_SetString(PyExc_OverflowError,
1678 "replace string is too long");
1679 return NULL;
1682 if (! (result = (PyByteArrayObject *)
1683 PyByteArray_FromStringAndSize(NULL, result_len)) )
1684 return NULL;
1686 self_s = PyByteArray_AS_STRING(self);
1687 result_s = PyByteArray_AS_STRING(result);
1689 /* TODO: special case single character, which doesn't need memcpy */
1691 /* Lay the first one down (guaranteed this will occur) */
1692 Py_MEMCPY(result_s, to_s, to_len);
1693 result_s += to_len;
1694 count -= 1;
1696 for (i=0; i<count; i++) {
1697 *result_s++ = *self_s++;
1698 Py_MEMCPY(result_s, to_s, to_len);
1699 result_s += to_len;
1702 /* Copy the rest of the original string */
1703 Py_MEMCPY(result_s, self_s, self_len-i);
1705 return result;
1708 /* Special case for deleting a single character */
1709 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1710 Py_LOCAL(PyByteArrayObject *)
1711 replace_delete_single_character(PyByteArrayObject *self,
1712 char from_c, Py_ssize_t maxcount)
1714 char *self_s, *result_s;
1715 char *start, *next, *end;
1716 Py_ssize_t self_len, result_len;
1717 Py_ssize_t count;
1718 PyByteArrayObject *result;
1720 self_len = PyByteArray_GET_SIZE(self);
1721 self_s = PyByteArray_AS_STRING(self);
1723 count = countchar(self_s, self_len, from_c, maxcount);
1724 if (count == 0) {
1725 return return_self(self);
1728 result_len = self_len - count; /* from_len == 1 */
1729 assert(result_len>=0);
1731 if ( (result = (PyByteArrayObject *)
1732 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1733 return NULL;
1734 result_s = PyByteArray_AS_STRING(result);
1736 start = self_s;
1737 end = self_s + self_len;
1738 while (count-- > 0) {
1739 next = findchar(start, end-start, from_c);
1740 if (next == NULL)
1741 break;
1742 Py_MEMCPY(result_s, start, next-start);
1743 result_s += (next-start);
1744 start = next+1;
1746 Py_MEMCPY(result_s, start, end-start);
1748 return result;
1751 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1753 Py_LOCAL(PyByteArrayObject *)
1754 replace_delete_substring(PyByteArrayObject *self,
1755 const char *from_s, Py_ssize_t from_len,
1756 Py_ssize_t maxcount)
1758 char *self_s, *result_s;
1759 char *start, *next, *end;
1760 Py_ssize_t self_len, result_len;
1761 Py_ssize_t count, offset;
1762 PyByteArrayObject *result;
1764 self_len = PyByteArray_GET_SIZE(self);
1765 self_s = PyByteArray_AS_STRING(self);
1767 count = countstring(self_s, self_len,
1768 from_s, from_len,
1769 0, self_len, 1,
1770 maxcount);
1772 if (count == 0) {
1773 /* no matches */
1774 return return_self(self);
1777 result_len = self_len - (count * from_len);
1778 assert (result_len>=0);
1780 if ( (result = (PyByteArrayObject *)
1781 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL )
1782 return NULL;
1784 result_s = PyByteArray_AS_STRING(result);
1786 start = self_s;
1787 end = self_s + self_len;
1788 while (count-- > 0) {
1789 offset = findstring(start, end-start,
1790 from_s, from_len,
1791 0, end-start, FORWARD);
1792 if (offset == -1)
1793 break;
1794 next = start + offset;
1796 Py_MEMCPY(result_s, start, next-start);
1798 result_s += (next-start);
1799 start = next+from_len;
1801 Py_MEMCPY(result_s, start, end-start);
1802 return result;
1805 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1806 Py_LOCAL(PyByteArrayObject *)
1807 replace_single_character_in_place(PyByteArrayObject *self,
1808 char from_c, char to_c,
1809 Py_ssize_t maxcount)
1811 char *self_s, *result_s, *start, *end, *next;
1812 Py_ssize_t self_len;
1813 PyByteArrayObject *result;
1815 /* The result string will be the same size */
1816 self_s = PyByteArray_AS_STRING(self);
1817 self_len = PyByteArray_GET_SIZE(self);
1819 next = findchar(self_s, self_len, from_c);
1821 if (next == NULL) {
1822 /* No matches; return the original bytes */
1823 return return_self(self);
1826 /* Need to make a new bytes */
1827 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1828 if (result == NULL)
1829 return NULL;
1830 result_s = PyByteArray_AS_STRING(result);
1831 Py_MEMCPY(result_s, self_s, self_len);
1833 /* change everything in-place, starting with this one */
1834 start = result_s + (next-self_s);
1835 *start = to_c;
1836 start++;
1837 end = result_s + self_len;
1839 while (--maxcount > 0) {
1840 next = findchar(start, end-start, from_c);
1841 if (next == NULL)
1842 break;
1843 *next = to_c;
1844 start = next+1;
1847 return result;
1850 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1851 Py_LOCAL(PyByteArrayObject *)
1852 replace_substring_in_place(PyByteArrayObject *self,
1853 const char *from_s, Py_ssize_t from_len,
1854 const char *to_s, Py_ssize_t to_len,
1855 Py_ssize_t maxcount)
1857 char *result_s, *start, *end;
1858 char *self_s;
1859 Py_ssize_t self_len, offset;
1860 PyByteArrayObject *result;
1862 /* The result bytes will be the same size */
1864 self_s = PyByteArray_AS_STRING(self);
1865 self_len = PyByteArray_GET_SIZE(self);
1867 offset = findstring(self_s, self_len,
1868 from_s, from_len,
1869 0, self_len, FORWARD);
1870 if (offset == -1) {
1871 /* No matches; return the original bytes */
1872 return return_self(self);
1875 /* Need to make a new bytes */
1876 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1877 if (result == NULL)
1878 return NULL;
1879 result_s = PyByteArray_AS_STRING(result);
1880 Py_MEMCPY(result_s, self_s, self_len);
1882 /* change everything in-place, starting with this one */
1883 start = result_s + offset;
1884 Py_MEMCPY(start, to_s, from_len);
1885 start += from_len;
1886 end = result_s + self_len;
1888 while ( --maxcount > 0) {
1889 offset = findstring(start, end-start,
1890 from_s, from_len,
1891 0, end-start, FORWARD);
1892 if (offset==-1)
1893 break;
1894 Py_MEMCPY(start+offset, to_s, from_len);
1895 start += offset+from_len;
1898 return result;
1901 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1902 Py_LOCAL(PyByteArrayObject *)
1903 replace_single_character(PyByteArrayObject *self,
1904 char from_c,
1905 const char *to_s, Py_ssize_t to_len,
1906 Py_ssize_t maxcount)
1908 char *self_s, *result_s;
1909 char *start, *next, *end;
1910 Py_ssize_t self_len, result_len;
1911 Py_ssize_t count, product;
1912 PyByteArrayObject *result;
1914 self_s = PyByteArray_AS_STRING(self);
1915 self_len = PyByteArray_GET_SIZE(self);
1917 count = countchar(self_s, self_len, from_c, maxcount);
1918 if (count == 0) {
1919 /* no matches, return unchanged */
1920 return return_self(self);
1923 /* use the difference between current and new, hence the "-1" */
1924 /* result_len = self_len + count * (to_len-1) */
1925 product = count * (to_len-1);
1926 if (product / (to_len-1) != count) {
1927 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1928 return NULL;
1930 result_len = self_len + product;
1931 if (result_len < 0) {
1932 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1933 return NULL;
1936 if ( (result = (PyByteArrayObject *)
1937 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1938 return NULL;
1939 result_s = PyByteArray_AS_STRING(result);
1941 start = self_s;
1942 end = self_s + self_len;
1943 while (count-- > 0) {
1944 next = findchar(start, end-start, from_c);
1945 if (next == NULL)
1946 break;
1948 if (next == start) {
1949 /* replace with the 'to' */
1950 Py_MEMCPY(result_s, to_s, to_len);
1951 result_s += to_len;
1952 start += 1;
1953 } else {
1954 /* copy the unchanged old then the 'to' */
1955 Py_MEMCPY(result_s, start, next-start);
1956 result_s += (next-start);
1957 Py_MEMCPY(result_s, to_s, to_len);
1958 result_s += to_len;
1959 start = next+1;
1962 /* Copy the remainder of the remaining bytes */
1963 Py_MEMCPY(result_s, start, end-start);
1965 return result;
1968 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1969 Py_LOCAL(PyByteArrayObject *)
1970 replace_substring(PyByteArrayObject *self,
1971 const char *from_s, Py_ssize_t from_len,
1972 const char *to_s, Py_ssize_t to_len,
1973 Py_ssize_t maxcount)
1975 char *self_s, *result_s;
1976 char *start, *next, *end;
1977 Py_ssize_t self_len, result_len;
1978 Py_ssize_t count, offset, product;
1979 PyByteArrayObject *result;
1981 self_s = PyByteArray_AS_STRING(self);
1982 self_len = PyByteArray_GET_SIZE(self);
1984 count = countstring(self_s, self_len,
1985 from_s, from_len,
1986 0, self_len, FORWARD, maxcount);
1987 if (count == 0) {
1988 /* no matches, return unchanged */
1989 return return_self(self);
1992 /* Check for overflow */
1993 /* result_len = self_len + count * (to_len-from_len) */
1994 product = count * (to_len-from_len);
1995 if (product / (to_len-from_len) != count) {
1996 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1997 return NULL;
1999 result_len = self_len + product;
2000 if (result_len < 0) {
2001 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
2002 return NULL;
2005 if ( (result = (PyByteArrayObject *)
2006 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
2007 return NULL;
2008 result_s = PyByteArray_AS_STRING(result);
2010 start = self_s;
2011 end = self_s + self_len;
2012 while (count-- > 0) {
2013 offset = findstring(start, end-start,
2014 from_s, from_len,
2015 0, end-start, FORWARD);
2016 if (offset == -1)
2017 break;
2018 next = start+offset;
2019 if (next == start) {
2020 /* replace with the 'to' */
2021 Py_MEMCPY(result_s, to_s, to_len);
2022 result_s += to_len;
2023 start += from_len;
2024 } else {
2025 /* copy the unchanged old then the 'to' */
2026 Py_MEMCPY(result_s, start, next-start);
2027 result_s += (next-start);
2028 Py_MEMCPY(result_s, to_s, to_len);
2029 result_s += to_len;
2030 start = next+from_len;
2033 /* Copy the remainder of the remaining bytes */
2034 Py_MEMCPY(result_s, start, end-start);
2036 return result;
2040 Py_LOCAL(PyByteArrayObject *)
2041 replace(PyByteArrayObject *self,
2042 const char *from_s, Py_ssize_t from_len,
2043 const char *to_s, Py_ssize_t to_len,
2044 Py_ssize_t maxcount)
2046 if (maxcount < 0) {
2047 maxcount = PY_SSIZE_T_MAX;
2048 } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) {
2049 /* nothing to do; return the original bytes */
2050 return return_self(self);
2053 if (maxcount == 0 ||
2054 (from_len == 0 && to_len == 0)) {
2055 /* nothing to do; return the original bytes */
2056 return return_self(self);
2059 /* Handle zero-length special cases */
2061 if (from_len == 0) {
2062 /* insert the 'to' bytes everywhere. */
2063 /* >>> "Python".replace("", ".") */
2064 /* '.P.y.t.h.o.n.' */
2065 return replace_interleave(self, to_s, to_len, maxcount);
2068 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2069 /* point for an empty self bytes to generate a non-empty bytes */
2070 /* Special case so the remaining code always gets a non-empty bytes */
2071 if (PyByteArray_GET_SIZE(self) == 0) {
2072 return return_self(self);
2075 if (to_len == 0) {
2076 /* delete all occurances of 'from' bytes */
2077 if (from_len == 1) {
2078 return replace_delete_single_character(
2079 self, from_s[0], maxcount);
2080 } else {
2081 return replace_delete_substring(self, from_s, from_len, maxcount);
2085 /* Handle special case where both bytes have the same length */
2087 if (from_len == to_len) {
2088 if (from_len == 1) {
2089 return replace_single_character_in_place(
2090 self,
2091 from_s[0],
2092 to_s[0],
2093 maxcount);
2094 } else {
2095 return replace_substring_in_place(
2096 self, from_s, from_len, to_s, to_len, maxcount);
2100 /* Otherwise use the more generic algorithms */
2101 if (from_len == 1) {
2102 return replace_single_character(self, from_s[0],
2103 to_s, to_len, maxcount);
2104 } else {
2105 /* len('from')>=2, len('to')>=1 */
2106 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2111 PyDoc_STRVAR(replace__doc__,
2112 "B.replace(old, new[, count]) -> bytes\n\
2114 Return a copy of B with all occurrences of subsection\n\
2115 old replaced by new. If the optional argument count is\n\
2116 given, only the first count occurrences are replaced.");
2118 static PyObject *
2119 bytes_replace(PyByteArrayObject *self, PyObject *args)
2121 Py_ssize_t count = -1;
2122 PyObject *from, *to, *res;
2123 Py_buffer vfrom, vto;
2125 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2126 return NULL;
2128 if (_getbuffer(from, &vfrom) < 0)
2129 return NULL;
2130 if (_getbuffer(to, &vto) < 0) {
2131 PyBuffer_Release(&vfrom);
2132 return NULL;
2135 res = (PyObject *)replace((PyByteArrayObject *) self,
2136 vfrom.buf, vfrom.len,
2137 vto.buf, vto.len, count);
2139 PyBuffer_Release(&vfrom);
2140 PyBuffer_Release(&vto);
2141 return res;
2145 /* Overallocate the initial list to reduce the number of reallocs for small
2146 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2147 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2148 text (roughly 11 words per line) and field delimited data (usually 1-10
2149 fields). For large strings the split algorithms are bandwidth limited
2150 so increasing the preallocation likely will not improve things.*/
2152 #define MAX_PREALLOC 12
2154 /* 5 splits gives 6 elements */
2155 #define PREALLOC_SIZE(maxsplit) \
2156 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2158 #define SPLIT_APPEND(data, left, right) \
2159 str = PyByteArray_FromStringAndSize((data) + (left), \
2160 (right) - (left)); \
2161 if (str == NULL) \
2162 goto onError; \
2163 if (PyList_Append(list, str)) { \
2164 Py_DECREF(str); \
2165 goto onError; \
2167 else \
2168 Py_DECREF(str);
2170 #define SPLIT_ADD(data, left, right) { \
2171 str = PyByteArray_FromStringAndSize((data) + (left), \
2172 (right) - (left)); \
2173 if (str == NULL) \
2174 goto onError; \
2175 if (count < MAX_PREALLOC) { \
2176 PyList_SET_ITEM(list, count, str); \
2177 } else { \
2178 if (PyList_Append(list, str)) { \
2179 Py_DECREF(str); \
2180 goto onError; \
2182 else \
2183 Py_DECREF(str); \
2185 count++; }
2187 /* Always force the list to the expected size. */
2188 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2191 Py_LOCAL_INLINE(PyObject *)
2192 split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2194 register Py_ssize_t i, j, count = 0;
2195 PyObject *str;
2196 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2198 if (list == NULL)
2199 return NULL;
2201 i = j = 0;
2202 while ((j < len) && (maxcount-- > 0)) {
2203 for(; j < len; j++) {
2204 /* I found that using memchr makes no difference */
2205 if (s[j] == ch) {
2206 SPLIT_ADD(s, i, j);
2207 i = j = j + 1;
2208 break;
2212 if (i <= len) {
2213 SPLIT_ADD(s, i, len);
2215 FIX_PREALLOC_SIZE(list);
2216 return list;
2218 onError:
2219 Py_DECREF(list);
2220 return NULL;
2224 Py_LOCAL_INLINE(PyObject *)
2225 split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2227 register Py_ssize_t i, j, count = 0;
2228 PyObject *str;
2229 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2231 if (list == NULL)
2232 return NULL;
2234 for (i = j = 0; i < len; ) {
2235 /* find a token */
2236 while (i < len && ISSPACE(s[i]))
2237 i++;
2238 j = i;
2239 while (i < len && !ISSPACE(s[i]))
2240 i++;
2241 if (j < i) {
2242 if (maxcount-- <= 0)
2243 break;
2244 SPLIT_ADD(s, j, i);
2245 while (i < len && ISSPACE(s[i]))
2246 i++;
2247 j = i;
2250 if (j < len) {
2251 SPLIT_ADD(s, j, len);
2253 FIX_PREALLOC_SIZE(list);
2254 return list;
2256 onError:
2257 Py_DECREF(list);
2258 return NULL;
2261 PyDoc_STRVAR(split__doc__,
2262 "B.split([sep[, maxsplit]]) -> list of bytearray\n\
2264 Return a list of the sections in B, using sep as the delimiter.\n\
2265 If sep is not given, B is split on ASCII whitespace characters\n\
2266 (space, tab, return, newline, formfeed, vertical tab).\n\
2267 If maxsplit is given, at most maxsplit splits are done.");
2269 static PyObject *
2270 bytes_split(PyByteArrayObject *self, PyObject *args)
2272 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2273 Py_ssize_t maxsplit = -1, count = 0;
2274 const char *s = PyByteArray_AS_STRING(self), *sub;
2275 PyObject *list, *str, *subobj = Py_None;
2276 Py_buffer vsub;
2277 #ifdef USE_FAST
2278 Py_ssize_t pos;
2279 #endif
2281 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2282 return NULL;
2283 if (maxsplit < 0)
2284 maxsplit = PY_SSIZE_T_MAX;
2286 if (subobj == Py_None)
2287 return split_whitespace(s, len, maxsplit);
2289 if (_getbuffer(subobj, &vsub) < 0)
2290 return NULL;
2291 sub = vsub.buf;
2292 n = vsub.len;
2294 if (n == 0) {
2295 PyErr_SetString(PyExc_ValueError, "empty separator");
2296 PyBuffer_Release(&vsub);
2297 return NULL;
2299 if (n == 1) {
2300 list = split_char(s, len, sub[0], maxsplit);
2301 PyBuffer_Release(&vsub);
2302 return list;
2305 list = PyList_New(PREALLOC_SIZE(maxsplit));
2306 if (list == NULL) {
2307 PyBuffer_Release(&vsub);
2308 return NULL;
2311 #ifdef USE_FAST
2312 i = j = 0;
2313 while (maxsplit-- > 0) {
2314 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2315 if (pos < 0)
2316 break;
2317 j = i+pos;
2318 SPLIT_ADD(s, i, j);
2319 i = j + n;
2321 #else
2322 i = j = 0;
2323 while ((j+n <= len) && (maxsplit-- > 0)) {
2324 for (; j+n <= len; j++) {
2325 if (Py_STRING_MATCH(s, j, sub, n)) {
2326 SPLIT_ADD(s, i, j);
2327 i = j = j + n;
2328 break;
2332 #endif
2333 SPLIT_ADD(s, i, len);
2334 FIX_PREALLOC_SIZE(list);
2335 PyBuffer_Release(&vsub);
2336 return list;
2338 onError:
2339 Py_DECREF(list);
2340 PyBuffer_Release(&vsub);
2341 return NULL;
2344 /* stringlib's partition shares nullbytes in some cases.
2345 undo this, we don't want the nullbytes to be shared. */
2346 static PyObject *
2347 make_nullbytes_unique(PyObject *result)
2349 if (result != NULL) {
2350 int i;
2351 assert(PyTuple_Check(result));
2352 assert(PyTuple_GET_SIZE(result) == 3);
2353 for (i = 0; i < 3; i++) {
2354 if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
2355 PyObject *new = PyByteArray_FromStringAndSize(NULL, 0);
2356 if (new == NULL) {
2357 Py_DECREF(result);
2358 result = NULL;
2359 break;
2361 Py_DECREF(nullbytes);
2362 PyTuple_SET_ITEM(result, i, new);
2366 return result;
2369 PyDoc_STRVAR(partition__doc__,
2370 "B.partition(sep) -> (head, sep, tail)\n\
2372 Searches for the separator sep in B, and returns the part before it,\n\
2373 the separator itself, and the part after it. If the separator is not\n\
2374 found, returns B and two empty bytearray objects.");
2376 static PyObject *
2377 bytes_partition(PyByteArrayObject *self, PyObject *sep_obj)
2379 PyObject *bytesep, *result;
2381 bytesep = PyByteArray_FromObject(sep_obj);
2382 if (! bytesep)
2383 return NULL;
2385 result = stringlib_partition(
2386 (PyObject*) self,
2387 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2388 bytesep,
2389 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2392 Py_DECREF(bytesep);
2393 return make_nullbytes_unique(result);
2396 PyDoc_STRVAR(rpartition__doc__,
2397 "B.rpartition(sep) -> (tail, sep, head)\n\
2399 Searches for the separator sep in B, starting at the end of B,\n\
2400 and returns the part before it, the separator itself, and the\n\
2401 part after it. If the separator is not found, returns two empty\n\
2402 bytearray objects and B.");
2404 static PyObject *
2405 bytes_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
2407 PyObject *bytesep, *result;
2409 bytesep = PyByteArray_FromObject(sep_obj);
2410 if (! bytesep)
2411 return NULL;
2413 result = stringlib_rpartition(
2414 (PyObject*) self,
2415 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2416 bytesep,
2417 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2420 Py_DECREF(bytesep);
2421 return make_nullbytes_unique(result);
2424 Py_LOCAL_INLINE(PyObject *)
2425 rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2427 register Py_ssize_t i, j, count=0;
2428 PyObject *str;
2429 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2431 if (list == NULL)
2432 return NULL;
2434 i = j = len - 1;
2435 while ((i >= 0) && (maxcount-- > 0)) {
2436 for (; i >= 0; i--) {
2437 if (s[i] == ch) {
2438 SPLIT_ADD(s, i + 1, j + 1);
2439 j = i = i - 1;
2440 break;
2444 if (j >= -1) {
2445 SPLIT_ADD(s, 0, j + 1);
2447 FIX_PREALLOC_SIZE(list);
2448 if (PyList_Reverse(list) < 0)
2449 goto onError;
2451 return list;
2453 onError:
2454 Py_DECREF(list);
2455 return NULL;
2458 Py_LOCAL_INLINE(PyObject *)
2459 rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2461 register Py_ssize_t i, j, count = 0;
2462 PyObject *str;
2463 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2465 if (list == NULL)
2466 return NULL;
2468 for (i = j = len - 1; i >= 0; ) {
2469 /* find a token */
2470 while (i >= 0 && ISSPACE(s[i]))
2471 i--;
2472 j = i;
2473 while (i >= 0 && !ISSPACE(s[i]))
2474 i--;
2475 if (j > i) {
2476 if (maxcount-- <= 0)
2477 break;
2478 SPLIT_ADD(s, i + 1, j + 1);
2479 while (i >= 0 && ISSPACE(s[i]))
2480 i--;
2481 j = i;
2484 if (j >= 0) {
2485 SPLIT_ADD(s, 0, j + 1);
2487 FIX_PREALLOC_SIZE(list);
2488 if (PyList_Reverse(list) < 0)
2489 goto onError;
2491 return list;
2493 onError:
2494 Py_DECREF(list);
2495 return NULL;
2498 PyDoc_STRVAR(rsplit__doc__,
2499 "B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2501 Return a list of the sections in B, using sep as the delimiter,\n\
2502 starting at the end of B and working to the front.\n\
2503 If sep is not given, B is split on ASCII whitespace characters\n\
2504 (space, tab, return, newline, formfeed, vertical tab).\n\
2505 If maxsplit is given, at most maxsplit splits are done.");
2507 static PyObject *
2508 bytes_rsplit(PyByteArrayObject *self, PyObject *args)
2510 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2511 Py_ssize_t maxsplit = -1, count = 0;
2512 const char *s = PyByteArray_AS_STRING(self), *sub;
2513 PyObject *list, *str, *subobj = Py_None;
2514 Py_buffer vsub;
2516 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2517 return NULL;
2518 if (maxsplit < 0)
2519 maxsplit = PY_SSIZE_T_MAX;
2521 if (subobj == Py_None)
2522 return rsplit_whitespace(s, len, maxsplit);
2524 if (_getbuffer(subobj, &vsub) < 0)
2525 return NULL;
2526 sub = vsub.buf;
2527 n = vsub.len;
2529 if (n == 0) {
2530 PyErr_SetString(PyExc_ValueError, "empty separator");
2531 PyBuffer_Release(&vsub);
2532 return NULL;
2534 else if (n == 1) {
2535 list = rsplit_char(s, len, sub[0], maxsplit);
2536 PyBuffer_Release(&vsub);
2537 return list;
2540 list = PyList_New(PREALLOC_SIZE(maxsplit));
2541 if (list == NULL) {
2542 PyBuffer_Release(&vsub);
2543 return NULL;
2546 j = len;
2547 i = j - n;
2549 while ( (i >= 0) && (maxsplit-- > 0) ) {
2550 for (; i>=0; i--) {
2551 if (Py_STRING_MATCH(s, i, sub, n)) {
2552 SPLIT_ADD(s, i + n, j);
2553 j = i;
2554 i -= n;
2555 break;
2559 SPLIT_ADD(s, 0, j);
2560 FIX_PREALLOC_SIZE(list);
2561 if (PyList_Reverse(list) < 0)
2562 goto onError;
2563 PyBuffer_Release(&vsub);
2564 return list;
2566 onError:
2567 Py_DECREF(list);
2568 PyBuffer_Release(&vsub);
2569 return NULL;
2572 PyDoc_STRVAR(reverse__doc__,
2573 "B.reverse() -> None\n\
2575 Reverse the order of the values in B in place.");
2576 static PyObject *
2577 bytes_reverse(PyByteArrayObject *self, PyObject *unused)
2579 char swap, *head, *tail;
2580 Py_ssize_t i, j, n = Py_SIZE(self);
2582 j = n / 2;
2583 head = self->ob_bytes;
2584 tail = head + n - 1;
2585 for (i = 0; i < j; i++) {
2586 swap = *head;
2587 *head++ = *tail;
2588 *tail-- = swap;
2591 Py_RETURN_NONE;
2594 PyDoc_STRVAR(insert__doc__,
2595 "B.insert(index, int) -> None\n\
2597 Insert a single item into the bytearray before the given index.");
2598 static PyObject *
2599 bytes_insert(PyByteArrayObject *self, PyObject *args)
2601 PyObject *value;
2602 int ival;
2603 Py_ssize_t where, n = Py_SIZE(self);
2605 if (!PyArg_ParseTuple(args, "nO:insert", &where, &value))
2606 return NULL;
2608 if (n == PY_SSIZE_T_MAX) {
2609 PyErr_SetString(PyExc_OverflowError,
2610 "cannot add more objects to bytes");
2611 return NULL;
2613 if (!_getbytevalue(value, &ival))
2614 return NULL;
2615 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2616 return NULL;
2618 if (where < 0) {
2619 where += n;
2620 if (where < 0)
2621 where = 0;
2623 if (where > n)
2624 where = n;
2625 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
2626 self->ob_bytes[where] = ival;
2628 Py_RETURN_NONE;
2631 PyDoc_STRVAR(append__doc__,
2632 "B.append(int) -> None\n\
2634 Append a single item to the end of B.");
2635 static PyObject *
2636 bytes_append(PyByteArrayObject *self, PyObject *arg)
2638 int value;
2639 Py_ssize_t n = Py_SIZE(self);
2641 if (! _getbytevalue(arg, &value))
2642 return NULL;
2643 if (n == PY_SSIZE_T_MAX) {
2644 PyErr_SetString(PyExc_OverflowError,
2645 "cannot add more objects to bytes");
2646 return NULL;
2648 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2649 return NULL;
2651 self->ob_bytes[n] = value;
2653 Py_RETURN_NONE;
2656 PyDoc_STRVAR(extend__doc__,
2657 "B.extend(iterable int) -> None\n\
2659 Append all the elements from the iterator or sequence to the\n\
2660 end of B.");
2661 static PyObject *
2662 bytes_extend(PyByteArrayObject *self, PyObject *arg)
2664 PyObject *it, *item, *bytes_obj;
2665 Py_ssize_t buf_size = 0, len = 0;
2666 int value;
2667 char *buf;
2669 /* bytes_setslice code only accepts something supporting PEP 3118. */
2670 if (PyObject_CheckBuffer(arg)) {
2671 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2672 return NULL;
2674 Py_RETURN_NONE;
2677 it = PyObject_GetIter(arg);
2678 if (it == NULL)
2679 return NULL;
2681 /* Try to determine the length of the argument. 32 is abitrary. */
2682 buf_size = _PyObject_LengthHint(arg, 32);
2684 bytes_obj = PyByteArray_FromStringAndSize(NULL, buf_size);
2685 if (bytes_obj == NULL)
2686 return NULL;
2687 buf = PyByteArray_AS_STRING(bytes_obj);
2689 while ((item = PyIter_Next(it)) != NULL) {
2690 if (! _getbytevalue(item, &value)) {
2691 Py_DECREF(item);
2692 Py_DECREF(it);
2693 Py_DECREF(bytes_obj);
2694 return NULL;
2696 buf[len++] = value;
2697 Py_DECREF(item);
2699 if (len >= buf_size) {
2700 buf_size = len + (len >> 1) + 1;
2701 if (PyByteArray_Resize((PyObject *)bytes_obj, buf_size) < 0) {
2702 Py_DECREF(it);
2703 Py_DECREF(bytes_obj);
2704 return NULL;
2706 /* Recompute the `buf' pointer, since the resizing operation may
2707 have invalidated it. */
2708 buf = PyByteArray_AS_STRING(bytes_obj);
2711 Py_DECREF(it);
2713 /* Resize down to exact size. */
2714 if (PyByteArray_Resize((PyObject *)bytes_obj, len) < 0) {
2715 Py_DECREF(bytes_obj);
2716 return NULL;
2719 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), bytes_obj) == -1)
2720 return NULL;
2721 Py_DECREF(bytes_obj);
2723 Py_RETURN_NONE;
2726 PyDoc_STRVAR(pop__doc__,
2727 "B.pop([index]) -> int\n\
2729 Remove and return a single item from B. If no index\n\
2730 argument is given, will pop the last value.");
2731 static PyObject *
2732 bytes_pop(PyByteArrayObject *self, PyObject *args)
2734 int value;
2735 Py_ssize_t where = -1, n = Py_SIZE(self);
2737 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2738 return NULL;
2740 if (n == 0) {
2741 PyErr_SetString(PyExc_OverflowError,
2742 "cannot pop an empty bytes");
2743 return NULL;
2745 if (where < 0)
2746 where += Py_SIZE(self);
2747 if (where < 0 || where >= Py_SIZE(self)) {
2748 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2749 return NULL;
2752 value = self->ob_bytes[where];
2753 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2754 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2755 return NULL;
2757 return PyInt_FromLong(value);
2760 PyDoc_STRVAR(remove__doc__,
2761 "B.remove(int) -> None\n\
2763 Remove the first occurance of a value in B.");
2764 static PyObject *
2765 bytes_remove(PyByteArrayObject *self, PyObject *arg)
2767 int value;
2768 Py_ssize_t where, n = Py_SIZE(self);
2770 if (! _getbytevalue(arg, &value))
2771 return NULL;
2773 for (where = 0; where < n; where++) {
2774 if (self->ob_bytes[where] == value)
2775 break;
2777 if (where == n) {
2778 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2779 return NULL;
2782 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2783 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2784 return NULL;
2786 Py_RETURN_NONE;
2789 /* XXX These two helpers could be optimized if argsize == 1 */
2791 static Py_ssize_t
2792 lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2793 void *argptr, Py_ssize_t argsize)
2795 Py_ssize_t i = 0;
2796 while (i < mysize && memchr(argptr, myptr[i], argsize))
2797 i++;
2798 return i;
2801 static Py_ssize_t
2802 rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2803 void *argptr, Py_ssize_t argsize)
2805 Py_ssize_t i = mysize - 1;
2806 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2807 i--;
2808 return i + 1;
2811 PyDoc_STRVAR(strip__doc__,
2812 "B.strip([bytes]) -> bytearray\n\
2814 Strip leading and trailing bytes contained in the argument.\n\
2815 If the argument is omitted, strip ASCII whitespace.");
2816 static PyObject *
2817 bytes_strip(PyByteArrayObject *self, PyObject *args)
2819 Py_ssize_t left, right, mysize, argsize;
2820 void *myptr, *argptr;
2821 PyObject *arg = Py_None;
2822 Py_buffer varg;
2823 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2824 return NULL;
2825 if (arg == Py_None) {
2826 argptr = "\t\n\r\f\v ";
2827 argsize = 6;
2829 else {
2830 if (_getbuffer(arg, &varg) < 0)
2831 return NULL;
2832 argptr = varg.buf;
2833 argsize = varg.len;
2835 myptr = self->ob_bytes;
2836 mysize = Py_SIZE(self);
2837 left = lstrip_helper(myptr, mysize, argptr, argsize);
2838 if (left == mysize)
2839 right = left;
2840 else
2841 right = rstrip_helper(myptr, mysize, argptr, argsize);
2842 if (arg != Py_None)
2843 PyBuffer_Release(&varg);
2844 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2847 PyDoc_STRVAR(lstrip__doc__,
2848 "B.lstrip([bytes]) -> bytearray\n\
2850 Strip leading bytes contained in the argument.\n\
2851 If the argument is omitted, strip leading ASCII whitespace.");
2852 static PyObject *
2853 bytes_lstrip(PyByteArrayObject *self, PyObject *args)
2855 Py_ssize_t left, right, mysize, argsize;
2856 void *myptr, *argptr;
2857 PyObject *arg = Py_None;
2858 Py_buffer varg;
2859 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2860 return NULL;
2861 if (arg == Py_None) {
2862 argptr = "\t\n\r\f\v ";
2863 argsize = 6;
2865 else {
2866 if (_getbuffer(arg, &varg) < 0)
2867 return NULL;
2868 argptr = varg.buf;
2869 argsize = varg.len;
2871 myptr = self->ob_bytes;
2872 mysize = Py_SIZE(self);
2873 left = lstrip_helper(myptr, mysize, argptr, argsize);
2874 right = mysize;
2875 if (arg != Py_None)
2876 PyBuffer_Release(&varg);
2877 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2880 PyDoc_STRVAR(rstrip__doc__,
2881 "B.rstrip([bytes]) -> bytearray\n\
2883 Strip trailing bytes contained in the argument.\n\
2884 If the argument is omitted, strip trailing ASCII whitespace.");
2885 static PyObject *
2886 bytes_rstrip(PyByteArrayObject *self, PyObject *args)
2888 Py_ssize_t left, right, mysize, argsize;
2889 void *myptr, *argptr;
2890 PyObject *arg = Py_None;
2891 Py_buffer varg;
2892 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2893 return NULL;
2894 if (arg == Py_None) {
2895 argptr = "\t\n\r\f\v ";
2896 argsize = 6;
2898 else {
2899 if (_getbuffer(arg, &varg) < 0)
2900 return NULL;
2901 argptr = varg.buf;
2902 argsize = varg.len;
2904 myptr = self->ob_bytes;
2905 mysize = Py_SIZE(self);
2906 left = 0;
2907 right = rstrip_helper(myptr, mysize, argptr, argsize);
2908 if (arg != Py_None)
2909 PyBuffer_Release(&varg);
2910 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2913 PyDoc_STRVAR(decode_doc,
2914 "B.decode([encoding[, errors]]) -> unicode object.\n\
2916 Decodes B using the codec registered for encoding. encoding defaults\n\
2917 to the default encoding. errors may be given to set a different error\n\
2918 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2919 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2920 as well as any other name registered with codecs.register_error that is\n\
2921 able to handle UnicodeDecodeErrors.");
2923 static PyObject *
2924 bytes_decode(PyObject *self, PyObject *args)
2926 const char *encoding = NULL;
2927 const char *errors = NULL;
2929 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2930 return NULL;
2931 if (encoding == NULL)
2932 encoding = PyUnicode_GetDefaultEncoding();
2933 return PyCodec_Decode(self, encoding, errors);
2936 PyDoc_STRVAR(alloc_doc,
2937 "B.__alloc__() -> int\n\
2939 Returns the number of bytes actually allocated.");
2941 static PyObject *
2942 bytes_alloc(PyByteArrayObject *self)
2944 return PyInt_FromSsize_t(self->ob_alloc);
2947 PyDoc_STRVAR(join_doc,
2948 "B.join(iterable_of_bytes) -> bytes\n\
2950 Concatenates any number of bytearray objects, with B in between each pair.");
2952 static PyObject *
2953 bytes_join(PyByteArrayObject *self, PyObject *it)
2955 PyObject *seq;
2956 Py_ssize_t mysize = Py_SIZE(self);
2957 Py_ssize_t i;
2958 Py_ssize_t n;
2959 PyObject **items;
2960 Py_ssize_t totalsize = 0;
2961 PyObject *result;
2962 char *dest;
2964 seq = PySequence_Fast(it, "can only join an iterable");
2965 if (seq == NULL)
2966 return NULL;
2967 n = PySequence_Fast_GET_SIZE(seq);
2968 items = PySequence_Fast_ITEMS(seq);
2970 /* Compute the total size, and check that they are all bytes */
2971 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2972 for (i = 0; i < n; i++) {
2973 PyObject *obj = items[i];
2974 if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) {
2975 PyErr_Format(PyExc_TypeError,
2976 "can only join an iterable of bytes "
2977 "(item %ld has type '%.100s')",
2978 /* XXX %ld isn't right on Win64 */
2979 (long)i, Py_TYPE(obj)->tp_name);
2980 goto error;
2982 if (i > 0)
2983 totalsize += mysize;
2984 totalsize += Py_SIZE(obj);
2985 if (totalsize < 0) {
2986 PyErr_NoMemory();
2987 goto error;
2991 /* Allocate the result, and copy the bytes */
2992 result = PyByteArray_FromStringAndSize(NULL, totalsize);
2993 if (result == NULL)
2994 goto error;
2995 dest = PyByteArray_AS_STRING(result);
2996 for (i = 0; i < n; i++) {
2997 PyObject *obj = items[i];
2998 Py_ssize_t size = Py_SIZE(obj);
2999 char *buf;
3000 if (PyByteArray_Check(obj))
3001 buf = PyByteArray_AS_STRING(obj);
3002 else
3003 buf = PyBytes_AS_STRING(obj);
3004 if (i) {
3005 memcpy(dest, self->ob_bytes, mysize);
3006 dest += mysize;
3008 memcpy(dest, buf, size);
3009 dest += size;
3012 /* Done */
3013 Py_DECREF(seq);
3014 return result;
3016 /* Error handling */
3017 error:
3018 Py_DECREF(seq);
3019 return NULL;
3022 PyDoc_STRVAR(fromhex_doc,
3023 "bytearray.fromhex(string) -> bytearray\n\
3025 Create a bytearray object from a string of hexadecimal numbers.\n\
3026 Spaces between two numbers are accepted.\n\
3027 Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3029 static int
3030 hex_digit_to_int(Py_UNICODE c)
3032 if (c >= 128)
3033 return -1;
3034 if (ISDIGIT(c))
3035 return c - '0';
3036 else {
3037 if (ISUPPER(c))
3038 c = TOLOWER(c);
3039 if (c >= 'a' && c <= 'f')
3040 return c - 'a' + 10;
3042 return -1;
3045 static PyObject *
3046 bytes_fromhex(PyObject *cls, PyObject *args)
3048 PyObject *newbytes, *hexobj;
3049 char *buf;
3050 Py_UNICODE *hex;
3051 Py_ssize_t hexlen, byteslen, i, j;
3052 int top, bot;
3054 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
3055 return NULL;
3056 assert(PyUnicode_Check(hexobj));
3057 hexlen = PyUnicode_GET_SIZE(hexobj);
3058 hex = PyUnicode_AS_UNICODE(hexobj);
3059 byteslen = hexlen/2; /* This overestimates if there are spaces */
3060 newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
3061 if (!newbytes)
3062 return NULL;
3063 buf = PyByteArray_AS_STRING(newbytes);
3064 for (i = j = 0; i < hexlen; i += 2) {
3065 /* skip over spaces in the input */
3066 while (hex[i] == ' ')
3067 i++;
3068 if (i >= hexlen)
3069 break;
3070 top = hex_digit_to_int(hex[i]);
3071 bot = hex_digit_to_int(hex[i+1]);
3072 if (top == -1 || bot == -1) {
3073 PyErr_Format(PyExc_ValueError,
3074 "non-hexadecimal number found in "
3075 "fromhex() arg at position %zd", i);
3076 goto error;
3078 buf[j++] = (top << 4) + bot;
3080 if (PyByteArray_Resize(newbytes, j) < 0)
3081 goto error;
3082 return newbytes;
3084 error:
3085 Py_DECREF(newbytes);
3086 return NULL;
3089 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3091 static PyObject *
3092 bytes_reduce(PyByteArrayObject *self)
3094 PyObject *latin1, *dict;
3095 if (self->ob_bytes)
3096 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
3097 Py_SIZE(self), NULL);
3098 else
3099 latin1 = PyUnicode_FromString("");
3101 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
3102 if (dict == NULL) {
3103 PyErr_Clear();
3104 dict = Py_None;
3105 Py_INCREF(dict);
3108 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
3111 PyDoc_STRVAR(sizeof_doc,
3112 "B.__sizeof__() -> int\n\
3114 Returns the size of B in memory, in bytes");
3115 static PyObject *
3116 bytes_sizeof(PyByteArrayObject *self)
3118 Py_ssize_t res;
3120 res = sizeof(PyByteArrayObject) + self->ob_alloc * sizeof(char);
3121 return PyInt_FromSsize_t(res);
3124 static PySequenceMethods bytes_as_sequence = {
3125 (lenfunc)bytes_length, /* sq_length */
3126 (binaryfunc)PyByteArray_Concat, /* sq_concat */
3127 (ssizeargfunc)bytes_repeat, /* sq_repeat */
3128 (ssizeargfunc)bytes_getitem, /* sq_item */
3129 0, /* sq_slice */
3130 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
3131 0, /* sq_ass_slice */
3132 (objobjproc)bytes_contains, /* sq_contains */
3133 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
3134 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
3137 static PyMappingMethods bytes_as_mapping = {
3138 (lenfunc)bytes_length,
3139 (binaryfunc)bytes_subscript,
3140 (objobjargproc)bytes_ass_subscript,
3143 static PyBufferProcs bytes_as_buffer = {
3144 (readbufferproc)bytes_buffer_getreadbuf,
3145 (writebufferproc)bytes_buffer_getwritebuf,
3146 (segcountproc)bytes_buffer_getsegcount,
3147 (charbufferproc)bytes_buffer_getcharbuf,
3148 (getbufferproc)bytes_getbuffer,
3149 (releasebufferproc)bytes_releasebuffer,
3152 static PyMethodDef
3153 bytes_methods[] = {
3154 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
3155 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
3156 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS, sizeof_doc},
3157 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
3158 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3159 _Py_capitalize__doc__},
3160 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3161 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
3162 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
3163 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
3164 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3165 expandtabs__doc__},
3166 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
3167 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
3168 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
3169 fromhex_doc},
3170 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3171 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
3172 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3173 _Py_isalnum__doc__},
3174 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3175 _Py_isalpha__doc__},
3176 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3177 _Py_isdigit__doc__},
3178 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3179 _Py_islower__doc__},
3180 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3181 _Py_isspace__doc__},
3182 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3183 _Py_istitle__doc__},
3184 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3185 _Py_isupper__doc__},
3186 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
3187 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3188 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
3189 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
3190 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
3191 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
3192 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
3193 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
3194 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
3195 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3196 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3197 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3198 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
3199 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
3200 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
3201 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
3202 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3203 splitlines__doc__},
3204 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS ,
3205 startswith__doc__},
3206 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
3207 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3208 _Py_swapcase__doc__},
3209 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3210 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
3211 translate__doc__},
3212 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3213 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
3214 {NULL}
3217 PyDoc_STRVAR(bytes_doc,
3218 "bytearray(iterable_of_ints) -> bytearray.\n\
3219 bytearray(string, encoding[, errors]) -> bytearray.\n\
3220 bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3221 bytearray(memory_view) -> bytearray.\n\
3223 Construct an mutable bytearray object from:\n\
3224 - an iterable yielding integers in range(256)\n\
3225 - a text string encoded using the specified encoding\n\
3226 - a bytes or a bytearray object\n\
3227 - any object implementing the buffer API.\n\
3229 bytearray(int) -> bytearray.\n\
3231 Construct a zero-initialized bytearray of the given length.");
3234 static PyObject *bytes_iter(PyObject *seq);
3236 PyTypeObject PyByteArray_Type = {
3237 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3238 "bytearray",
3239 sizeof(PyByteArrayObject),
3241 (destructor)bytes_dealloc, /* tp_dealloc */
3242 0, /* tp_print */
3243 0, /* tp_getattr */
3244 0, /* tp_setattr */
3245 0, /* tp_compare */
3246 (reprfunc)bytes_repr, /* tp_repr */
3247 0, /* tp_as_number */
3248 &bytes_as_sequence, /* tp_as_sequence */
3249 &bytes_as_mapping, /* tp_as_mapping */
3250 0, /* tp_hash */
3251 0, /* tp_call */
3252 bytes_str, /* tp_str */
3253 PyObject_GenericGetAttr, /* tp_getattro */
3254 0, /* tp_setattro */
3255 &bytes_as_buffer, /* tp_as_buffer */
3256 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3257 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3258 bytes_doc, /* tp_doc */
3259 0, /* tp_traverse */
3260 0, /* tp_clear */
3261 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3262 0, /* tp_weaklistoffset */
3263 bytes_iter, /* tp_iter */
3264 0, /* tp_iternext */
3265 bytes_methods, /* tp_methods */
3266 0, /* tp_members */
3267 0, /* tp_getset */
3268 0, /* tp_base */
3269 0, /* tp_dict */
3270 0, /* tp_descr_get */
3271 0, /* tp_descr_set */
3272 0, /* tp_dictoffset */
3273 (initproc)bytes_init, /* tp_init */
3274 PyType_GenericAlloc, /* tp_alloc */
3275 PyType_GenericNew, /* tp_new */
3276 PyObject_Del, /* tp_free */
3279 /*********************** Bytes Iterator ****************************/
3281 typedef struct {
3282 PyObject_HEAD
3283 Py_ssize_t it_index;
3284 PyByteArrayObject *it_seq; /* Set to NULL when iterator is exhausted */
3285 } bytesiterobject;
3287 static void
3288 bytesiter_dealloc(bytesiterobject *it)
3290 _PyObject_GC_UNTRACK(it);
3291 Py_XDECREF(it->it_seq);
3292 PyObject_GC_Del(it);
3295 static int
3296 bytesiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
3298 Py_VISIT(it->it_seq);
3299 return 0;
3302 static PyObject *
3303 bytesiter_next(bytesiterobject *it)
3305 PyByteArrayObject *seq;
3306 PyObject *item;
3308 assert(it != NULL);
3309 seq = it->it_seq;
3310 if (seq == NULL)
3311 return NULL;
3312 assert(PyByteArray_Check(seq));
3314 if (it->it_index < PyByteArray_GET_SIZE(seq)) {
3315 item = PyInt_FromLong(
3316 (unsigned char)seq->ob_bytes[it->it_index]);
3317 if (item != NULL)
3318 ++it->it_index;
3319 return item;
3322 Py_DECREF(seq);
3323 it->it_seq = NULL;
3324 return NULL;
3327 static PyObject *
3328 bytesiter_length_hint(bytesiterobject *it)
3330 Py_ssize_t len = 0;
3331 if (it->it_seq)
3332 len = PyByteArray_GET_SIZE(it->it_seq) - it->it_index;
3333 return PyInt_FromSsize_t(len);
3336 PyDoc_STRVAR(length_hint_doc,
3337 "Private method returning an estimate of len(list(it)).");
3339 static PyMethodDef bytesiter_methods[] = {
3340 {"__length_hint__", (PyCFunction)bytesiter_length_hint, METH_NOARGS,
3341 length_hint_doc},
3342 {NULL, NULL} /* sentinel */
3345 PyTypeObject PyByteArrayIter_Type = {
3346 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3347 "bytearray_iterator", /* tp_name */
3348 sizeof(bytesiterobject), /* tp_basicsize */
3349 0, /* tp_itemsize */
3350 /* methods */
3351 (destructor)bytesiter_dealloc, /* tp_dealloc */
3352 0, /* tp_print */
3353 0, /* tp_getattr */
3354 0, /* tp_setattr */
3355 0, /* tp_compare */
3356 0, /* tp_repr */
3357 0, /* tp_as_number */
3358 0, /* tp_as_sequence */
3359 0, /* tp_as_mapping */
3360 0, /* tp_hash */
3361 0, /* tp_call */
3362 0, /* tp_str */
3363 PyObject_GenericGetAttr, /* tp_getattro */
3364 0, /* tp_setattro */
3365 0, /* tp_as_buffer */
3366 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3367 0, /* tp_doc */
3368 (traverseproc)bytesiter_traverse, /* tp_traverse */
3369 0, /* tp_clear */
3370 0, /* tp_richcompare */
3371 0, /* tp_weaklistoffset */
3372 PyObject_SelfIter, /* tp_iter */
3373 (iternextfunc)bytesiter_next, /* tp_iternext */
3374 bytesiter_methods, /* tp_methods */
3378 static PyObject *
3379 bytes_iter(PyObject *seq)
3381 bytesiterobject *it;
3383 if (!PyByteArray_Check(seq)) {
3384 PyErr_BadInternalCall();
3385 return NULL;
3387 it = PyObject_GC_New(bytesiterobject, &PyByteArrayIter_Type);
3388 if (it == NULL)
3389 return NULL;
3390 it->it_index = 0;
3391 Py_INCREF(seq);
3392 it->it_seq = (PyByteArrayObject *)seq;
3393 _PyObject_GC_TRACK(it);
3394 return (PyObject *)it;