Remove use of tuple unpacking and dict.has_key() so as to silence
[python.git] / Objects / bytearrayobject.c
blobbc02106e786a7d41b091d3889c47921a52733283
1 /* PyBytes (bytearray) implementation */
3 #define PY_SSIZE_T_CLEAN
4 #include "Python.h"
5 #include "structmember.h"
6 #include "bytes_methods.h"
8 static PyByteArrayObject *nullbytes = NULL;
10 void
11 PyByteArray_Fini(void)
13 Py_CLEAR(nullbytes);
16 int
17 PyByteArray_Init(void)
19 nullbytes = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
20 if (nullbytes == NULL)
21 return 0;
22 nullbytes->ob_bytes = NULL;
23 Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0;
24 nullbytes->ob_exports = 0;
25 return 1;
28 /* end nullbytes support */
30 /* Helpers */
32 static int
33 _getbytevalue(PyObject* arg, int *value)
35 long face_value;
37 if (PyBytes_CheckExact(arg)) {
38 if (Py_SIZE(arg) != 1) {
39 PyErr_SetString(PyExc_ValueError, "string must be of size 1");
40 return 0;
42 *value = Py_CHARMASK(((PyBytesObject*)arg)->ob_sval[0]);
43 return 1;
45 else if (PyInt_Check(arg) || PyLong_Check(arg)) {
46 face_value = PyLong_AsLong(arg);
48 else {
49 PyObject *index = PyNumber_Index(arg);
50 if (index == NULL) {
51 PyErr_Format(PyExc_TypeError,
52 "an integer or string of size 1 is required");
53 return 0;
55 face_value = PyLong_AsLong(index);
56 Py_DECREF(index);
59 if (face_value < 0 || face_value >= 256) {
60 /* this includes the OverflowError in case the long is too large */
61 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
62 return 0;
65 *value = face_value;
66 return 1;
69 static Py_ssize_t
70 bytes_buffer_getreadbuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
72 if ( index != 0 ) {
73 PyErr_SetString(PyExc_SystemError,
74 "accessing non-existent bytes segment");
75 return -1;
77 *ptr = (void *)self->ob_bytes;
78 return Py_SIZE(self);
81 static Py_ssize_t
82 bytes_buffer_getwritebuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
84 if ( index != 0 ) {
85 PyErr_SetString(PyExc_SystemError,
86 "accessing non-existent bytes segment");
87 return -1;
89 *ptr = (void *)self->ob_bytes;
90 return Py_SIZE(self);
93 static Py_ssize_t
94 bytes_buffer_getsegcount(PyByteArrayObject *self, Py_ssize_t *lenp)
96 if ( lenp )
97 *lenp = Py_SIZE(self);
98 return 1;
101 static Py_ssize_t
102 bytes_buffer_getcharbuf(PyByteArrayObject *self, Py_ssize_t index, const char **ptr)
104 if ( index != 0 ) {
105 PyErr_SetString(PyExc_SystemError,
106 "accessing non-existent bytes segment");
107 return -1;
109 *ptr = self->ob_bytes;
110 return Py_SIZE(self);
113 static int
114 bytes_getbuffer(PyByteArrayObject *obj, Py_buffer *view, int flags)
116 int ret;
117 void *ptr;
118 if (view == NULL) {
119 obj->ob_exports++;
120 return 0;
122 if (obj->ob_bytes == NULL)
123 ptr = "";
124 else
125 ptr = obj->ob_bytes;
126 ret = PyBuffer_FillInfo(view, ptr, Py_SIZE(obj), 0, flags);
127 if (ret >= 0) {
128 obj->ob_exports++;
130 return ret;
133 static void
134 bytes_releasebuffer(PyByteArrayObject *obj, Py_buffer *view)
136 obj->ob_exports--;
139 static Py_ssize_t
140 _getbuffer(PyObject *obj, Py_buffer *view)
142 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
144 if (buffer == NULL || buffer->bf_getbuffer == NULL)
146 PyErr_Format(PyExc_TypeError,
147 "Type %.100s doesn't support the buffer API",
148 Py_TYPE(obj)->tp_name);
149 return -1;
152 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
153 return -1;
154 return view->len;
157 /* Direct API functions */
159 PyObject *
160 PyByteArray_FromObject(PyObject *input)
162 return PyObject_CallFunctionObjArgs((PyObject *)&PyByteArray_Type,
163 input, NULL);
166 PyObject *
167 PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
169 PyByteArrayObject *new;
170 Py_ssize_t alloc;
172 if (size < 0) {
173 PyErr_SetString(PyExc_SystemError,
174 "Negative size passed to PyByteArray_FromStringAndSize");
175 return NULL;
178 new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
179 if (new == NULL)
180 return NULL;
182 if (size == 0) {
183 new->ob_bytes = NULL;
184 alloc = 0;
186 else {
187 alloc = size + 1;
188 new->ob_bytes = PyMem_Malloc(alloc);
189 if (new->ob_bytes == NULL) {
190 Py_DECREF(new);
191 return PyErr_NoMemory();
193 if (bytes != NULL)
194 memcpy(new->ob_bytes, bytes, size);
195 new->ob_bytes[size] = '\0'; /* Trailing null byte */
197 Py_SIZE(new) = size;
198 new->ob_alloc = alloc;
199 new->ob_exports = 0;
201 return (PyObject *)new;
204 Py_ssize_t
205 PyByteArray_Size(PyObject *self)
207 assert(self != NULL);
208 assert(PyByteArray_Check(self));
210 return PyByteArray_GET_SIZE(self);
213 char *
214 PyByteArray_AsString(PyObject *self)
216 assert(self != NULL);
217 assert(PyByteArray_Check(self));
219 return PyByteArray_AS_STRING(self);
223 PyByteArray_Resize(PyObject *self, Py_ssize_t size)
225 void *sval;
226 Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc;
228 assert(self != NULL);
229 assert(PyByteArray_Check(self));
230 assert(size >= 0);
232 if (size < alloc / 2) {
233 /* Major downsize; resize down to exact size */
234 alloc = size + 1;
236 else if (size < alloc) {
237 /* Within allocated size; quick exit */
238 Py_SIZE(self) = size;
239 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
240 return 0;
242 else if (size <= alloc * 1.125) {
243 /* Moderate upsize; overallocate similar to list_resize() */
244 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
246 else {
247 /* Major upsize; resize up to exact size */
248 alloc = size + 1;
251 if (((PyByteArrayObject *)self)->ob_exports > 0) {
253 fprintf(stderr, "%d: %s", ((PyByteArrayObject *)self)->ob_exports,
254 ((PyByteArrayObject *)self)->ob_bytes);
256 PyErr_SetString(PyExc_BufferError,
257 "Existing exports of data: object cannot be re-sized");
258 return -1;
261 sval = PyMem_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc);
262 if (sval == NULL) {
263 PyErr_NoMemory();
264 return -1;
267 ((PyByteArrayObject *)self)->ob_bytes = sval;
268 Py_SIZE(self) = size;
269 ((PyByteArrayObject *)self)->ob_alloc = alloc;
270 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
272 return 0;
275 PyObject *
276 PyByteArray_Concat(PyObject *a, PyObject *b)
278 Py_ssize_t size;
279 Py_buffer va, vb;
280 PyByteArrayObject *result = NULL;
282 va.len = -1;
283 vb.len = -1;
284 if (_getbuffer(a, &va) < 0 ||
285 _getbuffer(b, &vb) < 0) {
286 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
287 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
288 goto done;
291 size = va.len + vb.len;
292 if (size < 0) {
293 return PyErr_NoMemory();
294 goto done;
297 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, size);
298 if (result != NULL) {
299 memcpy(result->ob_bytes, va.buf, va.len);
300 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
303 done:
304 if (va.len != -1)
305 PyObject_ReleaseBuffer(a, &va);
306 if (vb.len != -1)
307 PyObject_ReleaseBuffer(b, &vb);
308 return (PyObject *)result;
311 /* Functions stuffed into the type object */
313 static Py_ssize_t
314 bytes_length(PyByteArrayObject *self)
316 return Py_SIZE(self);
319 static PyObject *
320 bytes_iconcat(PyByteArrayObject *self, PyObject *other)
322 Py_ssize_t mysize;
323 Py_ssize_t size;
324 Py_buffer vo;
326 if (_getbuffer(other, &vo) < 0) {
327 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
328 Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name);
329 return NULL;
332 mysize = Py_SIZE(self);
333 size = mysize + vo.len;
334 if (size < 0) {
335 PyObject_ReleaseBuffer(other, &vo);
336 return PyErr_NoMemory();
338 if (size < self->ob_alloc) {
339 Py_SIZE(self) = size;
340 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
342 else if (PyByteArray_Resize((PyObject *)self, size) < 0) {
343 PyObject_ReleaseBuffer(other, &vo);
344 return NULL;
346 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
347 PyObject_ReleaseBuffer(other, &vo);
348 Py_INCREF(self);
349 return (PyObject *)self;
352 static PyObject *
353 bytes_repeat(PyByteArrayObject *self, Py_ssize_t count)
355 PyByteArrayObject *result;
356 Py_ssize_t mysize;
357 Py_ssize_t size;
359 if (count < 0)
360 count = 0;
361 mysize = Py_SIZE(self);
362 size = mysize * count;
363 if (count != 0 && size / count != mysize)
364 return PyErr_NoMemory();
365 result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
366 if (result != NULL && size != 0) {
367 if (mysize == 1)
368 memset(result->ob_bytes, self->ob_bytes[0], size);
369 else {
370 Py_ssize_t i;
371 for (i = 0; i < count; i++)
372 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
375 return (PyObject *)result;
378 static PyObject *
379 bytes_irepeat(PyByteArrayObject *self, Py_ssize_t count)
381 Py_ssize_t mysize;
382 Py_ssize_t size;
384 if (count < 0)
385 count = 0;
386 mysize = Py_SIZE(self);
387 size = mysize * count;
388 if (count != 0 && size / count != mysize)
389 return PyErr_NoMemory();
390 if (size < self->ob_alloc) {
391 Py_SIZE(self) = size;
392 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
394 else if (PyByteArray_Resize((PyObject *)self, size) < 0)
395 return NULL;
397 if (mysize == 1)
398 memset(self->ob_bytes, self->ob_bytes[0], size);
399 else {
400 Py_ssize_t i;
401 for (i = 1; i < count; i++)
402 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
405 Py_INCREF(self);
406 return (PyObject *)self;
409 static PyObject *
410 bytes_getitem(PyByteArrayObject *self, Py_ssize_t i)
412 if (i < 0)
413 i += Py_SIZE(self);
414 if (i < 0 || i >= Py_SIZE(self)) {
415 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
416 return NULL;
418 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
421 static PyObject *
422 bytes_subscript(PyByteArrayObject *self, PyObject *index)
424 if (PyIndex_Check(index)) {
425 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
427 if (i == -1 && PyErr_Occurred())
428 return NULL;
430 if (i < 0)
431 i += PyByteArray_GET_SIZE(self);
433 if (i < 0 || i >= Py_SIZE(self)) {
434 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
435 return NULL;
437 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
439 else if (PySlice_Check(index)) {
440 Py_ssize_t start, stop, step, slicelength, cur, i;
441 if (PySlice_GetIndicesEx((PySliceObject *)index,
442 PyByteArray_GET_SIZE(self),
443 &start, &stop, &step, &slicelength) < 0) {
444 return NULL;
447 if (slicelength <= 0)
448 return PyByteArray_FromStringAndSize("", 0);
449 else if (step == 1) {
450 return PyByteArray_FromStringAndSize(self->ob_bytes + start,
451 slicelength);
453 else {
454 char *source_buf = PyByteArray_AS_STRING(self);
455 char *result_buf = (char *)PyMem_Malloc(slicelength);
456 PyObject *result;
458 if (result_buf == NULL)
459 return PyErr_NoMemory();
461 for (cur = start, i = 0; i < slicelength;
462 cur += step, i++) {
463 result_buf[i] = source_buf[cur];
465 result = PyByteArray_FromStringAndSize(result_buf, slicelength);
466 PyMem_Free(result_buf);
467 return result;
470 else {
471 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
472 return NULL;
476 static int
477 bytes_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
478 PyObject *values)
480 Py_ssize_t avail, needed;
481 void *bytes;
482 Py_buffer vbytes;
483 int res = 0;
485 vbytes.len = -1;
486 if (values == (PyObject *)self) {
487 /* Make a copy and call this function recursively */
488 int err;
489 values = PyByteArray_FromObject(values);
490 if (values == NULL)
491 return -1;
492 err = bytes_setslice(self, lo, hi, values);
493 Py_DECREF(values);
494 return err;
496 if (values == NULL) {
497 /* del b[lo:hi] */
498 bytes = NULL;
499 needed = 0;
501 else {
502 if (_getbuffer(values, &vbytes) < 0) {
503 PyErr_Format(PyExc_TypeError,
504 "can't set bytearray slice from %.100s",
505 Py_TYPE(values)->tp_name);
506 return -1;
508 needed = vbytes.len;
509 bytes = vbytes.buf;
512 if (lo < 0)
513 lo = 0;
514 if (hi < lo)
515 hi = lo;
516 if (hi > Py_SIZE(self))
517 hi = Py_SIZE(self);
519 avail = hi - lo;
520 if (avail < 0)
521 lo = hi = avail = 0;
523 if (avail != needed) {
524 if (avail > needed) {
526 0 lo hi old_size
527 | |<----avail----->|<-----tomove------>|
528 | |<-needed->|<-----tomove------>|
529 0 lo new_hi new_size
531 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
532 Py_SIZE(self) - hi);
534 /* XXX(nnorwitz): need to verify this can't overflow! */
535 if (PyByteArray_Resize((PyObject *)self,
536 Py_SIZE(self) + needed - avail) < 0) {
537 res = -1;
538 goto finish;
540 if (avail < needed) {
542 0 lo hi old_size
543 | |<-avail->|<-----tomove------>|
544 | |<----needed---->|<-----tomove------>|
545 0 lo new_hi new_size
547 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
548 Py_SIZE(self) - lo - needed);
552 if (needed > 0)
553 memcpy(self->ob_bytes + lo, bytes, needed);
556 finish:
557 if (vbytes.len != -1)
558 PyObject_ReleaseBuffer(values, &vbytes);
559 return res;
562 static int
563 bytes_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value)
565 int ival;
567 if (i < 0)
568 i += Py_SIZE(self);
570 if (i < 0 || i >= Py_SIZE(self)) {
571 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
572 return -1;
575 if (value == NULL)
576 return bytes_setslice(self, i, i+1, NULL);
578 if (!_getbytevalue(value, &ival))
579 return -1;
581 self->ob_bytes[i] = ival;
582 return 0;
585 static int
586 bytes_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *values)
588 Py_ssize_t start, stop, step, slicelen, needed;
589 char *bytes;
591 if (PyIndex_Check(index)) {
592 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
594 if (i == -1 && PyErr_Occurred())
595 return -1;
597 if (i < 0)
598 i += PyByteArray_GET_SIZE(self);
600 if (i < 0 || i >= Py_SIZE(self)) {
601 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
602 return -1;
605 if (values == NULL) {
606 /* Fall through to slice assignment */
607 start = i;
608 stop = i + 1;
609 step = 1;
610 slicelen = 1;
612 else {
613 int ival;
614 if (!_getbytevalue(values, &ival))
615 return -1;
616 self->ob_bytes[i] = (char)ival;
617 return 0;
620 else if (PySlice_Check(index)) {
621 if (PySlice_GetIndicesEx((PySliceObject *)index,
622 PyByteArray_GET_SIZE(self),
623 &start, &stop, &step, &slicelen) < 0) {
624 return -1;
627 else {
628 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
629 return -1;
632 if (values == NULL) {
633 bytes = NULL;
634 needed = 0;
636 else if (values == (PyObject *)self || !PyByteArray_Check(values)) {
637 /* Make a copy an call this function recursively */
638 int err;
639 values = PyByteArray_FromObject(values);
640 if (values == NULL)
641 return -1;
642 err = bytes_ass_subscript(self, index, values);
643 Py_DECREF(values);
644 return err;
646 else {
647 assert(PyByteArray_Check(values));
648 bytes = ((PyByteArrayObject *)values)->ob_bytes;
649 needed = Py_SIZE(values);
651 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
652 if ((step < 0 && start < stop) ||
653 (step > 0 && start > stop))
654 stop = start;
655 if (step == 1) {
656 if (slicelen != needed) {
657 if (slicelen > needed) {
659 0 start stop old_size
660 | |<---slicelen--->|<-----tomove------>|
661 | |<-needed->|<-----tomove------>|
662 0 lo new_hi new_size
664 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
665 Py_SIZE(self) - stop);
667 if (PyByteArray_Resize((PyObject *)self,
668 Py_SIZE(self) + needed - slicelen) < 0)
669 return -1;
670 if (slicelen < needed) {
672 0 lo hi old_size
673 | |<-avail->|<-----tomove------>|
674 | |<----needed---->|<-----tomove------>|
675 0 lo new_hi new_size
677 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
678 Py_SIZE(self) - start - needed);
682 if (needed > 0)
683 memcpy(self->ob_bytes + start, bytes, needed);
685 return 0;
687 else {
688 if (needed == 0) {
689 /* Delete slice */
690 Py_ssize_t cur, i;
692 if (step < 0) {
693 stop = start + 1;
694 start = stop + step * (slicelen - 1) - 1;
695 step = -step;
697 for (cur = start, i = 0;
698 i < slicelen; cur += step, i++) {
699 Py_ssize_t lim = step - 1;
701 if (cur + step >= PyByteArray_GET_SIZE(self))
702 lim = PyByteArray_GET_SIZE(self) - cur - 1;
704 memmove(self->ob_bytes + cur - i,
705 self->ob_bytes + cur + 1, lim);
707 /* Move the tail of the bytes, in one chunk */
708 cur = start + slicelen*step;
709 if (cur < PyByteArray_GET_SIZE(self)) {
710 memmove(self->ob_bytes + cur - slicelen,
711 self->ob_bytes + cur,
712 PyByteArray_GET_SIZE(self) - cur);
714 if (PyByteArray_Resize((PyObject *)self,
715 PyByteArray_GET_SIZE(self) - slicelen) < 0)
716 return -1;
718 return 0;
720 else {
721 /* Assign slice */
722 Py_ssize_t cur, i;
724 if (needed != slicelen) {
725 PyErr_Format(PyExc_ValueError,
726 "attempt to assign bytes of size %zd "
727 "to extended slice of size %zd",
728 needed, slicelen);
729 return -1;
731 for (cur = start, i = 0; i < slicelen; cur += step, i++)
732 self->ob_bytes[cur] = bytes[i];
733 return 0;
738 static int
739 bytes_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
741 static char *kwlist[] = {"source", "encoding", "errors", 0};
742 PyObject *arg = NULL;
743 const char *encoding = NULL;
744 const char *errors = NULL;
745 Py_ssize_t count;
746 PyObject *it;
747 PyObject *(*iternext)(PyObject *);
749 if (Py_SIZE(self) != 0) {
750 /* Empty previous contents (yes, do this first of all!) */
751 if (PyByteArray_Resize((PyObject *)self, 0) < 0)
752 return -1;
755 /* Parse arguments */
756 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytearray", kwlist,
757 &arg, &encoding, &errors))
758 return -1;
760 /* Make a quick exit if no first argument */
761 if (arg == NULL) {
762 if (encoding != NULL || errors != NULL) {
763 PyErr_SetString(PyExc_TypeError,
764 "encoding or errors without sequence argument");
765 return -1;
767 return 0;
770 if (PyBytes_Check(arg)) {
771 PyObject *new, *encoded;
772 if (encoding != NULL) {
773 encoded = PyCodec_Encode(arg, encoding, errors);
774 if (encoded == NULL)
775 return -1;
776 assert(PyBytes_Check(encoded));
778 else {
779 encoded = arg;
780 Py_INCREF(arg);
782 new = bytes_iconcat(self, arg);
783 Py_DECREF(encoded);
784 if (new == NULL)
785 return -1;
786 Py_DECREF(new);
787 return 0;
790 if (PyUnicode_Check(arg)) {
791 /* Encode via the codec registry */
792 PyObject *encoded, *new;
793 if (encoding == NULL) {
794 PyErr_SetString(PyExc_TypeError,
795 "unicode argument without an encoding");
796 return -1;
798 encoded = PyCodec_Encode(arg, encoding, errors);
799 if (encoded == NULL)
800 return -1;
801 assert(PyBytes_Check(encoded));
802 new = bytes_iconcat(self, encoded);
803 Py_DECREF(encoded);
804 if (new == NULL)
805 return -1;
806 Py_DECREF(new);
807 return 0;
810 /* If it's not unicode, there can't be encoding or errors */
811 if (encoding != NULL || errors != NULL) {
812 PyErr_SetString(PyExc_TypeError,
813 "encoding or errors without a string argument");
814 return -1;
817 /* Is it an int? */
818 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
819 if (count == -1 && PyErr_Occurred())
820 PyErr_Clear();
821 else {
822 if (count < 0) {
823 PyErr_SetString(PyExc_ValueError, "negative count");
824 return -1;
826 if (count > 0) {
827 if (PyByteArray_Resize((PyObject *)self, count))
828 return -1;
829 memset(self->ob_bytes, 0, count);
831 return 0;
834 /* Use the buffer API */
835 if (PyObject_CheckBuffer(arg)) {
836 Py_ssize_t size;
837 Py_buffer view;
838 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
839 return -1;
840 size = view.len;
841 if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
842 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
843 goto fail;
844 PyObject_ReleaseBuffer(arg, &view);
845 return 0;
846 fail:
847 PyObject_ReleaseBuffer(arg, &view);
848 return -1;
851 /* XXX Optimize this if the arguments is a list, tuple */
853 /* Get the iterator */
854 it = PyObject_GetIter(arg);
855 if (it == NULL)
856 return -1;
857 iternext = *Py_TYPE(it)->tp_iternext;
859 /* Run the iterator to exhaustion */
860 for (;;) {
861 PyObject *item;
862 int rc, value;
864 /* Get the next item */
865 item = iternext(it);
866 if (item == NULL) {
867 if (PyErr_Occurred()) {
868 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
869 goto error;
870 PyErr_Clear();
872 break;
875 /* Interpret it as an int (__index__) */
876 rc = _getbytevalue(item, &value);
877 Py_DECREF(item);
878 if (!rc)
879 goto error;
881 /* Append the byte */
882 if (Py_SIZE(self) < self->ob_alloc)
883 Py_SIZE(self)++;
884 else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
885 goto error;
886 self->ob_bytes[Py_SIZE(self)-1] = value;
889 /* Clean up and return success */
890 Py_DECREF(it);
891 return 0;
893 error:
894 /* Error handling when it != NULL */
895 Py_DECREF(it);
896 return -1;
899 /* Mostly copied from string_repr, but without the
900 "smart quote" functionality. */
901 static PyObject *
902 bytes_repr(PyByteArrayObject *self)
904 static const char *hexdigits = "0123456789abcdef";
905 const char *quote_prefix = "bytearray(b";
906 const char *quote_postfix = ")";
907 Py_ssize_t length = Py_SIZE(self);
908 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
909 size_t newsize = 14 + 4 * length;
910 PyObject *v;
911 if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 3 != length) {
912 PyErr_SetString(PyExc_OverflowError,
913 "bytearray object is too large to make repr");
914 return NULL;
916 v = PyUnicode_FromUnicode(NULL, newsize);
917 if (v == NULL) {
918 return NULL;
920 else {
921 register Py_ssize_t i;
922 register Py_UNICODE c;
923 register Py_UNICODE *p;
924 int quote;
926 /* Figure out which quote to use; single is preferred */
927 quote = '\'';
929 char *test, *start;
930 start = PyByteArray_AS_STRING(self);
931 for (test = start; test < start+length; ++test) {
932 if (*test == '"') {
933 quote = '\''; /* back to single */
934 goto decided;
936 else if (*test == '\'')
937 quote = '"';
939 decided:
943 p = PyUnicode_AS_UNICODE(v);
944 while (*quote_prefix)
945 *p++ = *quote_prefix++;
946 *p++ = quote;
948 for (i = 0; i < length; i++) {
949 /* There's at least enough room for a hex escape
950 and a closing quote. */
951 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
952 c = self->ob_bytes[i];
953 if (c == '\'' || c == '\\')
954 *p++ = '\\', *p++ = c;
955 else if (c == '\t')
956 *p++ = '\\', *p++ = 't';
957 else if (c == '\n')
958 *p++ = '\\', *p++ = 'n';
959 else if (c == '\r')
960 *p++ = '\\', *p++ = 'r';
961 else if (c == 0)
962 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
963 else if (c < ' ' || c >= 0x7f) {
964 *p++ = '\\';
965 *p++ = 'x';
966 *p++ = hexdigits[(c & 0xf0) >> 4];
967 *p++ = hexdigits[c & 0xf];
969 else
970 *p++ = c;
972 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
973 *p++ = quote;
974 while (*quote_postfix) {
975 *p++ = *quote_postfix++;
977 *p = '\0';
978 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
979 Py_DECREF(v);
980 return NULL;
982 return v;
986 static PyObject *
987 bytes_str(PyObject *op)
989 #if 0
990 if (Py_BytesWarningFlag) {
991 if (PyErr_WarnEx(PyExc_BytesWarning,
992 "str() on a bytearray instance", 1))
993 return NULL;
995 return bytes_repr((PyByteArrayObject*)op);
996 #endif
997 return PyBytes_FromStringAndSize(((PyByteArrayObject*)op)->ob_bytes, Py_SIZE(op));
1000 static PyObject *
1001 bytes_richcompare(PyObject *self, PyObject *other, int op)
1003 Py_ssize_t self_size, other_size;
1004 Py_buffer self_bytes, other_bytes;
1005 PyObject *res;
1006 Py_ssize_t minsize;
1007 int cmp;
1009 /* Bytes can be compared to anything that supports the (binary)
1010 buffer API. Except that a comparison with Unicode is always an
1011 error, even if the comparison is for equality. */
1012 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
1013 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
1014 if (Py_BytesWarningFlag && op == Py_EQ) {
1015 if (PyErr_WarnEx(PyExc_BytesWarning,
1016 "Comparsion between bytearray and string", 1))
1017 return NULL;
1020 Py_INCREF(Py_NotImplemented);
1021 return Py_NotImplemented;
1024 self_size = _getbuffer(self, &self_bytes);
1025 if (self_size < 0) {
1026 PyErr_Clear();
1027 Py_INCREF(Py_NotImplemented);
1028 return Py_NotImplemented;
1031 other_size = _getbuffer(other, &other_bytes);
1032 if (other_size < 0) {
1033 PyErr_Clear();
1034 PyObject_ReleaseBuffer(self, &self_bytes);
1035 Py_INCREF(Py_NotImplemented);
1036 return Py_NotImplemented;
1039 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
1040 /* Shortcut: if the lengths differ, the objects differ */
1041 cmp = (op == Py_NE);
1043 else {
1044 minsize = self_size;
1045 if (other_size < minsize)
1046 minsize = other_size;
1048 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
1049 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1051 if (cmp == 0) {
1052 if (self_size < other_size)
1053 cmp = -1;
1054 else if (self_size > other_size)
1055 cmp = 1;
1058 switch (op) {
1059 case Py_LT: cmp = cmp < 0; break;
1060 case Py_LE: cmp = cmp <= 0; break;
1061 case Py_EQ: cmp = cmp == 0; break;
1062 case Py_NE: cmp = cmp != 0; break;
1063 case Py_GT: cmp = cmp > 0; break;
1064 case Py_GE: cmp = cmp >= 0; break;
1068 res = cmp ? Py_True : Py_False;
1069 PyObject_ReleaseBuffer(self, &self_bytes);
1070 PyObject_ReleaseBuffer(other, &other_bytes);
1071 Py_INCREF(res);
1072 return res;
1075 static void
1076 bytes_dealloc(PyByteArrayObject *self)
1078 if (self->ob_bytes != 0) {
1079 PyMem_Free(self->ob_bytes);
1081 Py_TYPE(self)->tp_free((PyObject *)self);
1085 /* -------------------------------------------------------------------- */
1086 /* Methods */
1088 #define STRINGLIB_CHAR char
1089 #define STRINGLIB_CMP memcmp
1090 #define STRINGLIB_LEN PyByteArray_GET_SIZE
1091 #define STRINGLIB_STR PyByteArray_AS_STRING
1092 #define STRINGLIB_NEW PyByteArray_FromStringAndSize
1093 #define STRINGLIB_EMPTY nullbytes
1094 #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1095 #define STRINGLIB_MUTABLE 1
1097 #include "stringlib/fastsearch.h"
1098 #include "stringlib/count.h"
1099 #include "stringlib/find.h"
1100 #include "stringlib/partition.h"
1101 #include "stringlib/ctype.h"
1102 #include "stringlib/transmogrify.h"
1105 /* The following Py_LOCAL_INLINE and Py_LOCAL functions
1106 were copied from the old char* style string object. */
1108 Py_LOCAL_INLINE(void)
1109 _adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1111 if (*end > len)
1112 *end = len;
1113 else if (*end < 0)
1114 *end += len;
1115 if (*end < 0)
1116 *end = 0;
1117 if (*start < 0)
1118 *start += len;
1119 if (*start < 0)
1120 *start = 0;
1124 Py_LOCAL_INLINE(Py_ssize_t)
1125 bytes_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
1127 PyObject *subobj;
1128 Py_buffer subbuf;
1129 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1130 Py_ssize_t res;
1132 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1133 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1134 return -2;
1135 if (_getbuffer(subobj, &subbuf) < 0)
1136 return -2;
1137 if (dir > 0)
1138 res = stringlib_find_slice(
1139 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1140 subbuf.buf, subbuf.len, start, end);
1141 else
1142 res = stringlib_rfind_slice(
1143 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1144 subbuf.buf, subbuf.len, start, end);
1145 PyObject_ReleaseBuffer(subobj, &subbuf);
1146 return res;
1149 PyDoc_STRVAR(find__doc__,
1150 "B.find(sub [,start [,end]]) -> int\n\
1152 Return the lowest index in B where subsection sub is found,\n\
1153 such that sub is contained within s[start,end]. Optional\n\
1154 arguments start and end are interpreted as in slice notation.\n\
1156 Return -1 on failure.");
1158 static PyObject *
1159 bytes_find(PyByteArrayObject *self, PyObject *args)
1161 Py_ssize_t result = bytes_find_internal(self, args, +1);
1162 if (result == -2)
1163 return NULL;
1164 return PyInt_FromSsize_t(result);
1167 PyDoc_STRVAR(count__doc__,
1168 "B.count(sub [,start [,end]]) -> int\n\
1170 Return the number of non-overlapping occurrences of subsection sub in\n\
1171 bytes B[start:end]. Optional arguments start and end are interpreted\n\
1172 as in slice notation.");
1174 static PyObject *
1175 bytes_count(PyByteArrayObject *self, PyObject *args)
1177 PyObject *sub_obj;
1178 const char *str = PyByteArray_AS_STRING(self);
1179 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1180 Py_buffer vsub;
1181 PyObject *count_obj;
1183 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1184 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1185 return NULL;
1187 if (_getbuffer(sub_obj, &vsub) < 0)
1188 return NULL;
1190 _adjust_indices(&start, &end, PyByteArray_GET_SIZE(self));
1192 count_obj = PyInt_FromSsize_t(
1193 stringlib_count(str + start, end - start, vsub.buf, vsub.len)
1195 PyObject_ReleaseBuffer(sub_obj, &vsub);
1196 return count_obj;
1200 PyDoc_STRVAR(index__doc__,
1201 "B.index(sub [,start [,end]]) -> int\n\
1203 Like B.find() but raise ValueError when the subsection is not found.");
1205 static PyObject *
1206 bytes_index(PyByteArrayObject *self, PyObject *args)
1208 Py_ssize_t result = bytes_find_internal(self, args, +1);
1209 if (result == -2)
1210 return NULL;
1211 if (result == -1) {
1212 PyErr_SetString(PyExc_ValueError,
1213 "subsection not found");
1214 return NULL;
1216 return PyInt_FromSsize_t(result);
1220 PyDoc_STRVAR(rfind__doc__,
1221 "B.rfind(sub [,start [,end]]) -> int\n\
1223 Return the highest index in B where subsection sub is found,\n\
1224 such that sub is contained within s[start,end]. Optional\n\
1225 arguments start and end are interpreted as in slice notation.\n\
1227 Return -1 on failure.");
1229 static PyObject *
1230 bytes_rfind(PyByteArrayObject *self, PyObject *args)
1232 Py_ssize_t result = bytes_find_internal(self, args, -1);
1233 if (result == -2)
1234 return NULL;
1235 return PyInt_FromSsize_t(result);
1239 PyDoc_STRVAR(rindex__doc__,
1240 "B.rindex(sub [,start [,end]]) -> int\n\
1242 Like B.rfind() but raise ValueError when the subsection is not found.");
1244 static PyObject *
1245 bytes_rindex(PyByteArrayObject *self, PyObject *args)
1247 Py_ssize_t result = bytes_find_internal(self, args, -1);
1248 if (result == -2)
1249 return NULL;
1250 if (result == -1) {
1251 PyErr_SetString(PyExc_ValueError,
1252 "subsection not found");
1253 return NULL;
1255 return PyInt_FromSsize_t(result);
1259 static int
1260 bytes_contains(PyObject *self, PyObject *arg)
1262 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1263 if (ival == -1 && PyErr_Occurred()) {
1264 Py_buffer varg;
1265 int pos;
1266 PyErr_Clear();
1267 if (_getbuffer(arg, &varg) < 0)
1268 return -1;
1269 pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
1270 varg.buf, varg.len, 0);
1271 PyObject_ReleaseBuffer(arg, &varg);
1272 return pos >= 0;
1274 if (ival < 0 || ival >= 256) {
1275 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1276 return -1;
1279 return memchr(PyByteArray_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
1283 /* Matches the end (direction >= 0) or start (direction < 0) of self
1284 * against substr, using the start and end arguments. Returns
1285 * -1 on error, 0 if not found and 1 if found.
1287 Py_LOCAL(int)
1288 _bytes_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start,
1289 Py_ssize_t end, int direction)
1291 Py_ssize_t len = PyByteArray_GET_SIZE(self);
1292 const char* str;
1293 Py_buffer vsubstr;
1294 int rv = 0;
1296 str = PyByteArray_AS_STRING(self);
1298 if (_getbuffer(substr, &vsubstr) < 0)
1299 return -1;
1301 _adjust_indices(&start, &end, len);
1303 if (direction < 0) {
1304 /* startswith */
1305 if (start+vsubstr.len > len) {
1306 goto done;
1308 } else {
1309 /* endswith */
1310 if (end-start < vsubstr.len || start > len) {
1311 goto done;
1314 if (end-vsubstr.len > start)
1315 start = end - vsubstr.len;
1317 if (end-start >= vsubstr.len)
1318 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1320 done:
1321 PyObject_ReleaseBuffer(substr, &vsubstr);
1322 return rv;
1326 PyDoc_STRVAR(startswith__doc__,
1327 "B.startswith(prefix [,start [,end]]) -> bool\n\
1329 Return True if B starts with the specified prefix, False otherwise.\n\
1330 With optional start, test B beginning at that position.\n\
1331 With optional end, stop comparing B at that position.\n\
1332 prefix can also be a tuple of strings to try.");
1334 static PyObject *
1335 bytes_startswith(PyByteArrayObject *self, PyObject *args)
1337 Py_ssize_t start = 0;
1338 Py_ssize_t end = PY_SSIZE_T_MAX;
1339 PyObject *subobj;
1340 int result;
1342 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1343 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1344 return NULL;
1345 if (PyTuple_Check(subobj)) {
1346 Py_ssize_t i;
1347 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1348 result = _bytes_tailmatch(self,
1349 PyTuple_GET_ITEM(subobj, i),
1350 start, end, -1);
1351 if (result == -1)
1352 return NULL;
1353 else if (result) {
1354 Py_RETURN_TRUE;
1357 Py_RETURN_FALSE;
1359 result = _bytes_tailmatch(self, subobj, start, end, -1);
1360 if (result == -1)
1361 return NULL;
1362 else
1363 return PyBool_FromLong(result);
1366 PyDoc_STRVAR(endswith__doc__,
1367 "B.endswith(suffix [,start [,end]]) -> bool\n\
1369 Return True if B ends with the specified suffix, False otherwise.\n\
1370 With optional start, test B beginning at that position.\n\
1371 With optional end, stop comparing B at that position.\n\
1372 suffix can also be a tuple of strings to try.");
1374 static PyObject *
1375 bytes_endswith(PyByteArrayObject *self, PyObject *args)
1377 Py_ssize_t start = 0;
1378 Py_ssize_t end = PY_SSIZE_T_MAX;
1379 PyObject *subobj;
1380 int result;
1382 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1383 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1384 return NULL;
1385 if (PyTuple_Check(subobj)) {
1386 Py_ssize_t i;
1387 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1388 result = _bytes_tailmatch(self,
1389 PyTuple_GET_ITEM(subobj, i),
1390 start, end, +1);
1391 if (result == -1)
1392 return NULL;
1393 else if (result) {
1394 Py_RETURN_TRUE;
1397 Py_RETURN_FALSE;
1399 result = _bytes_tailmatch(self, subobj, start, end, +1);
1400 if (result == -1)
1401 return NULL;
1402 else
1403 return PyBool_FromLong(result);
1407 PyDoc_STRVAR(translate__doc__,
1408 "B.translate(table[, deletechars]) -> bytearray\n\
1410 Return a copy of B, where all characters occurring in the\n\
1411 optional argument deletechars are removed, and the remaining\n\
1412 characters have been mapped through the given translation\n\
1413 table, which must be a bytes object of length 256.");
1415 static PyObject *
1416 bytes_translate(PyByteArrayObject *self, PyObject *args)
1418 register char *input, *output;
1419 register const char *table;
1420 register Py_ssize_t i, c, changed = 0;
1421 PyObject *input_obj = (PyObject*)self;
1422 const char *output_start;
1423 Py_ssize_t inlen;
1424 PyObject *result;
1425 int trans_table[256];
1426 PyObject *tableobj, *delobj = NULL;
1427 Py_buffer vtable, vdel;
1429 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1430 &tableobj, &delobj))
1431 return NULL;
1433 if (_getbuffer(tableobj, &vtable) < 0)
1434 return NULL;
1436 if (vtable.len != 256) {
1437 PyErr_SetString(PyExc_ValueError,
1438 "translation table must be 256 characters long");
1439 result = NULL;
1440 goto done;
1443 if (delobj != NULL) {
1444 if (_getbuffer(delobj, &vdel) < 0) {
1445 result = NULL;
1446 goto done;
1449 else {
1450 vdel.buf = NULL;
1451 vdel.len = 0;
1454 table = (const char *)vtable.buf;
1455 inlen = PyByteArray_GET_SIZE(input_obj);
1456 result = PyByteArray_FromStringAndSize((char *)NULL, inlen);
1457 if (result == NULL)
1458 goto done;
1459 output_start = output = PyByteArray_AsString(result);
1460 input = PyByteArray_AS_STRING(input_obj);
1462 if (vdel.len == 0) {
1463 /* If no deletions are required, use faster code */
1464 for (i = inlen; --i >= 0; ) {
1465 c = Py_CHARMASK(*input++);
1466 if (Py_CHARMASK((*output++ = table[c])) != c)
1467 changed = 1;
1469 if (changed || !PyByteArray_CheckExact(input_obj))
1470 goto done;
1471 Py_DECREF(result);
1472 Py_INCREF(input_obj);
1473 result = input_obj;
1474 goto done;
1477 for (i = 0; i < 256; i++)
1478 trans_table[i] = Py_CHARMASK(table[i]);
1480 for (i = 0; i < vdel.len; i++)
1481 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1483 for (i = inlen; --i >= 0; ) {
1484 c = Py_CHARMASK(*input++);
1485 if (trans_table[c] != -1)
1486 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1487 continue;
1488 changed = 1;
1490 if (!changed && PyByteArray_CheckExact(input_obj)) {
1491 Py_DECREF(result);
1492 Py_INCREF(input_obj);
1493 result = input_obj;
1494 goto done;
1496 /* Fix the size of the resulting string */
1497 if (inlen > 0)
1498 PyByteArray_Resize(result, output - output_start);
1500 done:
1501 PyObject_ReleaseBuffer(tableobj, &vtable);
1502 if (delobj != NULL)
1503 PyObject_ReleaseBuffer(delobj, &vdel);
1504 return result;
1508 #define FORWARD 1
1509 #define REVERSE -1
1511 /* find and count characters and substrings */
1513 #define findchar(target, target_len, c) \
1514 ((char *)memchr((const void *)(target), c, target_len))
1516 /* Don't call if length < 2 */
1517 #define Py_STRING_MATCH(target, offset, pattern, length) \
1518 (target[offset] == pattern[0] && \
1519 target[offset+length-1] == pattern[length-1] && \
1520 !memcmp(target+offset+1, pattern+1, length-2) )
1523 /* Bytes ops must return a string. */
1524 /* If the object is subclass of bytes, create a copy */
1525 Py_LOCAL(PyByteArrayObject *)
1526 return_self(PyByteArrayObject *self)
1528 if (PyByteArray_CheckExact(self)) {
1529 Py_INCREF(self);
1530 return (PyByteArrayObject *)self;
1532 return (PyByteArrayObject *)PyByteArray_FromStringAndSize(
1533 PyByteArray_AS_STRING(self),
1534 PyByteArray_GET_SIZE(self));
1537 Py_LOCAL_INLINE(Py_ssize_t)
1538 countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1540 Py_ssize_t count=0;
1541 const char *start=target;
1542 const char *end=target+target_len;
1544 while ( (start=findchar(start, end-start, c)) != NULL ) {
1545 count++;
1546 if (count >= maxcount)
1547 break;
1548 start += 1;
1550 return count;
1553 Py_LOCAL(Py_ssize_t)
1554 findstring(const char *target, Py_ssize_t target_len,
1555 const char *pattern, Py_ssize_t pattern_len,
1556 Py_ssize_t start,
1557 Py_ssize_t end,
1558 int direction)
1560 if (start < 0) {
1561 start += target_len;
1562 if (start < 0)
1563 start = 0;
1565 if (end > target_len) {
1566 end = target_len;
1567 } else if (end < 0) {
1568 end += target_len;
1569 if (end < 0)
1570 end = 0;
1573 /* zero-length substrings always match at the first attempt */
1574 if (pattern_len == 0)
1575 return (direction > 0) ? start : end;
1577 end -= pattern_len;
1579 if (direction < 0) {
1580 for (; end >= start; end--)
1581 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1582 return end;
1583 } else {
1584 for (; start <= end; start++)
1585 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1586 return start;
1588 return -1;
1591 Py_LOCAL_INLINE(Py_ssize_t)
1592 countstring(const char *target, Py_ssize_t target_len,
1593 const char *pattern, Py_ssize_t pattern_len,
1594 Py_ssize_t start,
1595 Py_ssize_t end,
1596 int direction, Py_ssize_t maxcount)
1598 Py_ssize_t count=0;
1600 if (start < 0) {
1601 start += target_len;
1602 if (start < 0)
1603 start = 0;
1605 if (end > target_len) {
1606 end = target_len;
1607 } else if (end < 0) {
1608 end += target_len;
1609 if (end < 0)
1610 end = 0;
1613 /* zero-length substrings match everywhere */
1614 if (pattern_len == 0 || maxcount == 0) {
1615 if (target_len+1 < maxcount)
1616 return target_len+1;
1617 return maxcount;
1620 end -= pattern_len;
1621 if (direction < 0) {
1622 for (; (end >= start); end--)
1623 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1624 count++;
1625 if (--maxcount <= 0) break;
1626 end -= pattern_len-1;
1628 } else {
1629 for (; (start <= end); start++)
1630 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1631 count++;
1632 if (--maxcount <= 0)
1633 break;
1634 start += pattern_len-1;
1637 return count;
1641 /* Algorithms for different cases of string replacement */
1643 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1644 Py_LOCAL(PyByteArrayObject *)
1645 replace_interleave(PyByteArrayObject *self,
1646 const char *to_s, Py_ssize_t to_len,
1647 Py_ssize_t maxcount)
1649 char *self_s, *result_s;
1650 Py_ssize_t self_len, result_len;
1651 Py_ssize_t count, i, product;
1652 PyByteArrayObject *result;
1654 self_len = PyByteArray_GET_SIZE(self);
1656 /* 1 at the end plus 1 after every character */
1657 count = self_len+1;
1658 if (maxcount < count)
1659 count = maxcount;
1661 /* Check for overflow */
1662 /* result_len = count * to_len + self_len; */
1663 product = count * to_len;
1664 if (product / to_len != count) {
1665 PyErr_SetString(PyExc_OverflowError,
1666 "replace string is too long");
1667 return NULL;
1669 result_len = product + self_len;
1670 if (result_len < 0) {
1671 PyErr_SetString(PyExc_OverflowError,
1672 "replace string is too long");
1673 return NULL;
1676 if (! (result = (PyByteArrayObject *)
1677 PyByteArray_FromStringAndSize(NULL, result_len)) )
1678 return NULL;
1680 self_s = PyByteArray_AS_STRING(self);
1681 result_s = PyByteArray_AS_STRING(result);
1683 /* TODO: special case single character, which doesn't need memcpy */
1685 /* Lay the first one down (guaranteed this will occur) */
1686 Py_MEMCPY(result_s, to_s, to_len);
1687 result_s += to_len;
1688 count -= 1;
1690 for (i=0; i<count; i++) {
1691 *result_s++ = *self_s++;
1692 Py_MEMCPY(result_s, to_s, to_len);
1693 result_s += to_len;
1696 /* Copy the rest of the original string */
1697 Py_MEMCPY(result_s, self_s, self_len-i);
1699 return result;
1702 /* Special case for deleting a single character */
1703 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1704 Py_LOCAL(PyByteArrayObject *)
1705 replace_delete_single_character(PyByteArrayObject *self,
1706 char from_c, Py_ssize_t maxcount)
1708 char *self_s, *result_s;
1709 char *start, *next, *end;
1710 Py_ssize_t self_len, result_len;
1711 Py_ssize_t count;
1712 PyByteArrayObject *result;
1714 self_len = PyByteArray_GET_SIZE(self);
1715 self_s = PyByteArray_AS_STRING(self);
1717 count = countchar(self_s, self_len, from_c, maxcount);
1718 if (count == 0) {
1719 return return_self(self);
1722 result_len = self_len - count; /* from_len == 1 */
1723 assert(result_len>=0);
1725 if ( (result = (PyByteArrayObject *)
1726 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1727 return NULL;
1728 result_s = PyByteArray_AS_STRING(result);
1730 start = self_s;
1731 end = self_s + self_len;
1732 while (count-- > 0) {
1733 next = findchar(start, end-start, from_c);
1734 if (next == NULL)
1735 break;
1736 Py_MEMCPY(result_s, start, next-start);
1737 result_s += (next-start);
1738 start = next+1;
1740 Py_MEMCPY(result_s, start, end-start);
1742 return result;
1745 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1747 Py_LOCAL(PyByteArrayObject *)
1748 replace_delete_substring(PyByteArrayObject *self,
1749 const char *from_s, Py_ssize_t from_len,
1750 Py_ssize_t maxcount)
1752 char *self_s, *result_s;
1753 char *start, *next, *end;
1754 Py_ssize_t self_len, result_len;
1755 Py_ssize_t count, offset;
1756 PyByteArrayObject *result;
1758 self_len = PyByteArray_GET_SIZE(self);
1759 self_s = PyByteArray_AS_STRING(self);
1761 count = countstring(self_s, self_len,
1762 from_s, from_len,
1763 0, self_len, 1,
1764 maxcount);
1766 if (count == 0) {
1767 /* no matches */
1768 return return_self(self);
1771 result_len = self_len - (count * from_len);
1772 assert (result_len>=0);
1774 if ( (result = (PyByteArrayObject *)
1775 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL )
1776 return NULL;
1778 result_s = PyByteArray_AS_STRING(result);
1780 start = self_s;
1781 end = self_s + self_len;
1782 while (count-- > 0) {
1783 offset = findstring(start, end-start,
1784 from_s, from_len,
1785 0, end-start, FORWARD);
1786 if (offset == -1)
1787 break;
1788 next = start + offset;
1790 Py_MEMCPY(result_s, start, next-start);
1792 result_s += (next-start);
1793 start = next+from_len;
1795 Py_MEMCPY(result_s, start, end-start);
1796 return result;
1799 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1800 Py_LOCAL(PyByteArrayObject *)
1801 replace_single_character_in_place(PyByteArrayObject *self,
1802 char from_c, char to_c,
1803 Py_ssize_t maxcount)
1805 char *self_s, *result_s, *start, *end, *next;
1806 Py_ssize_t self_len;
1807 PyByteArrayObject *result;
1809 /* The result string will be the same size */
1810 self_s = PyByteArray_AS_STRING(self);
1811 self_len = PyByteArray_GET_SIZE(self);
1813 next = findchar(self_s, self_len, from_c);
1815 if (next == NULL) {
1816 /* No matches; return the original bytes */
1817 return return_self(self);
1820 /* Need to make a new bytes */
1821 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1822 if (result == NULL)
1823 return NULL;
1824 result_s = PyByteArray_AS_STRING(result);
1825 Py_MEMCPY(result_s, self_s, self_len);
1827 /* change everything in-place, starting with this one */
1828 start = result_s + (next-self_s);
1829 *start = to_c;
1830 start++;
1831 end = result_s + self_len;
1833 while (--maxcount > 0) {
1834 next = findchar(start, end-start, from_c);
1835 if (next == NULL)
1836 break;
1837 *next = to_c;
1838 start = next+1;
1841 return result;
1844 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1845 Py_LOCAL(PyByteArrayObject *)
1846 replace_substring_in_place(PyByteArrayObject *self,
1847 const char *from_s, Py_ssize_t from_len,
1848 const char *to_s, Py_ssize_t to_len,
1849 Py_ssize_t maxcount)
1851 char *result_s, *start, *end;
1852 char *self_s;
1853 Py_ssize_t self_len, offset;
1854 PyByteArrayObject *result;
1856 /* The result bytes will be the same size */
1858 self_s = PyByteArray_AS_STRING(self);
1859 self_len = PyByteArray_GET_SIZE(self);
1861 offset = findstring(self_s, self_len,
1862 from_s, from_len,
1863 0, self_len, FORWARD);
1864 if (offset == -1) {
1865 /* No matches; return the original bytes */
1866 return return_self(self);
1869 /* Need to make a new bytes */
1870 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1871 if (result == NULL)
1872 return NULL;
1873 result_s = PyByteArray_AS_STRING(result);
1874 Py_MEMCPY(result_s, self_s, self_len);
1876 /* change everything in-place, starting with this one */
1877 start = result_s + offset;
1878 Py_MEMCPY(start, to_s, from_len);
1879 start += from_len;
1880 end = result_s + self_len;
1882 while ( --maxcount > 0) {
1883 offset = findstring(start, end-start,
1884 from_s, from_len,
1885 0, end-start, FORWARD);
1886 if (offset==-1)
1887 break;
1888 Py_MEMCPY(start+offset, to_s, from_len);
1889 start += offset+from_len;
1892 return result;
1895 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1896 Py_LOCAL(PyByteArrayObject *)
1897 replace_single_character(PyByteArrayObject *self,
1898 char from_c,
1899 const char *to_s, Py_ssize_t to_len,
1900 Py_ssize_t maxcount)
1902 char *self_s, *result_s;
1903 char *start, *next, *end;
1904 Py_ssize_t self_len, result_len;
1905 Py_ssize_t count, product;
1906 PyByteArrayObject *result;
1908 self_s = PyByteArray_AS_STRING(self);
1909 self_len = PyByteArray_GET_SIZE(self);
1911 count = countchar(self_s, self_len, from_c, maxcount);
1912 if (count == 0) {
1913 /* no matches, return unchanged */
1914 return return_self(self);
1917 /* use the difference between current and new, hence the "-1" */
1918 /* result_len = self_len + count * (to_len-1) */
1919 product = count * (to_len-1);
1920 if (product / (to_len-1) != count) {
1921 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1922 return NULL;
1924 result_len = self_len + product;
1925 if (result_len < 0) {
1926 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1927 return NULL;
1930 if ( (result = (PyByteArrayObject *)
1931 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1932 return NULL;
1933 result_s = PyByteArray_AS_STRING(result);
1935 start = self_s;
1936 end = self_s + self_len;
1937 while (count-- > 0) {
1938 next = findchar(start, end-start, from_c);
1939 if (next == NULL)
1940 break;
1942 if (next == start) {
1943 /* replace with the 'to' */
1944 Py_MEMCPY(result_s, to_s, to_len);
1945 result_s += to_len;
1946 start += 1;
1947 } else {
1948 /* copy the unchanged old then the 'to' */
1949 Py_MEMCPY(result_s, start, next-start);
1950 result_s += (next-start);
1951 Py_MEMCPY(result_s, to_s, to_len);
1952 result_s += to_len;
1953 start = next+1;
1956 /* Copy the remainder of the remaining bytes */
1957 Py_MEMCPY(result_s, start, end-start);
1959 return result;
1962 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1963 Py_LOCAL(PyByteArrayObject *)
1964 replace_substring(PyByteArrayObject *self,
1965 const char *from_s, Py_ssize_t from_len,
1966 const char *to_s, Py_ssize_t to_len,
1967 Py_ssize_t maxcount)
1969 char *self_s, *result_s;
1970 char *start, *next, *end;
1971 Py_ssize_t self_len, result_len;
1972 Py_ssize_t count, offset, product;
1973 PyByteArrayObject *result;
1975 self_s = PyByteArray_AS_STRING(self);
1976 self_len = PyByteArray_GET_SIZE(self);
1978 count = countstring(self_s, self_len,
1979 from_s, from_len,
1980 0, self_len, FORWARD, maxcount);
1981 if (count == 0) {
1982 /* no matches, return unchanged */
1983 return return_self(self);
1986 /* Check for overflow */
1987 /* result_len = self_len + count * (to_len-from_len) */
1988 product = count * (to_len-from_len);
1989 if (product / (to_len-from_len) != count) {
1990 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1991 return NULL;
1993 result_len = self_len + product;
1994 if (result_len < 0) {
1995 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1996 return NULL;
1999 if ( (result = (PyByteArrayObject *)
2000 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
2001 return NULL;
2002 result_s = PyByteArray_AS_STRING(result);
2004 start = self_s;
2005 end = self_s + self_len;
2006 while (count-- > 0) {
2007 offset = findstring(start, end-start,
2008 from_s, from_len,
2009 0, end-start, FORWARD);
2010 if (offset == -1)
2011 break;
2012 next = start+offset;
2013 if (next == start) {
2014 /* replace with the 'to' */
2015 Py_MEMCPY(result_s, to_s, to_len);
2016 result_s += to_len;
2017 start += from_len;
2018 } else {
2019 /* copy the unchanged old then the 'to' */
2020 Py_MEMCPY(result_s, start, next-start);
2021 result_s += (next-start);
2022 Py_MEMCPY(result_s, to_s, to_len);
2023 result_s += to_len;
2024 start = next+from_len;
2027 /* Copy the remainder of the remaining bytes */
2028 Py_MEMCPY(result_s, start, end-start);
2030 return result;
2034 Py_LOCAL(PyByteArrayObject *)
2035 replace(PyByteArrayObject *self,
2036 const char *from_s, Py_ssize_t from_len,
2037 const char *to_s, Py_ssize_t to_len,
2038 Py_ssize_t maxcount)
2040 if (maxcount < 0) {
2041 maxcount = PY_SSIZE_T_MAX;
2042 } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) {
2043 /* nothing to do; return the original bytes */
2044 return return_self(self);
2047 if (maxcount == 0 ||
2048 (from_len == 0 && to_len == 0)) {
2049 /* nothing to do; return the original bytes */
2050 return return_self(self);
2053 /* Handle zero-length special cases */
2055 if (from_len == 0) {
2056 /* insert the 'to' bytes everywhere. */
2057 /* >>> "Python".replace("", ".") */
2058 /* '.P.y.t.h.o.n.' */
2059 return replace_interleave(self, to_s, to_len, maxcount);
2062 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2063 /* point for an empty self bytes to generate a non-empty bytes */
2064 /* Special case so the remaining code always gets a non-empty bytes */
2065 if (PyByteArray_GET_SIZE(self) == 0) {
2066 return return_self(self);
2069 if (to_len == 0) {
2070 /* delete all occurances of 'from' bytes */
2071 if (from_len == 1) {
2072 return replace_delete_single_character(
2073 self, from_s[0], maxcount);
2074 } else {
2075 return replace_delete_substring(self, from_s, from_len, maxcount);
2079 /* Handle special case where both bytes have the same length */
2081 if (from_len == to_len) {
2082 if (from_len == 1) {
2083 return replace_single_character_in_place(
2084 self,
2085 from_s[0],
2086 to_s[0],
2087 maxcount);
2088 } else {
2089 return replace_substring_in_place(
2090 self, from_s, from_len, to_s, to_len, maxcount);
2094 /* Otherwise use the more generic algorithms */
2095 if (from_len == 1) {
2096 return replace_single_character(self, from_s[0],
2097 to_s, to_len, maxcount);
2098 } else {
2099 /* len('from')>=2, len('to')>=1 */
2100 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2105 PyDoc_STRVAR(replace__doc__,
2106 "B.replace(old, new[, count]) -> bytes\n\
2108 Return a copy of B with all occurrences of subsection\n\
2109 old replaced by new. If the optional argument count is\n\
2110 given, only the first count occurrences are replaced.");
2112 static PyObject *
2113 bytes_replace(PyByteArrayObject *self, PyObject *args)
2115 Py_ssize_t count = -1;
2116 PyObject *from, *to, *res;
2117 Py_buffer vfrom, vto;
2119 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2120 return NULL;
2122 if (_getbuffer(from, &vfrom) < 0)
2123 return NULL;
2124 if (_getbuffer(to, &vto) < 0) {
2125 PyObject_ReleaseBuffer(from, &vfrom);
2126 return NULL;
2129 res = (PyObject *)replace((PyByteArrayObject *) self,
2130 vfrom.buf, vfrom.len,
2131 vto.buf, vto.len, count);
2133 PyObject_ReleaseBuffer(from, &vfrom);
2134 PyObject_ReleaseBuffer(to, &vto);
2135 return res;
2139 /* Overallocate the initial list to reduce the number of reallocs for small
2140 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2141 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2142 text (roughly 11 words per line) and field delimited data (usually 1-10
2143 fields). For large strings the split algorithms are bandwidth limited
2144 so increasing the preallocation likely will not improve things.*/
2146 #define MAX_PREALLOC 12
2148 /* 5 splits gives 6 elements */
2149 #define PREALLOC_SIZE(maxsplit) \
2150 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2152 #define SPLIT_APPEND(data, left, right) \
2153 str = PyByteArray_FromStringAndSize((data) + (left), \
2154 (right) - (left)); \
2155 if (str == NULL) \
2156 goto onError; \
2157 if (PyList_Append(list, str)) { \
2158 Py_DECREF(str); \
2159 goto onError; \
2161 else \
2162 Py_DECREF(str);
2164 #define SPLIT_ADD(data, left, right) { \
2165 str = PyByteArray_FromStringAndSize((data) + (left), \
2166 (right) - (left)); \
2167 if (str == NULL) \
2168 goto onError; \
2169 if (count < MAX_PREALLOC) { \
2170 PyList_SET_ITEM(list, count, str); \
2171 } else { \
2172 if (PyList_Append(list, str)) { \
2173 Py_DECREF(str); \
2174 goto onError; \
2176 else \
2177 Py_DECREF(str); \
2179 count++; }
2181 /* Always force the list to the expected size. */
2182 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2185 Py_LOCAL_INLINE(PyObject *)
2186 split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2188 register Py_ssize_t i, j, count = 0;
2189 PyObject *str;
2190 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2192 if (list == NULL)
2193 return NULL;
2195 i = j = 0;
2196 while ((j < len) && (maxcount-- > 0)) {
2197 for(; j < len; j++) {
2198 /* I found that using memchr makes no difference */
2199 if (s[j] == ch) {
2200 SPLIT_ADD(s, i, j);
2201 i = j = j + 1;
2202 break;
2206 if (i <= len) {
2207 SPLIT_ADD(s, i, len);
2209 FIX_PREALLOC_SIZE(list);
2210 return list;
2212 onError:
2213 Py_DECREF(list);
2214 return NULL;
2218 Py_LOCAL_INLINE(PyObject *)
2219 split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2221 register Py_ssize_t i, j, count = 0;
2222 PyObject *str;
2223 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2225 if (list == NULL)
2226 return NULL;
2228 for (i = j = 0; i < len; ) {
2229 /* find a token */
2230 while (i < len && ISSPACE(s[i]))
2231 i++;
2232 j = i;
2233 while (i < len && !ISSPACE(s[i]))
2234 i++;
2235 if (j < i) {
2236 if (maxcount-- <= 0)
2237 break;
2238 SPLIT_ADD(s, j, i);
2239 while (i < len && ISSPACE(s[i]))
2240 i++;
2241 j = i;
2244 if (j < len) {
2245 SPLIT_ADD(s, j, len);
2247 FIX_PREALLOC_SIZE(list);
2248 return list;
2250 onError:
2251 Py_DECREF(list);
2252 return NULL;
2255 PyDoc_STRVAR(split__doc__,
2256 "B.split([sep[, maxsplit]]) -> list of bytearray\n\
2258 Return a list of the sections in B, using sep as the delimiter.\n\
2259 If sep is not given, B is split on ASCII whitespace characters\n\
2260 (space, tab, return, newline, formfeed, vertical tab).\n\
2261 If maxsplit is given, at most maxsplit splits are done.");
2263 static PyObject *
2264 bytes_split(PyByteArrayObject *self, PyObject *args)
2266 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2267 Py_ssize_t maxsplit = -1, count = 0;
2268 const char *s = PyByteArray_AS_STRING(self), *sub;
2269 PyObject *list, *str, *subobj = Py_None;
2270 Py_buffer vsub;
2271 #ifdef USE_FAST
2272 Py_ssize_t pos;
2273 #endif
2275 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2276 return NULL;
2277 if (maxsplit < 0)
2278 maxsplit = PY_SSIZE_T_MAX;
2280 if (subobj == Py_None)
2281 return split_whitespace(s, len, maxsplit);
2283 if (_getbuffer(subobj, &vsub) < 0)
2284 return NULL;
2285 sub = vsub.buf;
2286 n = vsub.len;
2288 if (n == 0) {
2289 PyErr_SetString(PyExc_ValueError, "empty separator");
2290 PyObject_ReleaseBuffer(subobj, &vsub);
2291 return NULL;
2293 if (n == 1)
2294 return split_char(s, len, sub[0], maxsplit);
2296 list = PyList_New(PREALLOC_SIZE(maxsplit));
2297 if (list == NULL) {
2298 PyObject_ReleaseBuffer(subobj, &vsub);
2299 return NULL;
2302 #ifdef USE_FAST
2303 i = j = 0;
2304 while (maxsplit-- > 0) {
2305 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2306 if (pos < 0)
2307 break;
2308 j = i+pos;
2309 SPLIT_ADD(s, i, j);
2310 i = j + n;
2312 #else
2313 i = j = 0;
2314 while ((j+n <= len) && (maxsplit-- > 0)) {
2315 for (; j+n <= len; j++) {
2316 if (Py_STRING_MATCH(s, j, sub, n)) {
2317 SPLIT_ADD(s, i, j);
2318 i = j = j + n;
2319 break;
2323 #endif
2324 SPLIT_ADD(s, i, len);
2325 FIX_PREALLOC_SIZE(list);
2326 PyObject_ReleaseBuffer(subobj, &vsub);
2327 return list;
2329 onError:
2330 Py_DECREF(list);
2331 PyObject_ReleaseBuffer(subobj, &vsub);
2332 return NULL;
2335 /* stringlib's partition shares nullbytes in some cases.
2336 undo this, we don't want the nullbytes to be shared. */
2337 static PyObject *
2338 make_nullbytes_unique(PyObject *result)
2340 if (result != NULL) {
2341 int i;
2342 assert(PyTuple_Check(result));
2343 assert(PyTuple_GET_SIZE(result) == 3);
2344 for (i = 0; i < 3; i++) {
2345 if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
2346 PyObject *new = PyByteArray_FromStringAndSize(NULL, 0);
2347 if (new == NULL) {
2348 Py_DECREF(result);
2349 result = NULL;
2350 break;
2352 Py_DECREF(nullbytes);
2353 PyTuple_SET_ITEM(result, i, new);
2357 return result;
2360 PyDoc_STRVAR(partition__doc__,
2361 "B.partition(sep) -> (head, sep, tail)\n\
2363 Searches for the separator sep in B, and returns the part before it,\n\
2364 the separator itself, and the part after it. If the separator is not\n\
2365 found, returns B and two empty bytearray objects.");
2367 static PyObject *
2368 bytes_partition(PyByteArrayObject *self, PyObject *sep_obj)
2370 PyObject *bytesep, *result;
2372 bytesep = PyByteArray_FromObject(sep_obj);
2373 if (! bytesep)
2374 return NULL;
2376 result = stringlib_partition(
2377 (PyObject*) self,
2378 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2379 bytesep,
2380 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2383 Py_DECREF(bytesep);
2384 return make_nullbytes_unique(result);
2387 PyDoc_STRVAR(rpartition__doc__,
2388 "B.rpartition(sep) -> (tail, sep, head)\n\
2390 Searches for the separator sep in B, starting at the end of B,\n\
2391 and returns the part before it, the separator itself, and the\n\
2392 part after it. If the separator is not found, returns two empty\n\
2393 bytearray objects and B.");
2395 static PyObject *
2396 bytes_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
2398 PyObject *bytesep, *result;
2400 bytesep = PyByteArray_FromObject(sep_obj);
2401 if (! bytesep)
2402 return NULL;
2404 result = stringlib_rpartition(
2405 (PyObject*) self,
2406 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2407 bytesep,
2408 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2411 Py_DECREF(bytesep);
2412 return make_nullbytes_unique(result);
2415 Py_LOCAL_INLINE(PyObject *)
2416 rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2418 register Py_ssize_t i, j, count=0;
2419 PyObject *str;
2420 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2422 if (list == NULL)
2423 return NULL;
2425 i = j = len - 1;
2426 while ((i >= 0) && (maxcount-- > 0)) {
2427 for (; i >= 0; i--) {
2428 if (s[i] == ch) {
2429 SPLIT_ADD(s, i + 1, j + 1);
2430 j = i = i - 1;
2431 break;
2435 if (j >= -1) {
2436 SPLIT_ADD(s, 0, j + 1);
2438 FIX_PREALLOC_SIZE(list);
2439 if (PyList_Reverse(list) < 0)
2440 goto onError;
2442 return list;
2444 onError:
2445 Py_DECREF(list);
2446 return NULL;
2449 Py_LOCAL_INLINE(PyObject *)
2450 rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2452 register Py_ssize_t i, j, count = 0;
2453 PyObject *str;
2454 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2456 if (list == NULL)
2457 return NULL;
2459 for (i = j = len - 1; i >= 0; ) {
2460 /* find a token */
2461 while (i >= 0 && ISSPACE(s[i]))
2462 i--;
2463 j = i;
2464 while (i >= 0 && !ISSPACE(s[i]))
2465 i--;
2466 if (j > i) {
2467 if (maxcount-- <= 0)
2468 break;
2469 SPLIT_ADD(s, i + 1, j + 1);
2470 while (i >= 0 && ISSPACE(s[i]))
2471 i--;
2472 j = i;
2475 if (j >= 0) {
2476 SPLIT_ADD(s, 0, j + 1);
2478 FIX_PREALLOC_SIZE(list);
2479 if (PyList_Reverse(list) < 0)
2480 goto onError;
2482 return list;
2484 onError:
2485 Py_DECREF(list);
2486 return NULL;
2489 PyDoc_STRVAR(rsplit__doc__,
2490 "B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2492 Return a list of the sections in B, using sep as the delimiter,\n\
2493 starting at the end of B and working to the front.\n\
2494 If sep is not given, B is split on ASCII whitespace characters\n\
2495 (space, tab, return, newline, formfeed, vertical tab).\n\
2496 If maxsplit is given, at most maxsplit splits are done.");
2498 static PyObject *
2499 bytes_rsplit(PyByteArrayObject *self, PyObject *args)
2501 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2502 Py_ssize_t maxsplit = -1, count = 0;
2503 const char *s = PyByteArray_AS_STRING(self), *sub;
2504 PyObject *list, *str, *subobj = Py_None;
2505 Py_buffer vsub;
2507 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2508 return NULL;
2509 if (maxsplit < 0)
2510 maxsplit = PY_SSIZE_T_MAX;
2512 if (subobj == Py_None)
2513 return rsplit_whitespace(s, len, maxsplit);
2515 if (_getbuffer(subobj, &vsub) < 0)
2516 return NULL;
2517 sub = vsub.buf;
2518 n = vsub.len;
2520 if (n == 0) {
2521 PyErr_SetString(PyExc_ValueError, "empty separator");
2522 PyObject_ReleaseBuffer(subobj, &vsub);
2523 return NULL;
2525 else if (n == 1)
2526 return rsplit_char(s, len, sub[0], maxsplit);
2528 list = PyList_New(PREALLOC_SIZE(maxsplit));
2529 if (list == NULL) {
2530 PyObject_ReleaseBuffer(subobj, &vsub);
2531 return NULL;
2534 j = len;
2535 i = j - n;
2537 while ( (i >= 0) && (maxsplit-- > 0) ) {
2538 for (; i>=0; i--) {
2539 if (Py_STRING_MATCH(s, i, sub, n)) {
2540 SPLIT_ADD(s, i + n, j);
2541 j = i;
2542 i -= n;
2543 break;
2547 SPLIT_ADD(s, 0, j);
2548 FIX_PREALLOC_SIZE(list);
2549 if (PyList_Reverse(list) < 0)
2550 goto onError;
2551 PyObject_ReleaseBuffer(subobj, &vsub);
2552 return list;
2554 onError:
2555 Py_DECREF(list);
2556 PyObject_ReleaseBuffer(subobj, &vsub);
2557 return NULL;
2560 PyDoc_STRVAR(reverse__doc__,
2561 "B.reverse() -> None\n\
2563 Reverse the order of the values in B in place.");
2564 static PyObject *
2565 bytes_reverse(PyByteArrayObject *self, PyObject *unused)
2567 char swap, *head, *tail;
2568 Py_ssize_t i, j, n = Py_SIZE(self);
2570 j = n / 2;
2571 head = self->ob_bytes;
2572 tail = head + n - 1;
2573 for (i = 0; i < j; i++) {
2574 swap = *head;
2575 *head++ = *tail;
2576 *tail-- = swap;
2579 Py_RETURN_NONE;
2582 PyDoc_STRVAR(insert__doc__,
2583 "B.insert(index, int) -> None\n\
2585 Insert a single item into the bytearray before the given index.");
2586 static PyObject *
2587 bytes_insert(PyByteArrayObject *self, PyObject *args)
2589 PyObject *value;
2590 int ival;
2591 Py_ssize_t where, n = Py_SIZE(self);
2593 if (!PyArg_ParseTuple(args, "nO:insert", &where, &value))
2594 return NULL;
2596 if (n == PY_SSIZE_T_MAX) {
2597 PyErr_SetString(PyExc_OverflowError,
2598 "cannot add more objects to bytes");
2599 return NULL;
2601 if (!_getbytevalue(value, &ival))
2602 return NULL;
2603 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2604 return NULL;
2606 if (where < 0) {
2607 where += n;
2608 if (where < 0)
2609 where = 0;
2611 if (where > n)
2612 where = n;
2613 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
2614 self->ob_bytes[where] = ival;
2616 Py_RETURN_NONE;
2619 PyDoc_STRVAR(append__doc__,
2620 "B.append(int) -> None\n\
2622 Append a single item to the end of B.");
2623 static PyObject *
2624 bytes_append(PyByteArrayObject *self, PyObject *arg)
2626 int value;
2627 Py_ssize_t n = Py_SIZE(self);
2629 if (! _getbytevalue(arg, &value))
2630 return NULL;
2631 if (n == PY_SSIZE_T_MAX) {
2632 PyErr_SetString(PyExc_OverflowError,
2633 "cannot add more objects to bytes");
2634 return NULL;
2636 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2637 return NULL;
2639 self->ob_bytes[n] = value;
2641 Py_RETURN_NONE;
2644 PyDoc_STRVAR(extend__doc__,
2645 "B.extend(iterable int) -> None\n\
2647 Append all the elements from the iterator or sequence to the\n\
2648 end of B.");
2649 static PyObject *
2650 bytes_extend(PyByteArrayObject *self, PyObject *arg)
2652 PyObject *it, *item, *bytes_obj;
2653 Py_ssize_t buf_size = 0, len = 0;
2654 int value;
2655 char *buf;
2657 /* bytes_setslice code only accepts something supporting PEP 3118. */
2658 if (PyObject_CheckBuffer(arg)) {
2659 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2660 return NULL;
2662 Py_RETURN_NONE;
2665 it = PyObject_GetIter(arg);
2666 if (it == NULL)
2667 return NULL;
2669 /* Try to determine the length of the argument. 32 is abitrary. */
2670 buf_size = _PyObject_LengthHint(arg, 32);
2672 bytes_obj = PyByteArray_FromStringAndSize(NULL, buf_size);
2673 if (bytes_obj == NULL)
2674 return NULL;
2675 buf = PyByteArray_AS_STRING(bytes_obj);
2677 while ((item = PyIter_Next(it)) != NULL) {
2678 if (! _getbytevalue(item, &value)) {
2679 Py_DECREF(item);
2680 Py_DECREF(it);
2681 Py_DECREF(bytes_obj);
2682 return NULL;
2684 buf[len++] = value;
2685 Py_DECREF(item);
2687 if (len >= buf_size) {
2688 buf_size = len + (len >> 1) + 1;
2689 if (PyByteArray_Resize((PyObject *)bytes_obj, buf_size) < 0) {
2690 Py_DECREF(it);
2691 Py_DECREF(bytes_obj);
2692 return NULL;
2694 /* Recompute the `buf' pointer, since the resizing operation may
2695 have invalidated it. */
2696 buf = PyByteArray_AS_STRING(bytes_obj);
2699 Py_DECREF(it);
2701 /* Resize down to exact size. */
2702 if (PyByteArray_Resize((PyObject *)bytes_obj, len) < 0) {
2703 Py_DECREF(bytes_obj);
2704 return NULL;
2707 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), bytes_obj) == -1)
2708 return NULL;
2709 Py_DECREF(bytes_obj);
2711 Py_RETURN_NONE;
2714 PyDoc_STRVAR(pop__doc__,
2715 "B.pop([index]) -> int\n\
2717 Remove and return a single item from B. If no index\n\
2718 argument is given, will pop the last value.");
2719 static PyObject *
2720 bytes_pop(PyByteArrayObject *self, PyObject *args)
2722 int value;
2723 Py_ssize_t where = -1, n = Py_SIZE(self);
2725 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2726 return NULL;
2728 if (n == 0) {
2729 PyErr_SetString(PyExc_OverflowError,
2730 "cannot pop an empty bytes");
2731 return NULL;
2733 if (where < 0)
2734 where += Py_SIZE(self);
2735 if (where < 0 || where >= Py_SIZE(self)) {
2736 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2737 return NULL;
2740 value = self->ob_bytes[where];
2741 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2742 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2743 return NULL;
2745 return PyInt_FromLong(value);
2748 PyDoc_STRVAR(remove__doc__,
2749 "B.remove(int) -> None\n\
2751 Remove the first occurance of a value in B.");
2752 static PyObject *
2753 bytes_remove(PyByteArrayObject *self, PyObject *arg)
2755 int value;
2756 Py_ssize_t where, n = Py_SIZE(self);
2758 if (! _getbytevalue(arg, &value))
2759 return NULL;
2761 for (where = 0; where < n; where++) {
2762 if (self->ob_bytes[where] == value)
2763 break;
2765 if (where == n) {
2766 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2767 return NULL;
2770 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2771 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2772 return NULL;
2774 Py_RETURN_NONE;
2777 /* XXX These two helpers could be optimized if argsize == 1 */
2779 static Py_ssize_t
2780 lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2781 void *argptr, Py_ssize_t argsize)
2783 Py_ssize_t i = 0;
2784 while (i < mysize && memchr(argptr, myptr[i], argsize))
2785 i++;
2786 return i;
2789 static Py_ssize_t
2790 rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2791 void *argptr, Py_ssize_t argsize)
2793 Py_ssize_t i = mysize - 1;
2794 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2795 i--;
2796 return i + 1;
2799 PyDoc_STRVAR(strip__doc__,
2800 "B.strip([bytes]) -> bytearray\n\
2802 Strip leading and trailing bytes contained in the argument.\n\
2803 If the argument is omitted, strip ASCII whitespace.");
2804 static PyObject *
2805 bytes_strip(PyByteArrayObject *self, PyObject *args)
2807 Py_ssize_t left, right, mysize, argsize;
2808 void *myptr, *argptr;
2809 PyObject *arg = Py_None;
2810 Py_buffer varg;
2811 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2812 return NULL;
2813 if (arg == Py_None) {
2814 argptr = "\t\n\r\f\v ";
2815 argsize = 6;
2817 else {
2818 if (_getbuffer(arg, &varg) < 0)
2819 return NULL;
2820 argptr = varg.buf;
2821 argsize = varg.len;
2823 myptr = self->ob_bytes;
2824 mysize = Py_SIZE(self);
2825 left = lstrip_helper(myptr, mysize, argptr, argsize);
2826 if (left == mysize)
2827 right = left;
2828 else
2829 right = rstrip_helper(myptr, mysize, argptr, argsize);
2830 if (arg != Py_None)
2831 PyObject_ReleaseBuffer(arg, &varg);
2832 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2835 PyDoc_STRVAR(lstrip__doc__,
2836 "B.lstrip([bytes]) -> bytearray\n\
2838 Strip leading bytes contained in the argument.\n\
2839 If the argument is omitted, strip leading ASCII whitespace.");
2840 static PyObject *
2841 bytes_lstrip(PyByteArrayObject *self, PyObject *args)
2843 Py_ssize_t left, right, mysize, argsize;
2844 void *myptr, *argptr;
2845 PyObject *arg = Py_None;
2846 Py_buffer varg;
2847 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2848 return NULL;
2849 if (arg == Py_None) {
2850 argptr = "\t\n\r\f\v ";
2851 argsize = 6;
2853 else {
2854 if (_getbuffer(arg, &varg) < 0)
2855 return NULL;
2856 argptr = varg.buf;
2857 argsize = varg.len;
2859 myptr = self->ob_bytes;
2860 mysize = Py_SIZE(self);
2861 left = lstrip_helper(myptr, mysize, argptr, argsize);
2862 right = mysize;
2863 if (arg != Py_None)
2864 PyObject_ReleaseBuffer(arg, &varg);
2865 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2868 PyDoc_STRVAR(rstrip__doc__,
2869 "B.rstrip([bytes]) -> bytearray\n\
2871 Strip trailing bytes contained in the argument.\n\
2872 If the argument is omitted, strip trailing ASCII whitespace.");
2873 static PyObject *
2874 bytes_rstrip(PyByteArrayObject *self, PyObject *args)
2876 Py_ssize_t left, right, mysize, argsize;
2877 void *myptr, *argptr;
2878 PyObject *arg = Py_None;
2879 Py_buffer varg;
2880 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2881 return NULL;
2882 if (arg == Py_None) {
2883 argptr = "\t\n\r\f\v ";
2884 argsize = 6;
2886 else {
2887 if (_getbuffer(arg, &varg) < 0)
2888 return NULL;
2889 argptr = varg.buf;
2890 argsize = varg.len;
2892 myptr = self->ob_bytes;
2893 mysize = Py_SIZE(self);
2894 left = 0;
2895 right = rstrip_helper(myptr, mysize, argptr, argsize);
2896 if (arg != Py_None)
2897 PyObject_ReleaseBuffer(arg, &varg);
2898 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2901 PyDoc_STRVAR(decode_doc,
2902 "B.decode([encoding[, errors]]) -> unicode object.\n\
2904 Decodes B using the codec registered for encoding. encoding defaults\n\
2905 to the default encoding. errors may be given to set a different error\n\
2906 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2907 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2908 as well as any other name registered with codecs.register_error that is\n\
2909 able to handle UnicodeDecodeErrors.");
2911 static PyObject *
2912 bytes_decode(PyObject *self, PyObject *args)
2914 const char *encoding = NULL;
2915 const char *errors = NULL;
2917 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2918 return NULL;
2919 if (encoding == NULL)
2920 encoding = PyUnicode_GetDefaultEncoding();
2921 return PyCodec_Decode(self, encoding, errors);
2924 PyDoc_STRVAR(alloc_doc,
2925 "B.__alloc__() -> int\n\
2927 Returns the number of bytes actually allocated.");
2929 static PyObject *
2930 bytes_alloc(PyByteArrayObject *self)
2932 return PyInt_FromSsize_t(self->ob_alloc);
2935 PyDoc_STRVAR(join_doc,
2936 "B.join(iterable_of_bytes) -> bytes\n\
2938 Concatenates any number of bytearray objects, with B in between each pair.");
2940 static PyObject *
2941 bytes_join(PyByteArrayObject *self, PyObject *it)
2943 PyObject *seq;
2944 Py_ssize_t mysize = Py_SIZE(self);
2945 Py_ssize_t i;
2946 Py_ssize_t n;
2947 PyObject **items;
2948 Py_ssize_t totalsize = 0;
2949 PyObject *result;
2950 char *dest;
2952 seq = PySequence_Fast(it, "can only join an iterable");
2953 if (seq == NULL)
2954 return NULL;
2955 n = PySequence_Fast_GET_SIZE(seq);
2956 items = PySequence_Fast_ITEMS(seq);
2958 /* Compute the total size, and check that they are all bytes */
2959 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2960 for (i = 0; i < n; i++) {
2961 PyObject *obj = items[i];
2962 if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) {
2963 PyErr_Format(PyExc_TypeError,
2964 "can only join an iterable of bytes "
2965 "(item %ld has type '%.100s')",
2966 /* XXX %ld isn't right on Win64 */
2967 (long)i, Py_TYPE(obj)->tp_name);
2968 goto error;
2970 if (i > 0)
2971 totalsize += mysize;
2972 totalsize += Py_SIZE(obj);
2973 if (totalsize < 0) {
2974 PyErr_NoMemory();
2975 goto error;
2979 /* Allocate the result, and copy the bytes */
2980 result = PyByteArray_FromStringAndSize(NULL, totalsize);
2981 if (result == NULL)
2982 goto error;
2983 dest = PyByteArray_AS_STRING(result);
2984 for (i = 0; i < n; i++) {
2985 PyObject *obj = items[i];
2986 Py_ssize_t size = Py_SIZE(obj);
2987 char *buf;
2988 if (PyByteArray_Check(obj))
2989 buf = PyByteArray_AS_STRING(obj);
2990 else
2991 buf = PyBytes_AS_STRING(obj);
2992 if (i) {
2993 memcpy(dest, self->ob_bytes, mysize);
2994 dest += mysize;
2996 memcpy(dest, buf, size);
2997 dest += size;
3000 /* Done */
3001 Py_DECREF(seq);
3002 return result;
3004 /* Error handling */
3005 error:
3006 Py_DECREF(seq);
3007 return NULL;
3010 PyDoc_STRVAR(fromhex_doc,
3011 "bytearray.fromhex(string) -> bytearray\n\
3013 Create a bytearray object from a string of hexadecimal numbers.\n\
3014 Spaces between two numbers are accepted.\n\
3015 Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3017 static int
3018 hex_digit_to_int(Py_UNICODE c)
3020 if (c >= 128)
3021 return -1;
3022 if (ISDIGIT(c))
3023 return c - '0';
3024 else {
3025 if (ISUPPER(c))
3026 c = TOLOWER(c);
3027 if (c >= 'a' && c <= 'f')
3028 return c - 'a' + 10;
3030 return -1;
3033 static PyObject *
3034 bytes_fromhex(PyObject *cls, PyObject *args)
3036 PyObject *newbytes, *hexobj;
3037 char *buf;
3038 Py_UNICODE *hex;
3039 Py_ssize_t hexlen, byteslen, i, j;
3040 int top, bot;
3042 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
3043 return NULL;
3044 assert(PyUnicode_Check(hexobj));
3045 hexlen = PyUnicode_GET_SIZE(hexobj);
3046 hex = PyUnicode_AS_UNICODE(hexobj);
3047 byteslen = hexlen/2; /* This overestimates if there are spaces */
3048 newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
3049 if (!newbytes)
3050 return NULL;
3051 buf = PyByteArray_AS_STRING(newbytes);
3052 for (i = j = 0; i < hexlen; i += 2) {
3053 /* skip over spaces in the input */
3054 while (hex[i] == ' ')
3055 i++;
3056 if (i >= hexlen)
3057 break;
3058 top = hex_digit_to_int(hex[i]);
3059 bot = hex_digit_to_int(hex[i+1]);
3060 if (top == -1 || bot == -1) {
3061 PyErr_Format(PyExc_ValueError,
3062 "non-hexadecimal number found in "
3063 "fromhex() arg at position %zd", i);
3064 goto error;
3066 buf[j++] = (top << 4) + bot;
3068 if (PyByteArray_Resize(newbytes, j) < 0)
3069 goto error;
3070 return newbytes;
3072 error:
3073 Py_DECREF(newbytes);
3074 return NULL;
3077 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3079 static PyObject *
3080 bytes_reduce(PyByteArrayObject *self)
3082 PyObject *latin1, *dict;
3083 if (self->ob_bytes)
3084 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
3085 Py_SIZE(self), NULL);
3086 else
3087 latin1 = PyUnicode_FromString("");
3089 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
3090 if (dict == NULL) {
3091 PyErr_Clear();
3092 dict = Py_None;
3093 Py_INCREF(dict);
3096 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
3099 PyDoc_STRVAR(sizeof_doc,
3100 "B.__sizeof__() -> int\n\
3102 Returns the size of B in memory, in bytes");
3103 static PyObject *
3104 bytes_sizeof(PyByteArrayObject *self)
3106 Py_ssize_t res;
3108 res = sizeof(PyByteArrayObject) + self->ob_alloc * sizeof(char);
3109 return PyInt_FromSsize_t(res);
3112 static PySequenceMethods bytes_as_sequence = {
3113 (lenfunc)bytes_length, /* sq_length */
3114 (binaryfunc)PyByteArray_Concat, /* sq_concat */
3115 (ssizeargfunc)bytes_repeat, /* sq_repeat */
3116 (ssizeargfunc)bytes_getitem, /* sq_item */
3117 0, /* sq_slice */
3118 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
3119 0, /* sq_ass_slice */
3120 (objobjproc)bytes_contains, /* sq_contains */
3121 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
3122 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
3125 static PyMappingMethods bytes_as_mapping = {
3126 (lenfunc)bytes_length,
3127 (binaryfunc)bytes_subscript,
3128 (objobjargproc)bytes_ass_subscript,
3131 static PyBufferProcs bytes_as_buffer = {
3132 (readbufferproc)bytes_buffer_getreadbuf,
3133 (writebufferproc)bytes_buffer_getwritebuf,
3134 (segcountproc)bytes_buffer_getsegcount,
3135 (charbufferproc)bytes_buffer_getcharbuf,
3136 (getbufferproc)bytes_getbuffer,
3137 (releasebufferproc)bytes_releasebuffer,
3140 static PyMethodDef
3141 bytes_methods[] = {
3142 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
3143 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
3144 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS, sizeof_doc},
3145 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
3146 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3147 _Py_capitalize__doc__},
3148 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3149 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
3150 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
3151 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
3152 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3153 expandtabs__doc__},
3154 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
3155 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
3156 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
3157 fromhex_doc},
3158 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3159 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
3160 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3161 _Py_isalnum__doc__},
3162 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3163 _Py_isalpha__doc__},
3164 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3165 _Py_isdigit__doc__},
3166 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3167 _Py_islower__doc__},
3168 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3169 _Py_isspace__doc__},
3170 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3171 _Py_istitle__doc__},
3172 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3173 _Py_isupper__doc__},
3174 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
3175 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3176 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
3177 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
3178 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
3179 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
3180 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
3181 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
3182 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
3183 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3184 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3185 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3186 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
3187 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
3188 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
3189 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
3190 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3191 splitlines__doc__},
3192 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS ,
3193 startswith__doc__},
3194 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
3195 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3196 _Py_swapcase__doc__},
3197 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3198 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
3199 translate__doc__},
3200 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3201 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
3202 {NULL}
3205 PyDoc_STRVAR(bytes_doc,
3206 "bytearray(iterable_of_ints) -> bytearray.\n\
3207 bytearray(string, encoding[, errors]) -> bytearray.\n\
3208 bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3209 bytearray(memory_view) -> bytearray.\n\
3211 Construct an mutable bytearray object from:\n\
3212 - an iterable yielding integers in range(256)\n\
3213 - a text string encoded using the specified encoding\n\
3214 - a bytes or a bytearray object\n\
3215 - any object implementing the buffer API.\n\
3217 bytearray(int) -> bytearray.\n\
3219 Construct a zero-initialized bytearray of the given length.");
3222 static PyObject *bytes_iter(PyObject *seq);
3224 PyTypeObject PyByteArray_Type = {
3225 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3226 "bytearray",
3227 sizeof(PyByteArrayObject),
3229 (destructor)bytes_dealloc, /* tp_dealloc */
3230 0, /* tp_print */
3231 0, /* tp_getattr */
3232 0, /* tp_setattr */
3233 0, /* tp_compare */
3234 (reprfunc)bytes_repr, /* tp_repr */
3235 0, /* tp_as_number */
3236 &bytes_as_sequence, /* tp_as_sequence */
3237 &bytes_as_mapping, /* tp_as_mapping */
3238 0, /* tp_hash */
3239 0, /* tp_call */
3240 bytes_str, /* tp_str */
3241 PyObject_GenericGetAttr, /* tp_getattro */
3242 0, /* tp_setattro */
3243 &bytes_as_buffer, /* tp_as_buffer */
3244 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3245 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3246 bytes_doc, /* tp_doc */
3247 0, /* tp_traverse */
3248 0, /* tp_clear */
3249 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3250 0, /* tp_weaklistoffset */
3251 bytes_iter, /* tp_iter */
3252 0, /* tp_iternext */
3253 bytes_methods, /* tp_methods */
3254 0, /* tp_members */
3255 0, /* tp_getset */
3256 0, /* tp_base */
3257 0, /* tp_dict */
3258 0, /* tp_descr_get */
3259 0, /* tp_descr_set */
3260 0, /* tp_dictoffset */
3261 (initproc)bytes_init, /* tp_init */
3262 PyType_GenericAlloc, /* tp_alloc */
3263 PyType_GenericNew, /* tp_new */
3264 PyObject_Del, /* tp_free */
3267 /*********************** Bytes Iterator ****************************/
3269 typedef struct {
3270 PyObject_HEAD
3271 Py_ssize_t it_index;
3272 PyByteArrayObject *it_seq; /* Set to NULL when iterator is exhausted */
3273 } bytesiterobject;
3275 static void
3276 bytesiter_dealloc(bytesiterobject *it)
3278 _PyObject_GC_UNTRACK(it);
3279 Py_XDECREF(it->it_seq);
3280 PyObject_GC_Del(it);
3283 static int
3284 bytesiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
3286 Py_VISIT(it->it_seq);
3287 return 0;
3290 static PyObject *
3291 bytesiter_next(bytesiterobject *it)
3293 PyByteArrayObject *seq;
3294 PyObject *item;
3296 assert(it != NULL);
3297 seq = it->it_seq;
3298 if (seq == NULL)
3299 return NULL;
3300 assert(PyByteArray_Check(seq));
3302 if (it->it_index < PyByteArray_GET_SIZE(seq)) {
3303 item = PyInt_FromLong(
3304 (unsigned char)seq->ob_bytes[it->it_index]);
3305 if (item != NULL)
3306 ++it->it_index;
3307 return item;
3310 Py_DECREF(seq);
3311 it->it_seq = NULL;
3312 return NULL;
3315 static PyObject *
3316 bytesiter_length_hint(bytesiterobject *it)
3318 Py_ssize_t len = 0;
3319 if (it->it_seq)
3320 len = PyByteArray_GET_SIZE(it->it_seq) - it->it_index;
3321 return PyInt_FromSsize_t(len);
3324 PyDoc_STRVAR(length_hint_doc,
3325 "Private method returning an estimate of len(list(it)).");
3327 static PyMethodDef bytesiter_methods[] = {
3328 {"__length_hint__", (PyCFunction)bytesiter_length_hint, METH_NOARGS,
3329 length_hint_doc},
3330 {NULL, NULL} /* sentinel */
3333 PyTypeObject PyByteArrayIter_Type = {
3334 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3335 "bytearray_iterator", /* tp_name */
3336 sizeof(bytesiterobject), /* tp_basicsize */
3337 0, /* tp_itemsize */
3338 /* methods */
3339 (destructor)bytesiter_dealloc, /* tp_dealloc */
3340 0, /* tp_print */
3341 0, /* tp_getattr */
3342 0, /* tp_setattr */
3343 0, /* tp_compare */
3344 0, /* tp_repr */
3345 0, /* tp_as_number */
3346 0, /* tp_as_sequence */
3347 0, /* tp_as_mapping */
3348 0, /* tp_hash */
3349 0, /* tp_call */
3350 0, /* tp_str */
3351 PyObject_GenericGetAttr, /* tp_getattro */
3352 0, /* tp_setattro */
3353 0, /* tp_as_buffer */
3354 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3355 0, /* tp_doc */
3356 (traverseproc)bytesiter_traverse, /* tp_traverse */
3357 0, /* tp_clear */
3358 0, /* tp_richcompare */
3359 0, /* tp_weaklistoffset */
3360 PyObject_SelfIter, /* tp_iter */
3361 (iternextfunc)bytesiter_next, /* tp_iternext */
3362 bytesiter_methods, /* tp_methods */
3366 static PyObject *
3367 bytes_iter(PyObject *seq)
3369 bytesiterobject *it;
3371 if (!PyByteArray_Check(seq)) {
3372 PyErr_BadInternalCall();
3373 return NULL;
3375 it = PyObject_GC_New(bytesiterobject, &PyByteArrayIter_Type);
3376 if (it == NULL)
3377 return NULL;
3378 it->it_index = 0;
3379 Py_INCREF(seq);
3380 it->it_seq = (PyByteArrayObject *)seq;
3381 _PyObject_GC_TRACK(it);
3382 return (PyObject *)it;