Updated with fix for #3126.
[python.git] / Objects / bytearrayobject.c
blob6e5df19e689386256d63bef59691a7cf989cce40
1 /* PyBytes (bytearray) implementation */
3 #define PY_SSIZE_T_CLEAN
4 #include "Python.h"
5 #include "structmember.h"
6 #include "bytes_methods.h"
8 static PyByteArrayObject *nullbytes = NULL;
10 void
11 PyByteArray_Fini(void)
13 Py_CLEAR(nullbytes);
16 int
17 PyByteArray_Init(void)
19 nullbytes = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
20 if (nullbytes == NULL)
21 return 0;
22 nullbytes->ob_bytes = NULL;
23 Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0;
24 nullbytes->ob_exports = 0;
25 return 1;
28 /* end nullbytes support */
30 /* Helpers */
32 static int
33 _getbytevalue(PyObject* arg, int *value)
35 long face_value;
37 if (PyInt_Check(arg)) {
38 face_value = PyInt_AsLong(arg);
39 if (face_value < 0 || face_value >= 256) {
40 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
41 return 0;
44 else if (PyBytes_CheckExact(arg)) {
45 if (Py_SIZE(arg) != 1) {
46 PyErr_SetString(PyExc_ValueError, "string must be of size 1");
47 return 0;
49 face_value = Py_CHARMASK(((PyBytesObject*)arg)->ob_sval[0]);
51 else {
52 PyErr_Format(PyExc_TypeError, "an integer or string of size 1 is required");
53 return 0;
56 *value = face_value;
57 return 1;
60 static Py_ssize_t
61 bytes_buffer_getreadbuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
63 if ( index != 0 ) {
64 PyErr_SetString(PyExc_SystemError,
65 "accessing non-existent bytes segment");
66 return -1;
68 *ptr = (void *)self->ob_bytes;
69 return Py_SIZE(self);
72 static Py_ssize_t
73 bytes_buffer_getwritebuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
75 if ( index != 0 ) {
76 PyErr_SetString(PyExc_SystemError,
77 "accessing non-existent bytes segment");
78 return -1;
80 *ptr = (void *)self->ob_bytes;
81 return Py_SIZE(self);
84 static Py_ssize_t
85 bytes_buffer_getsegcount(PyByteArrayObject *self, Py_ssize_t *lenp)
87 if ( lenp )
88 *lenp = Py_SIZE(self);
89 return 1;
92 static Py_ssize_t
93 bytes_buffer_getcharbuf(PyByteArrayObject *self, Py_ssize_t index, const char **ptr)
95 if ( index != 0 ) {
96 PyErr_SetString(PyExc_SystemError,
97 "accessing non-existent bytes segment");
98 return -1;
100 *ptr = self->ob_bytes;
101 return Py_SIZE(self);
104 static int
105 bytes_getbuffer(PyByteArrayObject *obj, Py_buffer *view, int flags)
107 int ret;
108 void *ptr;
109 if (view == NULL) {
110 obj->ob_exports++;
111 return 0;
113 if (obj->ob_bytes == NULL)
114 ptr = "";
115 else
116 ptr = obj->ob_bytes;
117 ret = PyBuffer_FillInfo(view, ptr, Py_SIZE(obj), 0, flags);
118 if (ret >= 0) {
119 obj->ob_exports++;
121 return ret;
124 static void
125 bytes_releasebuffer(PyByteArrayObject *obj, Py_buffer *view)
127 obj->ob_exports--;
130 static Py_ssize_t
131 _getbuffer(PyObject *obj, Py_buffer *view)
133 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
135 if (buffer == NULL || buffer->bf_getbuffer == NULL)
137 PyErr_Format(PyExc_TypeError,
138 "Type %.100s doesn't support the buffer API",
139 Py_TYPE(obj)->tp_name);
140 return -1;
143 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
144 return -1;
145 return view->len;
148 /* Direct API functions */
150 PyObject *
151 PyByteArray_FromObject(PyObject *input)
153 return PyObject_CallFunctionObjArgs((PyObject *)&PyByteArray_Type,
154 input, NULL);
157 PyObject *
158 PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
160 PyByteArrayObject *new;
161 Py_ssize_t alloc;
163 if (size < 0) {
164 PyErr_SetString(PyExc_SystemError,
165 "Negative size passed to PyByteArray_FromStringAndSize");
166 return NULL;
169 new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
170 if (new == NULL)
171 return NULL;
173 if (size == 0) {
174 new->ob_bytes = NULL;
175 alloc = 0;
177 else {
178 alloc = size + 1;
179 new->ob_bytes = PyMem_Malloc(alloc);
180 if (new->ob_bytes == NULL) {
181 Py_DECREF(new);
182 return PyErr_NoMemory();
184 if (bytes != NULL)
185 memcpy(new->ob_bytes, bytes, size);
186 new->ob_bytes[size] = '\0'; /* Trailing null byte */
188 Py_SIZE(new) = size;
189 new->ob_alloc = alloc;
190 new->ob_exports = 0;
192 return (PyObject *)new;
195 Py_ssize_t
196 PyByteArray_Size(PyObject *self)
198 assert(self != NULL);
199 assert(PyByteArray_Check(self));
201 return PyByteArray_GET_SIZE(self);
204 char *
205 PyByteArray_AsString(PyObject *self)
207 assert(self != NULL);
208 assert(PyByteArray_Check(self));
210 return PyByteArray_AS_STRING(self);
214 PyByteArray_Resize(PyObject *self, Py_ssize_t size)
216 void *sval;
217 Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc;
219 assert(self != NULL);
220 assert(PyByteArray_Check(self));
221 assert(size >= 0);
223 if (size < alloc / 2) {
224 /* Major downsize; resize down to exact size */
225 alloc = size + 1;
227 else if (size < alloc) {
228 /* Within allocated size; quick exit */
229 Py_SIZE(self) = size;
230 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
231 return 0;
233 else if (size <= alloc * 1.125) {
234 /* Moderate upsize; overallocate similar to list_resize() */
235 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
237 else {
238 /* Major upsize; resize up to exact size */
239 alloc = size + 1;
242 if (((PyByteArrayObject *)self)->ob_exports > 0) {
244 fprintf(stderr, "%d: %s", ((PyByteArrayObject *)self)->ob_exports,
245 ((PyByteArrayObject *)self)->ob_bytes);
247 PyErr_SetString(PyExc_BufferError,
248 "Existing exports of data: object cannot be re-sized");
249 return -1;
252 sval = PyMem_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc);
253 if (sval == NULL) {
254 PyErr_NoMemory();
255 return -1;
258 ((PyByteArrayObject *)self)->ob_bytes = sval;
259 Py_SIZE(self) = size;
260 ((PyByteArrayObject *)self)->ob_alloc = alloc;
261 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
263 return 0;
266 PyObject *
267 PyByteArray_Concat(PyObject *a, PyObject *b)
269 Py_ssize_t size;
270 Py_buffer va, vb;
271 PyByteArrayObject *result = NULL;
273 va.len = -1;
274 vb.len = -1;
275 if (_getbuffer(a, &va) < 0 ||
276 _getbuffer(b, &vb) < 0) {
277 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
278 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
279 goto done;
282 size = va.len + vb.len;
283 if (size < 0) {
284 return PyErr_NoMemory();
285 goto done;
288 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, size);
289 if (result != NULL) {
290 memcpy(result->ob_bytes, va.buf, va.len);
291 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
294 done:
295 if (va.len != -1)
296 PyObject_ReleaseBuffer(a, &va);
297 if (vb.len != -1)
298 PyObject_ReleaseBuffer(b, &vb);
299 return (PyObject *)result;
302 /* Functions stuffed into the type object */
304 static Py_ssize_t
305 bytes_length(PyByteArrayObject *self)
307 return Py_SIZE(self);
310 static PyObject *
311 bytes_iconcat(PyByteArrayObject *self, PyObject *other)
313 Py_ssize_t mysize;
314 Py_ssize_t size;
315 Py_buffer vo;
317 if (_getbuffer(other, &vo) < 0) {
318 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
319 Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name);
320 return NULL;
323 mysize = Py_SIZE(self);
324 size = mysize + vo.len;
325 if (size < 0) {
326 PyObject_ReleaseBuffer(other, &vo);
327 return PyErr_NoMemory();
329 if (size < self->ob_alloc) {
330 Py_SIZE(self) = size;
331 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
333 else if (PyByteArray_Resize((PyObject *)self, size) < 0) {
334 PyObject_ReleaseBuffer(other, &vo);
335 return NULL;
337 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
338 PyObject_ReleaseBuffer(other, &vo);
339 Py_INCREF(self);
340 return (PyObject *)self;
343 static PyObject *
344 bytes_repeat(PyByteArrayObject *self, Py_ssize_t count)
346 PyByteArrayObject *result;
347 Py_ssize_t mysize;
348 Py_ssize_t size;
350 if (count < 0)
351 count = 0;
352 mysize = Py_SIZE(self);
353 size = mysize * count;
354 if (count != 0 && size / count != mysize)
355 return PyErr_NoMemory();
356 result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
357 if (result != NULL && size != 0) {
358 if (mysize == 1)
359 memset(result->ob_bytes, self->ob_bytes[0], size);
360 else {
361 Py_ssize_t i;
362 for (i = 0; i < count; i++)
363 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
366 return (PyObject *)result;
369 static PyObject *
370 bytes_irepeat(PyByteArrayObject *self, Py_ssize_t count)
372 Py_ssize_t mysize;
373 Py_ssize_t size;
375 if (count < 0)
376 count = 0;
377 mysize = Py_SIZE(self);
378 size = mysize * count;
379 if (count != 0 && size / count != mysize)
380 return PyErr_NoMemory();
381 if (size < self->ob_alloc) {
382 Py_SIZE(self) = size;
383 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
385 else if (PyByteArray_Resize((PyObject *)self, size) < 0)
386 return NULL;
388 if (mysize == 1)
389 memset(self->ob_bytes, self->ob_bytes[0], size);
390 else {
391 Py_ssize_t i;
392 for (i = 1; i < count; i++)
393 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
396 Py_INCREF(self);
397 return (PyObject *)self;
400 static PyObject *
401 bytes_getitem(PyByteArrayObject *self, Py_ssize_t i)
403 if (i < 0)
404 i += Py_SIZE(self);
405 if (i < 0 || i >= Py_SIZE(self)) {
406 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
407 return NULL;
409 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
412 static PyObject *
413 bytes_subscript(PyByteArrayObject *self, PyObject *item)
415 if (PyIndex_Check(item)) {
416 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
418 if (i == -1 && PyErr_Occurred())
419 return NULL;
421 if (i < 0)
422 i += PyByteArray_GET_SIZE(self);
424 if (i < 0 || i >= Py_SIZE(self)) {
425 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
426 return NULL;
428 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
430 else if (PySlice_Check(item)) {
431 Py_ssize_t start, stop, step, slicelength, cur, i;
432 if (PySlice_GetIndicesEx((PySliceObject *)item,
433 PyByteArray_GET_SIZE(self),
434 &start, &stop, &step, &slicelength) < 0) {
435 return NULL;
438 if (slicelength <= 0)
439 return PyByteArray_FromStringAndSize("", 0);
440 else if (step == 1) {
441 return PyByteArray_FromStringAndSize(self->ob_bytes + start,
442 slicelength);
444 else {
445 char *source_buf = PyByteArray_AS_STRING(self);
446 char *result_buf = (char *)PyMem_Malloc(slicelength);
447 PyObject *result;
449 if (result_buf == NULL)
450 return PyErr_NoMemory();
452 for (cur = start, i = 0; i < slicelength;
453 cur += step, i++) {
454 result_buf[i] = source_buf[cur];
456 result = PyByteArray_FromStringAndSize(result_buf, slicelength);
457 PyMem_Free(result_buf);
458 return result;
461 else {
462 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
463 return NULL;
467 static int
468 bytes_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
469 PyObject *values)
471 Py_ssize_t avail, needed;
472 void *bytes;
473 Py_buffer vbytes;
474 int res = 0;
476 vbytes.len = -1;
477 if (values == (PyObject *)self) {
478 /* Make a copy and call this function recursively */
479 int err;
480 values = PyByteArray_FromObject(values);
481 if (values == NULL)
482 return -1;
483 err = bytes_setslice(self, lo, hi, values);
484 Py_DECREF(values);
485 return err;
487 if (values == NULL) {
488 /* del b[lo:hi] */
489 bytes = NULL;
490 needed = 0;
492 else {
493 if (_getbuffer(values, &vbytes) < 0) {
494 PyErr_Format(PyExc_TypeError,
495 "can't set bytes slice from %.100s",
496 Py_TYPE(values)->tp_name);
497 return -1;
499 needed = vbytes.len;
500 bytes = vbytes.buf;
503 if (lo < 0)
504 lo = 0;
505 if (hi < lo)
506 hi = lo;
507 if (hi > Py_SIZE(self))
508 hi = Py_SIZE(self);
510 avail = hi - lo;
511 if (avail < 0)
512 lo = hi = avail = 0;
514 if (avail != needed) {
515 if (avail > needed) {
517 0 lo hi old_size
518 | |<----avail----->|<-----tomove------>|
519 | |<-needed->|<-----tomove------>|
520 0 lo new_hi new_size
522 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
523 Py_SIZE(self) - hi);
525 /* XXX(nnorwitz): need to verify this can't overflow! */
526 if (PyByteArray_Resize((PyObject *)self,
527 Py_SIZE(self) + needed - avail) < 0) {
528 res = -1;
529 goto finish;
531 if (avail < needed) {
533 0 lo hi old_size
534 | |<-avail->|<-----tomove------>|
535 | |<----needed---->|<-----tomove------>|
536 0 lo new_hi new_size
538 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
539 Py_SIZE(self) - lo - needed);
543 if (needed > 0)
544 memcpy(self->ob_bytes + lo, bytes, needed);
547 finish:
548 if (vbytes.len != -1)
549 PyObject_ReleaseBuffer(values, &vbytes);
550 return res;
553 static int
554 bytes_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value)
556 int ival;
558 if (i < 0)
559 i += Py_SIZE(self);
561 if (i < 0 || i >= Py_SIZE(self)) {
562 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
563 return -1;
566 if (value == NULL)
567 return bytes_setslice(self, i, i+1, NULL);
569 if (!_getbytevalue(value, &ival))
570 return -1;
572 self->ob_bytes[i] = ival;
573 return 0;
576 static int
577 bytes_ass_subscript(PyByteArrayObject *self, PyObject *item, PyObject *values)
579 Py_ssize_t start, stop, step, slicelen, needed;
580 char *bytes;
582 if (PyIndex_Check(item)) {
583 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
585 if (i == -1 && PyErr_Occurred())
586 return -1;
588 if (i < 0)
589 i += PyByteArray_GET_SIZE(self);
591 if (i < 0 || i >= Py_SIZE(self)) {
592 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
593 return -1;
596 if (values == NULL) {
597 /* Fall through to slice assignment */
598 start = i;
599 stop = i + 1;
600 step = 1;
601 slicelen = 1;
603 else {
604 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
605 if (ival == -1 && PyErr_Occurred()) {
606 int int_value;
607 /* Also accept str of size 1 in 2.x */
608 PyErr_Clear();
609 if (!_getbytevalue(values, &int_value))
610 return -1;
611 ival = (int) int_value;
612 } else if (ival < 0 || ival >= 256) {
613 PyErr_SetString(PyExc_ValueError,
614 "byte must be in range(0, 256)");
615 return -1;
617 self->ob_bytes[i] = (char)ival;
618 return 0;
621 else if (PySlice_Check(item)) {
622 if (PySlice_GetIndicesEx((PySliceObject *)item,
623 PyByteArray_GET_SIZE(self),
624 &start, &stop, &step, &slicelen) < 0) {
625 return -1;
628 else {
629 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
630 return -1;
633 if (values == NULL) {
634 bytes = NULL;
635 needed = 0;
637 else if (values == (PyObject *)self || !PyByteArray_Check(values)) {
638 /* Make a copy an call this function recursively */
639 int err;
640 values = PyByteArray_FromObject(values);
641 if (values == NULL)
642 return -1;
643 err = bytes_ass_subscript(self, item, values);
644 Py_DECREF(values);
645 return err;
647 else {
648 assert(PyByteArray_Check(values));
649 bytes = ((PyByteArrayObject *)values)->ob_bytes;
650 needed = Py_SIZE(values);
652 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
653 if ((step < 0 && start < stop) ||
654 (step > 0 && start > stop))
655 stop = start;
656 if (step == 1) {
657 if (slicelen != needed) {
658 if (slicelen > needed) {
660 0 start stop old_size
661 | |<---slicelen--->|<-----tomove------>|
662 | |<-needed->|<-----tomove------>|
663 0 lo new_hi new_size
665 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
666 Py_SIZE(self) - stop);
668 if (PyByteArray_Resize((PyObject *)self,
669 Py_SIZE(self) + needed - slicelen) < 0)
670 return -1;
671 if (slicelen < needed) {
673 0 lo hi old_size
674 | |<-avail->|<-----tomove------>|
675 | |<----needed---->|<-----tomove------>|
676 0 lo new_hi new_size
678 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
679 Py_SIZE(self) - start - needed);
683 if (needed > 0)
684 memcpy(self->ob_bytes + start, bytes, needed);
686 return 0;
688 else {
689 if (needed == 0) {
690 /* Delete slice */
691 Py_ssize_t cur, i;
693 if (step < 0) {
694 stop = start + 1;
695 start = stop + step * (slicelen - 1) - 1;
696 step = -step;
698 for (cur = start, i = 0;
699 i < slicelen; cur += step, i++) {
700 Py_ssize_t lim = step - 1;
702 if (cur + step >= PyByteArray_GET_SIZE(self))
703 lim = PyByteArray_GET_SIZE(self) - cur - 1;
705 memmove(self->ob_bytes + cur - i,
706 self->ob_bytes + cur + 1, lim);
708 /* Move the tail of the bytes, in one chunk */
709 cur = start + slicelen*step;
710 if (cur < PyByteArray_GET_SIZE(self)) {
711 memmove(self->ob_bytes + cur - slicelen,
712 self->ob_bytes + cur,
713 PyByteArray_GET_SIZE(self) - cur);
715 if (PyByteArray_Resize((PyObject *)self,
716 PyByteArray_GET_SIZE(self) - slicelen) < 0)
717 return -1;
719 return 0;
721 else {
722 /* Assign slice */
723 Py_ssize_t cur, i;
725 if (needed != slicelen) {
726 PyErr_Format(PyExc_ValueError,
727 "attempt to assign bytes of size %zd "
728 "to extended slice of size %zd",
729 needed, slicelen);
730 return -1;
732 for (cur = start, i = 0; i < slicelen; cur += step, i++)
733 self->ob_bytes[cur] = bytes[i];
734 return 0;
739 static int
740 bytes_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
742 static char *kwlist[] = {"source", "encoding", "errors", 0};
743 PyObject *arg = NULL;
744 const char *encoding = NULL;
745 const char *errors = NULL;
746 Py_ssize_t count;
747 PyObject *it;
748 PyObject *(*iternext)(PyObject *);
750 if (Py_SIZE(self) != 0) {
751 /* Empty previous contents (yes, do this first of all!) */
752 if (PyByteArray_Resize((PyObject *)self, 0) < 0)
753 return -1;
756 /* Parse arguments */
757 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
758 &arg, &encoding, &errors))
759 return -1;
761 /* Make a quick exit if no first argument */
762 if (arg == NULL) {
763 if (encoding != NULL || errors != NULL) {
764 PyErr_SetString(PyExc_TypeError,
765 "encoding or errors without sequence argument");
766 return -1;
768 return 0;
771 if (PyBytes_Check(arg)) {
772 PyObject *new, *encoded;
773 if (encoding != NULL) {
774 encoded = PyCodec_Encode(arg, encoding, errors);
775 if (encoded == NULL)
776 return -1;
777 assert(PyBytes_Check(encoded));
779 else {
780 encoded = arg;
781 Py_INCREF(arg);
783 new = bytes_iconcat(self, arg);
784 Py_DECREF(encoded);
785 if (new == NULL)
786 return -1;
787 Py_DECREF(new);
788 return 0;
791 if (PyUnicode_Check(arg)) {
792 /* Encode via the codec registry */
793 PyObject *encoded, *new;
794 if (encoding == NULL) {
795 PyErr_SetString(PyExc_TypeError,
796 "unicode argument without an encoding");
797 return -1;
799 encoded = PyCodec_Encode(arg, encoding, errors);
800 if (encoded == NULL)
801 return -1;
802 assert(PyBytes_Check(encoded));
803 new = bytes_iconcat(self, encoded);
804 Py_DECREF(encoded);
805 if (new == NULL)
806 return -1;
807 Py_DECREF(new);
808 return 0;
811 /* If it's not unicode, there can't be encoding or errors */
812 if (encoding != NULL || errors != NULL) {
813 PyErr_SetString(PyExc_TypeError,
814 "encoding or errors without a string argument");
815 return -1;
818 /* Is it an int? */
819 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
820 if (count == -1 && PyErr_Occurred())
821 PyErr_Clear();
822 else {
823 if (count < 0) {
824 PyErr_SetString(PyExc_ValueError, "negative count");
825 return -1;
827 if (count > 0) {
828 if (PyByteArray_Resize((PyObject *)self, count))
829 return -1;
830 memset(self->ob_bytes, 0, count);
832 return 0;
835 /* Use the buffer API */
836 if (PyObject_CheckBuffer(arg)) {
837 Py_ssize_t size;
838 Py_buffer view;
839 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
840 return -1;
841 size = view.len;
842 if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
843 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
844 goto fail;
845 PyObject_ReleaseBuffer(arg, &view);
846 return 0;
847 fail:
848 PyObject_ReleaseBuffer(arg, &view);
849 return -1;
852 /* XXX Optimize this if the arguments is a list, tuple */
854 /* Get the iterator */
855 it = PyObject_GetIter(arg);
856 if (it == NULL)
857 return -1;
858 iternext = *Py_TYPE(it)->tp_iternext;
860 /* Run the iterator to exhaustion */
861 for (;;) {
862 PyObject *item;
863 Py_ssize_t value;
865 /* Get the next item */
866 item = iternext(it);
867 if (item == NULL) {
868 if (PyErr_Occurred()) {
869 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
870 goto error;
871 PyErr_Clear();
873 break;
876 /* Interpret it as an int (__index__) */
877 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
878 Py_DECREF(item);
879 if (value == -1 && PyErr_Occurred())
880 goto error;
882 /* Range check */
883 if (value < 0 || value >= 256) {
884 PyErr_SetString(PyExc_ValueError,
885 "bytes must be in range(0, 256)");
886 goto error;
889 /* Append the byte */
890 if (Py_SIZE(self) < self->ob_alloc)
891 Py_SIZE(self)++;
892 else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
893 goto error;
894 self->ob_bytes[Py_SIZE(self)-1] = value;
897 /* Clean up and return success */
898 Py_DECREF(it);
899 return 0;
901 error:
902 /* Error handling when it != NULL */
903 Py_DECREF(it);
904 return -1;
907 /* Mostly copied from string_repr, but without the
908 "smart quote" functionality. */
909 static PyObject *
910 bytes_repr(PyByteArrayObject *self)
912 static const char *hexdigits = "0123456789abcdef";
913 const char *quote_prefix = "bytearray(b";
914 const char *quote_postfix = ")";
915 Py_ssize_t length = Py_SIZE(self);
916 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
917 size_t newsize = 14 + 4 * length;
918 PyObject *v;
919 if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 3 != length) {
920 PyErr_SetString(PyExc_OverflowError,
921 "bytearray object is too large to make repr");
922 return NULL;
924 v = PyUnicode_FromUnicode(NULL, newsize);
925 if (v == NULL) {
926 return NULL;
928 else {
929 register Py_ssize_t i;
930 register Py_UNICODE c;
931 register Py_UNICODE *p;
932 int quote;
934 /* Figure out which quote to use; single is preferred */
935 quote = '\'';
937 char *test, *start;
938 start = PyByteArray_AS_STRING(self);
939 for (test = start; test < start+length; ++test) {
940 if (*test == '"') {
941 quote = '\''; /* back to single */
942 goto decided;
944 else if (*test == '\'')
945 quote = '"';
947 decided:
951 p = PyUnicode_AS_UNICODE(v);
952 while (*quote_prefix)
953 *p++ = *quote_prefix++;
954 *p++ = quote;
956 for (i = 0; i < length; i++) {
957 /* There's at least enough room for a hex escape
958 and a closing quote. */
959 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
960 c = self->ob_bytes[i];
961 if (c == '\'' || c == '\\')
962 *p++ = '\\', *p++ = c;
963 else if (c == '\t')
964 *p++ = '\\', *p++ = 't';
965 else if (c == '\n')
966 *p++ = '\\', *p++ = 'n';
967 else if (c == '\r')
968 *p++ = '\\', *p++ = 'r';
969 else if (c == 0)
970 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
971 else if (c < ' ' || c >= 0x7f) {
972 *p++ = '\\';
973 *p++ = 'x';
974 *p++ = hexdigits[(c & 0xf0) >> 4];
975 *p++ = hexdigits[c & 0xf];
977 else
978 *p++ = c;
980 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
981 *p++ = quote;
982 while (*quote_postfix) {
983 *p++ = *quote_postfix++;
985 *p = '\0';
986 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
987 Py_DECREF(v);
988 return NULL;
990 return v;
994 static PyObject *
995 bytes_str(PyObject *op)
997 #if 0
998 if (Py_BytesWarningFlag) {
999 if (PyErr_WarnEx(PyExc_BytesWarning,
1000 "str() on a bytearray instance", 1))
1001 return NULL;
1003 return bytes_repr((PyByteArrayObject*)op);
1004 #endif
1005 return PyBytes_FromStringAndSize(((PyByteArrayObject*)op)->ob_bytes, Py_SIZE(op));
1008 static PyObject *
1009 bytes_richcompare(PyObject *self, PyObject *other, int op)
1011 Py_ssize_t self_size, other_size;
1012 Py_buffer self_bytes, other_bytes;
1013 PyObject *res;
1014 Py_ssize_t minsize;
1015 int cmp;
1017 /* Bytes can be compared to anything that supports the (binary)
1018 buffer API. Except that a comparison with Unicode is always an
1019 error, even if the comparison is for equality. */
1020 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
1021 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
1022 if (Py_BytesWarningFlag && op == Py_EQ) {
1023 if (PyErr_WarnEx(PyExc_BytesWarning,
1024 "Comparsion between bytearray and string", 1))
1025 return NULL;
1028 Py_INCREF(Py_NotImplemented);
1029 return Py_NotImplemented;
1032 self_size = _getbuffer(self, &self_bytes);
1033 if (self_size < 0) {
1034 PyErr_Clear();
1035 Py_INCREF(Py_NotImplemented);
1036 return Py_NotImplemented;
1039 other_size = _getbuffer(other, &other_bytes);
1040 if (other_size < 0) {
1041 PyErr_Clear();
1042 PyObject_ReleaseBuffer(self, &self_bytes);
1043 Py_INCREF(Py_NotImplemented);
1044 return Py_NotImplemented;
1047 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
1048 /* Shortcut: if the lengths differ, the objects differ */
1049 cmp = (op == Py_NE);
1051 else {
1052 minsize = self_size;
1053 if (other_size < minsize)
1054 minsize = other_size;
1056 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
1057 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1059 if (cmp == 0) {
1060 if (self_size < other_size)
1061 cmp = -1;
1062 else if (self_size > other_size)
1063 cmp = 1;
1066 switch (op) {
1067 case Py_LT: cmp = cmp < 0; break;
1068 case Py_LE: cmp = cmp <= 0; break;
1069 case Py_EQ: cmp = cmp == 0; break;
1070 case Py_NE: cmp = cmp != 0; break;
1071 case Py_GT: cmp = cmp > 0; break;
1072 case Py_GE: cmp = cmp >= 0; break;
1076 res = cmp ? Py_True : Py_False;
1077 PyObject_ReleaseBuffer(self, &self_bytes);
1078 PyObject_ReleaseBuffer(other, &other_bytes);
1079 Py_INCREF(res);
1080 return res;
1083 static void
1084 bytes_dealloc(PyByteArrayObject *self)
1086 if (self->ob_bytes != 0) {
1087 PyMem_Free(self->ob_bytes);
1089 Py_TYPE(self)->tp_free((PyObject *)self);
1093 /* -------------------------------------------------------------------- */
1094 /* Methods */
1096 #define STRINGLIB_CHAR char
1097 #define STRINGLIB_CMP memcmp
1098 #define STRINGLIB_LEN PyByteArray_GET_SIZE
1099 #define STRINGLIB_STR PyByteArray_AS_STRING
1100 #define STRINGLIB_NEW PyByteArray_FromStringAndSize
1101 #define STRINGLIB_EMPTY nullbytes
1102 #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1103 #define STRINGLIB_MUTABLE 1
1105 #include "stringlib/fastsearch.h"
1106 #include "stringlib/count.h"
1107 #include "stringlib/find.h"
1108 #include "stringlib/partition.h"
1109 #include "stringlib/ctype.h"
1110 #include "stringlib/transmogrify.h"
1113 /* The following Py_LOCAL_INLINE and Py_LOCAL functions
1114 were copied from the old char* style string object. */
1116 Py_LOCAL_INLINE(void)
1117 _adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1119 if (*end > len)
1120 *end = len;
1121 else if (*end < 0)
1122 *end += len;
1123 if (*end < 0)
1124 *end = 0;
1125 if (*start < 0)
1126 *start += len;
1127 if (*start < 0)
1128 *start = 0;
1132 Py_LOCAL_INLINE(Py_ssize_t)
1133 bytes_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
1135 PyObject *subobj;
1136 Py_buffer subbuf;
1137 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1138 Py_ssize_t res;
1140 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1141 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1142 return -2;
1143 if (_getbuffer(subobj, &subbuf) < 0)
1144 return -2;
1145 if (dir > 0)
1146 res = stringlib_find_slice(
1147 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1148 subbuf.buf, subbuf.len, start, end);
1149 else
1150 res = stringlib_rfind_slice(
1151 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1152 subbuf.buf, subbuf.len, start, end);
1153 PyObject_ReleaseBuffer(subobj, &subbuf);
1154 return res;
1157 PyDoc_STRVAR(find__doc__,
1158 "B.find(sub [,start [,end]]) -> int\n\
1160 Return the lowest index in B where subsection sub is found,\n\
1161 such that sub is contained within s[start,end]. Optional\n\
1162 arguments start and end are interpreted as in slice notation.\n\
1164 Return -1 on failure.");
1166 static PyObject *
1167 bytes_find(PyByteArrayObject *self, PyObject *args)
1169 Py_ssize_t result = bytes_find_internal(self, args, +1);
1170 if (result == -2)
1171 return NULL;
1172 return PyInt_FromSsize_t(result);
1175 PyDoc_STRVAR(count__doc__,
1176 "B.count(sub [,start [,end]]) -> int\n\
1178 Return the number of non-overlapping occurrences of subsection sub in\n\
1179 bytes B[start:end]. Optional arguments start and end are interpreted\n\
1180 as in slice notation.");
1182 static PyObject *
1183 bytes_count(PyByteArrayObject *self, PyObject *args)
1185 PyObject *sub_obj;
1186 const char *str = PyByteArray_AS_STRING(self);
1187 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1188 Py_buffer vsub;
1189 PyObject *count_obj;
1191 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1192 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1193 return NULL;
1195 if (_getbuffer(sub_obj, &vsub) < 0)
1196 return NULL;
1198 _adjust_indices(&start, &end, PyByteArray_GET_SIZE(self));
1200 count_obj = PyInt_FromSsize_t(
1201 stringlib_count(str + start, end - start, vsub.buf, vsub.len)
1203 PyObject_ReleaseBuffer(sub_obj, &vsub);
1204 return count_obj;
1208 PyDoc_STRVAR(index__doc__,
1209 "B.index(sub [,start [,end]]) -> int\n\
1211 Like B.find() but raise ValueError when the subsection is not found.");
1213 static PyObject *
1214 bytes_index(PyByteArrayObject *self, PyObject *args)
1216 Py_ssize_t result = bytes_find_internal(self, args, +1);
1217 if (result == -2)
1218 return NULL;
1219 if (result == -1) {
1220 PyErr_SetString(PyExc_ValueError,
1221 "subsection not found");
1222 return NULL;
1224 return PyInt_FromSsize_t(result);
1228 PyDoc_STRVAR(rfind__doc__,
1229 "B.rfind(sub [,start [,end]]) -> int\n\
1231 Return the highest index in B where subsection sub is found,\n\
1232 such that sub is contained within s[start,end]. Optional\n\
1233 arguments start and end are interpreted as in slice notation.\n\
1235 Return -1 on failure.");
1237 static PyObject *
1238 bytes_rfind(PyByteArrayObject *self, PyObject *args)
1240 Py_ssize_t result = bytes_find_internal(self, args, -1);
1241 if (result == -2)
1242 return NULL;
1243 return PyInt_FromSsize_t(result);
1247 PyDoc_STRVAR(rindex__doc__,
1248 "B.rindex(sub [,start [,end]]) -> int\n\
1250 Like B.rfind() but raise ValueError when the subsection is not found.");
1252 static PyObject *
1253 bytes_rindex(PyByteArrayObject *self, PyObject *args)
1255 Py_ssize_t result = bytes_find_internal(self, args, -1);
1256 if (result == -2)
1257 return NULL;
1258 if (result == -1) {
1259 PyErr_SetString(PyExc_ValueError,
1260 "subsection not found");
1261 return NULL;
1263 return PyInt_FromSsize_t(result);
1267 static int
1268 bytes_contains(PyObject *self, PyObject *arg)
1270 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1271 if (ival == -1 && PyErr_Occurred()) {
1272 Py_buffer varg;
1273 int pos;
1274 PyErr_Clear();
1275 if (_getbuffer(arg, &varg) < 0)
1276 return -1;
1277 pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
1278 varg.buf, varg.len, 0);
1279 PyObject_ReleaseBuffer(arg, &varg);
1280 return pos >= 0;
1282 if (ival < 0 || ival >= 256) {
1283 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1284 return -1;
1287 return memchr(PyByteArray_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
1291 /* Matches the end (direction >= 0) or start (direction < 0) of self
1292 * against substr, using the start and end arguments. Returns
1293 * -1 on error, 0 if not found and 1 if found.
1295 Py_LOCAL(int)
1296 _bytes_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start,
1297 Py_ssize_t end, int direction)
1299 Py_ssize_t len = PyByteArray_GET_SIZE(self);
1300 const char* str;
1301 Py_buffer vsubstr;
1302 int rv = 0;
1304 str = PyByteArray_AS_STRING(self);
1306 if (_getbuffer(substr, &vsubstr) < 0)
1307 return -1;
1309 _adjust_indices(&start, &end, len);
1311 if (direction < 0) {
1312 /* startswith */
1313 if (start+vsubstr.len > len) {
1314 goto done;
1316 } else {
1317 /* endswith */
1318 if (end-start < vsubstr.len || start > len) {
1319 goto done;
1322 if (end-vsubstr.len > start)
1323 start = end - vsubstr.len;
1325 if (end-start >= vsubstr.len)
1326 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1328 done:
1329 PyObject_ReleaseBuffer(substr, &vsubstr);
1330 return rv;
1334 PyDoc_STRVAR(startswith__doc__,
1335 "B.startswith(prefix [,start [,end]]) -> bool\n\
1337 Return True if B starts with the specified prefix, False otherwise.\n\
1338 With optional start, test B beginning at that position.\n\
1339 With optional end, stop comparing B at that position.\n\
1340 prefix can also be a tuple of strings to try.");
1342 static PyObject *
1343 bytes_startswith(PyByteArrayObject *self, PyObject *args)
1345 Py_ssize_t start = 0;
1346 Py_ssize_t end = PY_SSIZE_T_MAX;
1347 PyObject *subobj;
1348 int result;
1350 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1351 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1352 return NULL;
1353 if (PyTuple_Check(subobj)) {
1354 Py_ssize_t i;
1355 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1356 result = _bytes_tailmatch(self,
1357 PyTuple_GET_ITEM(subobj, i),
1358 start, end, -1);
1359 if (result == -1)
1360 return NULL;
1361 else if (result) {
1362 Py_RETURN_TRUE;
1365 Py_RETURN_FALSE;
1367 result = _bytes_tailmatch(self, subobj, start, end, -1);
1368 if (result == -1)
1369 return NULL;
1370 else
1371 return PyBool_FromLong(result);
1374 PyDoc_STRVAR(endswith__doc__,
1375 "B.endswith(suffix [,start [,end]]) -> bool\n\
1377 Return True if B ends with the specified suffix, False otherwise.\n\
1378 With optional start, test B beginning at that position.\n\
1379 With optional end, stop comparing B at that position.\n\
1380 suffix can also be a tuple of strings to try.");
1382 static PyObject *
1383 bytes_endswith(PyByteArrayObject *self, PyObject *args)
1385 Py_ssize_t start = 0;
1386 Py_ssize_t end = PY_SSIZE_T_MAX;
1387 PyObject *subobj;
1388 int result;
1390 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1391 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1392 return NULL;
1393 if (PyTuple_Check(subobj)) {
1394 Py_ssize_t i;
1395 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1396 result = _bytes_tailmatch(self,
1397 PyTuple_GET_ITEM(subobj, i),
1398 start, end, +1);
1399 if (result == -1)
1400 return NULL;
1401 else if (result) {
1402 Py_RETURN_TRUE;
1405 Py_RETURN_FALSE;
1407 result = _bytes_tailmatch(self, subobj, start, end, +1);
1408 if (result == -1)
1409 return NULL;
1410 else
1411 return PyBool_FromLong(result);
1415 PyDoc_STRVAR(translate__doc__,
1416 "B.translate(table[, deletechars]) -> bytearray\n\
1418 Return a copy of B, where all characters occurring in the\n\
1419 optional argument deletechars are removed, and the remaining\n\
1420 characters have been mapped through the given translation\n\
1421 table, which must be a bytes object of length 256.");
1423 static PyObject *
1424 bytes_translate(PyByteArrayObject *self, PyObject *args)
1426 register char *input, *output;
1427 register const char *table;
1428 register Py_ssize_t i, c, changed = 0;
1429 PyObject *input_obj = (PyObject*)self;
1430 const char *output_start;
1431 Py_ssize_t inlen;
1432 PyObject *result;
1433 int trans_table[256];
1434 PyObject *tableobj, *delobj = NULL;
1435 Py_buffer vtable, vdel;
1437 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1438 &tableobj, &delobj))
1439 return NULL;
1441 if (_getbuffer(tableobj, &vtable) < 0)
1442 return NULL;
1444 if (vtable.len != 256) {
1445 PyErr_SetString(PyExc_ValueError,
1446 "translation table must be 256 characters long");
1447 result = NULL;
1448 goto done;
1451 if (delobj != NULL) {
1452 if (_getbuffer(delobj, &vdel) < 0) {
1453 result = NULL;
1454 goto done;
1457 else {
1458 vdel.buf = NULL;
1459 vdel.len = 0;
1462 table = (const char *)vtable.buf;
1463 inlen = PyByteArray_GET_SIZE(input_obj);
1464 result = PyByteArray_FromStringAndSize((char *)NULL, inlen);
1465 if (result == NULL)
1466 goto done;
1467 output_start = output = PyByteArray_AsString(result);
1468 input = PyByteArray_AS_STRING(input_obj);
1470 if (vdel.len == 0) {
1471 /* If no deletions are required, use faster code */
1472 for (i = inlen; --i >= 0; ) {
1473 c = Py_CHARMASK(*input++);
1474 if (Py_CHARMASK((*output++ = table[c])) != c)
1475 changed = 1;
1477 if (changed || !PyByteArray_CheckExact(input_obj))
1478 goto done;
1479 Py_DECREF(result);
1480 Py_INCREF(input_obj);
1481 result = input_obj;
1482 goto done;
1485 for (i = 0; i < 256; i++)
1486 trans_table[i] = Py_CHARMASK(table[i]);
1488 for (i = 0; i < vdel.len; i++)
1489 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1491 for (i = inlen; --i >= 0; ) {
1492 c = Py_CHARMASK(*input++);
1493 if (trans_table[c] != -1)
1494 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1495 continue;
1496 changed = 1;
1498 if (!changed && PyByteArray_CheckExact(input_obj)) {
1499 Py_DECREF(result);
1500 Py_INCREF(input_obj);
1501 result = input_obj;
1502 goto done;
1504 /* Fix the size of the resulting string */
1505 if (inlen > 0)
1506 PyByteArray_Resize(result, output - output_start);
1508 done:
1509 PyObject_ReleaseBuffer(tableobj, &vtable);
1510 if (delobj != NULL)
1511 PyObject_ReleaseBuffer(delobj, &vdel);
1512 return result;
1516 #define FORWARD 1
1517 #define REVERSE -1
1519 /* find and count characters and substrings */
1521 #define findchar(target, target_len, c) \
1522 ((char *)memchr((const void *)(target), c, target_len))
1524 /* Don't call if length < 2 */
1525 #define Py_STRING_MATCH(target, offset, pattern, length) \
1526 (target[offset] == pattern[0] && \
1527 target[offset+length-1] == pattern[length-1] && \
1528 !memcmp(target+offset+1, pattern+1, length-2) )
1531 /* Bytes ops must return a string. */
1532 /* If the object is subclass of bytes, create a copy */
1533 Py_LOCAL(PyByteArrayObject *)
1534 return_self(PyByteArrayObject *self)
1536 if (PyByteArray_CheckExact(self)) {
1537 Py_INCREF(self);
1538 return (PyByteArrayObject *)self;
1540 return (PyByteArrayObject *)PyByteArray_FromStringAndSize(
1541 PyByteArray_AS_STRING(self),
1542 PyByteArray_GET_SIZE(self));
1545 Py_LOCAL_INLINE(Py_ssize_t)
1546 countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1548 Py_ssize_t count=0;
1549 const char *start=target;
1550 const char *end=target+target_len;
1552 while ( (start=findchar(start, end-start, c)) != NULL ) {
1553 count++;
1554 if (count >= maxcount)
1555 break;
1556 start += 1;
1558 return count;
1561 Py_LOCAL(Py_ssize_t)
1562 findstring(const char *target, Py_ssize_t target_len,
1563 const char *pattern, Py_ssize_t pattern_len,
1564 Py_ssize_t start,
1565 Py_ssize_t end,
1566 int direction)
1568 if (start < 0) {
1569 start += target_len;
1570 if (start < 0)
1571 start = 0;
1573 if (end > target_len) {
1574 end = target_len;
1575 } else if (end < 0) {
1576 end += target_len;
1577 if (end < 0)
1578 end = 0;
1581 /* zero-length substrings always match at the first attempt */
1582 if (pattern_len == 0)
1583 return (direction > 0) ? start : end;
1585 end -= pattern_len;
1587 if (direction < 0) {
1588 for (; end >= start; end--)
1589 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1590 return end;
1591 } else {
1592 for (; start <= end; start++)
1593 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1594 return start;
1596 return -1;
1599 Py_LOCAL_INLINE(Py_ssize_t)
1600 countstring(const char *target, Py_ssize_t target_len,
1601 const char *pattern, Py_ssize_t pattern_len,
1602 Py_ssize_t start,
1603 Py_ssize_t end,
1604 int direction, Py_ssize_t maxcount)
1606 Py_ssize_t count=0;
1608 if (start < 0) {
1609 start += target_len;
1610 if (start < 0)
1611 start = 0;
1613 if (end > target_len) {
1614 end = target_len;
1615 } else if (end < 0) {
1616 end += target_len;
1617 if (end < 0)
1618 end = 0;
1621 /* zero-length substrings match everywhere */
1622 if (pattern_len == 0 || maxcount == 0) {
1623 if (target_len+1 < maxcount)
1624 return target_len+1;
1625 return maxcount;
1628 end -= pattern_len;
1629 if (direction < 0) {
1630 for (; (end >= start); end--)
1631 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1632 count++;
1633 if (--maxcount <= 0) break;
1634 end -= pattern_len-1;
1636 } else {
1637 for (; (start <= end); start++)
1638 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1639 count++;
1640 if (--maxcount <= 0)
1641 break;
1642 start += pattern_len-1;
1645 return count;
1649 /* Algorithms for different cases of string replacement */
1651 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1652 Py_LOCAL(PyByteArrayObject *)
1653 replace_interleave(PyByteArrayObject *self,
1654 const char *to_s, Py_ssize_t to_len,
1655 Py_ssize_t maxcount)
1657 char *self_s, *result_s;
1658 Py_ssize_t self_len, result_len;
1659 Py_ssize_t count, i, product;
1660 PyByteArrayObject *result;
1662 self_len = PyByteArray_GET_SIZE(self);
1664 /* 1 at the end plus 1 after every character */
1665 count = self_len+1;
1666 if (maxcount < count)
1667 count = maxcount;
1669 /* Check for overflow */
1670 /* result_len = count * to_len + self_len; */
1671 product = count * to_len;
1672 if (product / to_len != count) {
1673 PyErr_SetString(PyExc_OverflowError,
1674 "replace string is too long");
1675 return NULL;
1677 result_len = product + self_len;
1678 if (result_len < 0) {
1679 PyErr_SetString(PyExc_OverflowError,
1680 "replace string is too long");
1681 return NULL;
1684 if (! (result = (PyByteArrayObject *)
1685 PyByteArray_FromStringAndSize(NULL, result_len)) )
1686 return NULL;
1688 self_s = PyByteArray_AS_STRING(self);
1689 result_s = PyByteArray_AS_STRING(result);
1691 /* TODO: special case single character, which doesn't need memcpy */
1693 /* Lay the first one down (guaranteed this will occur) */
1694 Py_MEMCPY(result_s, to_s, to_len);
1695 result_s += to_len;
1696 count -= 1;
1698 for (i=0; i<count; i++) {
1699 *result_s++ = *self_s++;
1700 Py_MEMCPY(result_s, to_s, to_len);
1701 result_s += to_len;
1704 /* Copy the rest of the original string */
1705 Py_MEMCPY(result_s, self_s, self_len-i);
1707 return result;
1710 /* Special case for deleting a single character */
1711 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1712 Py_LOCAL(PyByteArrayObject *)
1713 replace_delete_single_character(PyByteArrayObject *self,
1714 char from_c, Py_ssize_t maxcount)
1716 char *self_s, *result_s;
1717 char *start, *next, *end;
1718 Py_ssize_t self_len, result_len;
1719 Py_ssize_t count;
1720 PyByteArrayObject *result;
1722 self_len = PyByteArray_GET_SIZE(self);
1723 self_s = PyByteArray_AS_STRING(self);
1725 count = countchar(self_s, self_len, from_c, maxcount);
1726 if (count == 0) {
1727 return return_self(self);
1730 result_len = self_len - count; /* from_len == 1 */
1731 assert(result_len>=0);
1733 if ( (result = (PyByteArrayObject *)
1734 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1735 return NULL;
1736 result_s = PyByteArray_AS_STRING(result);
1738 start = self_s;
1739 end = self_s + self_len;
1740 while (count-- > 0) {
1741 next = findchar(start, end-start, from_c);
1742 if (next == NULL)
1743 break;
1744 Py_MEMCPY(result_s, start, next-start);
1745 result_s += (next-start);
1746 start = next+1;
1748 Py_MEMCPY(result_s, start, end-start);
1750 return result;
1753 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1755 Py_LOCAL(PyByteArrayObject *)
1756 replace_delete_substring(PyByteArrayObject *self,
1757 const char *from_s, Py_ssize_t from_len,
1758 Py_ssize_t maxcount)
1760 char *self_s, *result_s;
1761 char *start, *next, *end;
1762 Py_ssize_t self_len, result_len;
1763 Py_ssize_t count, offset;
1764 PyByteArrayObject *result;
1766 self_len = PyByteArray_GET_SIZE(self);
1767 self_s = PyByteArray_AS_STRING(self);
1769 count = countstring(self_s, self_len,
1770 from_s, from_len,
1771 0, self_len, 1,
1772 maxcount);
1774 if (count == 0) {
1775 /* no matches */
1776 return return_self(self);
1779 result_len = self_len - (count * from_len);
1780 assert (result_len>=0);
1782 if ( (result = (PyByteArrayObject *)
1783 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL )
1784 return NULL;
1786 result_s = PyByteArray_AS_STRING(result);
1788 start = self_s;
1789 end = self_s + self_len;
1790 while (count-- > 0) {
1791 offset = findstring(start, end-start,
1792 from_s, from_len,
1793 0, end-start, FORWARD);
1794 if (offset == -1)
1795 break;
1796 next = start + offset;
1798 Py_MEMCPY(result_s, start, next-start);
1800 result_s += (next-start);
1801 start = next+from_len;
1803 Py_MEMCPY(result_s, start, end-start);
1804 return result;
1807 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1808 Py_LOCAL(PyByteArrayObject *)
1809 replace_single_character_in_place(PyByteArrayObject *self,
1810 char from_c, char to_c,
1811 Py_ssize_t maxcount)
1813 char *self_s, *result_s, *start, *end, *next;
1814 Py_ssize_t self_len;
1815 PyByteArrayObject *result;
1817 /* The result string will be the same size */
1818 self_s = PyByteArray_AS_STRING(self);
1819 self_len = PyByteArray_GET_SIZE(self);
1821 next = findchar(self_s, self_len, from_c);
1823 if (next == NULL) {
1824 /* No matches; return the original bytes */
1825 return return_self(self);
1828 /* Need to make a new bytes */
1829 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1830 if (result == NULL)
1831 return NULL;
1832 result_s = PyByteArray_AS_STRING(result);
1833 Py_MEMCPY(result_s, self_s, self_len);
1835 /* change everything in-place, starting with this one */
1836 start = result_s + (next-self_s);
1837 *start = to_c;
1838 start++;
1839 end = result_s + self_len;
1841 while (--maxcount > 0) {
1842 next = findchar(start, end-start, from_c);
1843 if (next == NULL)
1844 break;
1845 *next = to_c;
1846 start = next+1;
1849 return result;
1852 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1853 Py_LOCAL(PyByteArrayObject *)
1854 replace_substring_in_place(PyByteArrayObject *self,
1855 const char *from_s, Py_ssize_t from_len,
1856 const char *to_s, Py_ssize_t to_len,
1857 Py_ssize_t maxcount)
1859 char *result_s, *start, *end;
1860 char *self_s;
1861 Py_ssize_t self_len, offset;
1862 PyByteArrayObject *result;
1864 /* The result bytes will be the same size */
1866 self_s = PyByteArray_AS_STRING(self);
1867 self_len = PyByteArray_GET_SIZE(self);
1869 offset = findstring(self_s, self_len,
1870 from_s, from_len,
1871 0, self_len, FORWARD);
1872 if (offset == -1) {
1873 /* No matches; return the original bytes */
1874 return return_self(self);
1877 /* Need to make a new bytes */
1878 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1879 if (result == NULL)
1880 return NULL;
1881 result_s = PyByteArray_AS_STRING(result);
1882 Py_MEMCPY(result_s, self_s, self_len);
1884 /* change everything in-place, starting with this one */
1885 start = result_s + offset;
1886 Py_MEMCPY(start, to_s, from_len);
1887 start += from_len;
1888 end = result_s + self_len;
1890 while ( --maxcount > 0) {
1891 offset = findstring(start, end-start,
1892 from_s, from_len,
1893 0, end-start, FORWARD);
1894 if (offset==-1)
1895 break;
1896 Py_MEMCPY(start+offset, to_s, from_len);
1897 start += offset+from_len;
1900 return result;
1903 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1904 Py_LOCAL(PyByteArrayObject *)
1905 replace_single_character(PyByteArrayObject *self,
1906 char from_c,
1907 const char *to_s, Py_ssize_t to_len,
1908 Py_ssize_t maxcount)
1910 char *self_s, *result_s;
1911 char *start, *next, *end;
1912 Py_ssize_t self_len, result_len;
1913 Py_ssize_t count, product;
1914 PyByteArrayObject *result;
1916 self_s = PyByteArray_AS_STRING(self);
1917 self_len = PyByteArray_GET_SIZE(self);
1919 count = countchar(self_s, self_len, from_c, maxcount);
1920 if (count == 0) {
1921 /* no matches, return unchanged */
1922 return return_self(self);
1925 /* use the difference between current and new, hence the "-1" */
1926 /* result_len = self_len + count * (to_len-1) */
1927 product = count * (to_len-1);
1928 if (product / (to_len-1) != count) {
1929 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1930 return NULL;
1932 result_len = self_len + product;
1933 if (result_len < 0) {
1934 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1935 return NULL;
1938 if ( (result = (PyByteArrayObject *)
1939 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1940 return NULL;
1941 result_s = PyByteArray_AS_STRING(result);
1943 start = self_s;
1944 end = self_s + self_len;
1945 while (count-- > 0) {
1946 next = findchar(start, end-start, from_c);
1947 if (next == NULL)
1948 break;
1950 if (next == start) {
1951 /* replace with the 'to' */
1952 Py_MEMCPY(result_s, to_s, to_len);
1953 result_s += to_len;
1954 start += 1;
1955 } else {
1956 /* copy the unchanged old then the 'to' */
1957 Py_MEMCPY(result_s, start, next-start);
1958 result_s += (next-start);
1959 Py_MEMCPY(result_s, to_s, to_len);
1960 result_s += to_len;
1961 start = next+1;
1964 /* Copy the remainder of the remaining bytes */
1965 Py_MEMCPY(result_s, start, end-start);
1967 return result;
1970 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1971 Py_LOCAL(PyByteArrayObject *)
1972 replace_substring(PyByteArrayObject *self,
1973 const char *from_s, Py_ssize_t from_len,
1974 const char *to_s, Py_ssize_t to_len,
1975 Py_ssize_t maxcount)
1977 char *self_s, *result_s;
1978 char *start, *next, *end;
1979 Py_ssize_t self_len, result_len;
1980 Py_ssize_t count, offset, product;
1981 PyByteArrayObject *result;
1983 self_s = PyByteArray_AS_STRING(self);
1984 self_len = PyByteArray_GET_SIZE(self);
1986 count = countstring(self_s, self_len,
1987 from_s, from_len,
1988 0, self_len, FORWARD, maxcount);
1989 if (count == 0) {
1990 /* no matches, return unchanged */
1991 return return_self(self);
1994 /* Check for overflow */
1995 /* result_len = self_len + count * (to_len-from_len) */
1996 product = count * (to_len-from_len);
1997 if (product / (to_len-from_len) != count) {
1998 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1999 return NULL;
2001 result_len = self_len + product;
2002 if (result_len < 0) {
2003 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
2004 return NULL;
2007 if ( (result = (PyByteArrayObject *)
2008 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
2009 return NULL;
2010 result_s = PyByteArray_AS_STRING(result);
2012 start = self_s;
2013 end = self_s + self_len;
2014 while (count-- > 0) {
2015 offset = findstring(start, end-start,
2016 from_s, from_len,
2017 0, end-start, FORWARD);
2018 if (offset == -1)
2019 break;
2020 next = start+offset;
2021 if (next == start) {
2022 /* replace with the 'to' */
2023 Py_MEMCPY(result_s, to_s, to_len);
2024 result_s += to_len;
2025 start += from_len;
2026 } else {
2027 /* copy the unchanged old then the 'to' */
2028 Py_MEMCPY(result_s, start, next-start);
2029 result_s += (next-start);
2030 Py_MEMCPY(result_s, to_s, to_len);
2031 result_s += to_len;
2032 start = next+from_len;
2035 /* Copy the remainder of the remaining bytes */
2036 Py_MEMCPY(result_s, start, end-start);
2038 return result;
2042 Py_LOCAL(PyByteArrayObject *)
2043 replace(PyByteArrayObject *self,
2044 const char *from_s, Py_ssize_t from_len,
2045 const char *to_s, Py_ssize_t to_len,
2046 Py_ssize_t maxcount)
2048 if (maxcount < 0) {
2049 maxcount = PY_SSIZE_T_MAX;
2050 } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) {
2051 /* nothing to do; return the original bytes */
2052 return return_self(self);
2055 if (maxcount == 0 ||
2056 (from_len == 0 && to_len == 0)) {
2057 /* nothing to do; return the original bytes */
2058 return return_self(self);
2061 /* Handle zero-length special cases */
2063 if (from_len == 0) {
2064 /* insert the 'to' bytes everywhere. */
2065 /* >>> "Python".replace("", ".") */
2066 /* '.P.y.t.h.o.n.' */
2067 return replace_interleave(self, to_s, to_len, maxcount);
2070 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2071 /* point for an empty self bytes to generate a non-empty bytes */
2072 /* Special case so the remaining code always gets a non-empty bytes */
2073 if (PyByteArray_GET_SIZE(self) == 0) {
2074 return return_self(self);
2077 if (to_len == 0) {
2078 /* delete all occurances of 'from' bytes */
2079 if (from_len == 1) {
2080 return replace_delete_single_character(
2081 self, from_s[0], maxcount);
2082 } else {
2083 return replace_delete_substring(self, from_s, from_len, maxcount);
2087 /* Handle special case where both bytes have the same length */
2089 if (from_len == to_len) {
2090 if (from_len == 1) {
2091 return replace_single_character_in_place(
2092 self,
2093 from_s[0],
2094 to_s[0],
2095 maxcount);
2096 } else {
2097 return replace_substring_in_place(
2098 self, from_s, from_len, to_s, to_len, maxcount);
2102 /* Otherwise use the more generic algorithms */
2103 if (from_len == 1) {
2104 return replace_single_character(self, from_s[0],
2105 to_s, to_len, maxcount);
2106 } else {
2107 /* len('from')>=2, len('to')>=1 */
2108 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2113 PyDoc_STRVAR(replace__doc__,
2114 "B.replace(old, new[, count]) -> bytes\n\
2116 Return a copy of B with all occurrences of subsection\n\
2117 old replaced by new. If the optional argument count is\n\
2118 given, only the first count occurrences are replaced.");
2120 static PyObject *
2121 bytes_replace(PyByteArrayObject *self, PyObject *args)
2123 Py_ssize_t count = -1;
2124 PyObject *from, *to, *res;
2125 Py_buffer vfrom, vto;
2127 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2128 return NULL;
2130 if (_getbuffer(from, &vfrom) < 0)
2131 return NULL;
2132 if (_getbuffer(to, &vto) < 0) {
2133 PyObject_ReleaseBuffer(from, &vfrom);
2134 return NULL;
2137 res = (PyObject *)replace((PyByteArrayObject *) self,
2138 vfrom.buf, vfrom.len,
2139 vto.buf, vto.len, count);
2141 PyObject_ReleaseBuffer(from, &vfrom);
2142 PyObject_ReleaseBuffer(to, &vto);
2143 return res;
2147 /* Overallocate the initial list to reduce the number of reallocs for small
2148 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2149 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2150 text (roughly 11 words per line) and field delimited data (usually 1-10
2151 fields). For large strings the split algorithms are bandwidth limited
2152 so increasing the preallocation likely will not improve things.*/
2154 #define MAX_PREALLOC 12
2156 /* 5 splits gives 6 elements */
2157 #define PREALLOC_SIZE(maxsplit) \
2158 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2160 #define SPLIT_APPEND(data, left, right) \
2161 str = PyByteArray_FromStringAndSize((data) + (left), \
2162 (right) - (left)); \
2163 if (str == NULL) \
2164 goto onError; \
2165 if (PyList_Append(list, str)) { \
2166 Py_DECREF(str); \
2167 goto onError; \
2169 else \
2170 Py_DECREF(str);
2172 #define SPLIT_ADD(data, left, right) { \
2173 str = PyByteArray_FromStringAndSize((data) + (left), \
2174 (right) - (left)); \
2175 if (str == NULL) \
2176 goto onError; \
2177 if (count < MAX_PREALLOC) { \
2178 PyList_SET_ITEM(list, count, str); \
2179 } else { \
2180 if (PyList_Append(list, str)) { \
2181 Py_DECREF(str); \
2182 goto onError; \
2184 else \
2185 Py_DECREF(str); \
2187 count++; }
2189 /* Always force the list to the expected size. */
2190 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2193 Py_LOCAL_INLINE(PyObject *)
2194 split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2196 register Py_ssize_t i, j, count = 0;
2197 PyObject *str;
2198 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2200 if (list == NULL)
2201 return NULL;
2203 i = j = 0;
2204 while ((j < len) && (maxcount-- > 0)) {
2205 for(; j < len; j++) {
2206 /* I found that using memchr makes no difference */
2207 if (s[j] == ch) {
2208 SPLIT_ADD(s, i, j);
2209 i = j = j + 1;
2210 break;
2214 if (i <= len) {
2215 SPLIT_ADD(s, i, len);
2217 FIX_PREALLOC_SIZE(list);
2218 return list;
2220 onError:
2221 Py_DECREF(list);
2222 return NULL;
2226 Py_LOCAL_INLINE(PyObject *)
2227 split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2229 register Py_ssize_t i, j, count = 0;
2230 PyObject *str;
2231 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2233 if (list == NULL)
2234 return NULL;
2236 for (i = j = 0; i < len; ) {
2237 /* find a token */
2238 while (i < len && ISSPACE(s[i]))
2239 i++;
2240 j = i;
2241 while (i < len && !ISSPACE(s[i]))
2242 i++;
2243 if (j < i) {
2244 if (maxcount-- <= 0)
2245 break;
2246 SPLIT_ADD(s, j, i);
2247 while (i < len && ISSPACE(s[i]))
2248 i++;
2249 j = i;
2252 if (j < len) {
2253 SPLIT_ADD(s, j, len);
2255 FIX_PREALLOC_SIZE(list);
2256 return list;
2258 onError:
2259 Py_DECREF(list);
2260 return NULL;
2263 PyDoc_STRVAR(split__doc__,
2264 "B.split([sep[, maxsplit]]) -> list of bytearray\n\
2266 Return a list of the sections in B, using sep as the delimiter.\n\
2267 If sep is not given, B is split on ASCII whitespace characters\n\
2268 (space, tab, return, newline, formfeed, vertical tab).\n\
2269 If maxsplit is given, at most maxsplit splits are done.");
2271 static PyObject *
2272 bytes_split(PyByteArrayObject *self, PyObject *args)
2274 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2275 Py_ssize_t maxsplit = -1, count = 0;
2276 const char *s = PyByteArray_AS_STRING(self), *sub;
2277 PyObject *list, *str, *subobj = Py_None;
2278 Py_buffer vsub;
2279 #ifdef USE_FAST
2280 Py_ssize_t pos;
2281 #endif
2283 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2284 return NULL;
2285 if (maxsplit < 0)
2286 maxsplit = PY_SSIZE_T_MAX;
2288 if (subobj == Py_None)
2289 return split_whitespace(s, len, maxsplit);
2291 if (_getbuffer(subobj, &vsub) < 0)
2292 return NULL;
2293 sub = vsub.buf;
2294 n = vsub.len;
2296 if (n == 0) {
2297 PyErr_SetString(PyExc_ValueError, "empty separator");
2298 PyObject_ReleaseBuffer(subobj, &vsub);
2299 return NULL;
2301 if (n == 1)
2302 return split_char(s, len, sub[0], maxsplit);
2304 list = PyList_New(PREALLOC_SIZE(maxsplit));
2305 if (list == NULL) {
2306 PyObject_ReleaseBuffer(subobj, &vsub);
2307 return NULL;
2310 #ifdef USE_FAST
2311 i = j = 0;
2312 while (maxsplit-- > 0) {
2313 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2314 if (pos < 0)
2315 break;
2316 j = i+pos;
2317 SPLIT_ADD(s, i, j);
2318 i = j + n;
2320 #else
2321 i = j = 0;
2322 while ((j+n <= len) && (maxsplit-- > 0)) {
2323 for (; j+n <= len; j++) {
2324 if (Py_STRING_MATCH(s, j, sub, n)) {
2325 SPLIT_ADD(s, i, j);
2326 i = j = j + n;
2327 break;
2331 #endif
2332 SPLIT_ADD(s, i, len);
2333 FIX_PREALLOC_SIZE(list);
2334 PyObject_ReleaseBuffer(subobj, &vsub);
2335 return list;
2337 onError:
2338 Py_DECREF(list);
2339 PyObject_ReleaseBuffer(subobj, &vsub);
2340 return NULL;
2343 /* stringlib's partition shares nullbytes in some cases.
2344 undo this, we don't want the nullbytes to be shared. */
2345 static PyObject *
2346 make_nullbytes_unique(PyObject *result)
2348 if (result != NULL) {
2349 int i;
2350 assert(PyTuple_Check(result));
2351 assert(PyTuple_GET_SIZE(result) == 3);
2352 for (i = 0; i < 3; i++) {
2353 if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
2354 PyObject *new = PyByteArray_FromStringAndSize(NULL, 0);
2355 if (new == NULL) {
2356 Py_DECREF(result);
2357 result = NULL;
2358 break;
2360 Py_DECREF(nullbytes);
2361 PyTuple_SET_ITEM(result, i, new);
2365 return result;
2368 PyDoc_STRVAR(partition__doc__,
2369 "B.partition(sep) -> (head, sep, tail)\n\
2371 Searches for the separator sep in B, and returns the part before it,\n\
2372 the separator itself, and the part after it. If the separator is not\n\
2373 found, returns B and two empty bytearray objects.");
2375 static PyObject *
2376 bytes_partition(PyByteArrayObject *self, PyObject *sep_obj)
2378 PyObject *bytesep, *result;
2380 bytesep = PyByteArray_FromObject(sep_obj);
2381 if (! bytesep)
2382 return NULL;
2384 result = stringlib_partition(
2385 (PyObject*) self,
2386 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2387 bytesep,
2388 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2391 Py_DECREF(bytesep);
2392 return make_nullbytes_unique(result);
2395 PyDoc_STRVAR(rpartition__doc__,
2396 "B.rpartition(sep) -> (tail, sep, head)\n\
2398 Searches for the separator sep in B, starting at the end of B,\n\
2399 and returns the part before it, the separator itself, and the\n\
2400 part after it. If the separator is not found, returns two empty\n\
2401 bytearray objects and B.");
2403 static PyObject *
2404 bytes_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
2406 PyObject *bytesep, *result;
2408 bytesep = PyByteArray_FromObject(sep_obj);
2409 if (! bytesep)
2410 return NULL;
2412 result = stringlib_rpartition(
2413 (PyObject*) self,
2414 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2415 bytesep,
2416 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2419 Py_DECREF(bytesep);
2420 return make_nullbytes_unique(result);
2423 Py_LOCAL_INLINE(PyObject *)
2424 rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2426 register Py_ssize_t i, j, count=0;
2427 PyObject *str;
2428 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2430 if (list == NULL)
2431 return NULL;
2433 i = j = len - 1;
2434 while ((i >= 0) && (maxcount-- > 0)) {
2435 for (; i >= 0; i--) {
2436 if (s[i] == ch) {
2437 SPLIT_ADD(s, i + 1, j + 1);
2438 j = i = i - 1;
2439 break;
2443 if (j >= -1) {
2444 SPLIT_ADD(s, 0, j + 1);
2446 FIX_PREALLOC_SIZE(list);
2447 if (PyList_Reverse(list) < 0)
2448 goto onError;
2450 return list;
2452 onError:
2453 Py_DECREF(list);
2454 return NULL;
2457 Py_LOCAL_INLINE(PyObject *)
2458 rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2460 register Py_ssize_t i, j, count = 0;
2461 PyObject *str;
2462 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2464 if (list == NULL)
2465 return NULL;
2467 for (i = j = len - 1; i >= 0; ) {
2468 /* find a token */
2469 while (i >= 0 && ISSPACE(s[i]))
2470 i--;
2471 j = i;
2472 while (i >= 0 && !ISSPACE(s[i]))
2473 i--;
2474 if (j > i) {
2475 if (maxcount-- <= 0)
2476 break;
2477 SPLIT_ADD(s, i + 1, j + 1);
2478 while (i >= 0 && ISSPACE(s[i]))
2479 i--;
2480 j = i;
2483 if (j >= 0) {
2484 SPLIT_ADD(s, 0, j + 1);
2486 FIX_PREALLOC_SIZE(list);
2487 if (PyList_Reverse(list) < 0)
2488 goto onError;
2490 return list;
2492 onError:
2493 Py_DECREF(list);
2494 return NULL;
2497 PyDoc_STRVAR(rsplit__doc__,
2498 "B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2500 Return a list of the sections in B, using sep as the delimiter,\n\
2501 starting at the end of B and working to the front.\n\
2502 If sep is not given, B is split on ASCII whitespace characters\n\
2503 (space, tab, return, newline, formfeed, vertical tab).\n\
2504 If maxsplit is given, at most maxsplit splits are done.");
2506 static PyObject *
2507 bytes_rsplit(PyByteArrayObject *self, PyObject *args)
2509 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2510 Py_ssize_t maxsplit = -1, count = 0;
2511 const char *s = PyByteArray_AS_STRING(self), *sub;
2512 PyObject *list, *str, *subobj = Py_None;
2513 Py_buffer vsub;
2515 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2516 return NULL;
2517 if (maxsplit < 0)
2518 maxsplit = PY_SSIZE_T_MAX;
2520 if (subobj == Py_None)
2521 return rsplit_whitespace(s, len, maxsplit);
2523 if (_getbuffer(subobj, &vsub) < 0)
2524 return NULL;
2525 sub = vsub.buf;
2526 n = vsub.len;
2528 if (n == 0) {
2529 PyErr_SetString(PyExc_ValueError, "empty separator");
2530 PyObject_ReleaseBuffer(subobj, &vsub);
2531 return NULL;
2533 else if (n == 1)
2534 return rsplit_char(s, len, sub[0], maxsplit);
2536 list = PyList_New(PREALLOC_SIZE(maxsplit));
2537 if (list == NULL) {
2538 PyObject_ReleaseBuffer(subobj, &vsub);
2539 return NULL;
2542 j = len;
2543 i = j - n;
2545 while ( (i >= 0) && (maxsplit-- > 0) ) {
2546 for (; i>=0; i--) {
2547 if (Py_STRING_MATCH(s, i, sub, n)) {
2548 SPLIT_ADD(s, i + n, j);
2549 j = i;
2550 i -= n;
2551 break;
2555 SPLIT_ADD(s, 0, j);
2556 FIX_PREALLOC_SIZE(list);
2557 if (PyList_Reverse(list) < 0)
2558 goto onError;
2559 PyObject_ReleaseBuffer(subobj, &vsub);
2560 return list;
2562 onError:
2563 Py_DECREF(list);
2564 PyObject_ReleaseBuffer(subobj, &vsub);
2565 return NULL;
2568 PyDoc_STRVAR(reverse__doc__,
2569 "B.reverse() -> None\n\
2571 Reverse the order of the values in B in place.");
2572 static PyObject *
2573 bytes_reverse(PyByteArrayObject *self, PyObject *unused)
2575 char swap, *head, *tail;
2576 Py_ssize_t i, j, n = Py_SIZE(self);
2578 j = n / 2;
2579 head = self->ob_bytes;
2580 tail = head + n - 1;
2581 for (i = 0; i < j; i++) {
2582 swap = *head;
2583 *head++ = *tail;
2584 *tail-- = swap;
2587 Py_RETURN_NONE;
2590 PyDoc_STRVAR(insert__doc__,
2591 "B.insert(index, int) -> None\n\
2593 Insert a single item into the bytearray before the given index.");
2594 static PyObject *
2595 bytes_insert(PyByteArrayObject *self, PyObject *args)
2597 int value;
2598 Py_ssize_t where, n = Py_SIZE(self);
2600 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2601 return NULL;
2603 if (n == PY_SSIZE_T_MAX) {
2604 PyErr_SetString(PyExc_OverflowError,
2605 "cannot add more objects to bytes");
2606 return NULL;
2608 if (value < 0 || value >= 256) {
2609 PyErr_SetString(PyExc_ValueError,
2610 "byte must be in range(0, 256)");
2611 return NULL;
2613 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2614 return NULL;
2616 if (where < 0) {
2617 where += n;
2618 if (where < 0)
2619 where = 0;
2621 if (where > n)
2622 where = n;
2623 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
2624 self->ob_bytes[where] = value;
2626 Py_RETURN_NONE;
2629 PyDoc_STRVAR(append__doc__,
2630 "B.append(int) -> None\n\
2632 Append a single item to the end of B.");
2633 static PyObject *
2634 bytes_append(PyByteArrayObject *self, PyObject *arg)
2636 int value;
2637 Py_ssize_t n = Py_SIZE(self);
2639 if (! _getbytevalue(arg, &value))
2640 return NULL;
2641 if (n == PY_SSIZE_T_MAX) {
2642 PyErr_SetString(PyExc_OverflowError,
2643 "cannot add more objects to bytes");
2644 return NULL;
2646 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2647 return NULL;
2649 self->ob_bytes[n] = value;
2651 Py_RETURN_NONE;
2654 PyDoc_STRVAR(extend__doc__,
2655 "B.extend(iterable int) -> None\n\
2657 Append all the elements from the iterator or sequence to the\n\
2658 end of B.");
2659 static PyObject *
2660 bytes_extend(PyByteArrayObject *self, PyObject *arg)
2662 PyObject *it, *item, *bytes_obj;
2663 Py_ssize_t buf_size = 0, len = 0;
2664 int value;
2665 char *buf;
2667 /* bytes_setslice code only accepts something supporting PEP 3118. */
2668 if (PyObject_CheckBuffer(arg)) {
2669 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2670 return NULL;
2672 Py_RETURN_NONE;
2675 it = PyObject_GetIter(arg);
2676 if (it == NULL)
2677 return NULL;
2679 /* Try to determine the length of the argument. 32 is abitrary. */
2680 buf_size = _PyObject_LengthHint(arg, 32);
2682 bytes_obj = PyByteArray_FromStringAndSize(NULL, buf_size);
2683 if (bytes_obj == NULL)
2684 return NULL;
2685 buf = PyByteArray_AS_STRING(bytes_obj);
2687 while ((item = PyIter_Next(it)) != NULL) {
2688 if (! _getbytevalue(item, &value)) {
2689 Py_DECREF(item);
2690 Py_DECREF(it);
2691 Py_DECREF(bytes_obj);
2692 return NULL;
2694 buf[len++] = value;
2695 Py_DECREF(item);
2697 if (len >= buf_size) {
2698 buf_size = len + (len >> 1) + 1;
2699 if (PyByteArray_Resize((PyObject *)bytes_obj, buf_size) < 0) {
2700 Py_DECREF(it);
2701 Py_DECREF(bytes_obj);
2702 return NULL;
2704 /* Recompute the `buf' pointer, since the resizing operation may
2705 have invalidated it. */
2706 buf = PyByteArray_AS_STRING(bytes_obj);
2709 Py_DECREF(it);
2711 /* Resize down to exact size. */
2712 if (PyByteArray_Resize((PyObject *)bytes_obj, len) < 0) {
2713 Py_DECREF(bytes_obj);
2714 return NULL;
2717 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), bytes_obj) == -1)
2718 return NULL;
2719 Py_DECREF(bytes_obj);
2721 Py_RETURN_NONE;
2724 PyDoc_STRVAR(pop__doc__,
2725 "B.pop([index]) -> int\n\
2727 Remove and return a single item from B. If no index\n\
2728 argument is give, will pop the last value.");
2729 static PyObject *
2730 bytes_pop(PyByteArrayObject *self, PyObject *args)
2732 int value;
2733 Py_ssize_t where = -1, n = Py_SIZE(self);
2735 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2736 return NULL;
2738 if (n == 0) {
2739 PyErr_SetString(PyExc_OverflowError,
2740 "cannot pop an empty bytes");
2741 return NULL;
2743 if (where < 0)
2744 where += Py_SIZE(self);
2745 if (where < 0 || where >= Py_SIZE(self)) {
2746 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2747 return NULL;
2750 value = self->ob_bytes[where];
2751 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2752 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2753 return NULL;
2755 return PyInt_FromLong(value);
2758 PyDoc_STRVAR(remove__doc__,
2759 "B.remove(int) -> None\n\
2761 Remove the first occurance of a value in B.");
2762 static PyObject *
2763 bytes_remove(PyByteArrayObject *self, PyObject *arg)
2765 int value;
2766 Py_ssize_t where, n = Py_SIZE(self);
2768 if (! _getbytevalue(arg, &value))
2769 return NULL;
2771 for (where = 0; where < n; where++) {
2772 if (self->ob_bytes[where] == value)
2773 break;
2775 if (where == n) {
2776 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2777 return NULL;
2780 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2781 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2782 return NULL;
2784 Py_RETURN_NONE;
2787 /* XXX These two helpers could be optimized if argsize == 1 */
2789 static Py_ssize_t
2790 lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2791 void *argptr, Py_ssize_t argsize)
2793 Py_ssize_t i = 0;
2794 while (i < mysize && memchr(argptr, myptr[i], argsize))
2795 i++;
2796 return i;
2799 static Py_ssize_t
2800 rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2801 void *argptr, Py_ssize_t argsize)
2803 Py_ssize_t i = mysize - 1;
2804 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2805 i--;
2806 return i + 1;
2809 PyDoc_STRVAR(strip__doc__,
2810 "B.strip([bytes]) -> bytearray\n\
2812 Strip leading and trailing bytes contained in the argument.\n\
2813 If the argument is omitted, strip ASCII whitespace.");
2814 static PyObject *
2815 bytes_strip(PyByteArrayObject *self, PyObject *args)
2817 Py_ssize_t left, right, mysize, argsize;
2818 void *myptr, *argptr;
2819 PyObject *arg = Py_None;
2820 Py_buffer varg;
2821 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2822 return NULL;
2823 if (arg == Py_None) {
2824 argptr = "\t\n\r\f\v ";
2825 argsize = 6;
2827 else {
2828 if (_getbuffer(arg, &varg) < 0)
2829 return NULL;
2830 argptr = varg.buf;
2831 argsize = varg.len;
2833 myptr = self->ob_bytes;
2834 mysize = Py_SIZE(self);
2835 left = lstrip_helper(myptr, mysize, argptr, argsize);
2836 if (left == mysize)
2837 right = left;
2838 else
2839 right = rstrip_helper(myptr, mysize, argptr, argsize);
2840 if (arg != Py_None)
2841 PyObject_ReleaseBuffer(arg, &varg);
2842 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2845 PyDoc_STRVAR(lstrip__doc__,
2846 "B.lstrip([bytes]) -> bytearray\n\
2848 Strip leading bytes contained in the argument.\n\
2849 If the argument is omitted, strip leading ASCII whitespace.");
2850 static PyObject *
2851 bytes_lstrip(PyByteArrayObject *self, PyObject *args)
2853 Py_ssize_t left, right, mysize, argsize;
2854 void *myptr, *argptr;
2855 PyObject *arg = Py_None;
2856 Py_buffer varg;
2857 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2858 return NULL;
2859 if (arg == Py_None) {
2860 argptr = "\t\n\r\f\v ";
2861 argsize = 6;
2863 else {
2864 if (_getbuffer(arg, &varg) < 0)
2865 return NULL;
2866 argptr = varg.buf;
2867 argsize = varg.len;
2869 myptr = self->ob_bytes;
2870 mysize = Py_SIZE(self);
2871 left = lstrip_helper(myptr, mysize, argptr, argsize);
2872 right = mysize;
2873 if (arg != Py_None)
2874 PyObject_ReleaseBuffer(arg, &varg);
2875 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2878 PyDoc_STRVAR(rstrip__doc__,
2879 "B.rstrip([bytes]) -> bytearray\n\
2881 Strip trailing bytes contained in the argument.\n\
2882 If the argument is omitted, strip trailing ASCII whitespace.");
2883 static PyObject *
2884 bytes_rstrip(PyByteArrayObject *self, PyObject *args)
2886 Py_ssize_t left, right, mysize, argsize;
2887 void *myptr, *argptr;
2888 PyObject *arg = Py_None;
2889 Py_buffer varg;
2890 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2891 return NULL;
2892 if (arg == Py_None) {
2893 argptr = "\t\n\r\f\v ";
2894 argsize = 6;
2896 else {
2897 if (_getbuffer(arg, &varg) < 0)
2898 return NULL;
2899 argptr = varg.buf;
2900 argsize = varg.len;
2902 myptr = self->ob_bytes;
2903 mysize = Py_SIZE(self);
2904 left = 0;
2905 right = rstrip_helper(myptr, mysize, argptr, argsize);
2906 if (arg != Py_None)
2907 PyObject_ReleaseBuffer(arg, &varg);
2908 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2911 PyDoc_STRVAR(decode_doc,
2912 "B.decode([encoding[, errors]]) -> unicode object.\n\
2914 Decodes B using the codec registered for encoding. encoding defaults\n\
2915 to the default encoding. errors may be given to set a different error\n\
2916 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2917 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2918 as well as any other name registered with codecs.register_error that is\n\
2919 able to handle UnicodeDecodeErrors.");
2921 static PyObject *
2922 bytes_decode(PyObject *self, PyObject *args)
2924 const char *encoding = NULL;
2925 const char *errors = NULL;
2927 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2928 return NULL;
2929 if (encoding == NULL)
2930 encoding = PyUnicode_GetDefaultEncoding();
2931 return PyCodec_Decode(self, encoding, errors);
2934 PyDoc_STRVAR(alloc_doc,
2935 "B.__alloc__() -> int\n\
2937 Returns the number of bytes actually allocated.");
2939 static PyObject *
2940 bytes_alloc(PyByteArrayObject *self)
2942 return PyInt_FromSsize_t(self->ob_alloc);
2945 PyDoc_STRVAR(join_doc,
2946 "B.join(iterable_of_bytes) -> bytes\n\
2948 Concatenates any number of bytearray objects, with B in between each pair.");
2950 static PyObject *
2951 bytes_join(PyByteArrayObject *self, PyObject *it)
2953 PyObject *seq;
2954 Py_ssize_t mysize = Py_SIZE(self);
2955 Py_ssize_t i;
2956 Py_ssize_t n;
2957 PyObject **items;
2958 Py_ssize_t totalsize = 0;
2959 PyObject *result;
2960 char *dest;
2962 seq = PySequence_Fast(it, "can only join an iterable");
2963 if (seq == NULL)
2964 return NULL;
2965 n = PySequence_Fast_GET_SIZE(seq);
2966 items = PySequence_Fast_ITEMS(seq);
2968 /* Compute the total size, and check that they are all bytes */
2969 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2970 for (i = 0; i < n; i++) {
2971 PyObject *obj = items[i];
2972 if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) {
2973 PyErr_Format(PyExc_TypeError,
2974 "can only join an iterable of bytes "
2975 "(item %ld has type '%.100s')",
2976 /* XXX %ld isn't right on Win64 */
2977 (long)i, Py_TYPE(obj)->tp_name);
2978 goto error;
2980 if (i > 0)
2981 totalsize += mysize;
2982 totalsize += Py_SIZE(obj);
2983 if (totalsize < 0) {
2984 PyErr_NoMemory();
2985 goto error;
2989 /* Allocate the result, and copy the bytes */
2990 result = PyByteArray_FromStringAndSize(NULL, totalsize);
2991 if (result == NULL)
2992 goto error;
2993 dest = PyByteArray_AS_STRING(result);
2994 for (i = 0; i < n; i++) {
2995 PyObject *obj = items[i];
2996 Py_ssize_t size = Py_SIZE(obj);
2997 char *buf;
2998 if (PyByteArray_Check(obj))
2999 buf = PyByteArray_AS_STRING(obj);
3000 else
3001 buf = PyBytes_AS_STRING(obj);
3002 if (i) {
3003 memcpy(dest, self->ob_bytes, mysize);
3004 dest += mysize;
3006 memcpy(dest, buf, size);
3007 dest += size;
3010 /* Done */
3011 Py_DECREF(seq);
3012 return result;
3014 /* Error handling */
3015 error:
3016 Py_DECREF(seq);
3017 return NULL;
3020 PyDoc_STRVAR(fromhex_doc,
3021 "bytearray.fromhex(string) -> bytearray\n\
3023 Create a bytearray object from a string of hexadecimal numbers.\n\
3024 Spaces between two numbers are accepted.\n\
3025 Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3027 static int
3028 hex_digit_to_int(Py_UNICODE c)
3030 if (c >= 128)
3031 return -1;
3032 if (ISDIGIT(c))
3033 return c - '0';
3034 else {
3035 if (ISUPPER(c))
3036 c = TOLOWER(c);
3037 if (c >= 'a' && c <= 'f')
3038 return c - 'a' + 10;
3040 return -1;
3043 static PyObject *
3044 bytes_fromhex(PyObject *cls, PyObject *args)
3046 PyObject *newbytes, *hexobj;
3047 char *buf;
3048 Py_UNICODE *hex;
3049 Py_ssize_t hexlen, byteslen, i, j;
3050 int top, bot;
3052 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
3053 return NULL;
3054 assert(PyUnicode_Check(hexobj));
3055 hexlen = PyUnicode_GET_SIZE(hexobj);
3056 hex = PyUnicode_AS_UNICODE(hexobj);
3057 byteslen = hexlen/2; /* This overestimates if there are spaces */
3058 newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
3059 if (!newbytes)
3060 return NULL;
3061 buf = PyByteArray_AS_STRING(newbytes);
3062 for (i = j = 0; i < hexlen; i += 2) {
3063 /* skip over spaces in the input */
3064 while (hex[i] == ' ')
3065 i++;
3066 if (i >= hexlen)
3067 break;
3068 top = hex_digit_to_int(hex[i]);
3069 bot = hex_digit_to_int(hex[i+1]);
3070 if (top == -1 || bot == -1) {
3071 PyErr_Format(PyExc_ValueError,
3072 "non-hexadecimal number found in "
3073 "fromhex() arg at position %zd", i);
3074 goto error;
3076 buf[j++] = (top << 4) + bot;
3078 if (PyByteArray_Resize(newbytes, j) < 0)
3079 goto error;
3080 return newbytes;
3082 error:
3083 Py_DECREF(newbytes);
3084 return NULL;
3087 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3089 static PyObject *
3090 bytes_reduce(PyByteArrayObject *self)
3092 PyObject *latin1, *dict;
3093 if (self->ob_bytes)
3094 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
3095 Py_SIZE(self), NULL);
3096 else
3097 latin1 = PyUnicode_FromString("");
3099 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
3100 if (dict == NULL) {
3101 PyErr_Clear();
3102 dict = Py_None;
3103 Py_INCREF(dict);
3106 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
3109 static PySequenceMethods bytes_as_sequence = {
3110 (lenfunc)bytes_length, /* sq_length */
3111 (binaryfunc)PyByteArray_Concat, /* sq_concat */
3112 (ssizeargfunc)bytes_repeat, /* sq_repeat */
3113 (ssizeargfunc)bytes_getitem, /* sq_item */
3114 0, /* sq_slice */
3115 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
3116 0, /* sq_ass_slice */
3117 (objobjproc)bytes_contains, /* sq_contains */
3118 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
3119 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
3122 static PyMappingMethods bytes_as_mapping = {
3123 (lenfunc)bytes_length,
3124 (binaryfunc)bytes_subscript,
3125 (objobjargproc)bytes_ass_subscript,
3128 static PyBufferProcs bytes_as_buffer = {
3129 (readbufferproc)bytes_buffer_getreadbuf,
3130 (writebufferproc)bytes_buffer_getwritebuf,
3131 (segcountproc)bytes_buffer_getsegcount,
3132 (charbufferproc)bytes_buffer_getcharbuf,
3133 (getbufferproc)bytes_getbuffer,
3134 (releasebufferproc)bytes_releasebuffer,
3137 static PyMethodDef
3138 bytes_methods[] = {
3139 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
3140 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
3141 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
3142 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3143 _Py_capitalize__doc__},
3144 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3145 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
3146 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
3147 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
3148 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3149 expandtabs__doc__},
3150 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
3151 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
3152 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
3153 fromhex_doc},
3154 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3155 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
3156 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3157 _Py_isalnum__doc__},
3158 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3159 _Py_isalpha__doc__},
3160 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3161 _Py_isdigit__doc__},
3162 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3163 _Py_islower__doc__},
3164 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3165 _Py_isspace__doc__},
3166 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3167 _Py_istitle__doc__},
3168 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3169 _Py_isupper__doc__},
3170 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
3171 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3172 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
3173 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
3174 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
3175 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
3176 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
3177 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
3178 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
3179 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3180 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3181 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3182 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
3183 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
3184 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
3185 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
3186 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3187 splitlines__doc__},
3188 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS ,
3189 startswith__doc__},
3190 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
3191 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3192 _Py_swapcase__doc__},
3193 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3194 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
3195 translate__doc__},
3196 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3197 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
3198 {NULL}
3201 PyDoc_STRVAR(bytes_doc,
3202 "bytearray(iterable_of_ints) -> bytearray.\n\
3203 bytearray(string, encoding[, errors]) -> bytearray.\n\
3204 bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3205 bytearray(memory_view) -> bytearray.\n\
3207 Construct an mutable bytearray object from:\n\
3208 - an iterable yielding integers in range(256)\n\
3209 - a text string encoded using the specified encoding\n\
3210 - a bytes or a bytearray object\n\
3211 - any object implementing the buffer API.\n\
3213 bytearray(int) -> bytearray.\n\
3215 Construct a zero-initialized bytearray of the given length.");
3218 static PyObject *bytes_iter(PyObject *seq);
3220 PyTypeObject PyByteArray_Type = {
3221 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3222 "bytearray",
3223 sizeof(PyByteArrayObject),
3225 (destructor)bytes_dealloc, /* tp_dealloc */
3226 0, /* tp_print */
3227 0, /* tp_getattr */
3228 0, /* tp_setattr */
3229 0, /* tp_compare */
3230 (reprfunc)bytes_repr, /* tp_repr */
3231 0, /* tp_as_number */
3232 &bytes_as_sequence, /* tp_as_sequence */
3233 &bytes_as_mapping, /* tp_as_mapping */
3234 0, /* tp_hash */
3235 0, /* tp_call */
3236 bytes_str, /* tp_str */
3237 PyObject_GenericGetAttr, /* tp_getattro */
3238 0, /* tp_setattro */
3239 &bytes_as_buffer, /* tp_as_buffer */
3240 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3241 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3242 bytes_doc, /* tp_doc */
3243 0, /* tp_traverse */
3244 0, /* tp_clear */
3245 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3246 0, /* tp_weaklistoffset */
3247 bytes_iter, /* tp_iter */
3248 0, /* tp_iternext */
3249 bytes_methods, /* tp_methods */
3250 0, /* tp_members */
3251 0, /* tp_getset */
3252 0, /* tp_base */
3253 0, /* tp_dict */
3254 0, /* tp_descr_get */
3255 0, /* tp_descr_set */
3256 0, /* tp_dictoffset */
3257 (initproc)bytes_init, /* tp_init */
3258 PyType_GenericAlloc, /* tp_alloc */
3259 PyType_GenericNew, /* tp_new */
3260 PyObject_Del, /* tp_free */
3263 /*********************** Bytes Iterator ****************************/
3265 typedef struct {
3266 PyObject_HEAD
3267 Py_ssize_t it_index;
3268 PyByteArrayObject *it_seq; /* Set to NULL when iterator is exhausted */
3269 } bytesiterobject;
3271 static void
3272 bytesiter_dealloc(bytesiterobject *it)
3274 _PyObject_GC_UNTRACK(it);
3275 Py_XDECREF(it->it_seq);
3276 PyObject_GC_Del(it);
3279 static int
3280 bytesiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
3282 Py_VISIT(it->it_seq);
3283 return 0;
3286 static PyObject *
3287 bytesiter_next(bytesiterobject *it)
3289 PyByteArrayObject *seq;
3290 PyObject *item;
3292 assert(it != NULL);
3293 seq = it->it_seq;
3294 if (seq == NULL)
3295 return NULL;
3296 assert(PyByteArray_Check(seq));
3298 if (it->it_index < PyByteArray_GET_SIZE(seq)) {
3299 item = PyInt_FromLong(
3300 (unsigned char)seq->ob_bytes[it->it_index]);
3301 if (item != NULL)
3302 ++it->it_index;
3303 return item;
3306 Py_DECREF(seq);
3307 it->it_seq = NULL;
3308 return NULL;
3311 static PyObject *
3312 bytesiter_length_hint(bytesiterobject *it)
3314 Py_ssize_t len = 0;
3315 if (it->it_seq)
3316 len = PyByteArray_GET_SIZE(it->it_seq) - it->it_index;
3317 return PyInt_FromSsize_t(len);
3320 PyDoc_STRVAR(length_hint_doc,
3321 "Private method returning an estimate of len(list(it)).");
3323 static PyMethodDef bytesiter_methods[] = {
3324 {"__length_hint__", (PyCFunction)bytesiter_length_hint, METH_NOARGS,
3325 length_hint_doc},
3326 {NULL, NULL} /* sentinel */
3329 PyTypeObject PyByteArrayIter_Type = {
3330 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3331 "bytearray_iterator", /* tp_name */
3332 sizeof(bytesiterobject), /* tp_basicsize */
3333 0, /* tp_itemsize */
3334 /* methods */
3335 (destructor)bytesiter_dealloc, /* tp_dealloc */
3336 0, /* tp_print */
3337 0, /* tp_getattr */
3338 0, /* tp_setattr */
3339 0, /* tp_compare */
3340 0, /* tp_repr */
3341 0, /* tp_as_number */
3342 0, /* tp_as_sequence */
3343 0, /* tp_as_mapping */
3344 0, /* tp_hash */
3345 0, /* tp_call */
3346 0, /* tp_str */
3347 PyObject_GenericGetAttr, /* tp_getattro */
3348 0, /* tp_setattro */
3349 0, /* tp_as_buffer */
3350 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3351 0, /* tp_doc */
3352 (traverseproc)bytesiter_traverse, /* tp_traverse */
3353 0, /* tp_clear */
3354 0, /* tp_richcompare */
3355 0, /* tp_weaklistoffset */
3356 PyObject_SelfIter, /* tp_iter */
3357 (iternextfunc)bytesiter_next, /* tp_iternext */
3358 bytesiter_methods, /* tp_methods */
3362 static PyObject *
3363 bytes_iter(PyObject *seq)
3365 bytesiterobject *it;
3367 if (!PyByteArray_Check(seq)) {
3368 PyErr_BadInternalCall();
3369 return NULL;
3371 it = PyObject_GC_New(bytesiterobject, &PyByteArrayIter_Type);
3372 if (it == NULL)
3373 return NULL;
3374 it->it_index = 0;
3375 Py_INCREF(seq);
3376 it->it_seq = (PyByteArrayObject *)seq;
3377 _PyObject_GC_TRACK(it);
3378 return (PyObject *)it;