1 /* PyBytes (bytearray) implementation */
3 #define PY_SSIZE_T_CLEAN
5 #include "structmember.h"
6 #include "bytes_methods.h"
8 static PyByteArrayObject
*nullbytes
= NULL
;
11 PyByteArray_Fini(void)
17 PyByteArray_Init(void)
19 nullbytes
= PyObject_New(PyByteArrayObject
, &PyByteArray_Type
);
20 if (nullbytes
== NULL
)
22 nullbytes
->ob_bytes
= NULL
;
23 Py_SIZE(nullbytes
) = nullbytes
->ob_alloc
= 0;
24 nullbytes
->ob_exports
= 0;
28 /* end nullbytes support */
33 _getbytevalue(PyObject
* arg
, int *value
)
37 if (PyBytes_CheckExact(arg
)) {
38 if (Py_SIZE(arg
) != 1) {
39 PyErr_SetString(PyExc_ValueError
, "string must be of size 1");
42 *value
= Py_CHARMASK(((PyBytesObject
*)arg
)->ob_sval
[0]);
45 else if (PyInt_Check(arg
) || PyLong_Check(arg
)) {
46 face_value
= PyLong_AsLong(arg
);
49 PyObject
*index
= PyNumber_Index(arg
);
51 PyErr_Format(PyExc_TypeError
,
52 "an integer or string of size 1 is required");
55 face_value
= PyLong_AsLong(index
);
59 if (face_value
< 0 || face_value
>= 256) {
60 /* this includes the OverflowError in case the long is too large */
61 PyErr_SetString(PyExc_ValueError
, "byte must be in range(0, 256)");
70 bytearray_buffer_getreadbuf(PyByteArrayObject
*self
, Py_ssize_t index
, const void **ptr
)
73 PyErr_SetString(PyExc_SystemError
,
74 "accessing non-existent bytes segment");
77 *ptr
= (void *)self
->ob_bytes
;
82 bytearray_buffer_getwritebuf(PyByteArrayObject
*self
, Py_ssize_t index
, const void **ptr
)
85 PyErr_SetString(PyExc_SystemError
,
86 "accessing non-existent bytes segment");
89 *ptr
= (void *)self
->ob_bytes
;
94 bytearray_buffer_getsegcount(PyByteArrayObject
*self
, Py_ssize_t
*lenp
)
97 *lenp
= Py_SIZE(self
);
102 bytearray_buffer_getcharbuf(PyByteArrayObject
*self
, Py_ssize_t index
, const char **ptr
)
105 PyErr_SetString(PyExc_SystemError
,
106 "accessing non-existent bytes segment");
109 *ptr
= self
->ob_bytes
;
110 return Py_SIZE(self
);
114 bytearray_getbuffer(PyByteArrayObject
*obj
, Py_buffer
*view
, int flags
)
122 if (obj
->ob_bytes
== NULL
)
126 ret
= PyBuffer_FillInfo(view
, (PyObject
*)obj
, ptr
, Py_SIZE(obj
), 0, flags
);
134 bytearray_releasebuffer(PyByteArrayObject
*obj
, Py_buffer
*view
)
140 _getbuffer(PyObject
*obj
, Py_buffer
*view
)
142 PyBufferProcs
*buffer
= Py_TYPE(obj
)->tp_as_buffer
;
144 if (buffer
== NULL
|| buffer
->bf_getbuffer
== NULL
)
146 PyErr_Format(PyExc_TypeError
,
147 "Type %.100s doesn't support the buffer API",
148 Py_TYPE(obj
)->tp_name
);
152 if (buffer
->bf_getbuffer(obj
, view
, PyBUF_SIMPLE
) < 0)
158 _canresize(PyByteArrayObject
*self
)
160 if (self
->ob_exports
> 0) {
161 PyErr_SetString(PyExc_BufferError
,
162 "Existing exports of data: object cannot be re-sized");
168 /* Direct API functions */
171 PyByteArray_FromObject(PyObject
*input
)
173 return PyObject_CallFunctionObjArgs((PyObject
*)&PyByteArray_Type
,
178 PyByteArray_FromStringAndSize(const char *bytes
, Py_ssize_t size
)
180 PyByteArrayObject
*new;
184 PyErr_SetString(PyExc_SystemError
,
185 "Negative size passed to PyByteArray_FromStringAndSize");
189 new = PyObject_New(PyByteArrayObject
, &PyByteArray_Type
);
194 new->ob_bytes
= NULL
;
199 new->ob_bytes
= PyMem_Malloc(alloc
);
200 if (new->ob_bytes
== NULL
) {
202 return PyErr_NoMemory();
205 memcpy(new->ob_bytes
, bytes
, size
);
206 new->ob_bytes
[size
] = '\0'; /* Trailing null byte */
209 new->ob_alloc
= alloc
;
212 return (PyObject
*)new;
216 PyByteArray_Size(PyObject
*self
)
218 assert(self
!= NULL
);
219 assert(PyByteArray_Check(self
));
221 return PyByteArray_GET_SIZE(self
);
225 PyByteArray_AsString(PyObject
*self
)
227 assert(self
!= NULL
);
228 assert(PyByteArray_Check(self
));
230 return PyByteArray_AS_STRING(self
);
234 PyByteArray_Resize(PyObject
*self
, Py_ssize_t size
)
237 Py_ssize_t alloc
= ((PyByteArrayObject
*)self
)->ob_alloc
;
239 assert(self
!= NULL
);
240 assert(PyByteArray_Check(self
));
243 if (size
== Py_SIZE(self
)) {
246 if (!_canresize((PyByteArrayObject
*)self
)) {
250 if (size
< alloc
/ 2) {
251 /* Major downsize; resize down to exact size */
254 else if (size
< alloc
) {
255 /* Within allocated size; quick exit */
256 Py_SIZE(self
) = size
;
257 ((PyByteArrayObject
*)self
)->ob_bytes
[size
] = '\0'; /* Trailing null */
260 else if (size
<= alloc
* 1.125) {
261 /* Moderate upsize; overallocate similar to list_resize() */
262 alloc
= size
+ (size
>> 3) + (size
< 9 ? 3 : 6);
265 /* Major upsize; resize up to exact size */
269 sval
= PyMem_Realloc(((PyByteArrayObject
*)self
)->ob_bytes
, alloc
);
275 ((PyByteArrayObject
*)self
)->ob_bytes
= sval
;
276 Py_SIZE(self
) = size
;
277 ((PyByteArrayObject
*)self
)->ob_alloc
= alloc
;
278 ((PyByteArrayObject
*)self
)->ob_bytes
[size
] = '\0'; /* Trailing null byte */
284 PyByteArray_Concat(PyObject
*a
, PyObject
*b
)
288 PyByteArrayObject
*result
= NULL
;
292 if (_getbuffer(a
, &va
) < 0 ||
293 _getbuffer(b
, &vb
) < 0) {
294 PyErr_Format(PyExc_TypeError
, "can't concat %.100s to %.100s",
295 Py_TYPE(a
)->tp_name
, Py_TYPE(b
)->tp_name
);
299 size
= va
.len
+ vb
.len
;
305 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, size
);
306 if (result
!= NULL
) {
307 memcpy(result
->ob_bytes
, va
.buf
, va
.len
);
308 memcpy(result
->ob_bytes
+ va
.len
, vb
.buf
, vb
.len
);
313 PyBuffer_Release(&va
);
315 PyBuffer_Release(&vb
);
316 return (PyObject
*)result
;
319 /* Functions stuffed into the type object */
322 bytearray_length(PyByteArrayObject
*self
)
324 return Py_SIZE(self
);
328 bytearray_iconcat(PyByteArrayObject
*self
, PyObject
*other
)
334 if (_getbuffer(other
, &vo
) < 0) {
335 PyErr_Format(PyExc_TypeError
, "can't concat %.100s to %.100s",
336 Py_TYPE(other
)->tp_name
, Py_TYPE(self
)->tp_name
);
340 mysize
= Py_SIZE(self
);
341 size
= mysize
+ vo
.len
;
343 PyBuffer_Release(&vo
);
344 return PyErr_NoMemory();
346 if (size
< self
->ob_alloc
) {
347 Py_SIZE(self
) = size
;
348 self
->ob_bytes
[Py_SIZE(self
)] = '\0'; /* Trailing null byte */
350 else if (PyByteArray_Resize((PyObject
*)self
, size
) < 0) {
351 PyBuffer_Release(&vo
);
354 memcpy(self
->ob_bytes
+ mysize
, vo
.buf
, vo
.len
);
355 PyBuffer_Release(&vo
);
357 return (PyObject
*)self
;
361 bytearray_repeat(PyByteArrayObject
*self
, Py_ssize_t count
)
363 PyByteArrayObject
*result
;
369 mysize
= Py_SIZE(self
);
370 size
= mysize
* count
;
371 if (count
!= 0 && size
/ count
!= mysize
)
372 return PyErr_NoMemory();
373 result
= (PyByteArrayObject
*)PyByteArray_FromStringAndSize(NULL
, size
);
374 if (result
!= NULL
&& size
!= 0) {
376 memset(result
->ob_bytes
, self
->ob_bytes
[0], size
);
379 for (i
= 0; i
< count
; i
++)
380 memcpy(result
->ob_bytes
+ i
*mysize
, self
->ob_bytes
, mysize
);
383 return (PyObject
*)result
;
387 bytearray_irepeat(PyByteArrayObject
*self
, Py_ssize_t count
)
394 mysize
= Py_SIZE(self
);
395 size
= mysize
* count
;
396 if (count
!= 0 && size
/ count
!= mysize
)
397 return PyErr_NoMemory();
398 if (size
< self
->ob_alloc
) {
399 Py_SIZE(self
) = size
;
400 self
->ob_bytes
[Py_SIZE(self
)] = '\0'; /* Trailing null byte */
402 else if (PyByteArray_Resize((PyObject
*)self
, size
) < 0)
406 memset(self
->ob_bytes
, self
->ob_bytes
[0], size
);
409 for (i
= 1; i
< count
; i
++)
410 memcpy(self
->ob_bytes
+ i
*mysize
, self
->ob_bytes
, mysize
);
414 return (PyObject
*)self
;
418 bytearray_getitem(PyByteArrayObject
*self
, Py_ssize_t i
)
422 if (i
< 0 || i
>= Py_SIZE(self
)) {
423 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
426 return PyInt_FromLong((unsigned char)(self
->ob_bytes
[i
]));
430 bytearray_subscript(PyByteArrayObject
*self
, PyObject
*index
)
432 if (PyIndex_Check(index
)) {
433 Py_ssize_t i
= PyNumber_AsSsize_t(index
, PyExc_IndexError
);
435 if (i
== -1 && PyErr_Occurred())
439 i
+= PyByteArray_GET_SIZE(self
);
441 if (i
< 0 || i
>= Py_SIZE(self
)) {
442 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
445 return PyInt_FromLong((unsigned char)(self
->ob_bytes
[i
]));
447 else if (PySlice_Check(index
)) {
448 Py_ssize_t start
, stop
, step
, slicelength
, cur
, i
;
449 if (PySlice_GetIndicesEx((PySliceObject
*)index
,
450 PyByteArray_GET_SIZE(self
),
451 &start
, &stop
, &step
, &slicelength
) < 0) {
455 if (slicelength
<= 0)
456 return PyByteArray_FromStringAndSize("", 0);
457 else if (step
== 1) {
458 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ start
,
462 char *source_buf
= PyByteArray_AS_STRING(self
);
463 char *result_buf
= (char *)PyMem_Malloc(slicelength
);
466 if (result_buf
== NULL
)
467 return PyErr_NoMemory();
469 for (cur
= start
, i
= 0; i
< slicelength
;
471 result_buf
[i
] = source_buf
[cur
];
473 result
= PyByteArray_FromStringAndSize(result_buf
, slicelength
);
474 PyMem_Free(result_buf
);
479 PyErr_SetString(PyExc_TypeError
, "bytearray indices must be integers");
485 bytearray_setslice(PyByteArrayObject
*self
, Py_ssize_t lo
, Py_ssize_t hi
,
488 Py_ssize_t avail
, needed
;
494 if (values
== (PyObject
*)self
) {
495 /* Make a copy and call this function recursively */
497 values
= PyByteArray_FromObject(values
);
500 err
= bytearray_setslice(self
, lo
, hi
, values
);
504 if (values
== NULL
) {
510 if (_getbuffer(values
, &vbytes
) < 0) {
511 PyErr_Format(PyExc_TypeError
,
512 "can't set bytearray slice from %.100s",
513 Py_TYPE(values
)->tp_name
);
524 if (hi
> Py_SIZE(self
))
531 if (avail
!= needed
) {
532 if (avail
> needed
) {
533 if (!_canresize(self
)) {
539 | |<----avail----->|<-----tomove------>|
540 | |<-needed->|<-----tomove------>|
543 memmove(self
->ob_bytes
+ lo
+ needed
, self
->ob_bytes
+ hi
,
546 /* XXX(nnorwitz): need to verify this can't overflow! */
547 if (PyByteArray_Resize((PyObject
*)self
,
548 Py_SIZE(self
) + needed
- avail
) < 0) {
552 if (avail
< needed
) {
555 | |<-avail->|<-----tomove------>|
556 | |<----needed---->|<-----tomove------>|
559 memmove(self
->ob_bytes
+ lo
+ needed
, self
->ob_bytes
+ hi
,
560 Py_SIZE(self
) - lo
- needed
);
565 memcpy(self
->ob_bytes
+ lo
, bytes
, needed
);
569 if (vbytes
.len
!= -1)
570 PyBuffer_Release(&vbytes
);
575 bytearray_setitem(PyByteArrayObject
*self
, Py_ssize_t i
, PyObject
*value
)
582 if (i
< 0 || i
>= Py_SIZE(self
)) {
583 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
588 return bytearray_setslice(self
, i
, i
+1, NULL
);
590 if (!_getbytevalue(value
, &ival
))
593 self
->ob_bytes
[i
] = ival
;
598 bytearray_ass_subscript(PyByteArrayObject
*self
, PyObject
*index
, PyObject
*values
)
600 Py_ssize_t start
, stop
, step
, slicelen
, needed
;
603 if (PyIndex_Check(index
)) {
604 Py_ssize_t i
= PyNumber_AsSsize_t(index
, PyExc_IndexError
);
606 if (i
== -1 && PyErr_Occurred())
610 i
+= PyByteArray_GET_SIZE(self
);
612 if (i
< 0 || i
>= Py_SIZE(self
)) {
613 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
617 if (values
== NULL
) {
618 /* Fall through to slice assignment */
626 if (!_getbytevalue(values
, &ival
))
628 self
->ob_bytes
[i
] = (char)ival
;
632 else if (PySlice_Check(index
)) {
633 if (PySlice_GetIndicesEx((PySliceObject
*)index
,
634 PyByteArray_GET_SIZE(self
),
635 &start
, &stop
, &step
, &slicelen
) < 0) {
640 PyErr_SetString(PyExc_TypeError
, "bytearray indices must be integer");
644 if (values
== NULL
) {
648 else if (values
== (PyObject
*)self
|| !PyByteArray_Check(values
)) {
649 /* Make a copy an call this function recursively */
651 values
= PyByteArray_FromObject(values
);
654 err
= bytearray_ass_subscript(self
, index
, values
);
659 assert(PyByteArray_Check(values
));
660 bytes
= ((PyByteArrayObject
*)values
)->ob_bytes
;
661 needed
= Py_SIZE(values
);
663 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
664 if ((step
< 0 && start
< stop
) ||
665 (step
> 0 && start
> stop
))
668 if (slicelen
!= needed
) {
669 if (!_canresize(self
))
671 if (slicelen
> needed
) {
673 0 start stop old_size
674 | |<---slicelen--->|<-----tomove------>|
675 | |<-needed->|<-----tomove------>|
678 memmove(self
->ob_bytes
+ start
+ needed
, self
->ob_bytes
+ stop
,
679 Py_SIZE(self
) - stop
);
681 if (PyByteArray_Resize((PyObject
*)self
,
682 Py_SIZE(self
) + needed
- slicelen
) < 0)
684 if (slicelen
< needed
) {
687 | |<-avail->|<-----tomove------>|
688 | |<----needed---->|<-----tomove------>|
691 memmove(self
->ob_bytes
+ start
+ needed
, self
->ob_bytes
+ stop
,
692 Py_SIZE(self
) - start
- needed
);
697 memcpy(self
->ob_bytes
+ start
, bytes
, needed
);
706 if (!_canresize(self
))
710 start
= stop
+ step
* (slicelen
- 1) - 1;
713 for (cur
= start
, i
= 0;
714 i
< slicelen
; cur
+= step
, i
++) {
715 Py_ssize_t lim
= step
- 1;
717 if (cur
+ step
>= PyByteArray_GET_SIZE(self
))
718 lim
= PyByteArray_GET_SIZE(self
) - cur
- 1;
720 memmove(self
->ob_bytes
+ cur
- i
,
721 self
->ob_bytes
+ cur
+ 1, lim
);
723 /* Move the tail of the bytes, in one chunk */
724 cur
= start
+ slicelen
*step
;
725 if (cur
< PyByteArray_GET_SIZE(self
)) {
726 memmove(self
->ob_bytes
+ cur
- slicelen
,
727 self
->ob_bytes
+ cur
,
728 PyByteArray_GET_SIZE(self
) - cur
);
730 if (PyByteArray_Resize((PyObject
*)self
,
731 PyByteArray_GET_SIZE(self
) - slicelen
) < 0)
740 if (needed
!= slicelen
) {
741 PyErr_Format(PyExc_ValueError
,
742 "attempt to assign bytes of size %zd "
743 "to extended slice of size %zd",
747 for (cur
= start
, i
= 0; i
< slicelen
; cur
+= step
, i
++)
748 self
->ob_bytes
[cur
] = bytes
[i
];
755 bytearray_init(PyByteArrayObject
*self
, PyObject
*args
, PyObject
*kwds
)
757 static char *kwlist
[] = {"source", "encoding", "errors", 0};
758 PyObject
*arg
= NULL
;
759 const char *encoding
= NULL
;
760 const char *errors
= NULL
;
763 PyObject
*(*iternext
)(PyObject
*);
765 if (Py_SIZE(self
) != 0) {
766 /* Empty previous contents (yes, do this first of all!) */
767 if (PyByteArray_Resize((PyObject
*)self
, 0) < 0)
771 /* Parse arguments */
772 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|Oss:bytearray", kwlist
,
773 &arg
, &encoding
, &errors
))
776 /* Make a quick exit if no first argument */
778 if (encoding
!= NULL
|| errors
!= NULL
) {
779 PyErr_SetString(PyExc_TypeError
,
780 "encoding or errors without sequence argument");
786 if (PyBytes_Check(arg
)) {
787 PyObject
*new, *encoded
;
788 if (encoding
!= NULL
) {
789 encoded
= PyCodec_Encode(arg
, encoding
, errors
);
792 assert(PyBytes_Check(encoded
));
798 new = bytearray_iconcat(self
, arg
);
806 #ifdef Py_USING_UNICODE
807 if (PyUnicode_Check(arg
)) {
808 /* Encode via the codec registry */
809 PyObject
*encoded
, *new;
810 if (encoding
== NULL
) {
811 PyErr_SetString(PyExc_TypeError
,
812 "unicode argument without an encoding");
815 encoded
= PyCodec_Encode(arg
, encoding
, errors
);
818 assert(PyBytes_Check(encoded
));
819 new = bytearray_iconcat(self
, encoded
);
828 /* If it's not unicode, there can't be encoding or errors */
829 if (encoding
!= NULL
|| errors
!= NULL
) {
830 PyErr_SetString(PyExc_TypeError
,
831 "encoding or errors without a string argument");
836 count
= PyNumber_AsSsize_t(arg
, PyExc_ValueError
);
837 if (count
== -1 && PyErr_Occurred())
841 PyErr_SetString(PyExc_ValueError
, "negative count");
845 if (PyByteArray_Resize((PyObject
*)self
, count
))
847 memset(self
->ob_bytes
, 0, count
);
852 /* Use the buffer API */
853 if (PyObject_CheckBuffer(arg
)) {
856 if (PyObject_GetBuffer(arg
, &view
, PyBUF_FULL_RO
) < 0)
859 if (PyByteArray_Resize((PyObject
*)self
, size
) < 0) goto fail
;
860 if (PyBuffer_ToContiguous(self
->ob_bytes
, &view
, size
, 'C') < 0)
862 PyBuffer_Release(&view
);
865 PyBuffer_Release(&view
);
869 /* XXX Optimize this if the arguments is a list, tuple */
871 /* Get the iterator */
872 it
= PyObject_GetIter(arg
);
875 iternext
= *Py_TYPE(it
)->tp_iternext
;
877 /* Run the iterator to exhaustion */
882 /* Get the next item */
885 if (PyErr_Occurred()) {
886 if (!PyErr_ExceptionMatches(PyExc_StopIteration
))
893 /* Interpret it as an int (__index__) */
894 rc
= _getbytevalue(item
, &value
);
899 /* Append the byte */
900 if (Py_SIZE(self
) < self
->ob_alloc
)
902 else if (PyByteArray_Resize((PyObject
*)self
, Py_SIZE(self
)+1) < 0)
904 self
->ob_bytes
[Py_SIZE(self
)-1] = value
;
907 /* Clean up and return success */
912 /* Error handling when it != NULL */
917 /* Mostly copied from string_repr, but without the
918 "smart quote" functionality. */
920 bytearray_repr(PyByteArrayObject
*self
)
922 static const char *hexdigits
= "0123456789abcdef";
923 const char *quote_prefix
= "bytearray(b";
924 const char *quote_postfix
= ")";
925 Py_ssize_t length
= Py_SIZE(self
);
926 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
927 size_t newsize
= 14 + 4 * length
;
929 if (newsize
> PY_SSIZE_T_MAX
|| newsize
/ 4 - 3 != length
) {
930 PyErr_SetString(PyExc_OverflowError
,
931 "bytearray object is too large to make repr");
934 v
= PyString_FromStringAndSize(NULL
, newsize
);
939 register Py_ssize_t i
;
944 /* Figure out which quote to use; single is preferred */
948 start
= PyByteArray_AS_STRING(self
);
949 for (test
= start
; test
< start
+length
; ++test
) {
951 quote
= '\''; /* back to single */
954 else if (*test
== '\'')
961 p
= PyString_AS_STRING(v
);
962 while (*quote_prefix
)
963 *p
++ = *quote_prefix
++;
966 for (i
= 0; i
< length
; i
++) {
967 /* There's at least enough room for a hex escape
968 and a closing quote. */
969 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 5);
970 c
= self
->ob_bytes
[i
];
971 if (c
== '\'' || c
== '\\')
972 *p
++ = '\\', *p
++ = c
;
974 *p
++ = '\\', *p
++ = 't';
976 *p
++ = '\\', *p
++ = 'n';
978 *p
++ = '\\', *p
++ = 'r';
980 *p
++ = '\\', *p
++ = 'x', *p
++ = '0', *p
++ = '0';
981 else if (c
< ' ' || c
>= 0x7f) {
984 *p
++ = hexdigits
[(c
& 0xf0) >> 4];
985 *p
++ = hexdigits
[c
& 0xf];
990 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 1);
992 while (*quote_postfix
) {
993 *p
++ = *quote_postfix
++;
996 if (_PyString_Resize(&v
, (p
- PyString_AS_STRING(v
)))) {
1005 bytearray_str(PyObject
*op
)
1008 if (Py_BytesWarningFlag
) {
1009 if (PyErr_WarnEx(PyExc_BytesWarning
,
1010 "str() on a bytearray instance", 1))
1013 return bytearray_repr((PyByteArrayObject
*)op
);
1015 return PyBytes_FromStringAndSize(((PyByteArrayObject
*)op
)->ob_bytes
, Py_SIZE(op
));
1019 bytearray_richcompare(PyObject
*self
, PyObject
*other
, int op
)
1021 Py_ssize_t self_size
, other_size
;
1022 Py_buffer self_bytes
, other_bytes
;
1027 /* Bytes can be compared to anything that supports the (binary)
1028 buffer API. Except that a comparison with Unicode is always an
1029 error, even if the comparison is for equality. */
1030 #ifdef Py_USING_UNICODE
1031 if (PyObject_IsInstance(self
, (PyObject
*)&PyUnicode_Type
) ||
1032 PyObject_IsInstance(other
, (PyObject
*)&PyUnicode_Type
)) {
1033 if (Py_BytesWarningFlag
&& op
== Py_EQ
) {
1034 if (PyErr_WarnEx(PyExc_BytesWarning
,
1035 "Comparsion between bytearray and string", 1))
1039 Py_INCREF(Py_NotImplemented
);
1040 return Py_NotImplemented
;
1044 self_size
= _getbuffer(self
, &self_bytes
);
1045 if (self_size
< 0) {
1047 Py_INCREF(Py_NotImplemented
);
1048 return Py_NotImplemented
;
1051 other_size
= _getbuffer(other
, &other_bytes
);
1052 if (other_size
< 0) {
1054 PyBuffer_Release(&self_bytes
);
1055 Py_INCREF(Py_NotImplemented
);
1056 return Py_NotImplemented
;
1059 if (self_size
!= other_size
&& (op
== Py_EQ
|| op
== Py_NE
)) {
1060 /* Shortcut: if the lengths differ, the objects differ */
1061 cmp
= (op
== Py_NE
);
1064 minsize
= self_size
;
1065 if (other_size
< minsize
)
1066 minsize
= other_size
;
1068 cmp
= memcmp(self_bytes
.buf
, other_bytes
.buf
, minsize
);
1069 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1072 if (self_size
< other_size
)
1074 else if (self_size
> other_size
)
1079 case Py_LT
: cmp
= cmp
< 0; break;
1080 case Py_LE
: cmp
= cmp
<= 0; break;
1081 case Py_EQ
: cmp
= cmp
== 0; break;
1082 case Py_NE
: cmp
= cmp
!= 0; break;
1083 case Py_GT
: cmp
= cmp
> 0; break;
1084 case Py_GE
: cmp
= cmp
>= 0; break;
1088 res
= cmp
? Py_True
: Py_False
;
1089 PyBuffer_Release(&self_bytes
);
1090 PyBuffer_Release(&other_bytes
);
1096 bytearray_dealloc(PyByteArrayObject
*self
)
1098 if (self
->ob_exports
> 0) {
1099 PyErr_SetString(PyExc_SystemError
,
1100 "deallocated bytearray object has exported buffers");
1103 if (self
->ob_bytes
!= 0) {
1104 PyMem_Free(self
->ob_bytes
);
1106 Py_TYPE(self
)->tp_free((PyObject
*)self
);
1110 /* -------------------------------------------------------------------- */
1113 #define STRINGLIB_CHAR char
1114 #define STRINGLIB_CMP memcmp
1115 #define STRINGLIB_LEN PyByteArray_GET_SIZE
1116 #define STRINGLIB_STR PyByteArray_AS_STRING
1117 #define STRINGLIB_NEW PyByteArray_FromStringAndSize
1118 #define STRINGLIB_EMPTY nullbytes
1119 #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1120 #define STRINGLIB_MUTABLE 1
1121 #define FROM_BYTEARRAY 1
1123 #include "stringlib/fastsearch.h"
1124 #include "stringlib/count.h"
1125 #include "stringlib/find.h"
1126 #include "stringlib/partition.h"
1127 #include "stringlib/ctype.h"
1128 #include "stringlib/transmogrify.h"
1131 /* The following Py_LOCAL_INLINE and Py_LOCAL functions
1132 were copied from the old char* style string object. */
1134 Py_LOCAL_INLINE(void)
1135 _adjust_indices(Py_ssize_t
*start
, Py_ssize_t
*end
, Py_ssize_t len
)
1150 Py_LOCAL_INLINE(Py_ssize_t
)
1151 bytearray_find_internal(PyByteArrayObject
*self
, PyObject
*args
, int dir
)
1155 Py_ssize_t start
=0, end
=PY_SSIZE_T_MAX
;
1158 if (!PyArg_ParseTuple(args
, "O|O&O&:find/rfind/index/rindex", &subobj
,
1159 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1161 if (_getbuffer(subobj
, &subbuf
) < 0)
1164 res
= stringlib_find_slice(
1165 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
1166 subbuf
.buf
, subbuf
.len
, start
, end
);
1168 res
= stringlib_rfind_slice(
1169 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
1170 subbuf
.buf
, subbuf
.len
, start
, end
);
1171 PyBuffer_Release(&subbuf
);
1175 PyDoc_STRVAR(find__doc__
,
1176 "B.find(sub [,start [,end]]) -> int\n\
1178 Return the lowest index in B where subsection sub is found,\n\
1179 such that sub is contained within s[start,end]. Optional\n\
1180 arguments start and end are interpreted as in slice notation.\n\
1182 Return -1 on failure.");
1185 bytearray_find(PyByteArrayObject
*self
, PyObject
*args
)
1187 Py_ssize_t result
= bytearray_find_internal(self
, args
, +1);
1190 return PyInt_FromSsize_t(result
);
1193 PyDoc_STRVAR(count__doc__
,
1194 "B.count(sub [,start [,end]]) -> int\n\
1196 Return the number of non-overlapping occurrences of subsection sub in\n\
1197 bytes B[start:end]. Optional arguments start and end are interpreted\n\
1198 as in slice notation.");
1201 bytearray_count(PyByteArrayObject
*self
, PyObject
*args
)
1204 const char *str
= PyByteArray_AS_STRING(self
);
1205 Py_ssize_t start
= 0, end
= PY_SSIZE_T_MAX
;
1207 PyObject
*count_obj
;
1209 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &sub_obj
,
1210 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1213 if (_getbuffer(sub_obj
, &vsub
) < 0)
1216 _adjust_indices(&start
, &end
, PyByteArray_GET_SIZE(self
));
1218 count_obj
= PyInt_FromSsize_t(
1219 stringlib_count(str
+ start
, end
- start
, vsub
.buf
, vsub
.len
)
1221 PyBuffer_Release(&vsub
);
1226 PyDoc_STRVAR(index__doc__
,
1227 "B.index(sub [,start [,end]]) -> int\n\
1229 Like B.find() but raise ValueError when the subsection is not found.");
1232 bytearray_index(PyByteArrayObject
*self
, PyObject
*args
)
1234 Py_ssize_t result
= bytearray_find_internal(self
, args
, +1);
1238 PyErr_SetString(PyExc_ValueError
,
1239 "subsection not found");
1242 return PyInt_FromSsize_t(result
);
1246 PyDoc_STRVAR(rfind__doc__
,
1247 "B.rfind(sub [,start [,end]]) -> int\n\
1249 Return the highest index in B where subsection sub is found,\n\
1250 such that sub is contained within s[start,end]. Optional\n\
1251 arguments start and end are interpreted as in slice notation.\n\
1253 Return -1 on failure.");
1256 bytearray_rfind(PyByteArrayObject
*self
, PyObject
*args
)
1258 Py_ssize_t result
= bytearray_find_internal(self
, args
, -1);
1261 return PyInt_FromSsize_t(result
);
1265 PyDoc_STRVAR(rindex__doc__
,
1266 "B.rindex(sub [,start [,end]]) -> int\n\
1268 Like B.rfind() but raise ValueError when the subsection is not found.");
1271 bytearray_rindex(PyByteArrayObject
*self
, PyObject
*args
)
1273 Py_ssize_t result
= bytearray_find_internal(self
, args
, -1);
1277 PyErr_SetString(PyExc_ValueError
,
1278 "subsection not found");
1281 return PyInt_FromSsize_t(result
);
1286 bytearray_contains(PyObject
*self
, PyObject
*arg
)
1288 Py_ssize_t ival
= PyNumber_AsSsize_t(arg
, PyExc_ValueError
);
1289 if (ival
== -1 && PyErr_Occurred()) {
1293 if (_getbuffer(arg
, &varg
) < 0)
1295 pos
= stringlib_find(PyByteArray_AS_STRING(self
), Py_SIZE(self
),
1296 varg
.buf
, varg
.len
, 0);
1297 PyBuffer_Release(&varg
);
1300 if (ival
< 0 || ival
>= 256) {
1301 PyErr_SetString(PyExc_ValueError
, "byte must be in range(0, 256)");
1305 return memchr(PyByteArray_AS_STRING(self
), ival
, Py_SIZE(self
)) != NULL
;
1309 /* Matches the end (direction >= 0) or start (direction < 0) of self
1310 * against substr, using the start and end arguments. Returns
1311 * -1 on error, 0 if not found and 1 if found.
1314 _bytearray_tailmatch(PyByteArrayObject
*self
, PyObject
*substr
, Py_ssize_t start
,
1315 Py_ssize_t end
, int direction
)
1317 Py_ssize_t len
= PyByteArray_GET_SIZE(self
);
1322 str
= PyByteArray_AS_STRING(self
);
1324 if (_getbuffer(substr
, &vsubstr
) < 0)
1327 _adjust_indices(&start
, &end
, len
);
1329 if (direction
< 0) {
1331 if (start
+vsubstr
.len
> len
) {
1336 if (end
-start
< vsubstr
.len
|| start
> len
) {
1340 if (end
-vsubstr
.len
> start
)
1341 start
= end
- vsubstr
.len
;
1343 if (end
-start
>= vsubstr
.len
)
1344 rv
= ! memcmp(str
+start
, vsubstr
.buf
, vsubstr
.len
);
1347 PyBuffer_Release(&vsubstr
);
1352 PyDoc_STRVAR(startswith__doc__
,
1353 "B.startswith(prefix [,start [,end]]) -> bool\n\
1355 Return True if B starts with the specified prefix, False otherwise.\n\
1356 With optional start, test B beginning at that position.\n\
1357 With optional end, stop comparing B at that position.\n\
1358 prefix can also be a tuple of strings to try.");
1361 bytearray_startswith(PyByteArrayObject
*self
, PyObject
*args
)
1363 Py_ssize_t start
= 0;
1364 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1368 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
1369 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1371 if (PyTuple_Check(subobj
)) {
1373 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
1374 result
= _bytearray_tailmatch(self
,
1375 PyTuple_GET_ITEM(subobj
, i
),
1385 result
= _bytearray_tailmatch(self
, subobj
, start
, end
, -1);
1389 return PyBool_FromLong(result
);
1392 PyDoc_STRVAR(endswith__doc__
,
1393 "B.endswith(suffix [,start [,end]]) -> bool\n\
1395 Return True if B ends with the specified suffix, False otherwise.\n\
1396 With optional start, test B beginning at that position.\n\
1397 With optional end, stop comparing B at that position.\n\
1398 suffix can also be a tuple of strings to try.");
1401 bytearray_endswith(PyByteArrayObject
*self
, PyObject
*args
)
1403 Py_ssize_t start
= 0;
1404 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1408 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
1409 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1411 if (PyTuple_Check(subobj
)) {
1413 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
1414 result
= _bytearray_tailmatch(self
,
1415 PyTuple_GET_ITEM(subobj
, i
),
1425 result
= _bytearray_tailmatch(self
, subobj
, start
, end
, +1);
1429 return PyBool_FromLong(result
);
1433 PyDoc_STRVAR(translate__doc__
,
1434 "B.translate(table[, deletechars]) -> bytearray\n\
1436 Return a copy of B, where all characters occurring in the\n\
1437 optional argument deletechars are removed, and the remaining\n\
1438 characters have been mapped through the given translation\n\
1439 table, which must be a bytes object of length 256.");
1442 bytearray_translate(PyByteArrayObject
*self
, PyObject
*args
)
1444 register char *input
, *output
;
1445 register const char *table
;
1446 register Py_ssize_t i
, c
;
1447 PyObject
*input_obj
= (PyObject
*)self
;
1448 const char *output_start
;
1450 PyObject
*result
= NULL
;
1451 int trans_table
[256];
1452 PyObject
*tableobj
= NULL
, *delobj
= NULL
;
1453 Py_buffer vtable
, vdel
;
1455 if (!PyArg_UnpackTuple(args
, "translate", 1, 2,
1456 &tableobj
, &delobj
))
1459 if (tableobj
== Py_None
) {
1462 } else if (_getbuffer(tableobj
, &vtable
) < 0) {
1465 if (vtable
.len
!= 256) {
1466 PyErr_SetString(PyExc_ValueError
,
1467 "translation table must be 256 characters long");
1470 table
= (const char*)vtable
.buf
;
1473 if (delobj
!= NULL
) {
1474 if (_getbuffer(delobj
, &vdel
) < 0) {
1475 delobj
= NULL
; /* don't try to release vdel buffer on exit */
1484 inlen
= PyByteArray_GET_SIZE(input_obj
);
1485 result
= PyByteArray_FromStringAndSize((char *)NULL
, inlen
);
1488 output_start
= output
= PyByteArray_AsString(result
);
1489 input
= PyByteArray_AS_STRING(input_obj
);
1491 if (vdel
.len
== 0 && table
!= NULL
) {
1492 /* If no deletions are required, use faster code */
1493 for (i
= inlen
; --i
>= 0; ) {
1494 c
= Py_CHARMASK(*input
++);
1495 *output
++ = table
[c
];
1500 if (table
== NULL
) {
1501 for (i
= 0; i
< 256; i
++)
1502 trans_table
[i
] = Py_CHARMASK(i
);
1504 for (i
= 0; i
< 256; i
++)
1505 trans_table
[i
] = Py_CHARMASK(table
[i
]);
1508 for (i
= 0; i
< vdel
.len
; i
++)
1509 trans_table
[(int) Py_CHARMASK( ((unsigned char*)vdel
.buf
)[i
] )] = -1;
1511 for (i
= inlen
; --i
>= 0; ) {
1512 c
= Py_CHARMASK(*input
++);
1513 if (trans_table
[c
] != -1)
1514 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
1517 /* Fix the size of the resulting string */
1519 PyByteArray_Resize(result
, output
- output_start
);
1522 if (tableobj
!= NULL
)
1523 PyBuffer_Release(&vtable
);
1525 PyBuffer_Release(&vdel
);
1533 /* find and count characters and substrings */
1535 #define findchar(target, target_len, c) \
1536 ((char *)memchr((const void *)(target), c, target_len))
1538 /* Don't call if length < 2 */
1539 #define Py_STRING_MATCH(target, offset, pattern, length) \
1540 (target[offset] == pattern[0] && \
1541 target[offset+length-1] == pattern[length-1] && \
1542 !memcmp(target+offset+1, pattern+1, length-2) )
1545 /* Bytes ops must return a string, create a copy */
1546 Py_LOCAL(PyByteArrayObject
*)
1547 return_self(PyByteArrayObject
*self
)
1549 return (PyByteArrayObject
*)PyByteArray_FromStringAndSize(
1550 PyByteArray_AS_STRING(self
),
1551 PyByteArray_GET_SIZE(self
));
1554 Py_LOCAL_INLINE(Py_ssize_t
)
1555 countchar(const char *target
, Py_ssize_t target_len
, char c
, Py_ssize_t maxcount
)
1558 const char *start
=target
;
1559 const char *end
=target
+target_len
;
1561 while ( (start
=findchar(start
, end
-start
, c
)) != NULL
) {
1563 if (count
>= maxcount
)
1570 Py_LOCAL(Py_ssize_t
)
1571 findstring(const char *target
, Py_ssize_t target_len
,
1572 const char *pattern
, Py_ssize_t pattern_len
,
1578 start
+= target_len
;
1582 if (end
> target_len
) {
1584 } else if (end
< 0) {
1590 /* zero-length substrings always match at the first attempt */
1591 if (pattern_len
== 0)
1592 return (direction
> 0) ? start
: end
;
1596 if (direction
< 0) {
1597 for (; end
>= start
; end
--)
1598 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
))
1601 for (; start
<= end
; start
++)
1602 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
))
1608 Py_LOCAL_INLINE(Py_ssize_t
)
1609 countstring(const char *target
, Py_ssize_t target_len
,
1610 const char *pattern
, Py_ssize_t pattern_len
,
1613 int direction
, Py_ssize_t maxcount
)
1618 start
+= target_len
;
1622 if (end
> target_len
) {
1624 } else if (end
< 0) {
1630 /* zero-length substrings match everywhere */
1631 if (pattern_len
== 0 || maxcount
== 0) {
1632 if (target_len
+1 < maxcount
)
1633 return target_len
+1;
1638 if (direction
< 0) {
1639 for (; (end
>= start
); end
--)
1640 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
)) {
1642 if (--maxcount
<= 0) break;
1643 end
-= pattern_len
-1;
1646 for (; (start
<= end
); start
++)
1647 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
)) {
1649 if (--maxcount
<= 0)
1651 start
+= pattern_len
-1;
1658 /* Algorithms for different cases of string replacement */
1660 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1661 Py_LOCAL(PyByteArrayObject
*)
1662 replace_interleave(PyByteArrayObject
*self
,
1663 const char *to_s
, Py_ssize_t to_len
,
1664 Py_ssize_t maxcount
)
1666 char *self_s
, *result_s
;
1667 Py_ssize_t self_len
, result_len
;
1668 Py_ssize_t count
, i
, product
;
1669 PyByteArrayObject
*result
;
1671 self_len
= PyByteArray_GET_SIZE(self
);
1673 /* 1 at the end plus 1 after every character */
1675 if (maxcount
< count
)
1678 /* Check for overflow */
1679 /* result_len = count * to_len + self_len; */
1680 product
= count
* to_len
;
1681 if (product
/ to_len
!= count
) {
1682 PyErr_SetString(PyExc_OverflowError
,
1683 "replace string is too long");
1686 result_len
= product
+ self_len
;
1687 if (result_len
< 0) {
1688 PyErr_SetString(PyExc_OverflowError
,
1689 "replace string is too long");
1693 if (! (result
= (PyByteArrayObject
*)
1694 PyByteArray_FromStringAndSize(NULL
, result_len
)) )
1697 self_s
= PyByteArray_AS_STRING(self
);
1698 result_s
= PyByteArray_AS_STRING(result
);
1700 /* TODO: special case single character, which doesn't need memcpy */
1702 /* Lay the first one down (guaranteed this will occur) */
1703 Py_MEMCPY(result_s
, to_s
, to_len
);
1707 for (i
=0; i
<count
; i
++) {
1708 *result_s
++ = *self_s
++;
1709 Py_MEMCPY(result_s
, to_s
, to_len
);
1713 /* Copy the rest of the original string */
1714 Py_MEMCPY(result_s
, self_s
, self_len
-i
);
1719 /* Special case for deleting a single character */
1720 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1721 Py_LOCAL(PyByteArrayObject
*)
1722 replace_delete_single_character(PyByteArrayObject
*self
,
1723 char from_c
, Py_ssize_t maxcount
)
1725 char *self_s
, *result_s
;
1726 char *start
, *next
, *end
;
1727 Py_ssize_t self_len
, result_len
;
1729 PyByteArrayObject
*result
;
1731 self_len
= PyByteArray_GET_SIZE(self
);
1732 self_s
= PyByteArray_AS_STRING(self
);
1734 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
1736 return return_self(self
);
1739 result_len
= self_len
- count
; /* from_len == 1 */
1740 assert(result_len
>=0);
1742 if ( (result
= (PyByteArrayObject
*)
1743 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1745 result_s
= PyByteArray_AS_STRING(result
);
1748 end
= self_s
+ self_len
;
1749 while (count
-- > 0) {
1750 next
= findchar(start
, end
-start
, from_c
);
1753 Py_MEMCPY(result_s
, start
, next
-start
);
1754 result_s
+= (next
-start
);
1757 Py_MEMCPY(result_s
, start
, end
-start
);
1762 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1764 Py_LOCAL(PyByteArrayObject
*)
1765 replace_delete_substring(PyByteArrayObject
*self
,
1766 const char *from_s
, Py_ssize_t from_len
,
1767 Py_ssize_t maxcount
)
1769 char *self_s
, *result_s
;
1770 char *start
, *next
, *end
;
1771 Py_ssize_t self_len
, result_len
;
1772 Py_ssize_t count
, offset
;
1773 PyByteArrayObject
*result
;
1775 self_len
= PyByteArray_GET_SIZE(self
);
1776 self_s
= PyByteArray_AS_STRING(self
);
1778 count
= countstring(self_s
, self_len
,
1785 return return_self(self
);
1788 result_len
= self_len
- (count
* from_len
);
1789 assert (result_len
>=0);
1791 if ( (result
= (PyByteArrayObject
*)
1792 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1795 result_s
= PyByteArray_AS_STRING(result
);
1798 end
= self_s
+ self_len
;
1799 while (count
-- > 0) {
1800 offset
= findstring(start
, end
-start
,
1802 0, end
-start
, FORWARD
);
1805 next
= start
+ offset
;
1807 Py_MEMCPY(result_s
, start
, next
-start
);
1809 result_s
+= (next
-start
);
1810 start
= next
+from_len
;
1812 Py_MEMCPY(result_s
, start
, end
-start
);
1816 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1817 Py_LOCAL(PyByteArrayObject
*)
1818 replace_single_character_in_place(PyByteArrayObject
*self
,
1819 char from_c
, char to_c
,
1820 Py_ssize_t maxcount
)
1822 char *self_s
, *result_s
, *start
, *end
, *next
;
1823 Py_ssize_t self_len
;
1824 PyByteArrayObject
*result
;
1826 /* The result string will be the same size */
1827 self_s
= PyByteArray_AS_STRING(self
);
1828 self_len
= PyByteArray_GET_SIZE(self
);
1830 next
= findchar(self_s
, self_len
, from_c
);
1833 /* No matches; return the original bytes */
1834 return return_self(self
);
1837 /* Need to make a new bytes */
1838 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, self_len
);
1841 result_s
= PyByteArray_AS_STRING(result
);
1842 Py_MEMCPY(result_s
, self_s
, self_len
);
1844 /* change everything in-place, starting with this one */
1845 start
= result_s
+ (next
-self_s
);
1848 end
= result_s
+ self_len
;
1850 while (--maxcount
> 0) {
1851 next
= findchar(start
, end
-start
, from_c
);
1861 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1862 Py_LOCAL(PyByteArrayObject
*)
1863 replace_substring_in_place(PyByteArrayObject
*self
,
1864 const char *from_s
, Py_ssize_t from_len
,
1865 const char *to_s
, Py_ssize_t to_len
,
1866 Py_ssize_t maxcount
)
1868 char *result_s
, *start
, *end
;
1870 Py_ssize_t self_len
, offset
;
1871 PyByteArrayObject
*result
;
1873 /* The result bytes will be the same size */
1875 self_s
= PyByteArray_AS_STRING(self
);
1876 self_len
= PyByteArray_GET_SIZE(self
);
1878 offset
= findstring(self_s
, self_len
,
1880 0, self_len
, FORWARD
);
1882 /* No matches; return the original bytes */
1883 return return_self(self
);
1886 /* Need to make a new bytes */
1887 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, self_len
);
1890 result_s
= PyByteArray_AS_STRING(result
);
1891 Py_MEMCPY(result_s
, self_s
, self_len
);
1893 /* change everything in-place, starting with this one */
1894 start
= result_s
+ offset
;
1895 Py_MEMCPY(start
, to_s
, from_len
);
1897 end
= result_s
+ self_len
;
1899 while ( --maxcount
> 0) {
1900 offset
= findstring(start
, end
-start
,
1902 0, end
-start
, FORWARD
);
1905 Py_MEMCPY(start
+offset
, to_s
, from_len
);
1906 start
+= offset
+from_len
;
1912 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1913 Py_LOCAL(PyByteArrayObject
*)
1914 replace_single_character(PyByteArrayObject
*self
,
1916 const char *to_s
, Py_ssize_t to_len
,
1917 Py_ssize_t maxcount
)
1919 char *self_s
, *result_s
;
1920 char *start
, *next
, *end
;
1921 Py_ssize_t self_len
, result_len
;
1922 Py_ssize_t count
, product
;
1923 PyByteArrayObject
*result
;
1925 self_s
= PyByteArray_AS_STRING(self
);
1926 self_len
= PyByteArray_GET_SIZE(self
);
1928 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
1930 /* no matches, return unchanged */
1931 return return_self(self
);
1934 /* use the difference between current and new, hence the "-1" */
1935 /* result_len = self_len + count * (to_len-1) */
1936 product
= count
* (to_len
-1);
1937 if (product
/ (to_len
-1) != count
) {
1938 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
1941 result_len
= self_len
+ product
;
1942 if (result_len
< 0) {
1943 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
1947 if ( (result
= (PyByteArrayObject
*)
1948 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1950 result_s
= PyByteArray_AS_STRING(result
);
1953 end
= self_s
+ self_len
;
1954 while (count
-- > 0) {
1955 next
= findchar(start
, end
-start
, from_c
);
1959 if (next
== start
) {
1960 /* replace with the 'to' */
1961 Py_MEMCPY(result_s
, to_s
, to_len
);
1965 /* copy the unchanged old then the 'to' */
1966 Py_MEMCPY(result_s
, start
, next
-start
);
1967 result_s
+= (next
-start
);
1968 Py_MEMCPY(result_s
, to_s
, to_len
);
1973 /* Copy the remainder of the remaining bytes */
1974 Py_MEMCPY(result_s
, start
, end
-start
);
1979 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1980 Py_LOCAL(PyByteArrayObject
*)
1981 replace_substring(PyByteArrayObject
*self
,
1982 const char *from_s
, Py_ssize_t from_len
,
1983 const char *to_s
, Py_ssize_t to_len
,
1984 Py_ssize_t maxcount
)
1986 char *self_s
, *result_s
;
1987 char *start
, *next
, *end
;
1988 Py_ssize_t self_len
, result_len
;
1989 Py_ssize_t count
, offset
, product
;
1990 PyByteArrayObject
*result
;
1992 self_s
= PyByteArray_AS_STRING(self
);
1993 self_len
= PyByteArray_GET_SIZE(self
);
1995 count
= countstring(self_s
, self_len
,
1997 0, self_len
, FORWARD
, maxcount
);
1999 /* no matches, return unchanged */
2000 return return_self(self
);
2003 /* Check for overflow */
2004 /* result_len = self_len + count * (to_len-from_len) */
2005 product
= count
* (to_len
-from_len
);
2006 if (product
/ (to_len
-from_len
) != count
) {
2007 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
2010 result_len
= self_len
+ product
;
2011 if (result_len
< 0) {
2012 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
2016 if ( (result
= (PyByteArrayObject
*)
2017 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
2019 result_s
= PyByteArray_AS_STRING(result
);
2022 end
= self_s
+ self_len
;
2023 while (count
-- > 0) {
2024 offset
= findstring(start
, end
-start
,
2026 0, end
-start
, FORWARD
);
2029 next
= start
+offset
;
2030 if (next
== start
) {
2031 /* replace with the 'to' */
2032 Py_MEMCPY(result_s
, to_s
, to_len
);
2036 /* copy the unchanged old then the 'to' */
2037 Py_MEMCPY(result_s
, start
, next
-start
);
2038 result_s
+= (next
-start
);
2039 Py_MEMCPY(result_s
, to_s
, to_len
);
2041 start
= next
+from_len
;
2044 /* Copy the remainder of the remaining bytes */
2045 Py_MEMCPY(result_s
, start
, end
-start
);
2051 Py_LOCAL(PyByteArrayObject
*)
2052 replace(PyByteArrayObject
*self
,
2053 const char *from_s
, Py_ssize_t from_len
,
2054 const char *to_s
, Py_ssize_t to_len
,
2055 Py_ssize_t maxcount
)
2058 maxcount
= PY_SSIZE_T_MAX
;
2059 } else if (maxcount
== 0 || PyByteArray_GET_SIZE(self
) == 0) {
2060 /* nothing to do; return the original bytes */
2061 return return_self(self
);
2064 if (maxcount
== 0 ||
2065 (from_len
== 0 && to_len
== 0)) {
2066 /* nothing to do; return the original bytes */
2067 return return_self(self
);
2070 /* Handle zero-length special cases */
2072 if (from_len
== 0) {
2073 /* insert the 'to' bytes everywhere. */
2074 /* >>> "Python".replace("", ".") */
2075 /* '.P.y.t.h.o.n.' */
2076 return replace_interleave(self
, to_s
, to_len
, maxcount
);
2079 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2080 /* point for an empty self bytes to generate a non-empty bytes */
2081 /* Special case so the remaining code always gets a non-empty bytes */
2082 if (PyByteArray_GET_SIZE(self
) == 0) {
2083 return return_self(self
);
2087 /* delete all occurances of 'from' bytes */
2088 if (from_len
== 1) {
2089 return replace_delete_single_character(
2090 self
, from_s
[0], maxcount
);
2092 return replace_delete_substring(self
, from_s
, from_len
, maxcount
);
2096 /* Handle special case where both bytes have the same length */
2098 if (from_len
== to_len
) {
2099 if (from_len
== 1) {
2100 return replace_single_character_in_place(
2106 return replace_substring_in_place(
2107 self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
2111 /* Otherwise use the more generic algorithms */
2112 if (from_len
== 1) {
2113 return replace_single_character(self
, from_s
[0],
2114 to_s
, to_len
, maxcount
);
2116 /* len('from')>=2, len('to')>=1 */
2117 return replace_substring(self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
2122 PyDoc_STRVAR(replace__doc__
,
2123 "B.replace(old, new[, count]) -> bytes\n\
2125 Return a copy of B with all occurrences of subsection\n\
2126 old replaced by new. If the optional argument count is\n\
2127 given, only the first count occurrences are replaced.");
2130 bytearray_replace(PyByteArrayObject
*self
, PyObject
*args
)
2132 Py_ssize_t count
= -1;
2133 PyObject
*from
, *to
, *res
;
2134 Py_buffer vfrom
, vto
;
2136 if (!PyArg_ParseTuple(args
, "OO|n:replace", &from
, &to
, &count
))
2139 if (_getbuffer(from
, &vfrom
) < 0)
2141 if (_getbuffer(to
, &vto
) < 0) {
2142 PyBuffer_Release(&vfrom
);
2146 res
= (PyObject
*)replace((PyByteArrayObject
*) self
,
2147 vfrom
.buf
, vfrom
.len
,
2148 vto
.buf
, vto
.len
, count
);
2150 PyBuffer_Release(&vfrom
);
2151 PyBuffer_Release(&vto
);
2156 /* Overallocate the initial list to reduce the number of reallocs for small
2157 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2158 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2159 text (roughly 11 words per line) and field delimited data (usually 1-10
2160 fields). For large strings the split algorithms are bandwidth limited
2161 so increasing the preallocation likely will not improve things.*/
2163 #define MAX_PREALLOC 12
2165 /* 5 splits gives 6 elements */
2166 #define PREALLOC_SIZE(maxsplit) \
2167 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2169 #define SPLIT_APPEND(data, left, right) \
2170 str = PyByteArray_FromStringAndSize((data) + (left), \
2171 (right) - (left)); \
2174 if (PyList_Append(list, str)) { \
2181 #define SPLIT_ADD(data, left, right) { \
2182 str = PyByteArray_FromStringAndSize((data) + (left), \
2183 (right) - (left)); \
2186 if (count < MAX_PREALLOC) { \
2187 PyList_SET_ITEM(list, count, str); \
2189 if (PyList_Append(list, str)) { \
2198 /* Always force the list to the expected size. */
2199 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2202 Py_LOCAL_INLINE(PyObject
*)
2203 split_char(const char *s
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
2205 register Py_ssize_t i
, j
, count
= 0;
2207 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2213 while ((j
< len
) && (maxcount
-- > 0)) {
2214 for(; j
< len
; j
++) {
2215 /* I found that using memchr makes no difference */
2224 SPLIT_ADD(s
, i
, len
);
2226 FIX_PREALLOC_SIZE(list
);
2235 Py_LOCAL_INLINE(PyObject
*)
2236 split_whitespace(const char *s
, Py_ssize_t len
, Py_ssize_t maxcount
)
2238 register Py_ssize_t i
, j
, count
= 0;
2240 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2245 for (i
= j
= 0; i
< len
; ) {
2247 while (i
< len
&& ISSPACE(s
[i
]))
2250 while (i
< len
&& !ISSPACE(s
[i
]))
2253 if (maxcount
-- <= 0)
2256 while (i
< len
&& ISSPACE(s
[i
]))
2262 SPLIT_ADD(s
, j
, len
);
2264 FIX_PREALLOC_SIZE(list
);
2272 PyDoc_STRVAR(split__doc__
,
2273 "B.split([sep[, maxsplit]]) -> list of bytearray\n\
2275 Return a list of the sections in B, using sep as the delimiter.\n\
2276 If sep is not given, B is split on ASCII whitespace characters\n\
2277 (space, tab, return, newline, formfeed, vertical tab).\n\
2278 If maxsplit is given, at most maxsplit splits are done.");
2281 bytearray_split(PyByteArrayObject
*self
, PyObject
*args
)
2283 Py_ssize_t len
= PyByteArray_GET_SIZE(self
), n
, i
, j
;
2284 Py_ssize_t maxsplit
= -1, count
= 0;
2285 const char *s
= PyByteArray_AS_STRING(self
), *sub
;
2286 PyObject
*list
, *str
, *subobj
= Py_None
;
2292 if (!PyArg_ParseTuple(args
, "|On:split", &subobj
, &maxsplit
))
2295 maxsplit
= PY_SSIZE_T_MAX
;
2297 if (subobj
== Py_None
)
2298 return split_whitespace(s
, len
, maxsplit
);
2300 if (_getbuffer(subobj
, &vsub
) < 0)
2306 PyErr_SetString(PyExc_ValueError
, "empty separator");
2307 PyBuffer_Release(&vsub
);
2311 list
= split_char(s
, len
, sub
[0], maxsplit
);
2312 PyBuffer_Release(&vsub
);
2316 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
2318 PyBuffer_Release(&vsub
);
2324 while (maxsplit
-- > 0) {
2325 pos
= fastsearch(s
+i
, len
-i
, sub
, n
, FAST_SEARCH
);
2334 while ((j
+n
<= len
) && (maxsplit
-- > 0)) {
2335 for (; j
+n
<= len
; j
++) {
2336 if (Py_STRING_MATCH(s
, j
, sub
, n
)) {
2344 SPLIT_ADD(s
, i
, len
);
2345 FIX_PREALLOC_SIZE(list
);
2346 PyBuffer_Release(&vsub
);
2351 PyBuffer_Release(&vsub
);
2355 /* stringlib's partition shares nullbytes in some cases.
2356 undo this, we don't want the nullbytes to be shared. */
2358 make_nullbytes_unique(PyObject
*result
)
2360 if (result
!= NULL
) {
2362 assert(PyTuple_Check(result
));
2363 assert(PyTuple_GET_SIZE(result
) == 3);
2364 for (i
= 0; i
< 3; i
++) {
2365 if (PyTuple_GET_ITEM(result
, i
) == (PyObject
*)nullbytes
) {
2366 PyObject
*new = PyByteArray_FromStringAndSize(NULL
, 0);
2372 Py_DECREF(nullbytes
);
2373 PyTuple_SET_ITEM(result
, i
, new);
2380 PyDoc_STRVAR(partition__doc__
,
2381 "B.partition(sep) -> (head, sep, tail)\n\
2383 Searches for the separator sep in B, and returns the part before it,\n\
2384 the separator itself, and the part after it. If the separator is not\n\
2385 found, returns B and two empty bytearray objects.");
2388 bytearray_partition(PyByteArrayObject
*self
, PyObject
*sep_obj
)
2390 PyObject
*bytesep
, *result
;
2392 bytesep
= PyByteArray_FromObject(sep_obj
);
2396 result
= stringlib_partition(
2398 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
2400 PyByteArray_AS_STRING(bytesep
), PyByteArray_GET_SIZE(bytesep
)
2404 return make_nullbytes_unique(result
);
2407 PyDoc_STRVAR(rpartition__doc__
,
2408 "B.rpartition(sep) -> (tail, sep, head)\n\
2410 Searches for the separator sep in B, starting at the end of B,\n\
2411 and returns the part before it, the separator itself, and the\n\
2412 part after it. If the separator is not found, returns two empty\n\
2413 bytearray objects and B.");
2416 bytearray_rpartition(PyByteArrayObject
*self
, PyObject
*sep_obj
)
2418 PyObject
*bytesep
, *result
;
2420 bytesep
= PyByteArray_FromObject(sep_obj
);
2424 result
= stringlib_rpartition(
2426 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
2428 PyByteArray_AS_STRING(bytesep
), PyByteArray_GET_SIZE(bytesep
)
2432 return make_nullbytes_unique(result
);
2435 Py_LOCAL_INLINE(PyObject
*)
2436 rsplit_char(const char *s
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
2438 register Py_ssize_t i
, j
, count
=0;
2440 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2446 while ((i
>= 0) && (maxcount
-- > 0)) {
2447 for (; i
>= 0; i
--) {
2449 SPLIT_ADD(s
, i
+ 1, j
+ 1);
2456 SPLIT_ADD(s
, 0, j
+ 1);
2458 FIX_PREALLOC_SIZE(list
);
2459 if (PyList_Reverse(list
) < 0)
2469 Py_LOCAL_INLINE(PyObject
*)
2470 rsplit_whitespace(const char *s
, Py_ssize_t len
, Py_ssize_t maxcount
)
2472 register Py_ssize_t i
, j
, count
= 0;
2474 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2479 for (i
= j
= len
- 1; i
>= 0; ) {
2481 while (i
>= 0 && ISSPACE(s
[i
]))
2484 while (i
>= 0 && !ISSPACE(s
[i
]))
2487 if (maxcount
-- <= 0)
2489 SPLIT_ADD(s
, i
+ 1, j
+ 1);
2490 while (i
>= 0 && ISSPACE(s
[i
]))
2496 SPLIT_ADD(s
, 0, j
+ 1);
2498 FIX_PREALLOC_SIZE(list
);
2499 if (PyList_Reverse(list
) < 0)
2509 PyDoc_STRVAR(rsplit__doc__
,
2510 "B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2512 Return a list of the sections in B, using sep as the delimiter,\n\
2513 starting at the end of B and working to the front.\n\
2514 If sep is not given, B is split on ASCII whitespace characters\n\
2515 (space, tab, return, newline, formfeed, vertical tab).\n\
2516 If maxsplit is given, at most maxsplit splits are done.");
2519 bytearray_rsplit(PyByteArrayObject
*self
, PyObject
*args
)
2521 Py_ssize_t len
= PyByteArray_GET_SIZE(self
), n
, i
, j
;
2522 Py_ssize_t maxsplit
= -1, count
= 0;
2523 const char *s
= PyByteArray_AS_STRING(self
), *sub
;
2524 PyObject
*list
, *str
, *subobj
= Py_None
;
2527 if (!PyArg_ParseTuple(args
, "|On:rsplit", &subobj
, &maxsplit
))
2530 maxsplit
= PY_SSIZE_T_MAX
;
2532 if (subobj
== Py_None
)
2533 return rsplit_whitespace(s
, len
, maxsplit
);
2535 if (_getbuffer(subobj
, &vsub
) < 0)
2541 PyErr_SetString(PyExc_ValueError
, "empty separator");
2542 PyBuffer_Release(&vsub
);
2546 list
= rsplit_char(s
, len
, sub
[0], maxsplit
);
2547 PyBuffer_Release(&vsub
);
2551 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
2553 PyBuffer_Release(&vsub
);
2560 while ( (i
>= 0) && (maxsplit
-- > 0) ) {
2562 if (Py_STRING_MATCH(s
, i
, sub
, n
)) {
2563 SPLIT_ADD(s
, i
+ n
, j
);
2571 FIX_PREALLOC_SIZE(list
);
2572 if (PyList_Reverse(list
) < 0)
2574 PyBuffer_Release(&vsub
);
2579 PyBuffer_Release(&vsub
);
2583 PyDoc_STRVAR(reverse__doc__
,
2584 "B.reverse() -> None\n\
2586 Reverse the order of the values in B in place.");
2588 bytearray_reverse(PyByteArrayObject
*self
, PyObject
*unused
)
2590 char swap
, *head
, *tail
;
2591 Py_ssize_t i
, j
, n
= Py_SIZE(self
);
2594 head
= self
->ob_bytes
;
2595 tail
= head
+ n
- 1;
2596 for (i
= 0; i
< j
; i
++) {
2605 PyDoc_STRVAR(insert__doc__
,
2606 "B.insert(index, int) -> None\n\
2608 Insert a single item into the bytearray before the given index.");
2610 bytearray_insert(PyByteArrayObject
*self
, PyObject
*args
)
2614 Py_ssize_t where
, n
= Py_SIZE(self
);
2616 if (!PyArg_ParseTuple(args
, "nO:insert", &where
, &value
))
2619 if (n
== PY_SSIZE_T_MAX
) {
2620 PyErr_SetString(PyExc_OverflowError
,
2621 "cannot add more objects to bytes");
2624 if (!_getbytevalue(value
, &ival
))
2626 if (PyByteArray_Resize((PyObject
*)self
, n
+ 1) < 0)
2636 memmove(self
->ob_bytes
+ where
+ 1, self
->ob_bytes
+ where
, n
- where
);
2637 self
->ob_bytes
[where
] = ival
;
2642 PyDoc_STRVAR(append__doc__
,
2643 "B.append(int) -> None\n\
2645 Append a single item to the end of B.");
2647 bytearray_append(PyByteArrayObject
*self
, PyObject
*arg
)
2650 Py_ssize_t n
= Py_SIZE(self
);
2652 if (! _getbytevalue(arg
, &value
))
2654 if (n
== PY_SSIZE_T_MAX
) {
2655 PyErr_SetString(PyExc_OverflowError
,
2656 "cannot add more objects to bytes");
2659 if (PyByteArray_Resize((PyObject
*)self
, n
+ 1) < 0)
2662 self
->ob_bytes
[n
] = value
;
2667 PyDoc_STRVAR(extend__doc__
,
2668 "B.extend(iterable int) -> None\n\
2670 Append all the elements from the iterator or sequence to the\n\
2673 bytearray_extend(PyByteArrayObject
*self
, PyObject
*arg
)
2675 PyObject
*it
, *item
, *bytearray_obj
;
2676 Py_ssize_t buf_size
= 0, len
= 0;
2680 /* bytearray_setslice code only accepts something supporting PEP 3118. */
2681 if (PyObject_CheckBuffer(arg
)) {
2682 if (bytearray_setslice(self
, Py_SIZE(self
), Py_SIZE(self
), arg
) == -1)
2688 it
= PyObject_GetIter(arg
);
2692 /* Try to determine the length of the argument. 32 is abitrary. */
2693 buf_size
= _PyObject_LengthHint(arg
, 32);
2694 if (buf_size
== -1) {
2699 bytearray_obj
= PyByteArray_FromStringAndSize(NULL
, buf_size
);
2700 if (bytearray_obj
== NULL
)
2702 buf
= PyByteArray_AS_STRING(bytearray_obj
);
2704 while ((item
= PyIter_Next(it
)) != NULL
) {
2705 if (! _getbytevalue(item
, &value
)) {
2708 Py_DECREF(bytearray_obj
);
2714 if (len
>= buf_size
) {
2715 buf_size
= len
+ (len
>> 1) + 1;
2716 if (PyByteArray_Resize((PyObject
*)bytearray_obj
, buf_size
) < 0) {
2718 Py_DECREF(bytearray_obj
);
2721 /* Recompute the `buf' pointer, since the resizing operation may
2722 have invalidated it. */
2723 buf
= PyByteArray_AS_STRING(bytearray_obj
);
2728 /* Resize down to exact size. */
2729 if (PyByteArray_Resize((PyObject
*)bytearray_obj
, len
) < 0) {
2730 Py_DECREF(bytearray_obj
);
2734 if (bytearray_setslice(self
, Py_SIZE(self
), Py_SIZE(self
), bytearray_obj
) == -1)
2736 Py_DECREF(bytearray_obj
);
2741 PyDoc_STRVAR(pop__doc__
,
2742 "B.pop([index]) -> int\n\
2744 Remove and return a single item from B. If no index\n\
2745 argument is given, will pop the last value.");
2747 bytearray_pop(PyByteArrayObject
*self
, PyObject
*args
)
2750 Py_ssize_t where
= -1, n
= Py_SIZE(self
);
2752 if (!PyArg_ParseTuple(args
, "|n:pop", &where
))
2756 PyErr_SetString(PyExc_OverflowError
,
2757 "cannot pop an empty bytes");
2761 where
+= Py_SIZE(self
);
2762 if (where
< 0 || where
>= Py_SIZE(self
)) {
2763 PyErr_SetString(PyExc_IndexError
, "pop index out of range");
2766 if (!_canresize(self
))
2769 value
= self
->ob_bytes
[where
];
2770 memmove(self
->ob_bytes
+ where
, self
->ob_bytes
+ where
+ 1, n
- where
);
2771 if (PyByteArray_Resize((PyObject
*)self
, n
- 1) < 0)
2774 return PyInt_FromLong(value
);
2777 PyDoc_STRVAR(remove__doc__
,
2778 "B.remove(int) -> None\n\
2780 Remove the first occurance of a value in B.");
2782 bytearray_remove(PyByteArrayObject
*self
, PyObject
*arg
)
2785 Py_ssize_t where
, n
= Py_SIZE(self
);
2787 if (! _getbytevalue(arg
, &value
))
2790 for (where
= 0; where
< n
; where
++) {
2791 if (self
->ob_bytes
[where
] == value
)
2795 PyErr_SetString(PyExc_ValueError
, "value not found in bytes");
2798 if (!_canresize(self
))
2801 memmove(self
->ob_bytes
+ where
, self
->ob_bytes
+ where
+ 1, n
- where
);
2802 if (PyByteArray_Resize((PyObject
*)self
, n
- 1) < 0)
2808 /* XXX These two helpers could be optimized if argsize == 1 */
2811 lstrip_helper(unsigned char *myptr
, Py_ssize_t mysize
,
2812 void *argptr
, Py_ssize_t argsize
)
2815 while (i
< mysize
&& memchr(argptr
, myptr
[i
], argsize
))
2821 rstrip_helper(unsigned char *myptr
, Py_ssize_t mysize
,
2822 void *argptr
, Py_ssize_t argsize
)
2824 Py_ssize_t i
= mysize
- 1;
2825 while (i
>= 0 && memchr(argptr
, myptr
[i
], argsize
))
2830 PyDoc_STRVAR(strip__doc__
,
2831 "B.strip([bytes]) -> bytearray\n\
2833 Strip leading and trailing bytes contained in the argument.\n\
2834 If the argument is omitted, strip ASCII whitespace.");
2836 bytearray_strip(PyByteArrayObject
*self
, PyObject
*args
)
2838 Py_ssize_t left
, right
, mysize
, argsize
;
2839 void *myptr
, *argptr
;
2840 PyObject
*arg
= Py_None
;
2842 if (!PyArg_ParseTuple(args
, "|O:strip", &arg
))
2844 if (arg
== Py_None
) {
2845 argptr
= "\t\n\r\f\v ";
2849 if (_getbuffer(arg
, &varg
) < 0)
2854 myptr
= self
->ob_bytes
;
2855 mysize
= Py_SIZE(self
);
2856 left
= lstrip_helper(myptr
, mysize
, argptr
, argsize
);
2860 right
= rstrip_helper(myptr
, mysize
, argptr
, argsize
);
2862 PyBuffer_Release(&varg
);
2863 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2866 PyDoc_STRVAR(lstrip__doc__
,
2867 "B.lstrip([bytes]) -> bytearray\n\
2869 Strip leading bytes contained in the argument.\n\
2870 If the argument is omitted, strip leading ASCII whitespace.");
2872 bytearray_lstrip(PyByteArrayObject
*self
, PyObject
*args
)
2874 Py_ssize_t left
, right
, mysize
, argsize
;
2875 void *myptr
, *argptr
;
2876 PyObject
*arg
= Py_None
;
2878 if (!PyArg_ParseTuple(args
, "|O:lstrip", &arg
))
2880 if (arg
== Py_None
) {
2881 argptr
= "\t\n\r\f\v ";
2885 if (_getbuffer(arg
, &varg
) < 0)
2890 myptr
= self
->ob_bytes
;
2891 mysize
= Py_SIZE(self
);
2892 left
= lstrip_helper(myptr
, mysize
, argptr
, argsize
);
2895 PyBuffer_Release(&varg
);
2896 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2899 PyDoc_STRVAR(rstrip__doc__
,
2900 "B.rstrip([bytes]) -> bytearray\n\
2902 Strip trailing bytes contained in the argument.\n\
2903 If the argument is omitted, strip trailing ASCII whitespace.");
2905 bytearray_rstrip(PyByteArrayObject
*self
, PyObject
*args
)
2907 Py_ssize_t left
, right
, mysize
, argsize
;
2908 void *myptr
, *argptr
;
2909 PyObject
*arg
= Py_None
;
2911 if (!PyArg_ParseTuple(args
, "|O:rstrip", &arg
))
2913 if (arg
== Py_None
) {
2914 argptr
= "\t\n\r\f\v ";
2918 if (_getbuffer(arg
, &varg
) < 0)
2923 myptr
= self
->ob_bytes
;
2924 mysize
= Py_SIZE(self
);
2926 right
= rstrip_helper(myptr
, mysize
, argptr
, argsize
);
2928 PyBuffer_Release(&varg
);
2929 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2932 PyDoc_STRVAR(decode_doc
,
2933 "B.decode([encoding[, errors]]) -> unicode object.\n\
2935 Decodes B using the codec registered for encoding. encoding defaults\n\
2936 to the default encoding. errors may be given to set a different error\n\
2937 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2938 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2939 as well as any other name registered with codecs.register_error that is\n\
2940 able to handle UnicodeDecodeErrors.");
2943 bytearray_decode(PyObject
*self
, PyObject
*args
)
2945 const char *encoding
= NULL
;
2946 const char *errors
= NULL
;
2948 if (!PyArg_ParseTuple(args
, "|ss:decode", &encoding
, &errors
))
2950 if (encoding
== NULL
) {
2951 #ifdef Py_USING_UNICODE
2952 encoding
= PyUnicode_GetDefaultEncoding();
2954 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
2958 return PyCodec_Decode(self
, encoding
, errors
);
2961 PyDoc_STRVAR(alloc_doc
,
2962 "B.__alloc__() -> int\n\
2964 Returns the number of bytes actually allocated.");
2967 bytearray_alloc(PyByteArrayObject
*self
)
2969 return PyInt_FromSsize_t(self
->ob_alloc
);
2972 PyDoc_STRVAR(join_doc
,
2973 "B.join(iterable_of_bytes) -> bytes\n\
2975 Concatenates any number of bytearray objects, with B in between each pair.");
2978 bytearray_join(PyByteArrayObject
*self
, PyObject
*it
)
2981 Py_ssize_t mysize
= Py_SIZE(self
);
2985 Py_ssize_t totalsize
= 0;
2989 seq
= PySequence_Fast(it
, "can only join an iterable");
2992 n
= PySequence_Fast_GET_SIZE(seq
);
2993 items
= PySequence_Fast_ITEMS(seq
);
2995 /* Compute the total size, and check that they are all bytes */
2996 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2997 for (i
= 0; i
< n
; i
++) {
2998 PyObject
*obj
= items
[i
];
2999 if (!PyByteArray_Check(obj
) && !PyBytes_Check(obj
)) {
3000 PyErr_Format(PyExc_TypeError
,
3001 "can only join an iterable of bytes "
3002 "(item %ld has type '%.100s')",
3003 /* XXX %ld isn't right on Win64 */
3004 (long)i
, Py_TYPE(obj
)->tp_name
);
3008 totalsize
+= mysize
;
3009 totalsize
+= Py_SIZE(obj
);
3010 if (totalsize
< 0) {
3016 /* Allocate the result, and copy the bytes */
3017 result
= PyByteArray_FromStringAndSize(NULL
, totalsize
);
3020 dest
= PyByteArray_AS_STRING(result
);
3021 for (i
= 0; i
< n
; i
++) {
3022 PyObject
*obj
= items
[i
];
3023 Py_ssize_t size
= Py_SIZE(obj
);
3025 if (PyByteArray_Check(obj
))
3026 buf
= PyByteArray_AS_STRING(obj
);
3028 buf
= PyBytes_AS_STRING(obj
);
3030 memcpy(dest
, self
->ob_bytes
, mysize
);
3033 memcpy(dest
, buf
, size
);
3041 /* Error handling */
3047 PyDoc_STRVAR(fromhex_doc
,
3048 "bytearray.fromhex(string) -> bytearray\n\
3050 Create a bytearray object from a string of hexadecimal numbers.\n\
3051 Spaces between two numbers are accepted.\n\
3052 Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3055 hex_digit_to_int(char c
)
3062 if (c
>= 'a' && c
<= 'f')
3063 return c
- 'a' + 10;
3069 bytearray_fromhex(PyObject
*cls
, PyObject
*args
)
3074 Py_ssize_t hexlen
, byteslen
, i
, j
;
3077 if (!PyArg_ParseTuple(args
, "s#:fromhex", &hex
, &hexlen
))
3079 byteslen
= hexlen
/2; /* This overestimates if there are spaces */
3080 newbytes
= PyByteArray_FromStringAndSize(NULL
, byteslen
);
3083 buf
= PyByteArray_AS_STRING(newbytes
);
3084 for (i
= j
= 0; i
< hexlen
; i
+= 2) {
3085 /* skip over spaces in the input */
3086 while (hex
[i
] == ' ')
3090 top
= hex_digit_to_int(hex
[i
]);
3091 bot
= hex_digit_to_int(hex
[i
+1]);
3092 if (top
== -1 || bot
== -1) {
3093 PyErr_Format(PyExc_ValueError
,
3094 "non-hexadecimal number found in "
3095 "fromhex() arg at position %zd", i
);
3098 buf
[j
++] = (top
<< 4) + bot
;
3100 if (PyByteArray_Resize(newbytes
, j
) < 0)
3105 Py_DECREF(newbytes
);
3109 PyDoc_STRVAR(reduce_doc
, "Return state information for pickling.");
3112 bytearray_reduce(PyByteArrayObject
*self
)
3114 PyObject
*latin1
, *dict
;
3116 #ifdef Py_USING_UNICODE
3117 latin1
= PyUnicode_DecodeLatin1(self
->ob_bytes
,
3118 Py_SIZE(self
), NULL
);
3120 latin1
= PyString_FromStringAndSize(self
->ob_bytes
, Py_SIZE(self
))
3123 #ifdef Py_USING_UNICODE
3124 latin1
= PyUnicode_FromString("");
3126 latin1
= PyString_FromString("");
3129 dict
= PyObject_GetAttrString((PyObject
*)self
, "__dict__");
3136 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self
), latin1
, "latin-1", dict
);
3139 PyDoc_STRVAR(sizeof_doc
,
3140 "B.__sizeof__() -> int\n\
3142 Returns the size of B in memory, in bytes");
3144 bytearray_sizeof(PyByteArrayObject
*self
)
3148 res
= sizeof(PyByteArrayObject
) + self
->ob_alloc
* sizeof(char);
3149 return PyInt_FromSsize_t(res
);
3152 static PySequenceMethods bytearray_as_sequence
= {
3153 (lenfunc
)bytearray_length
, /* sq_length */
3154 (binaryfunc
)PyByteArray_Concat
, /* sq_concat */
3155 (ssizeargfunc
)bytearray_repeat
, /* sq_repeat */
3156 (ssizeargfunc
)bytearray_getitem
, /* sq_item */
3158 (ssizeobjargproc
)bytearray_setitem
, /* sq_ass_item */
3159 0, /* sq_ass_slice */
3160 (objobjproc
)bytearray_contains
, /* sq_contains */
3161 (binaryfunc
)bytearray_iconcat
, /* sq_inplace_concat */
3162 (ssizeargfunc
)bytearray_irepeat
, /* sq_inplace_repeat */
3165 static PyMappingMethods bytearray_as_mapping
= {
3166 (lenfunc
)bytearray_length
,
3167 (binaryfunc
)bytearray_subscript
,
3168 (objobjargproc
)bytearray_ass_subscript
,
3171 static PyBufferProcs bytearray_as_buffer
= {
3172 (readbufferproc
)bytearray_buffer_getreadbuf
,
3173 (writebufferproc
)bytearray_buffer_getwritebuf
,
3174 (segcountproc
)bytearray_buffer_getsegcount
,
3175 (charbufferproc
)bytearray_buffer_getcharbuf
,
3176 (getbufferproc
)bytearray_getbuffer
,
3177 (releasebufferproc
)bytearray_releasebuffer
,
3181 bytearray_methods
[] = {
3182 {"__alloc__", (PyCFunction
)bytearray_alloc
, METH_NOARGS
, alloc_doc
},
3183 {"__reduce__", (PyCFunction
)bytearray_reduce
, METH_NOARGS
, reduce_doc
},
3184 {"__sizeof__", (PyCFunction
)bytearray_sizeof
, METH_NOARGS
, sizeof_doc
},
3185 {"append", (PyCFunction
)bytearray_append
, METH_O
, append__doc__
},
3186 {"capitalize", (PyCFunction
)stringlib_capitalize
, METH_NOARGS
,
3187 _Py_capitalize__doc__
},
3188 {"center", (PyCFunction
)stringlib_center
, METH_VARARGS
, center__doc__
},
3189 {"count", (PyCFunction
)bytearray_count
, METH_VARARGS
, count__doc__
},
3190 {"decode", (PyCFunction
)bytearray_decode
, METH_VARARGS
, decode_doc
},
3191 {"endswith", (PyCFunction
)bytearray_endswith
, METH_VARARGS
, endswith__doc__
},
3192 {"expandtabs", (PyCFunction
)stringlib_expandtabs
, METH_VARARGS
,
3194 {"extend", (PyCFunction
)bytearray_extend
, METH_O
, extend__doc__
},
3195 {"find", (PyCFunction
)bytearray_find
, METH_VARARGS
, find__doc__
},
3196 {"fromhex", (PyCFunction
)bytearray_fromhex
, METH_VARARGS
|METH_CLASS
,
3198 {"index", (PyCFunction
)bytearray_index
, METH_VARARGS
, index__doc__
},
3199 {"insert", (PyCFunction
)bytearray_insert
, METH_VARARGS
, insert__doc__
},
3200 {"isalnum", (PyCFunction
)stringlib_isalnum
, METH_NOARGS
,
3201 _Py_isalnum__doc__
},
3202 {"isalpha", (PyCFunction
)stringlib_isalpha
, METH_NOARGS
,
3203 _Py_isalpha__doc__
},
3204 {"isdigit", (PyCFunction
)stringlib_isdigit
, METH_NOARGS
,
3205 _Py_isdigit__doc__
},
3206 {"islower", (PyCFunction
)stringlib_islower
, METH_NOARGS
,
3207 _Py_islower__doc__
},
3208 {"isspace", (PyCFunction
)stringlib_isspace
, METH_NOARGS
,
3209 _Py_isspace__doc__
},
3210 {"istitle", (PyCFunction
)stringlib_istitle
, METH_NOARGS
,
3211 _Py_istitle__doc__
},
3212 {"isupper", (PyCFunction
)stringlib_isupper
, METH_NOARGS
,
3213 _Py_isupper__doc__
},
3214 {"join", (PyCFunction
)bytearray_join
, METH_O
, join_doc
},
3215 {"ljust", (PyCFunction
)stringlib_ljust
, METH_VARARGS
, ljust__doc__
},
3216 {"lower", (PyCFunction
)stringlib_lower
, METH_NOARGS
, _Py_lower__doc__
},
3217 {"lstrip", (PyCFunction
)bytearray_lstrip
, METH_VARARGS
, lstrip__doc__
},
3218 {"partition", (PyCFunction
)bytearray_partition
, METH_O
, partition__doc__
},
3219 {"pop", (PyCFunction
)bytearray_pop
, METH_VARARGS
, pop__doc__
},
3220 {"remove", (PyCFunction
)bytearray_remove
, METH_O
, remove__doc__
},
3221 {"replace", (PyCFunction
)bytearray_replace
, METH_VARARGS
, replace__doc__
},
3222 {"reverse", (PyCFunction
)bytearray_reverse
, METH_NOARGS
, reverse__doc__
},
3223 {"rfind", (PyCFunction
)bytearray_rfind
, METH_VARARGS
, rfind__doc__
},
3224 {"rindex", (PyCFunction
)bytearray_rindex
, METH_VARARGS
, rindex__doc__
},
3225 {"rjust", (PyCFunction
)stringlib_rjust
, METH_VARARGS
, rjust__doc__
},
3226 {"rpartition", (PyCFunction
)bytearray_rpartition
, METH_O
, rpartition__doc__
},
3227 {"rsplit", (PyCFunction
)bytearray_rsplit
, METH_VARARGS
, rsplit__doc__
},
3228 {"rstrip", (PyCFunction
)bytearray_rstrip
, METH_VARARGS
, rstrip__doc__
},
3229 {"split", (PyCFunction
)bytearray_split
, METH_VARARGS
, split__doc__
},
3230 {"splitlines", (PyCFunction
)stringlib_splitlines
, METH_VARARGS
,
3232 {"startswith", (PyCFunction
)bytearray_startswith
, METH_VARARGS
,
3234 {"strip", (PyCFunction
)bytearray_strip
, METH_VARARGS
, strip__doc__
},
3235 {"swapcase", (PyCFunction
)stringlib_swapcase
, METH_NOARGS
,
3236 _Py_swapcase__doc__
},
3237 {"title", (PyCFunction
)stringlib_title
, METH_NOARGS
, _Py_title__doc__
},
3238 {"translate", (PyCFunction
)bytearray_translate
, METH_VARARGS
,
3240 {"upper", (PyCFunction
)stringlib_upper
, METH_NOARGS
, _Py_upper__doc__
},
3241 {"zfill", (PyCFunction
)stringlib_zfill
, METH_VARARGS
, zfill__doc__
},
3245 PyDoc_STRVAR(bytearray_doc
,
3246 "bytearray(iterable_of_ints) -> bytearray.\n\
3247 bytearray(string, encoding[, errors]) -> bytearray.\n\
3248 bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3249 bytearray(memory_view) -> bytearray.\n\
3251 Construct an mutable bytearray object from:\n\
3252 - an iterable yielding integers in range(256)\n\
3253 - a text string encoded using the specified encoding\n\
3254 - a bytes or a bytearray object\n\
3255 - any object implementing the buffer API.\n\
3257 bytearray(int) -> bytearray.\n\
3259 Construct a zero-initialized bytearray of the given length.");
3262 static PyObject
*bytearray_iter(PyObject
*seq
);
3264 PyTypeObject PyByteArray_Type
= {
3265 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
3267 sizeof(PyByteArrayObject
),
3269 (destructor
)bytearray_dealloc
, /* tp_dealloc */
3274 (reprfunc
)bytearray_repr
, /* tp_repr */
3275 0, /* tp_as_number */
3276 &bytearray_as_sequence
, /* tp_as_sequence */
3277 &bytearray_as_mapping
, /* tp_as_mapping */
3280 bytearray_str
, /* tp_str */
3281 PyObject_GenericGetAttr
, /* tp_getattro */
3282 0, /* tp_setattro */
3283 &bytearray_as_buffer
, /* tp_as_buffer */
3284 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
|
3285 Py_TPFLAGS_HAVE_NEWBUFFER
, /* tp_flags */
3286 bytearray_doc
, /* tp_doc */
3287 0, /* tp_traverse */
3289 (richcmpfunc
)bytearray_richcompare
, /* tp_richcompare */
3290 0, /* tp_weaklistoffset */
3291 bytearray_iter
, /* tp_iter */
3292 0, /* tp_iternext */
3293 bytearray_methods
, /* tp_methods */
3298 0, /* tp_descr_get */
3299 0, /* tp_descr_set */
3300 0, /* tp_dictoffset */
3301 (initproc
)bytearray_init
, /* tp_init */
3302 PyType_GenericAlloc
, /* tp_alloc */
3303 PyType_GenericNew
, /* tp_new */
3304 PyObject_Del
, /* tp_free */
3307 /*********************** Bytes Iterator ****************************/
3311 Py_ssize_t it_index
;
3312 PyByteArrayObject
*it_seq
; /* Set to NULL when iterator is exhausted */
3316 bytearrayiter_dealloc(bytesiterobject
*it
)
3318 _PyObject_GC_UNTRACK(it
);
3319 Py_XDECREF(it
->it_seq
);
3320 PyObject_GC_Del(it
);
3324 bytearrayiter_traverse(bytesiterobject
*it
, visitproc visit
, void *arg
)
3326 Py_VISIT(it
->it_seq
);
3331 bytearrayiter_next(bytesiterobject
*it
)
3333 PyByteArrayObject
*seq
;
3340 assert(PyByteArray_Check(seq
));
3342 if (it
->it_index
< PyByteArray_GET_SIZE(seq
)) {
3343 item
= PyInt_FromLong(
3344 (unsigned char)seq
->ob_bytes
[it
->it_index
]);
3356 bytesarrayiter_length_hint(bytesiterobject
*it
)
3360 len
= PyByteArray_GET_SIZE(it
->it_seq
) - it
->it_index
;
3361 return PyInt_FromSsize_t(len
);
3364 PyDoc_STRVAR(length_hint_doc
,
3365 "Private method returning an estimate of len(list(it)).");
3367 static PyMethodDef bytearrayiter_methods
[] = {
3368 {"__length_hint__", (PyCFunction
)bytesarrayiter_length_hint
, METH_NOARGS
,
3370 {NULL
, NULL
} /* sentinel */
3373 PyTypeObject PyByteArrayIter_Type
= {
3374 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
3375 "bytearray_iterator", /* tp_name */
3376 sizeof(bytesiterobject
), /* tp_basicsize */
3377 0, /* tp_itemsize */
3379 (destructor
)bytearrayiter_dealloc
, /* tp_dealloc */
3385 0, /* tp_as_number */
3386 0, /* tp_as_sequence */
3387 0, /* tp_as_mapping */
3391 PyObject_GenericGetAttr
, /* tp_getattro */
3392 0, /* tp_setattro */
3393 0, /* tp_as_buffer */
3394 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_HAVE_GC
, /* tp_flags */
3396 (traverseproc
)bytearrayiter_traverse
, /* tp_traverse */
3398 0, /* tp_richcompare */
3399 0, /* tp_weaklistoffset */
3400 PyObject_SelfIter
, /* tp_iter */
3401 (iternextfunc
)bytearrayiter_next
, /* tp_iternext */
3402 bytearrayiter_methods
, /* tp_methods */
3407 bytearray_iter(PyObject
*seq
)
3409 bytesiterobject
*it
;
3411 if (!PyByteArray_Check(seq
)) {
3412 PyErr_BadInternalCall();
3415 it
= PyObject_GC_New(bytesiterobject
, &PyByteArrayIter_Type
);
3420 it
->it_seq
= (PyByteArrayObject
*)seq
;
3421 _PyObject_GC_TRACK(it
);
3422 return (PyObject
*)it
;