1 /* PyBytes (bytearray) implementation */
3 #define PY_SSIZE_T_CLEAN
5 #include "structmember.h"
6 #include "bytes_methods.h"
8 static PyByteArrayObject
*nullbytes
= NULL
;
11 PyByteArray_Fini(void)
17 PyByteArray_Init(void)
19 nullbytes
= PyObject_New(PyByteArrayObject
, &PyByteArray_Type
);
20 if (nullbytes
== NULL
)
22 nullbytes
->ob_bytes
= NULL
;
23 Py_SIZE(nullbytes
) = nullbytes
->ob_alloc
= 0;
24 nullbytes
->ob_exports
= 0;
28 /* end nullbytes support */
33 _getbytevalue(PyObject
* arg
, int *value
)
37 if (PyBytes_CheckExact(arg
)) {
38 if (Py_SIZE(arg
) != 1) {
39 PyErr_SetString(PyExc_ValueError
, "string must be of size 1");
42 *value
= Py_CHARMASK(((PyBytesObject
*)arg
)->ob_sval
[0]);
45 else if (PyInt_Check(arg
) || PyLong_Check(arg
)) {
46 face_value
= PyLong_AsLong(arg
);
49 PyObject
*index
= PyNumber_Index(arg
);
51 PyErr_Format(PyExc_TypeError
,
52 "an integer or string of size 1 is required");
55 face_value
= PyLong_AsLong(index
);
59 if (face_value
< 0 || face_value
>= 256) {
60 /* this includes the OverflowError in case the long is too large */
61 PyErr_SetString(PyExc_ValueError
, "byte must be in range(0, 256)");
70 bytearray_buffer_getreadbuf(PyByteArrayObject
*self
, Py_ssize_t index
, const void **ptr
)
73 PyErr_SetString(PyExc_SystemError
,
74 "accessing non-existent bytes segment");
77 *ptr
= (void *)self
->ob_bytes
;
82 bytearray_buffer_getwritebuf(PyByteArrayObject
*self
, Py_ssize_t index
, const void **ptr
)
85 PyErr_SetString(PyExc_SystemError
,
86 "accessing non-existent bytes segment");
89 *ptr
= (void *)self
->ob_bytes
;
94 bytearray_buffer_getsegcount(PyByteArrayObject
*self
, Py_ssize_t
*lenp
)
97 *lenp
= Py_SIZE(self
);
102 bytearray_buffer_getcharbuf(PyByteArrayObject
*self
, Py_ssize_t index
, const char **ptr
)
105 PyErr_SetString(PyExc_SystemError
,
106 "accessing non-existent bytes segment");
109 *ptr
= self
->ob_bytes
;
110 return Py_SIZE(self
);
114 bytearray_getbuffer(PyByteArrayObject
*obj
, Py_buffer
*view
, int flags
)
122 if (obj
->ob_bytes
== NULL
)
126 ret
= PyBuffer_FillInfo(view
, (PyObject
*)obj
, ptr
, Py_SIZE(obj
), 0, flags
);
134 bytearray_releasebuffer(PyByteArrayObject
*obj
, Py_buffer
*view
)
140 _getbuffer(PyObject
*obj
, Py_buffer
*view
)
142 PyBufferProcs
*buffer
= Py_TYPE(obj
)->tp_as_buffer
;
144 if (buffer
== NULL
|| buffer
->bf_getbuffer
== NULL
)
146 PyErr_Format(PyExc_TypeError
,
147 "Type %.100s doesn't support the buffer API",
148 Py_TYPE(obj
)->tp_name
);
152 if (buffer
->bf_getbuffer(obj
, view
, PyBUF_SIMPLE
) < 0)
158 _canresize(PyByteArrayObject
*self
)
160 if (self
->ob_exports
> 0) {
161 PyErr_SetString(PyExc_BufferError
,
162 "Existing exports of data: object cannot be re-sized");
168 /* Direct API functions */
171 PyByteArray_FromObject(PyObject
*input
)
173 return PyObject_CallFunctionObjArgs((PyObject
*)&PyByteArray_Type
,
178 PyByteArray_FromStringAndSize(const char *bytes
, Py_ssize_t size
)
180 PyByteArrayObject
*new;
184 PyErr_SetString(PyExc_SystemError
,
185 "Negative size passed to PyByteArray_FromStringAndSize");
189 new = PyObject_New(PyByteArrayObject
, &PyByteArray_Type
);
194 new->ob_bytes
= NULL
;
199 new->ob_bytes
= PyMem_Malloc(alloc
);
200 if (new->ob_bytes
== NULL
) {
202 return PyErr_NoMemory();
205 memcpy(new->ob_bytes
, bytes
, size
);
206 new->ob_bytes
[size
] = '\0'; /* Trailing null byte */
209 new->ob_alloc
= alloc
;
212 return (PyObject
*)new;
216 PyByteArray_Size(PyObject
*self
)
218 assert(self
!= NULL
);
219 assert(PyByteArray_Check(self
));
221 return PyByteArray_GET_SIZE(self
);
225 PyByteArray_AsString(PyObject
*self
)
227 assert(self
!= NULL
);
228 assert(PyByteArray_Check(self
));
230 return PyByteArray_AS_STRING(self
);
234 PyByteArray_Resize(PyObject
*self
, Py_ssize_t size
)
237 Py_ssize_t alloc
= ((PyByteArrayObject
*)self
)->ob_alloc
;
239 assert(self
!= NULL
);
240 assert(PyByteArray_Check(self
));
243 if (size
== Py_SIZE(self
)) {
246 if (!_canresize((PyByteArrayObject
*)self
)) {
250 if (size
< alloc
/ 2) {
251 /* Major downsize; resize down to exact size */
254 else if (size
< alloc
) {
255 /* Within allocated size; quick exit */
256 Py_SIZE(self
) = size
;
257 ((PyByteArrayObject
*)self
)->ob_bytes
[size
] = '\0'; /* Trailing null */
260 else if (size
<= alloc
* 1.125) {
261 /* Moderate upsize; overallocate similar to list_resize() */
262 alloc
= size
+ (size
>> 3) + (size
< 9 ? 3 : 6);
265 /* Major upsize; resize up to exact size */
269 sval
= PyMem_Realloc(((PyByteArrayObject
*)self
)->ob_bytes
, alloc
);
275 ((PyByteArrayObject
*)self
)->ob_bytes
= sval
;
276 Py_SIZE(self
) = size
;
277 ((PyByteArrayObject
*)self
)->ob_alloc
= alloc
;
278 ((PyByteArrayObject
*)self
)->ob_bytes
[size
] = '\0'; /* Trailing null byte */
284 PyByteArray_Concat(PyObject
*a
, PyObject
*b
)
288 PyByteArrayObject
*result
= NULL
;
292 if (_getbuffer(a
, &va
) < 0 ||
293 _getbuffer(b
, &vb
) < 0) {
294 PyErr_Format(PyExc_TypeError
, "can't concat %.100s to %.100s",
295 Py_TYPE(a
)->tp_name
, Py_TYPE(b
)->tp_name
);
299 size
= va
.len
+ vb
.len
;
305 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, size
);
306 if (result
!= NULL
) {
307 memcpy(result
->ob_bytes
, va
.buf
, va
.len
);
308 memcpy(result
->ob_bytes
+ va
.len
, vb
.buf
, vb
.len
);
313 PyBuffer_Release(&va
);
315 PyBuffer_Release(&vb
);
316 return (PyObject
*)result
;
319 /* Functions stuffed into the type object */
322 bytearray_length(PyByteArrayObject
*self
)
324 return Py_SIZE(self
);
328 bytearray_iconcat(PyByteArrayObject
*self
, PyObject
*other
)
334 if (_getbuffer(other
, &vo
) < 0) {
335 PyErr_Format(PyExc_TypeError
, "can't concat %.100s to %.100s",
336 Py_TYPE(other
)->tp_name
, Py_TYPE(self
)->tp_name
);
340 mysize
= Py_SIZE(self
);
341 size
= mysize
+ vo
.len
;
343 PyBuffer_Release(&vo
);
344 return PyErr_NoMemory();
346 if (size
< self
->ob_alloc
) {
347 Py_SIZE(self
) = size
;
348 self
->ob_bytes
[Py_SIZE(self
)] = '\0'; /* Trailing null byte */
350 else if (PyByteArray_Resize((PyObject
*)self
, size
) < 0) {
351 PyBuffer_Release(&vo
);
354 memcpy(self
->ob_bytes
+ mysize
, vo
.buf
, vo
.len
);
355 PyBuffer_Release(&vo
);
357 return (PyObject
*)self
;
361 bytearray_repeat(PyByteArrayObject
*self
, Py_ssize_t count
)
363 PyByteArrayObject
*result
;
369 mysize
= Py_SIZE(self
);
370 size
= mysize
* count
;
371 if (count
!= 0 && size
/ count
!= mysize
)
372 return PyErr_NoMemory();
373 result
= (PyByteArrayObject
*)PyByteArray_FromStringAndSize(NULL
, size
);
374 if (result
!= NULL
&& size
!= 0) {
376 memset(result
->ob_bytes
, self
->ob_bytes
[0], size
);
379 for (i
= 0; i
< count
; i
++)
380 memcpy(result
->ob_bytes
+ i
*mysize
, self
->ob_bytes
, mysize
);
383 return (PyObject
*)result
;
387 bytearray_irepeat(PyByteArrayObject
*self
, Py_ssize_t count
)
394 mysize
= Py_SIZE(self
);
395 size
= mysize
* count
;
396 if (count
!= 0 && size
/ count
!= mysize
)
397 return PyErr_NoMemory();
398 if (size
< self
->ob_alloc
) {
399 Py_SIZE(self
) = size
;
400 self
->ob_bytes
[Py_SIZE(self
)] = '\0'; /* Trailing null byte */
402 else if (PyByteArray_Resize((PyObject
*)self
, size
) < 0)
406 memset(self
->ob_bytes
, self
->ob_bytes
[0], size
);
409 for (i
= 1; i
< count
; i
++)
410 memcpy(self
->ob_bytes
+ i
*mysize
, self
->ob_bytes
, mysize
);
414 return (PyObject
*)self
;
418 bytearray_getitem(PyByteArrayObject
*self
, Py_ssize_t i
)
422 if (i
< 0 || i
>= Py_SIZE(self
)) {
423 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
426 return PyInt_FromLong((unsigned char)(self
->ob_bytes
[i
]));
430 bytearray_subscript(PyByteArrayObject
*self
, PyObject
*index
)
432 if (PyIndex_Check(index
)) {
433 Py_ssize_t i
= PyNumber_AsSsize_t(index
, PyExc_IndexError
);
435 if (i
== -1 && PyErr_Occurred())
439 i
+= PyByteArray_GET_SIZE(self
);
441 if (i
< 0 || i
>= Py_SIZE(self
)) {
442 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
445 return PyInt_FromLong((unsigned char)(self
->ob_bytes
[i
]));
447 else if (PySlice_Check(index
)) {
448 Py_ssize_t start
, stop
, step
, slicelength
, cur
, i
;
449 if (PySlice_GetIndicesEx((PySliceObject
*)index
,
450 PyByteArray_GET_SIZE(self
),
451 &start
, &stop
, &step
, &slicelength
) < 0) {
455 if (slicelength
<= 0)
456 return PyByteArray_FromStringAndSize("", 0);
457 else if (step
== 1) {
458 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ start
,
462 char *source_buf
= PyByteArray_AS_STRING(self
);
463 char *result_buf
= (char *)PyMem_Malloc(slicelength
);
466 if (result_buf
== NULL
)
467 return PyErr_NoMemory();
469 for (cur
= start
, i
= 0; i
< slicelength
;
471 result_buf
[i
] = source_buf
[cur
];
473 result
= PyByteArray_FromStringAndSize(result_buf
, slicelength
);
474 PyMem_Free(result_buf
);
479 PyErr_SetString(PyExc_TypeError
, "bytearray indices must be integers");
485 bytearray_setslice(PyByteArrayObject
*self
, Py_ssize_t lo
, Py_ssize_t hi
,
488 Py_ssize_t avail
, needed
;
494 if (values
== (PyObject
*)self
) {
495 /* Make a copy and call this function recursively */
497 values
= PyByteArray_FromObject(values
);
500 err
= bytearray_setslice(self
, lo
, hi
, values
);
504 if (values
== NULL
) {
510 if (_getbuffer(values
, &vbytes
) < 0) {
511 PyErr_Format(PyExc_TypeError
,
512 "can't set bytearray slice from %.100s",
513 Py_TYPE(values
)->tp_name
);
524 if (hi
> Py_SIZE(self
))
531 if (avail
!= needed
) {
532 if (avail
> needed
) {
533 if (!_canresize(self
)) {
539 | |<----avail----->|<-----tomove------>|
540 | |<-needed->|<-----tomove------>|
543 memmove(self
->ob_bytes
+ lo
+ needed
, self
->ob_bytes
+ hi
,
546 /* XXX(nnorwitz): need to verify this can't overflow! */
547 if (PyByteArray_Resize((PyObject
*)self
,
548 Py_SIZE(self
) + needed
- avail
) < 0) {
552 if (avail
< needed
) {
555 | |<-avail->|<-----tomove------>|
556 | |<----needed---->|<-----tomove------>|
559 memmove(self
->ob_bytes
+ lo
+ needed
, self
->ob_bytes
+ hi
,
560 Py_SIZE(self
) - lo
- needed
);
565 memcpy(self
->ob_bytes
+ lo
, bytes
, needed
);
569 if (vbytes
.len
!= -1)
570 PyBuffer_Release(&vbytes
);
575 bytearray_setitem(PyByteArrayObject
*self
, Py_ssize_t i
, PyObject
*value
)
582 if (i
< 0 || i
>= Py_SIZE(self
)) {
583 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
588 return bytearray_setslice(self
, i
, i
+1, NULL
);
590 if (!_getbytevalue(value
, &ival
))
593 self
->ob_bytes
[i
] = ival
;
598 bytearray_ass_subscript(PyByteArrayObject
*self
, PyObject
*index
, PyObject
*values
)
600 Py_ssize_t start
, stop
, step
, slicelen
, needed
;
603 if (PyIndex_Check(index
)) {
604 Py_ssize_t i
= PyNumber_AsSsize_t(index
, PyExc_IndexError
);
606 if (i
== -1 && PyErr_Occurred())
610 i
+= PyByteArray_GET_SIZE(self
);
612 if (i
< 0 || i
>= Py_SIZE(self
)) {
613 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
617 if (values
== NULL
) {
618 /* Fall through to slice assignment */
626 if (!_getbytevalue(values
, &ival
))
628 self
->ob_bytes
[i
] = (char)ival
;
632 else if (PySlice_Check(index
)) {
633 if (PySlice_GetIndicesEx((PySliceObject
*)index
,
634 PyByteArray_GET_SIZE(self
),
635 &start
, &stop
, &step
, &slicelen
) < 0) {
640 PyErr_SetString(PyExc_TypeError
, "bytearray indices must be integer");
644 if (values
== NULL
) {
648 else if (values
== (PyObject
*)self
|| !PyByteArray_Check(values
)) {
649 /* Make a copy an call this function recursively */
651 values
= PyByteArray_FromObject(values
);
654 err
= bytearray_ass_subscript(self
, index
, values
);
659 assert(PyByteArray_Check(values
));
660 bytes
= ((PyByteArrayObject
*)values
)->ob_bytes
;
661 needed
= Py_SIZE(values
);
663 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
664 if ((step
< 0 && start
< stop
) ||
665 (step
> 0 && start
> stop
))
668 if (slicelen
!= needed
) {
669 if (!_canresize(self
))
671 if (slicelen
> needed
) {
673 0 start stop old_size
674 | |<---slicelen--->|<-----tomove------>|
675 | |<-needed->|<-----tomove------>|
678 memmove(self
->ob_bytes
+ start
+ needed
, self
->ob_bytes
+ stop
,
679 Py_SIZE(self
) - stop
);
681 if (PyByteArray_Resize((PyObject
*)self
,
682 Py_SIZE(self
) + needed
- slicelen
) < 0)
684 if (slicelen
< needed
) {
687 | |<-avail->|<-----tomove------>|
688 | |<----needed---->|<-----tomove------>|
691 memmove(self
->ob_bytes
+ start
+ needed
, self
->ob_bytes
+ stop
,
692 Py_SIZE(self
) - start
- needed
);
697 memcpy(self
->ob_bytes
+ start
, bytes
, needed
);
706 if (!_canresize(self
))
710 start
= stop
+ step
* (slicelen
- 1) - 1;
713 for (cur
= start
, i
= 0;
714 i
< slicelen
; cur
+= step
, i
++) {
715 Py_ssize_t lim
= step
- 1;
717 if (cur
+ step
>= PyByteArray_GET_SIZE(self
))
718 lim
= PyByteArray_GET_SIZE(self
) - cur
- 1;
720 memmove(self
->ob_bytes
+ cur
- i
,
721 self
->ob_bytes
+ cur
+ 1, lim
);
723 /* Move the tail of the bytes, in one chunk */
724 cur
= start
+ slicelen
*step
;
725 if (cur
< PyByteArray_GET_SIZE(self
)) {
726 memmove(self
->ob_bytes
+ cur
- slicelen
,
727 self
->ob_bytes
+ cur
,
728 PyByteArray_GET_SIZE(self
) - cur
);
730 if (PyByteArray_Resize((PyObject
*)self
,
731 PyByteArray_GET_SIZE(self
) - slicelen
) < 0)
740 if (needed
!= slicelen
) {
741 PyErr_Format(PyExc_ValueError
,
742 "attempt to assign bytes of size %zd "
743 "to extended slice of size %zd",
747 for (cur
= start
, i
= 0; i
< slicelen
; cur
+= step
, i
++)
748 self
->ob_bytes
[cur
] = bytes
[i
];
755 bytearray_init(PyByteArrayObject
*self
, PyObject
*args
, PyObject
*kwds
)
757 static char *kwlist
[] = {"source", "encoding", "errors", 0};
758 PyObject
*arg
= NULL
;
759 const char *encoding
= NULL
;
760 const char *errors
= NULL
;
763 PyObject
*(*iternext
)(PyObject
*);
765 if (Py_SIZE(self
) != 0) {
766 /* Empty previous contents (yes, do this first of all!) */
767 if (PyByteArray_Resize((PyObject
*)self
, 0) < 0)
771 /* Parse arguments */
772 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|Oss:bytearray", kwlist
,
773 &arg
, &encoding
, &errors
))
776 /* Make a quick exit if no first argument */
778 if (encoding
!= NULL
|| errors
!= NULL
) {
779 PyErr_SetString(PyExc_TypeError
,
780 "encoding or errors without sequence argument");
786 if (PyBytes_Check(arg
)) {
787 PyObject
*new, *encoded
;
788 if (encoding
!= NULL
) {
789 encoded
= PyCodec_Encode(arg
, encoding
, errors
);
792 assert(PyBytes_Check(encoded
));
798 new = bytearray_iconcat(self
, arg
);
806 #ifdef Py_USING_UNICODE
807 if (PyUnicode_Check(arg
)) {
808 /* Encode via the codec registry */
809 PyObject
*encoded
, *new;
810 if (encoding
== NULL
) {
811 PyErr_SetString(PyExc_TypeError
,
812 "unicode argument without an encoding");
815 encoded
= PyCodec_Encode(arg
, encoding
, errors
);
818 assert(PyBytes_Check(encoded
));
819 new = bytearray_iconcat(self
, encoded
);
828 /* If it's not unicode, there can't be encoding or errors */
829 if (encoding
!= NULL
|| errors
!= NULL
) {
830 PyErr_SetString(PyExc_TypeError
,
831 "encoding or errors without a string argument");
836 count
= PyNumber_AsSsize_t(arg
, PyExc_ValueError
);
837 if (count
== -1 && PyErr_Occurred())
841 PyErr_SetString(PyExc_ValueError
, "negative count");
845 if (PyByteArray_Resize((PyObject
*)self
, count
))
847 memset(self
->ob_bytes
, 0, count
);
852 /* Use the buffer API */
853 if (PyObject_CheckBuffer(arg
)) {
856 if (PyObject_GetBuffer(arg
, &view
, PyBUF_FULL_RO
) < 0)
859 if (PyByteArray_Resize((PyObject
*)self
, size
) < 0) goto fail
;
860 if (PyBuffer_ToContiguous(self
->ob_bytes
, &view
, size
, 'C') < 0)
862 PyBuffer_Release(&view
);
865 PyBuffer_Release(&view
);
869 /* XXX Optimize this if the arguments is a list, tuple */
871 /* Get the iterator */
872 it
= PyObject_GetIter(arg
);
875 iternext
= *Py_TYPE(it
)->tp_iternext
;
877 /* Run the iterator to exhaustion */
882 /* Get the next item */
885 if (PyErr_Occurred()) {
886 if (!PyErr_ExceptionMatches(PyExc_StopIteration
))
893 /* Interpret it as an int (__index__) */
894 rc
= _getbytevalue(item
, &value
);
899 /* Append the byte */
900 if (Py_SIZE(self
) < self
->ob_alloc
)
902 else if (PyByteArray_Resize((PyObject
*)self
, Py_SIZE(self
)+1) < 0)
904 self
->ob_bytes
[Py_SIZE(self
)-1] = value
;
907 /* Clean up and return success */
912 /* Error handling when it != NULL */
917 /* Mostly copied from string_repr, but without the
918 "smart quote" functionality. */
920 bytearray_repr(PyByteArrayObject
*self
)
922 static const char *hexdigits
= "0123456789abcdef";
923 const char *quote_prefix
= "bytearray(b";
924 const char *quote_postfix
= ")";
925 Py_ssize_t length
= Py_SIZE(self
);
926 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
927 size_t newsize
= 14 + 4 * length
;
929 if (newsize
> PY_SSIZE_T_MAX
|| newsize
/ 4 - 3 != length
) {
930 PyErr_SetString(PyExc_OverflowError
,
931 "bytearray object is too large to make repr");
934 v
= PyString_FromStringAndSize(NULL
, newsize
);
939 register Py_ssize_t i
;
944 /* Figure out which quote to use; single is preferred */
948 start
= PyByteArray_AS_STRING(self
);
949 for (test
= start
; test
< start
+length
; ++test
) {
951 quote
= '\''; /* back to single */
954 else if (*test
== '\'')
961 p
= PyString_AS_STRING(v
);
962 while (*quote_prefix
)
963 *p
++ = *quote_prefix
++;
966 for (i
= 0; i
< length
; i
++) {
967 /* There's at least enough room for a hex escape
968 and a closing quote. */
969 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 5);
970 c
= self
->ob_bytes
[i
];
971 if (c
== '\'' || c
== '\\')
972 *p
++ = '\\', *p
++ = c
;
974 *p
++ = '\\', *p
++ = 't';
976 *p
++ = '\\', *p
++ = 'n';
978 *p
++ = '\\', *p
++ = 'r';
980 *p
++ = '\\', *p
++ = 'x', *p
++ = '0', *p
++ = '0';
981 else if (c
< ' ' || c
>= 0x7f) {
984 *p
++ = hexdigits
[(c
& 0xf0) >> 4];
985 *p
++ = hexdigits
[c
& 0xf];
990 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 1);
992 while (*quote_postfix
) {
993 *p
++ = *quote_postfix
++;
996 if (_PyString_Resize(&v
, (p
- PyString_AS_STRING(v
)))) {
1005 bytearray_str(PyObject
*op
)
1008 if (Py_BytesWarningFlag
) {
1009 if (PyErr_WarnEx(PyExc_BytesWarning
,
1010 "str() on a bytearray instance", 1))
1013 return bytearray_repr((PyByteArrayObject
*)op
);
1015 return PyBytes_FromStringAndSize(((PyByteArrayObject
*)op
)->ob_bytes
, Py_SIZE(op
));
1019 bytearray_richcompare(PyObject
*self
, PyObject
*other
, int op
)
1021 Py_ssize_t self_size
, other_size
;
1022 Py_buffer self_bytes
, other_bytes
;
1027 /* Bytes can be compared to anything that supports the (binary)
1028 buffer API. Except that a comparison with Unicode is always an
1029 error, even if the comparison is for equality. */
1030 #ifdef Py_USING_UNICODE
1031 if (PyObject_IsInstance(self
, (PyObject
*)&PyUnicode_Type
) ||
1032 PyObject_IsInstance(other
, (PyObject
*)&PyUnicode_Type
)) {
1033 if (Py_BytesWarningFlag
&& op
== Py_EQ
) {
1034 if (PyErr_WarnEx(PyExc_BytesWarning
,
1035 "Comparsion between bytearray and string", 1))
1039 Py_INCREF(Py_NotImplemented
);
1040 return Py_NotImplemented
;
1044 self_size
= _getbuffer(self
, &self_bytes
);
1045 if (self_size
< 0) {
1047 Py_INCREF(Py_NotImplemented
);
1048 return Py_NotImplemented
;
1051 other_size
= _getbuffer(other
, &other_bytes
);
1052 if (other_size
< 0) {
1054 PyBuffer_Release(&self_bytes
);
1055 Py_INCREF(Py_NotImplemented
);
1056 return Py_NotImplemented
;
1059 if (self_size
!= other_size
&& (op
== Py_EQ
|| op
== Py_NE
)) {
1060 /* Shortcut: if the lengths differ, the objects differ */
1061 cmp
= (op
== Py_NE
);
1064 minsize
= self_size
;
1065 if (other_size
< minsize
)
1066 minsize
= other_size
;
1068 cmp
= memcmp(self_bytes
.buf
, other_bytes
.buf
, minsize
);
1069 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1072 if (self_size
< other_size
)
1074 else if (self_size
> other_size
)
1079 case Py_LT
: cmp
= cmp
< 0; break;
1080 case Py_LE
: cmp
= cmp
<= 0; break;
1081 case Py_EQ
: cmp
= cmp
== 0; break;
1082 case Py_NE
: cmp
= cmp
!= 0; break;
1083 case Py_GT
: cmp
= cmp
> 0; break;
1084 case Py_GE
: cmp
= cmp
>= 0; break;
1088 res
= cmp
? Py_True
: Py_False
;
1089 PyBuffer_Release(&self_bytes
);
1090 PyBuffer_Release(&other_bytes
);
1096 bytearray_dealloc(PyByteArrayObject
*self
)
1098 if (self
->ob_exports
> 0) {
1099 PyErr_SetString(PyExc_SystemError
,
1100 "deallocated bytearray object has exported buffers");
1103 if (self
->ob_bytes
!= 0) {
1104 PyMem_Free(self
->ob_bytes
);
1106 Py_TYPE(self
)->tp_free((PyObject
*)self
);
1110 /* -------------------------------------------------------------------- */
1113 #define STRINGLIB_CHAR char
1114 #define STRINGLIB_LEN PyByteArray_GET_SIZE
1115 #define STRINGLIB_STR PyByteArray_AS_STRING
1116 #define STRINGLIB_NEW PyByteArray_FromStringAndSize
1117 #define STRINGLIB_EMPTY nullbytes
1118 #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1119 #define STRINGLIB_MUTABLE 1
1120 #define FROM_BYTEARRAY 1
1122 #include "stringlib/fastsearch.h"
1123 #include "stringlib/count.h"
1124 #include "stringlib/find.h"
1125 #include "stringlib/partition.h"
1126 #include "stringlib/ctype.h"
1127 #include "stringlib/transmogrify.h"
1130 /* The following Py_LOCAL_INLINE and Py_LOCAL functions
1131 were copied from the old char* style string object. */
1133 Py_LOCAL_INLINE(void)
1134 _adjust_indices(Py_ssize_t
*start
, Py_ssize_t
*end
, Py_ssize_t len
)
1149 Py_LOCAL_INLINE(Py_ssize_t
)
1150 bytearray_find_internal(PyByteArrayObject
*self
, PyObject
*args
, int dir
)
1154 Py_ssize_t start
=0, end
=PY_SSIZE_T_MAX
;
1157 if (!PyArg_ParseTuple(args
, "O|O&O&:find/rfind/index/rindex", &subobj
,
1158 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1160 if (_getbuffer(subobj
, &subbuf
) < 0)
1163 res
= stringlib_find_slice(
1164 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
1165 subbuf
.buf
, subbuf
.len
, start
, end
);
1167 res
= stringlib_rfind_slice(
1168 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
1169 subbuf
.buf
, subbuf
.len
, start
, end
);
1170 PyBuffer_Release(&subbuf
);
1174 PyDoc_STRVAR(find__doc__
,
1175 "B.find(sub [,start [,end]]) -> int\n\
1177 Return the lowest index in B where subsection sub is found,\n\
1178 such that sub is contained within s[start,end]. Optional\n\
1179 arguments start and end are interpreted as in slice notation.\n\
1181 Return -1 on failure.");
1184 bytearray_find(PyByteArrayObject
*self
, PyObject
*args
)
1186 Py_ssize_t result
= bytearray_find_internal(self
, args
, +1);
1189 return PyInt_FromSsize_t(result
);
1192 PyDoc_STRVAR(count__doc__
,
1193 "B.count(sub [,start [,end]]) -> int\n\
1195 Return the number of non-overlapping occurrences of subsection sub in\n\
1196 bytes B[start:end]. Optional arguments start and end are interpreted\n\
1197 as in slice notation.");
1200 bytearray_count(PyByteArrayObject
*self
, PyObject
*args
)
1203 const char *str
= PyByteArray_AS_STRING(self
);
1204 Py_ssize_t start
= 0, end
= PY_SSIZE_T_MAX
;
1206 PyObject
*count_obj
;
1208 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &sub_obj
,
1209 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1212 if (_getbuffer(sub_obj
, &vsub
) < 0)
1215 _adjust_indices(&start
, &end
, PyByteArray_GET_SIZE(self
));
1217 count_obj
= PyInt_FromSsize_t(
1218 stringlib_count(str
+ start
, end
- start
, vsub
.buf
, vsub
.len
)
1220 PyBuffer_Release(&vsub
);
1225 PyDoc_STRVAR(index__doc__
,
1226 "B.index(sub [,start [,end]]) -> int\n\
1228 Like B.find() but raise ValueError when the subsection is not found.");
1231 bytearray_index(PyByteArrayObject
*self
, PyObject
*args
)
1233 Py_ssize_t result
= bytearray_find_internal(self
, args
, +1);
1237 PyErr_SetString(PyExc_ValueError
,
1238 "subsection not found");
1241 return PyInt_FromSsize_t(result
);
1245 PyDoc_STRVAR(rfind__doc__
,
1246 "B.rfind(sub [,start [,end]]) -> int\n\
1248 Return the highest index in B where subsection sub is found,\n\
1249 such that sub is contained within s[start,end]. Optional\n\
1250 arguments start and end are interpreted as in slice notation.\n\
1252 Return -1 on failure.");
1255 bytearray_rfind(PyByteArrayObject
*self
, PyObject
*args
)
1257 Py_ssize_t result
= bytearray_find_internal(self
, args
, -1);
1260 return PyInt_FromSsize_t(result
);
1264 PyDoc_STRVAR(rindex__doc__
,
1265 "B.rindex(sub [,start [,end]]) -> int\n\
1267 Like B.rfind() but raise ValueError when the subsection is not found.");
1270 bytearray_rindex(PyByteArrayObject
*self
, PyObject
*args
)
1272 Py_ssize_t result
= bytearray_find_internal(self
, args
, -1);
1276 PyErr_SetString(PyExc_ValueError
,
1277 "subsection not found");
1280 return PyInt_FromSsize_t(result
);
1285 bytearray_contains(PyObject
*self
, PyObject
*arg
)
1287 Py_ssize_t ival
= PyNumber_AsSsize_t(arg
, PyExc_ValueError
);
1288 if (ival
== -1 && PyErr_Occurred()) {
1292 if (_getbuffer(arg
, &varg
) < 0)
1294 pos
= stringlib_find(PyByteArray_AS_STRING(self
), Py_SIZE(self
),
1295 varg
.buf
, varg
.len
, 0);
1296 PyBuffer_Release(&varg
);
1299 if (ival
< 0 || ival
>= 256) {
1300 PyErr_SetString(PyExc_ValueError
, "byte must be in range(0, 256)");
1304 return memchr(PyByteArray_AS_STRING(self
), ival
, Py_SIZE(self
)) != NULL
;
1308 /* Matches the end (direction >= 0) or start (direction < 0) of self
1309 * against substr, using the start and end arguments. Returns
1310 * -1 on error, 0 if not found and 1 if found.
1313 _bytearray_tailmatch(PyByteArrayObject
*self
, PyObject
*substr
, Py_ssize_t start
,
1314 Py_ssize_t end
, int direction
)
1316 Py_ssize_t len
= PyByteArray_GET_SIZE(self
);
1321 str
= PyByteArray_AS_STRING(self
);
1323 if (_getbuffer(substr
, &vsubstr
) < 0)
1326 _adjust_indices(&start
, &end
, len
);
1328 if (direction
< 0) {
1330 if (start
+vsubstr
.len
> len
) {
1335 if (end
-start
< vsubstr
.len
|| start
> len
) {
1339 if (end
-vsubstr
.len
> start
)
1340 start
= end
- vsubstr
.len
;
1342 if (end
-start
>= vsubstr
.len
)
1343 rv
= ! memcmp(str
+start
, vsubstr
.buf
, vsubstr
.len
);
1346 PyBuffer_Release(&vsubstr
);
1351 PyDoc_STRVAR(startswith__doc__
,
1352 "B.startswith(prefix [,start [,end]]) -> bool\n\
1354 Return True if B starts with the specified prefix, False otherwise.\n\
1355 With optional start, test B beginning at that position.\n\
1356 With optional end, stop comparing B at that position.\n\
1357 prefix can also be a tuple of strings to try.");
1360 bytearray_startswith(PyByteArrayObject
*self
, PyObject
*args
)
1362 Py_ssize_t start
= 0;
1363 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1367 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
1368 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1370 if (PyTuple_Check(subobj
)) {
1372 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
1373 result
= _bytearray_tailmatch(self
,
1374 PyTuple_GET_ITEM(subobj
, i
),
1384 result
= _bytearray_tailmatch(self
, subobj
, start
, end
, -1);
1388 return PyBool_FromLong(result
);
1391 PyDoc_STRVAR(endswith__doc__
,
1392 "B.endswith(suffix [,start [,end]]) -> bool\n\
1394 Return True if B ends with the specified suffix, False otherwise.\n\
1395 With optional start, test B beginning at that position.\n\
1396 With optional end, stop comparing B at that position.\n\
1397 suffix can also be a tuple of strings to try.");
1400 bytearray_endswith(PyByteArrayObject
*self
, PyObject
*args
)
1402 Py_ssize_t start
= 0;
1403 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1407 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
1408 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1410 if (PyTuple_Check(subobj
)) {
1412 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
1413 result
= _bytearray_tailmatch(self
,
1414 PyTuple_GET_ITEM(subobj
, i
),
1424 result
= _bytearray_tailmatch(self
, subobj
, start
, end
, +1);
1428 return PyBool_FromLong(result
);
1432 PyDoc_STRVAR(translate__doc__
,
1433 "B.translate(table[, deletechars]) -> bytearray\n\
1435 Return a copy of B, where all characters occurring in the\n\
1436 optional argument deletechars are removed, and the remaining\n\
1437 characters have been mapped through the given translation\n\
1438 table, which must be a bytes object of length 256.");
1441 bytearray_translate(PyByteArrayObject
*self
, PyObject
*args
)
1443 register char *input
, *output
;
1444 register const char *table
;
1445 register Py_ssize_t i
, c
;
1446 PyObject
*input_obj
= (PyObject
*)self
;
1447 const char *output_start
;
1449 PyObject
*result
= NULL
;
1450 int trans_table
[256];
1451 PyObject
*tableobj
= NULL
, *delobj
= NULL
;
1452 Py_buffer vtable
, vdel
;
1454 if (!PyArg_UnpackTuple(args
, "translate", 1, 2,
1455 &tableobj
, &delobj
))
1458 if (tableobj
== Py_None
) {
1461 } else if (_getbuffer(tableobj
, &vtable
) < 0) {
1464 if (vtable
.len
!= 256) {
1465 PyErr_SetString(PyExc_ValueError
,
1466 "translation table must be 256 characters long");
1467 PyBuffer_Release(&vtable
);
1470 table
= (const char*)vtable
.buf
;
1473 if (delobj
!= NULL
) {
1474 if (_getbuffer(delobj
, &vdel
) < 0) {
1475 if (tableobj
!= NULL
)
1476 PyBuffer_Release(&vtable
);
1485 inlen
= PyByteArray_GET_SIZE(input_obj
);
1486 result
= PyByteArray_FromStringAndSize((char *)NULL
, inlen
);
1489 output_start
= output
= PyByteArray_AsString(result
);
1490 input
= PyByteArray_AS_STRING(input_obj
);
1492 if (vdel
.len
== 0 && table
!= NULL
) {
1493 /* If no deletions are required, use faster code */
1494 for (i
= inlen
; --i
>= 0; ) {
1495 c
= Py_CHARMASK(*input
++);
1496 *output
++ = table
[c
];
1501 if (table
== NULL
) {
1502 for (i
= 0; i
< 256; i
++)
1503 trans_table
[i
] = Py_CHARMASK(i
);
1505 for (i
= 0; i
< 256; i
++)
1506 trans_table
[i
] = Py_CHARMASK(table
[i
]);
1509 for (i
= 0; i
< vdel
.len
; i
++)
1510 trans_table
[(int) Py_CHARMASK( ((unsigned char*)vdel
.buf
)[i
] )] = -1;
1512 for (i
= inlen
; --i
>= 0; ) {
1513 c
= Py_CHARMASK(*input
++);
1514 if (trans_table
[c
] != -1)
1515 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
1518 /* Fix the size of the resulting string */
1520 PyByteArray_Resize(result
, output
- output_start
);
1523 if (tableobj
!= NULL
)
1524 PyBuffer_Release(&vtable
);
1526 PyBuffer_Release(&vdel
);
1534 /* find and count characters and substrings */
1536 #define findchar(target, target_len, c) \
1537 ((char *)memchr((const void *)(target), c, target_len))
1539 /* Don't call if length < 2 */
1540 #define Py_STRING_MATCH(target, offset, pattern, length) \
1541 (target[offset] == pattern[0] && \
1542 target[offset+length-1] == pattern[length-1] && \
1543 !memcmp(target+offset+1, pattern+1, length-2) )
1546 /* Bytes ops must return a string, create a copy */
1547 Py_LOCAL(PyByteArrayObject
*)
1548 return_self(PyByteArrayObject
*self
)
1550 return (PyByteArrayObject
*)PyByteArray_FromStringAndSize(
1551 PyByteArray_AS_STRING(self
),
1552 PyByteArray_GET_SIZE(self
));
1555 Py_LOCAL_INLINE(Py_ssize_t
)
1556 countchar(const char *target
, Py_ssize_t target_len
, char c
, Py_ssize_t maxcount
)
1559 const char *start
=target
;
1560 const char *end
=target
+target_len
;
1562 while ( (start
=findchar(start
, end
-start
, c
)) != NULL
) {
1564 if (count
>= maxcount
)
1571 Py_LOCAL(Py_ssize_t
)
1572 findstring(const char *target
, Py_ssize_t target_len
,
1573 const char *pattern
, Py_ssize_t pattern_len
,
1579 start
+= target_len
;
1583 if (end
> target_len
) {
1585 } else if (end
< 0) {
1591 /* zero-length substrings always match at the first attempt */
1592 if (pattern_len
== 0)
1593 return (direction
> 0) ? start
: end
;
1597 if (direction
< 0) {
1598 for (; end
>= start
; end
--)
1599 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
))
1602 for (; start
<= end
; start
++)
1603 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
))
1609 Py_LOCAL_INLINE(Py_ssize_t
)
1610 countstring(const char *target
, Py_ssize_t target_len
,
1611 const char *pattern
, Py_ssize_t pattern_len
,
1614 int direction
, Py_ssize_t maxcount
)
1619 start
+= target_len
;
1623 if (end
> target_len
) {
1625 } else if (end
< 0) {
1631 /* zero-length substrings match everywhere */
1632 if (pattern_len
== 0 || maxcount
== 0) {
1633 if (target_len
+1 < maxcount
)
1634 return target_len
+1;
1639 if (direction
< 0) {
1640 for (; (end
>= start
); end
--)
1641 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
)) {
1643 if (--maxcount
<= 0) break;
1644 end
-= pattern_len
-1;
1647 for (; (start
<= end
); start
++)
1648 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
)) {
1650 if (--maxcount
<= 0)
1652 start
+= pattern_len
-1;
1659 /* Algorithms for different cases of string replacement */
1661 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1662 Py_LOCAL(PyByteArrayObject
*)
1663 replace_interleave(PyByteArrayObject
*self
,
1664 const char *to_s
, Py_ssize_t to_len
,
1665 Py_ssize_t maxcount
)
1667 char *self_s
, *result_s
;
1668 Py_ssize_t self_len
, result_len
;
1669 Py_ssize_t count
, i
, product
;
1670 PyByteArrayObject
*result
;
1672 self_len
= PyByteArray_GET_SIZE(self
);
1674 /* 1 at the end plus 1 after every character */
1676 if (maxcount
< count
)
1679 /* Check for overflow */
1680 /* result_len = count * to_len + self_len; */
1681 product
= count
* to_len
;
1682 if (product
/ to_len
!= count
) {
1683 PyErr_SetString(PyExc_OverflowError
,
1684 "replace string is too long");
1687 result_len
= product
+ self_len
;
1688 if (result_len
< 0) {
1689 PyErr_SetString(PyExc_OverflowError
,
1690 "replace string is too long");
1694 if (! (result
= (PyByteArrayObject
*)
1695 PyByteArray_FromStringAndSize(NULL
, result_len
)) )
1698 self_s
= PyByteArray_AS_STRING(self
);
1699 result_s
= PyByteArray_AS_STRING(result
);
1701 /* TODO: special case single character, which doesn't need memcpy */
1703 /* Lay the first one down (guaranteed this will occur) */
1704 Py_MEMCPY(result_s
, to_s
, to_len
);
1708 for (i
=0; i
<count
; i
++) {
1709 *result_s
++ = *self_s
++;
1710 Py_MEMCPY(result_s
, to_s
, to_len
);
1714 /* Copy the rest of the original string */
1715 Py_MEMCPY(result_s
, self_s
, self_len
-i
);
1720 /* Special case for deleting a single character */
1721 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1722 Py_LOCAL(PyByteArrayObject
*)
1723 replace_delete_single_character(PyByteArrayObject
*self
,
1724 char from_c
, Py_ssize_t maxcount
)
1726 char *self_s
, *result_s
;
1727 char *start
, *next
, *end
;
1728 Py_ssize_t self_len
, result_len
;
1730 PyByteArrayObject
*result
;
1732 self_len
= PyByteArray_GET_SIZE(self
);
1733 self_s
= PyByteArray_AS_STRING(self
);
1735 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
1737 return return_self(self
);
1740 result_len
= self_len
- count
; /* from_len == 1 */
1741 assert(result_len
>=0);
1743 if ( (result
= (PyByteArrayObject
*)
1744 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1746 result_s
= PyByteArray_AS_STRING(result
);
1749 end
= self_s
+ self_len
;
1750 while (count
-- > 0) {
1751 next
= findchar(start
, end
-start
, from_c
);
1754 Py_MEMCPY(result_s
, start
, next
-start
);
1755 result_s
+= (next
-start
);
1758 Py_MEMCPY(result_s
, start
, end
-start
);
1763 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1765 Py_LOCAL(PyByteArrayObject
*)
1766 replace_delete_substring(PyByteArrayObject
*self
,
1767 const char *from_s
, Py_ssize_t from_len
,
1768 Py_ssize_t maxcount
)
1770 char *self_s
, *result_s
;
1771 char *start
, *next
, *end
;
1772 Py_ssize_t self_len
, result_len
;
1773 Py_ssize_t count
, offset
;
1774 PyByteArrayObject
*result
;
1776 self_len
= PyByteArray_GET_SIZE(self
);
1777 self_s
= PyByteArray_AS_STRING(self
);
1779 count
= countstring(self_s
, self_len
,
1786 return return_self(self
);
1789 result_len
= self_len
- (count
* from_len
);
1790 assert (result_len
>=0);
1792 if ( (result
= (PyByteArrayObject
*)
1793 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1796 result_s
= PyByteArray_AS_STRING(result
);
1799 end
= self_s
+ self_len
;
1800 while (count
-- > 0) {
1801 offset
= findstring(start
, end
-start
,
1803 0, end
-start
, FORWARD
);
1806 next
= start
+ offset
;
1808 Py_MEMCPY(result_s
, start
, next
-start
);
1810 result_s
+= (next
-start
);
1811 start
= next
+from_len
;
1813 Py_MEMCPY(result_s
, start
, end
-start
);
1817 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1818 Py_LOCAL(PyByteArrayObject
*)
1819 replace_single_character_in_place(PyByteArrayObject
*self
,
1820 char from_c
, char to_c
,
1821 Py_ssize_t maxcount
)
1823 char *self_s
, *result_s
, *start
, *end
, *next
;
1824 Py_ssize_t self_len
;
1825 PyByteArrayObject
*result
;
1827 /* The result string will be the same size */
1828 self_s
= PyByteArray_AS_STRING(self
);
1829 self_len
= PyByteArray_GET_SIZE(self
);
1831 next
= findchar(self_s
, self_len
, from_c
);
1834 /* No matches; return the original bytes */
1835 return return_self(self
);
1838 /* Need to make a new bytes */
1839 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, self_len
);
1842 result_s
= PyByteArray_AS_STRING(result
);
1843 Py_MEMCPY(result_s
, self_s
, self_len
);
1845 /* change everything in-place, starting with this one */
1846 start
= result_s
+ (next
-self_s
);
1849 end
= result_s
+ self_len
;
1851 while (--maxcount
> 0) {
1852 next
= findchar(start
, end
-start
, from_c
);
1862 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1863 Py_LOCAL(PyByteArrayObject
*)
1864 replace_substring_in_place(PyByteArrayObject
*self
,
1865 const char *from_s
, Py_ssize_t from_len
,
1866 const char *to_s
, Py_ssize_t to_len
,
1867 Py_ssize_t maxcount
)
1869 char *result_s
, *start
, *end
;
1871 Py_ssize_t self_len
, offset
;
1872 PyByteArrayObject
*result
;
1874 /* The result bytes will be the same size */
1876 self_s
= PyByteArray_AS_STRING(self
);
1877 self_len
= PyByteArray_GET_SIZE(self
);
1879 offset
= findstring(self_s
, self_len
,
1881 0, self_len
, FORWARD
);
1883 /* No matches; return the original bytes */
1884 return return_self(self
);
1887 /* Need to make a new bytes */
1888 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, self_len
);
1891 result_s
= PyByteArray_AS_STRING(result
);
1892 Py_MEMCPY(result_s
, self_s
, self_len
);
1894 /* change everything in-place, starting with this one */
1895 start
= result_s
+ offset
;
1896 Py_MEMCPY(start
, to_s
, from_len
);
1898 end
= result_s
+ self_len
;
1900 while ( --maxcount
> 0) {
1901 offset
= findstring(start
, end
-start
,
1903 0, end
-start
, FORWARD
);
1906 Py_MEMCPY(start
+offset
, to_s
, from_len
);
1907 start
+= offset
+from_len
;
1913 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1914 Py_LOCAL(PyByteArrayObject
*)
1915 replace_single_character(PyByteArrayObject
*self
,
1917 const char *to_s
, Py_ssize_t to_len
,
1918 Py_ssize_t maxcount
)
1920 char *self_s
, *result_s
;
1921 char *start
, *next
, *end
;
1922 Py_ssize_t self_len
, result_len
;
1923 Py_ssize_t count
, product
;
1924 PyByteArrayObject
*result
;
1926 self_s
= PyByteArray_AS_STRING(self
);
1927 self_len
= PyByteArray_GET_SIZE(self
);
1929 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
1931 /* no matches, return unchanged */
1932 return return_self(self
);
1935 /* use the difference between current and new, hence the "-1" */
1936 /* result_len = self_len + count * (to_len-1) */
1937 product
= count
* (to_len
-1);
1938 if (product
/ (to_len
-1) != count
) {
1939 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
1942 result_len
= self_len
+ product
;
1943 if (result_len
< 0) {
1944 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
1948 if ( (result
= (PyByteArrayObject
*)
1949 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1951 result_s
= PyByteArray_AS_STRING(result
);
1954 end
= self_s
+ self_len
;
1955 while (count
-- > 0) {
1956 next
= findchar(start
, end
-start
, from_c
);
1960 if (next
== start
) {
1961 /* replace with the 'to' */
1962 Py_MEMCPY(result_s
, to_s
, to_len
);
1966 /* copy the unchanged old then the 'to' */
1967 Py_MEMCPY(result_s
, start
, next
-start
);
1968 result_s
+= (next
-start
);
1969 Py_MEMCPY(result_s
, to_s
, to_len
);
1974 /* Copy the remainder of the remaining bytes */
1975 Py_MEMCPY(result_s
, start
, end
-start
);
1980 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1981 Py_LOCAL(PyByteArrayObject
*)
1982 replace_substring(PyByteArrayObject
*self
,
1983 const char *from_s
, Py_ssize_t from_len
,
1984 const char *to_s
, Py_ssize_t to_len
,
1985 Py_ssize_t maxcount
)
1987 char *self_s
, *result_s
;
1988 char *start
, *next
, *end
;
1989 Py_ssize_t self_len
, result_len
;
1990 Py_ssize_t count
, offset
, product
;
1991 PyByteArrayObject
*result
;
1993 self_s
= PyByteArray_AS_STRING(self
);
1994 self_len
= PyByteArray_GET_SIZE(self
);
1996 count
= countstring(self_s
, self_len
,
1998 0, self_len
, FORWARD
, maxcount
);
2000 /* no matches, return unchanged */
2001 return return_self(self
);
2004 /* Check for overflow */
2005 /* result_len = self_len + count * (to_len-from_len) */
2006 product
= count
* (to_len
-from_len
);
2007 if (product
/ (to_len
-from_len
) != count
) {
2008 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
2011 result_len
= self_len
+ product
;
2012 if (result_len
< 0) {
2013 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
2017 if ( (result
= (PyByteArrayObject
*)
2018 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
2020 result_s
= PyByteArray_AS_STRING(result
);
2023 end
= self_s
+ self_len
;
2024 while (count
-- > 0) {
2025 offset
= findstring(start
, end
-start
,
2027 0, end
-start
, FORWARD
);
2030 next
= start
+offset
;
2031 if (next
== start
) {
2032 /* replace with the 'to' */
2033 Py_MEMCPY(result_s
, to_s
, to_len
);
2037 /* copy the unchanged old then the 'to' */
2038 Py_MEMCPY(result_s
, start
, next
-start
);
2039 result_s
+= (next
-start
);
2040 Py_MEMCPY(result_s
, to_s
, to_len
);
2042 start
= next
+from_len
;
2045 /* Copy the remainder of the remaining bytes */
2046 Py_MEMCPY(result_s
, start
, end
-start
);
2052 Py_LOCAL(PyByteArrayObject
*)
2053 replace(PyByteArrayObject
*self
,
2054 const char *from_s
, Py_ssize_t from_len
,
2055 const char *to_s
, Py_ssize_t to_len
,
2056 Py_ssize_t maxcount
)
2059 maxcount
= PY_SSIZE_T_MAX
;
2060 } else if (maxcount
== 0 || PyByteArray_GET_SIZE(self
) == 0) {
2061 /* nothing to do; return the original bytes */
2062 return return_self(self
);
2065 if (maxcount
== 0 ||
2066 (from_len
== 0 && to_len
== 0)) {
2067 /* nothing to do; return the original bytes */
2068 return return_self(self
);
2071 /* Handle zero-length special cases */
2073 if (from_len
== 0) {
2074 /* insert the 'to' bytes everywhere. */
2075 /* >>> "Python".replace("", ".") */
2076 /* '.P.y.t.h.o.n.' */
2077 return replace_interleave(self
, to_s
, to_len
, maxcount
);
2080 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2081 /* point for an empty self bytes to generate a non-empty bytes */
2082 /* Special case so the remaining code always gets a non-empty bytes */
2083 if (PyByteArray_GET_SIZE(self
) == 0) {
2084 return return_self(self
);
2088 /* delete all occurances of 'from' bytes */
2089 if (from_len
== 1) {
2090 return replace_delete_single_character(
2091 self
, from_s
[0], maxcount
);
2093 return replace_delete_substring(self
, from_s
, from_len
, maxcount
);
2097 /* Handle special case where both bytes have the same length */
2099 if (from_len
== to_len
) {
2100 if (from_len
== 1) {
2101 return replace_single_character_in_place(
2107 return replace_substring_in_place(
2108 self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
2112 /* Otherwise use the more generic algorithms */
2113 if (from_len
== 1) {
2114 return replace_single_character(self
, from_s
[0],
2115 to_s
, to_len
, maxcount
);
2117 /* len('from')>=2, len('to')>=1 */
2118 return replace_substring(self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
2123 PyDoc_STRVAR(replace__doc__
,
2124 "B.replace(old, new[, count]) -> bytes\n\
2126 Return a copy of B with all occurrences of subsection\n\
2127 old replaced by new. If the optional argument count is\n\
2128 given, only the first count occurrences are replaced.");
2131 bytearray_replace(PyByteArrayObject
*self
, PyObject
*args
)
2133 Py_ssize_t count
= -1;
2134 PyObject
*from
, *to
, *res
;
2135 Py_buffer vfrom
, vto
;
2137 if (!PyArg_ParseTuple(args
, "OO|n:replace", &from
, &to
, &count
))
2140 if (_getbuffer(from
, &vfrom
) < 0)
2142 if (_getbuffer(to
, &vto
) < 0) {
2143 PyBuffer_Release(&vfrom
);
2147 res
= (PyObject
*)replace((PyByteArrayObject
*) self
,
2148 vfrom
.buf
, vfrom
.len
,
2149 vto
.buf
, vto
.len
, count
);
2151 PyBuffer_Release(&vfrom
);
2152 PyBuffer_Release(&vto
);
2157 /* Overallocate the initial list to reduce the number of reallocs for small
2158 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2159 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2160 text (roughly 11 words per line) and field delimited data (usually 1-10
2161 fields). For large strings the split algorithms are bandwidth limited
2162 so increasing the preallocation likely will not improve things.*/
2164 #define MAX_PREALLOC 12
2166 /* 5 splits gives 6 elements */
2167 #define PREALLOC_SIZE(maxsplit) \
2168 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2170 #define SPLIT_APPEND(data, left, right) \
2171 str = PyByteArray_FromStringAndSize((data) + (left), \
2172 (right) - (left)); \
2175 if (PyList_Append(list, str)) { \
2182 #define SPLIT_ADD(data, left, right) { \
2183 str = PyByteArray_FromStringAndSize((data) + (left), \
2184 (right) - (left)); \
2187 if (count < MAX_PREALLOC) { \
2188 PyList_SET_ITEM(list, count, str); \
2190 if (PyList_Append(list, str)) { \
2199 /* Always force the list to the expected size. */
2200 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2203 Py_LOCAL_INLINE(PyObject
*)
2204 split_char(const char *s
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
2206 register Py_ssize_t i
, j
, count
= 0;
2208 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2214 while ((j
< len
) && (maxcount
-- > 0)) {
2215 for(; j
< len
; j
++) {
2216 /* I found that using memchr makes no difference */
2225 SPLIT_ADD(s
, i
, len
);
2227 FIX_PREALLOC_SIZE(list
);
2236 Py_LOCAL_INLINE(PyObject
*)
2237 split_whitespace(const char *s
, Py_ssize_t len
, Py_ssize_t maxcount
)
2239 register Py_ssize_t i
, j
, count
= 0;
2241 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2246 for (i
= j
= 0; i
< len
; ) {
2248 while (i
< len
&& Py_ISSPACE(s
[i
]))
2251 while (i
< len
&& !Py_ISSPACE(s
[i
]))
2254 if (maxcount
-- <= 0)
2257 while (i
< len
&& Py_ISSPACE(s
[i
]))
2263 SPLIT_ADD(s
, j
, len
);
2265 FIX_PREALLOC_SIZE(list
);
2273 PyDoc_STRVAR(split__doc__
,
2274 "B.split([sep[, maxsplit]]) -> list of bytearray\n\
2276 Return a list of the sections in B, using sep as the delimiter.\n\
2277 If sep is not given, B is split on ASCII whitespace characters\n\
2278 (space, tab, return, newline, formfeed, vertical tab).\n\
2279 If maxsplit is given, at most maxsplit splits are done.");
2282 bytearray_split(PyByteArrayObject
*self
, PyObject
*args
)
2284 Py_ssize_t len
= PyByteArray_GET_SIZE(self
), n
, i
, j
, pos
;
2285 Py_ssize_t maxsplit
= -1, count
= 0;
2286 const char *s
= PyByteArray_AS_STRING(self
), *sub
;
2287 PyObject
*list
, *str
, *subobj
= Py_None
;
2290 if (!PyArg_ParseTuple(args
, "|On:split", &subobj
, &maxsplit
))
2293 maxsplit
= PY_SSIZE_T_MAX
;
2295 if (subobj
== Py_None
)
2296 return split_whitespace(s
, len
, maxsplit
);
2298 if (_getbuffer(subobj
, &vsub
) < 0)
2304 PyErr_SetString(PyExc_ValueError
, "empty separator");
2305 PyBuffer_Release(&vsub
);
2309 list
= split_char(s
, len
, sub
[0], maxsplit
);
2310 PyBuffer_Release(&vsub
);
2314 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
2316 PyBuffer_Release(&vsub
);
2321 while (maxsplit
-- > 0) {
2322 pos
= fastsearch(s
+i
, len
-i
, sub
, n
, FAST_SEARCH
);
2329 SPLIT_ADD(s
, i
, len
);
2330 FIX_PREALLOC_SIZE(list
);
2331 PyBuffer_Release(&vsub
);
2336 PyBuffer_Release(&vsub
);
2340 /* stringlib's partition shares nullbytes in some cases.
2341 undo this, we don't want the nullbytes to be shared. */
2343 make_nullbytes_unique(PyObject
*result
)
2345 if (result
!= NULL
) {
2347 assert(PyTuple_Check(result
));
2348 assert(PyTuple_GET_SIZE(result
) == 3);
2349 for (i
= 0; i
< 3; i
++) {
2350 if (PyTuple_GET_ITEM(result
, i
) == (PyObject
*)nullbytes
) {
2351 PyObject
*new = PyByteArray_FromStringAndSize(NULL
, 0);
2357 Py_DECREF(nullbytes
);
2358 PyTuple_SET_ITEM(result
, i
, new);
2365 PyDoc_STRVAR(partition__doc__
,
2366 "B.partition(sep) -> (head, sep, tail)\n\
2368 Searches for the separator sep in B, and returns the part before it,\n\
2369 the separator itself, and the part after it. If the separator is not\n\
2370 found, returns B and two empty bytearray objects.");
2373 bytearray_partition(PyByteArrayObject
*self
, PyObject
*sep_obj
)
2375 PyObject
*bytesep
, *result
;
2377 bytesep
= PyByteArray_FromObject(sep_obj
);
2381 result
= stringlib_partition(
2383 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
2385 PyByteArray_AS_STRING(bytesep
), PyByteArray_GET_SIZE(bytesep
)
2389 return make_nullbytes_unique(result
);
2392 PyDoc_STRVAR(rpartition__doc__
,
2393 "B.rpartition(sep) -> (tail, sep, head)\n\
2395 Searches for the separator sep in B, starting at the end of B,\n\
2396 and returns the part before it, the separator itself, and the\n\
2397 part after it. If the separator is not found, returns two empty\n\
2398 bytearray objects and B.");
2401 bytearray_rpartition(PyByteArrayObject
*self
, PyObject
*sep_obj
)
2403 PyObject
*bytesep
, *result
;
2405 bytesep
= PyByteArray_FromObject(sep_obj
);
2409 result
= stringlib_rpartition(
2411 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
2413 PyByteArray_AS_STRING(bytesep
), PyByteArray_GET_SIZE(bytesep
)
2417 return make_nullbytes_unique(result
);
2420 Py_LOCAL_INLINE(PyObject
*)
2421 rsplit_char(const char *s
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
2423 register Py_ssize_t i
, j
, count
=0;
2425 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2431 while ((i
>= 0) && (maxcount
-- > 0)) {
2432 for (; i
>= 0; i
--) {
2434 SPLIT_ADD(s
, i
+ 1, j
+ 1);
2441 SPLIT_ADD(s
, 0, j
+ 1);
2443 FIX_PREALLOC_SIZE(list
);
2444 if (PyList_Reverse(list
) < 0)
2454 Py_LOCAL_INLINE(PyObject
*)
2455 rsplit_whitespace(const char *s
, Py_ssize_t len
, Py_ssize_t maxcount
)
2457 register Py_ssize_t i
, j
, count
= 0;
2459 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2464 for (i
= j
= len
- 1; i
>= 0; ) {
2466 while (i
>= 0 && Py_ISSPACE(s
[i
]))
2469 while (i
>= 0 && !Py_ISSPACE(s
[i
]))
2472 if (maxcount
-- <= 0)
2474 SPLIT_ADD(s
, i
+ 1, j
+ 1);
2475 while (i
>= 0 && Py_ISSPACE(s
[i
]))
2481 SPLIT_ADD(s
, 0, j
+ 1);
2483 FIX_PREALLOC_SIZE(list
);
2484 if (PyList_Reverse(list
) < 0)
2494 PyDoc_STRVAR(rsplit__doc__
,
2495 "B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2497 Return a list of the sections in B, using sep as the delimiter,\n\
2498 starting at the end of B and working to the front.\n\
2499 If sep is not given, B is split on ASCII whitespace characters\n\
2500 (space, tab, return, newline, formfeed, vertical tab).\n\
2501 If maxsplit is given, at most maxsplit splits are done.");
2504 bytearray_rsplit(PyByteArrayObject
*self
, PyObject
*args
)
2506 Py_ssize_t len
= PyByteArray_GET_SIZE(self
), n
, j
, pos
;
2507 Py_ssize_t maxsplit
= -1, count
= 0;
2508 const char *s
= PyByteArray_AS_STRING(self
), *sub
;
2509 PyObject
*list
, *str
, *subobj
= Py_None
;
2512 if (!PyArg_ParseTuple(args
, "|On:rsplit", &subobj
, &maxsplit
))
2515 maxsplit
= PY_SSIZE_T_MAX
;
2517 if (subobj
== Py_None
)
2518 return rsplit_whitespace(s
, len
, maxsplit
);
2520 if (_getbuffer(subobj
, &vsub
) < 0)
2526 PyErr_SetString(PyExc_ValueError
, "empty separator");
2527 PyBuffer_Release(&vsub
);
2531 list
= rsplit_char(s
, len
, sub
[0], maxsplit
);
2532 PyBuffer_Release(&vsub
);
2536 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
2538 PyBuffer_Release(&vsub
);
2544 while (maxsplit
-- > 0) {
2545 pos
= fastsearch(s
, j
, sub
, n
, FAST_RSEARCH
);
2548 SPLIT_ADD(s
, pos
+ n
, j
);
2552 FIX_PREALLOC_SIZE(list
);
2553 if (PyList_Reverse(list
) < 0)
2555 PyBuffer_Release(&vsub
);
2560 PyBuffer_Release(&vsub
);
2564 PyDoc_STRVAR(reverse__doc__
,
2565 "B.reverse() -> None\n\
2567 Reverse the order of the values in B in place.");
2569 bytearray_reverse(PyByteArrayObject
*self
, PyObject
*unused
)
2571 char swap
, *head
, *tail
;
2572 Py_ssize_t i
, j
, n
= Py_SIZE(self
);
2575 head
= self
->ob_bytes
;
2576 tail
= head
+ n
- 1;
2577 for (i
= 0; i
< j
; i
++) {
2586 PyDoc_STRVAR(insert__doc__
,
2587 "B.insert(index, int) -> None\n\
2589 Insert a single item into the bytearray before the given index.");
2591 bytearray_insert(PyByteArrayObject
*self
, PyObject
*args
)
2595 Py_ssize_t where
, n
= Py_SIZE(self
);
2597 if (!PyArg_ParseTuple(args
, "nO:insert", &where
, &value
))
2600 if (n
== PY_SSIZE_T_MAX
) {
2601 PyErr_SetString(PyExc_OverflowError
,
2602 "cannot add more objects to bytearray");
2605 if (!_getbytevalue(value
, &ival
))
2607 if (PyByteArray_Resize((PyObject
*)self
, n
+ 1) < 0)
2617 memmove(self
->ob_bytes
+ where
+ 1, self
->ob_bytes
+ where
, n
- where
);
2618 self
->ob_bytes
[where
] = ival
;
2623 PyDoc_STRVAR(append__doc__
,
2624 "B.append(int) -> None\n\
2626 Append a single item to the end of B.");
2628 bytearray_append(PyByteArrayObject
*self
, PyObject
*arg
)
2631 Py_ssize_t n
= Py_SIZE(self
);
2633 if (! _getbytevalue(arg
, &value
))
2635 if (n
== PY_SSIZE_T_MAX
) {
2636 PyErr_SetString(PyExc_OverflowError
,
2637 "cannot add more objects to bytearray");
2640 if (PyByteArray_Resize((PyObject
*)self
, n
+ 1) < 0)
2643 self
->ob_bytes
[n
] = value
;
2648 PyDoc_STRVAR(extend__doc__
,
2649 "B.extend(iterable int) -> None\n\
2651 Append all the elements from the iterator or sequence to the\n\
2654 bytearray_extend(PyByteArrayObject
*self
, PyObject
*arg
)
2656 PyObject
*it
, *item
, *bytearray_obj
;
2657 Py_ssize_t buf_size
= 0, len
= 0;
2661 /* bytearray_setslice code only accepts something supporting PEP 3118. */
2662 if (PyObject_CheckBuffer(arg
)) {
2663 if (bytearray_setslice(self
, Py_SIZE(self
), Py_SIZE(self
), arg
) == -1)
2669 it
= PyObject_GetIter(arg
);
2673 /* Try to determine the length of the argument. 32 is abitrary. */
2674 buf_size
= _PyObject_LengthHint(arg
, 32);
2675 if (buf_size
== -1) {
2680 bytearray_obj
= PyByteArray_FromStringAndSize(NULL
, buf_size
);
2681 if (bytearray_obj
== NULL
)
2683 buf
= PyByteArray_AS_STRING(bytearray_obj
);
2685 while ((item
= PyIter_Next(it
)) != NULL
) {
2686 if (! _getbytevalue(item
, &value
)) {
2689 Py_DECREF(bytearray_obj
);
2695 if (len
>= buf_size
) {
2696 buf_size
= len
+ (len
>> 1) + 1;
2697 if (PyByteArray_Resize((PyObject
*)bytearray_obj
, buf_size
) < 0) {
2699 Py_DECREF(bytearray_obj
);
2702 /* Recompute the `buf' pointer, since the resizing operation may
2703 have invalidated it. */
2704 buf
= PyByteArray_AS_STRING(bytearray_obj
);
2709 /* Resize down to exact size. */
2710 if (PyByteArray_Resize((PyObject
*)bytearray_obj
, len
) < 0) {
2711 Py_DECREF(bytearray_obj
);
2715 if (bytearray_setslice(self
, Py_SIZE(self
), Py_SIZE(self
), bytearray_obj
) == -1)
2717 Py_DECREF(bytearray_obj
);
2722 PyDoc_STRVAR(pop__doc__
,
2723 "B.pop([index]) -> int\n\
2725 Remove and return a single item from B. If no index\n\
2726 argument is given, will pop the last value.");
2728 bytearray_pop(PyByteArrayObject
*self
, PyObject
*args
)
2731 Py_ssize_t where
= -1, n
= Py_SIZE(self
);
2733 if (!PyArg_ParseTuple(args
, "|n:pop", &where
))
2737 PyErr_SetString(PyExc_OverflowError
,
2738 "cannot pop an empty bytearray");
2742 where
+= Py_SIZE(self
);
2743 if (where
< 0 || where
>= Py_SIZE(self
)) {
2744 PyErr_SetString(PyExc_IndexError
, "pop index out of range");
2747 if (!_canresize(self
))
2750 value
= self
->ob_bytes
[where
];
2751 memmove(self
->ob_bytes
+ where
, self
->ob_bytes
+ where
+ 1, n
- where
);
2752 if (PyByteArray_Resize((PyObject
*)self
, n
- 1) < 0)
2755 return PyInt_FromLong((unsigned char)value
);
2758 PyDoc_STRVAR(remove__doc__
,
2759 "B.remove(int) -> None\n\
2761 Remove the first occurance of a value in B.");
2763 bytearray_remove(PyByteArrayObject
*self
, PyObject
*arg
)
2766 Py_ssize_t where
, n
= Py_SIZE(self
);
2768 if (! _getbytevalue(arg
, &value
))
2771 for (where
= 0; where
< n
; where
++) {
2772 if (self
->ob_bytes
[where
] == value
)
2776 PyErr_SetString(PyExc_ValueError
, "value not found in bytearray");
2779 if (!_canresize(self
))
2782 memmove(self
->ob_bytes
+ where
, self
->ob_bytes
+ where
+ 1, n
- where
);
2783 if (PyByteArray_Resize((PyObject
*)self
, n
- 1) < 0)
2789 /* XXX These two helpers could be optimized if argsize == 1 */
2792 lstrip_helper(unsigned char *myptr
, Py_ssize_t mysize
,
2793 void *argptr
, Py_ssize_t argsize
)
2796 while (i
< mysize
&& memchr(argptr
, myptr
[i
], argsize
))
2802 rstrip_helper(unsigned char *myptr
, Py_ssize_t mysize
,
2803 void *argptr
, Py_ssize_t argsize
)
2805 Py_ssize_t i
= mysize
- 1;
2806 while (i
>= 0 && memchr(argptr
, myptr
[i
], argsize
))
2811 PyDoc_STRVAR(strip__doc__
,
2812 "B.strip([bytes]) -> bytearray\n\
2814 Strip leading and trailing bytes contained in the argument.\n\
2815 If the argument is omitted, strip ASCII whitespace.");
2817 bytearray_strip(PyByteArrayObject
*self
, PyObject
*args
)
2819 Py_ssize_t left
, right
, mysize
, argsize
;
2820 void *myptr
, *argptr
;
2821 PyObject
*arg
= Py_None
;
2823 if (!PyArg_ParseTuple(args
, "|O:strip", &arg
))
2825 if (arg
== Py_None
) {
2826 argptr
= "\t\n\r\f\v ";
2830 if (_getbuffer(arg
, &varg
) < 0)
2835 myptr
= self
->ob_bytes
;
2836 mysize
= Py_SIZE(self
);
2837 left
= lstrip_helper(myptr
, mysize
, argptr
, argsize
);
2841 right
= rstrip_helper(myptr
, mysize
, argptr
, argsize
);
2843 PyBuffer_Release(&varg
);
2844 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2847 PyDoc_STRVAR(lstrip__doc__
,
2848 "B.lstrip([bytes]) -> bytearray\n\
2850 Strip leading bytes contained in the argument.\n\
2851 If the argument is omitted, strip leading ASCII whitespace.");
2853 bytearray_lstrip(PyByteArrayObject
*self
, PyObject
*args
)
2855 Py_ssize_t left
, right
, mysize
, argsize
;
2856 void *myptr
, *argptr
;
2857 PyObject
*arg
= Py_None
;
2859 if (!PyArg_ParseTuple(args
, "|O:lstrip", &arg
))
2861 if (arg
== Py_None
) {
2862 argptr
= "\t\n\r\f\v ";
2866 if (_getbuffer(arg
, &varg
) < 0)
2871 myptr
= self
->ob_bytes
;
2872 mysize
= Py_SIZE(self
);
2873 left
= lstrip_helper(myptr
, mysize
, argptr
, argsize
);
2876 PyBuffer_Release(&varg
);
2877 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2880 PyDoc_STRVAR(rstrip__doc__
,
2881 "B.rstrip([bytes]) -> bytearray\n\
2883 Strip trailing bytes contained in the argument.\n\
2884 If the argument is omitted, strip trailing ASCII whitespace.");
2886 bytearray_rstrip(PyByteArrayObject
*self
, PyObject
*args
)
2888 Py_ssize_t left
, right
, mysize
, argsize
;
2889 void *myptr
, *argptr
;
2890 PyObject
*arg
= Py_None
;
2892 if (!PyArg_ParseTuple(args
, "|O:rstrip", &arg
))
2894 if (arg
== Py_None
) {
2895 argptr
= "\t\n\r\f\v ";
2899 if (_getbuffer(arg
, &varg
) < 0)
2904 myptr
= self
->ob_bytes
;
2905 mysize
= Py_SIZE(self
);
2907 right
= rstrip_helper(myptr
, mysize
, argptr
, argsize
);
2909 PyBuffer_Release(&varg
);
2910 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2913 PyDoc_STRVAR(decode_doc
,
2914 "B.decode([encoding[, errors]]) -> unicode object.\n\
2916 Decodes B using the codec registered for encoding. encoding defaults\n\
2917 to the default encoding. errors may be given to set a different error\n\
2918 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2919 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2920 as well as any other name registered with codecs.register_error that is\n\
2921 able to handle UnicodeDecodeErrors.");
2924 bytearray_decode(PyObject
*self
, PyObject
*args
, PyObject
*kwargs
)
2926 const char *encoding
= NULL
;
2927 const char *errors
= NULL
;
2928 static char *kwlist
[] = {"encoding", "errors", 0};
2930 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "|ss:decode", kwlist
, &encoding
, &errors
))
2932 if (encoding
== NULL
) {
2933 #ifdef Py_USING_UNICODE
2934 encoding
= PyUnicode_GetDefaultEncoding();
2936 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
2940 return PyCodec_Decode(self
, encoding
, errors
);
2943 PyDoc_STRVAR(alloc_doc
,
2944 "B.__alloc__() -> int\n\
2946 Returns the number of bytes actually allocated.");
2949 bytearray_alloc(PyByteArrayObject
*self
)
2951 return PyInt_FromSsize_t(self
->ob_alloc
);
2954 PyDoc_STRVAR(join_doc
,
2955 "B.join(iterable_of_bytes) -> bytes\n\
2957 Concatenates any number of bytearray objects, with B in between each pair.");
2960 bytearray_join(PyByteArrayObject
*self
, PyObject
*it
)
2963 Py_ssize_t mysize
= Py_SIZE(self
);
2967 Py_ssize_t totalsize
= 0;
2971 seq
= PySequence_Fast(it
, "can only join an iterable");
2974 n
= PySequence_Fast_GET_SIZE(seq
);
2975 items
= PySequence_Fast_ITEMS(seq
);
2977 /* Compute the total size, and check that they are all bytes */
2978 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2979 for (i
= 0; i
< n
; i
++) {
2980 PyObject
*obj
= items
[i
];
2981 if (!PyByteArray_Check(obj
) && !PyBytes_Check(obj
)) {
2982 PyErr_Format(PyExc_TypeError
,
2983 "can only join an iterable of bytes "
2984 "(item %ld has type '%.100s')",
2985 /* XXX %ld isn't right on Win64 */
2986 (long)i
, Py_TYPE(obj
)->tp_name
);
2990 totalsize
+= mysize
;
2991 totalsize
+= Py_SIZE(obj
);
2992 if (totalsize
< 0) {
2998 /* Allocate the result, and copy the bytes */
2999 result
= PyByteArray_FromStringAndSize(NULL
, totalsize
);
3002 dest
= PyByteArray_AS_STRING(result
);
3003 for (i
= 0; i
< n
; i
++) {
3004 PyObject
*obj
= items
[i
];
3005 Py_ssize_t size
= Py_SIZE(obj
);
3007 if (PyByteArray_Check(obj
))
3008 buf
= PyByteArray_AS_STRING(obj
);
3010 buf
= PyBytes_AS_STRING(obj
);
3012 memcpy(dest
, self
->ob_bytes
, mysize
);
3015 memcpy(dest
, buf
, size
);
3023 /* Error handling */
3029 PyDoc_STRVAR(fromhex_doc
,
3030 "bytearray.fromhex(string) -> bytearray\n\
3032 Create a bytearray object from a string of hexadecimal numbers.\n\
3033 Spaces between two numbers are accepted.\n\
3034 Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3037 hex_digit_to_int(char c
)
3044 if (c
>= 'a' && c
<= 'f')
3045 return c
- 'a' + 10;
3051 bytearray_fromhex(PyObject
*cls
, PyObject
*args
)
3056 Py_ssize_t hexlen
, byteslen
, i
, j
;
3059 if (!PyArg_ParseTuple(args
, "s#:fromhex", &hex
, &hexlen
))
3061 byteslen
= hexlen
/2; /* This overestimates if there are spaces */
3062 newbytes
= PyByteArray_FromStringAndSize(NULL
, byteslen
);
3065 buf
= PyByteArray_AS_STRING(newbytes
);
3066 for (i
= j
= 0; i
< hexlen
; i
+= 2) {
3067 /* skip over spaces in the input */
3068 while (hex
[i
] == ' ')
3072 top
= hex_digit_to_int(hex
[i
]);
3073 bot
= hex_digit_to_int(hex
[i
+1]);
3074 if (top
== -1 || bot
== -1) {
3075 PyErr_Format(PyExc_ValueError
,
3076 "non-hexadecimal number found in "
3077 "fromhex() arg at position %zd", i
);
3080 buf
[j
++] = (top
<< 4) + bot
;
3082 if (PyByteArray_Resize(newbytes
, j
) < 0)
3087 Py_DECREF(newbytes
);
3091 PyDoc_STRVAR(reduce_doc
, "Return state information for pickling.");
3094 bytearray_reduce(PyByteArrayObject
*self
)
3096 PyObject
*latin1
, *dict
;
3098 #ifdef Py_USING_UNICODE
3099 latin1
= PyUnicode_DecodeLatin1(self
->ob_bytes
,
3100 Py_SIZE(self
), NULL
);
3102 latin1
= PyString_FromStringAndSize(self
->ob_bytes
, Py_SIZE(self
));
3105 #ifdef Py_USING_UNICODE
3106 latin1
= PyUnicode_FromString("");
3108 latin1
= PyString_FromString("");
3111 dict
= PyObject_GetAttrString((PyObject
*)self
, "__dict__");
3118 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self
), latin1
, "latin-1", dict
);
3121 PyDoc_STRVAR(sizeof_doc
,
3122 "B.__sizeof__() -> int\n\
3124 Returns the size of B in memory, in bytes");
3126 bytearray_sizeof(PyByteArrayObject
*self
)
3130 res
= sizeof(PyByteArrayObject
) + self
->ob_alloc
* sizeof(char);
3131 return PyInt_FromSsize_t(res
);
3134 static PySequenceMethods bytearray_as_sequence
= {
3135 (lenfunc
)bytearray_length
, /* sq_length */
3136 (binaryfunc
)PyByteArray_Concat
, /* sq_concat */
3137 (ssizeargfunc
)bytearray_repeat
, /* sq_repeat */
3138 (ssizeargfunc
)bytearray_getitem
, /* sq_item */
3140 (ssizeobjargproc
)bytearray_setitem
, /* sq_ass_item */
3141 0, /* sq_ass_slice */
3142 (objobjproc
)bytearray_contains
, /* sq_contains */
3143 (binaryfunc
)bytearray_iconcat
, /* sq_inplace_concat */
3144 (ssizeargfunc
)bytearray_irepeat
, /* sq_inplace_repeat */
3147 static PyMappingMethods bytearray_as_mapping
= {
3148 (lenfunc
)bytearray_length
,
3149 (binaryfunc
)bytearray_subscript
,
3150 (objobjargproc
)bytearray_ass_subscript
,
3153 static PyBufferProcs bytearray_as_buffer
= {
3154 (readbufferproc
)bytearray_buffer_getreadbuf
,
3155 (writebufferproc
)bytearray_buffer_getwritebuf
,
3156 (segcountproc
)bytearray_buffer_getsegcount
,
3157 (charbufferproc
)bytearray_buffer_getcharbuf
,
3158 (getbufferproc
)bytearray_getbuffer
,
3159 (releasebufferproc
)bytearray_releasebuffer
,
3163 bytearray_methods
[] = {
3164 {"__alloc__", (PyCFunction
)bytearray_alloc
, METH_NOARGS
, alloc_doc
},
3165 {"__reduce__", (PyCFunction
)bytearray_reduce
, METH_NOARGS
, reduce_doc
},
3166 {"__sizeof__", (PyCFunction
)bytearray_sizeof
, METH_NOARGS
, sizeof_doc
},
3167 {"append", (PyCFunction
)bytearray_append
, METH_O
, append__doc__
},
3168 {"capitalize", (PyCFunction
)stringlib_capitalize
, METH_NOARGS
,
3169 _Py_capitalize__doc__
},
3170 {"center", (PyCFunction
)stringlib_center
, METH_VARARGS
, center__doc__
},
3171 {"count", (PyCFunction
)bytearray_count
, METH_VARARGS
, count__doc__
},
3172 {"decode", (PyCFunction
)bytearray_decode
, METH_VARARGS
| METH_KEYWORDS
, decode_doc
},
3173 {"endswith", (PyCFunction
)bytearray_endswith
, METH_VARARGS
, endswith__doc__
},
3174 {"expandtabs", (PyCFunction
)stringlib_expandtabs
, METH_VARARGS
,
3176 {"extend", (PyCFunction
)bytearray_extend
, METH_O
, extend__doc__
},
3177 {"find", (PyCFunction
)bytearray_find
, METH_VARARGS
, find__doc__
},
3178 {"fromhex", (PyCFunction
)bytearray_fromhex
, METH_VARARGS
|METH_CLASS
,
3180 {"index", (PyCFunction
)bytearray_index
, METH_VARARGS
, index__doc__
},
3181 {"insert", (PyCFunction
)bytearray_insert
, METH_VARARGS
, insert__doc__
},
3182 {"isalnum", (PyCFunction
)stringlib_isalnum
, METH_NOARGS
,
3183 _Py_isalnum__doc__
},
3184 {"isalpha", (PyCFunction
)stringlib_isalpha
, METH_NOARGS
,
3185 _Py_isalpha__doc__
},
3186 {"isdigit", (PyCFunction
)stringlib_isdigit
, METH_NOARGS
,
3187 _Py_isdigit__doc__
},
3188 {"islower", (PyCFunction
)stringlib_islower
, METH_NOARGS
,
3189 _Py_islower__doc__
},
3190 {"isspace", (PyCFunction
)stringlib_isspace
, METH_NOARGS
,
3191 _Py_isspace__doc__
},
3192 {"istitle", (PyCFunction
)stringlib_istitle
, METH_NOARGS
,
3193 _Py_istitle__doc__
},
3194 {"isupper", (PyCFunction
)stringlib_isupper
, METH_NOARGS
,
3195 _Py_isupper__doc__
},
3196 {"join", (PyCFunction
)bytearray_join
, METH_O
, join_doc
},
3197 {"ljust", (PyCFunction
)stringlib_ljust
, METH_VARARGS
, ljust__doc__
},
3198 {"lower", (PyCFunction
)stringlib_lower
, METH_NOARGS
, _Py_lower__doc__
},
3199 {"lstrip", (PyCFunction
)bytearray_lstrip
, METH_VARARGS
, lstrip__doc__
},
3200 {"partition", (PyCFunction
)bytearray_partition
, METH_O
, partition__doc__
},
3201 {"pop", (PyCFunction
)bytearray_pop
, METH_VARARGS
, pop__doc__
},
3202 {"remove", (PyCFunction
)bytearray_remove
, METH_O
, remove__doc__
},
3203 {"replace", (PyCFunction
)bytearray_replace
, METH_VARARGS
, replace__doc__
},
3204 {"reverse", (PyCFunction
)bytearray_reverse
, METH_NOARGS
, reverse__doc__
},
3205 {"rfind", (PyCFunction
)bytearray_rfind
, METH_VARARGS
, rfind__doc__
},
3206 {"rindex", (PyCFunction
)bytearray_rindex
, METH_VARARGS
, rindex__doc__
},
3207 {"rjust", (PyCFunction
)stringlib_rjust
, METH_VARARGS
, rjust__doc__
},
3208 {"rpartition", (PyCFunction
)bytearray_rpartition
, METH_O
, rpartition__doc__
},
3209 {"rsplit", (PyCFunction
)bytearray_rsplit
, METH_VARARGS
, rsplit__doc__
},
3210 {"rstrip", (PyCFunction
)bytearray_rstrip
, METH_VARARGS
, rstrip__doc__
},
3211 {"split", (PyCFunction
)bytearray_split
, METH_VARARGS
, split__doc__
},
3212 {"splitlines", (PyCFunction
)stringlib_splitlines
, METH_VARARGS
,
3214 {"startswith", (PyCFunction
)bytearray_startswith
, METH_VARARGS
,
3216 {"strip", (PyCFunction
)bytearray_strip
, METH_VARARGS
, strip__doc__
},
3217 {"swapcase", (PyCFunction
)stringlib_swapcase
, METH_NOARGS
,
3218 _Py_swapcase__doc__
},
3219 {"title", (PyCFunction
)stringlib_title
, METH_NOARGS
, _Py_title__doc__
},
3220 {"translate", (PyCFunction
)bytearray_translate
, METH_VARARGS
,
3222 {"upper", (PyCFunction
)stringlib_upper
, METH_NOARGS
, _Py_upper__doc__
},
3223 {"zfill", (PyCFunction
)stringlib_zfill
, METH_VARARGS
, zfill__doc__
},
3227 PyDoc_STRVAR(bytearray_doc
,
3228 "bytearray(iterable_of_ints) -> bytearray.\n\
3229 bytearray(string, encoding[, errors]) -> bytearray.\n\
3230 bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3231 bytearray(memory_view) -> bytearray.\n\
3233 Construct an mutable bytearray object from:\n\
3234 - an iterable yielding integers in range(256)\n\
3235 - a text string encoded using the specified encoding\n\
3236 - a bytes or a bytearray object\n\
3237 - any object implementing the buffer API.\n\
3239 bytearray(int) -> bytearray.\n\
3241 Construct a zero-initialized bytearray of the given length.");
3244 static PyObject
*bytearray_iter(PyObject
*seq
);
3246 PyTypeObject PyByteArray_Type
= {
3247 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
3249 sizeof(PyByteArrayObject
),
3251 (destructor
)bytearray_dealloc
, /* tp_dealloc */
3256 (reprfunc
)bytearray_repr
, /* tp_repr */
3257 0, /* tp_as_number */
3258 &bytearray_as_sequence
, /* tp_as_sequence */
3259 &bytearray_as_mapping
, /* tp_as_mapping */
3262 bytearray_str
, /* tp_str */
3263 PyObject_GenericGetAttr
, /* tp_getattro */
3264 0, /* tp_setattro */
3265 &bytearray_as_buffer
, /* tp_as_buffer */
3266 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
|
3267 Py_TPFLAGS_HAVE_NEWBUFFER
, /* tp_flags */
3268 bytearray_doc
, /* tp_doc */
3269 0, /* tp_traverse */
3271 (richcmpfunc
)bytearray_richcompare
, /* tp_richcompare */
3272 0, /* tp_weaklistoffset */
3273 bytearray_iter
, /* tp_iter */
3274 0, /* tp_iternext */
3275 bytearray_methods
, /* tp_methods */
3280 0, /* tp_descr_get */
3281 0, /* tp_descr_set */
3282 0, /* tp_dictoffset */
3283 (initproc
)bytearray_init
, /* tp_init */
3284 PyType_GenericAlloc
, /* tp_alloc */
3285 PyType_GenericNew
, /* tp_new */
3286 PyObject_Del
, /* tp_free */
3289 /*********************** Bytes Iterator ****************************/
3293 Py_ssize_t it_index
;
3294 PyByteArrayObject
*it_seq
; /* Set to NULL when iterator is exhausted */
3298 bytearrayiter_dealloc(bytesiterobject
*it
)
3300 _PyObject_GC_UNTRACK(it
);
3301 Py_XDECREF(it
->it_seq
);
3302 PyObject_GC_Del(it
);
3306 bytearrayiter_traverse(bytesiterobject
*it
, visitproc visit
, void *arg
)
3308 Py_VISIT(it
->it_seq
);
3313 bytearrayiter_next(bytesiterobject
*it
)
3315 PyByteArrayObject
*seq
;
3322 assert(PyByteArray_Check(seq
));
3324 if (it
->it_index
< PyByteArray_GET_SIZE(seq
)) {
3325 item
= PyInt_FromLong(
3326 (unsigned char)seq
->ob_bytes
[it
->it_index
]);
3338 bytesarrayiter_length_hint(bytesiterobject
*it
)
3342 len
= PyByteArray_GET_SIZE(it
->it_seq
) - it
->it_index
;
3343 return PyInt_FromSsize_t(len
);
3346 PyDoc_STRVAR(length_hint_doc
,
3347 "Private method returning an estimate of len(list(it)).");
3349 static PyMethodDef bytearrayiter_methods
[] = {
3350 {"__length_hint__", (PyCFunction
)bytesarrayiter_length_hint
, METH_NOARGS
,
3352 {NULL
, NULL
} /* sentinel */
3355 PyTypeObject PyByteArrayIter_Type
= {
3356 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
3357 "bytearray_iterator", /* tp_name */
3358 sizeof(bytesiterobject
), /* tp_basicsize */
3359 0, /* tp_itemsize */
3361 (destructor
)bytearrayiter_dealloc
, /* tp_dealloc */
3367 0, /* tp_as_number */
3368 0, /* tp_as_sequence */
3369 0, /* tp_as_mapping */
3373 PyObject_GenericGetAttr
, /* tp_getattro */
3374 0, /* tp_setattro */
3375 0, /* tp_as_buffer */
3376 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_HAVE_GC
, /* tp_flags */
3378 (traverseproc
)bytearrayiter_traverse
, /* tp_traverse */
3380 0, /* tp_richcompare */
3381 0, /* tp_weaklistoffset */
3382 PyObject_SelfIter
, /* tp_iter */
3383 (iternextfunc
)bytearrayiter_next
, /* tp_iternext */
3384 bytearrayiter_methods
, /* tp_methods */
3389 bytearray_iter(PyObject
*seq
)
3391 bytesiterobject
*it
;
3393 if (!PyByteArray_Check(seq
)) {
3394 PyErr_BadInternalCall();
3397 it
= PyObject_GC_New(bytesiterobject
, &PyByteArrayIter_Type
);
3402 it
->it_seq
= (PyByteArrayObject
*)seq
;
3403 _PyObject_GC_TRACK(it
);
3404 return (PyObject
*)it
;