1 /* PyBytes (bytearray) implementation */
3 #define PY_SSIZE_T_CLEAN
5 #include "structmember.h"
6 #include "bytes_methods.h"
8 static PyByteArrayObject
*nullbytes
= NULL
;
11 PyByteArray_Fini(void)
17 PyByteArray_Init(void)
19 nullbytes
= PyObject_New(PyByteArrayObject
, &PyByteArray_Type
);
20 if (nullbytes
== NULL
)
22 nullbytes
->ob_bytes
= NULL
;
23 Py_SIZE(nullbytes
) = nullbytes
->ob_alloc
= 0;
24 nullbytes
->ob_exports
= 0;
28 /* end nullbytes support */
33 _getbytevalue(PyObject
* arg
, int *value
)
37 if (PyBytes_CheckExact(arg
)) {
38 if (Py_SIZE(arg
) != 1) {
39 PyErr_SetString(PyExc_ValueError
, "string must be of size 1");
42 *value
= Py_CHARMASK(((PyBytesObject
*)arg
)->ob_sval
[0]);
45 else if (PyInt_Check(arg
) || PyLong_Check(arg
)) {
46 face_value
= PyLong_AsLong(arg
);
49 PyObject
*index
= PyNumber_Index(arg
);
51 PyErr_Format(PyExc_TypeError
,
52 "an integer or string of size 1 is required");
55 face_value
= PyLong_AsLong(index
);
59 if (face_value
< 0 || face_value
>= 256) {
60 /* this includes the OverflowError in case the long is too large */
61 PyErr_SetString(PyExc_ValueError
, "byte must be in range(0, 256)");
70 bytes_buffer_getreadbuf(PyByteArrayObject
*self
, Py_ssize_t index
, const void **ptr
)
73 PyErr_SetString(PyExc_SystemError
,
74 "accessing non-existent bytes segment");
77 *ptr
= (void *)self
->ob_bytes
;
82 bytes_buffer_getwritebuf(PyByteArrayObject
*self
, Py_ssize_t index
, const void **ptr
)
85 PyErr_SetString(PyExc_SystemError
,
86 "accessing non-existent bytes segment");
89 *ptr
= (void *)self
->ob_bytes
;
94 bytes_buffer_getsegcount(PyByteArrayObject
*self
, Py_ssize_t
*lenp
)
97 *lenp
= Py_SIZE(self
);
102 bytes_buffer_getcharbuf(PyByteArrayObject
*self
, Py_ssize_t index
, const char **ptr
)
105 PyErr_SetString(PyExc_SystemError
,
106 "accessing non-existent bytes segment");
109 *ptr
= self
->ob_bytes
;
110 return Py_SIZE(self
);
114 bytes_getbuffer(PyByteArrayObject
*obj
, Py_buffer
*view
, int flags
)
122 if (obj
->ob_bytes
== NULL
)
126 ret
= PyBuffer_FillInfo(view
, (PyObject
*)obj
, ptr
, Py_SIZE(obj
), 0, flags
);
134 bytes_releasebuffer(PyByteArrayObject
*obj
, Py_buffer
*view
)
140 _getbuffer(PyObject
*obj
, Py_buffer
*view
)
142 PyBufferProcs
*buffer
= Py_TYPE(obj
)->tp_as_buffer
;
144 if (buffer
== NULL
|| buffer
->bf_getbuffer
== NULL
)
146 PyErr_Format(PyExc_TypeError
,
147 "Type %.100s doesn't support the buffer API",
148 Py_TYPE(obj
)->tp_name
);
152 if (buffer
->bf_getbuffer(obj
, view
, PyBUF_SIMPLE
) < 0)
158 _canresize(PyByteArrayObject
*self
)
160 if (self
->ob_exports
> 0) {
161 PyErr_SetString(PyExc_BufferError
,
162 "Existing exports of data: object cannot be re-sized");
168 /* Direct API functions */
171 PyByteArray_FromObject(PyObject
*input
)
173 return PyObject_CallFunctionObjArgs((PyObject
*)&PyByteArray_Type
,
178 PyByteArray_FromStringAndSize(const char *bytes
, Py_ssize_t size
)
180 PyByteArrayObject
*new;
184 PyErr_SetString(PyExc_SystemError
,
185 "Negative size passed to PyByteArray_FromStringAndSize");
189 new = PyObject_New(PyByteArrayObject
, &PyByteArray_Type
);
194 new->ob_bytes
= NULL
;
199 new->ob_bytes
= PyMem_Malloc(alloc
);
200 if (new->ob_bytes
== NULL
) {
202 return PyErr_NoMemory();
205 memcpy(new->ob_bytes
, bytes
, size
);
206 new->ob_bytes
[size
] = '\0'; /* Trailing null byte */
209 new->ob_alloc
= alloc
;
212 return (PyObject
*)new;
216 PyByteArray_Size(PyObject
*self
)
218 assert(self
!= NULL
);
219 assert(PyByteArray_Check(self
));
221 return PyByteArray_GET_SIZE(self
);
225 PyByteArray_AsString(PyObject
*self
)
227 assert(self
!= NULL
);
228 assert(PyByteArray_Check(self
));
230 return PyByteArray_AS_STRING(self
);
234 PyByteArray_Resize(PyObject
*self
, Py_ssize_t size
)
237 Py_ssize_t alloc
= ((PyByteArrayObject
*)self
)->ob_alloc
;
239 assert(self
!= NULL
);
240 assert(PyByteArray_Check(self
));
243 if (size
== Py_SIZE(self
)) {
246 if (!_canresize((PyByteArrayObject
*)self
)) {
250 if (size
< alloc
/ 2) {
251 /* Major downsize; resize down to exact size */
254 else if (size
< alloc
) {
255 /* Within allocated size; quick exit */
256 Py_SIZE(self
) = size
;
257 ((PyByteArrayObject
*)self
)->ob_bytes
[size
] = '\0'; /* Trailing null */
260 else if (size
<= alloc
* 1.125) {
261 /* Moderate upsize; overallocate similar to list_resize() */
262 alloc
= size
+ (size
>> 3) + (size
< 9 ? 3 : 6);
265 /* Major upsize; resize up to exact size */
269 sval
= PyMem_Realloc(((PyByteArrayObject
*)self
)->ob_bytes
, alloc
);
275 ((PyByteArrayObject
*)self
)->ob_bytes
= sval
;
276 Py_SIZE(self
) = size
;
277 ((PyByteArrayObject
*)self
)->ob_alloc
= alloc
;
278 ((PyByteArrayObject
*)self
)->ob_bytes
[size
] = '\0'; /* Trailing null byte */
284 PyByteArray_Concat(PyObject
*a
, PyObject
*b
)
288 PyByteArrayObject
*result
= NULL
;
292 if (_getbuffer(a
, &va
) < 0 ||
293 _getbuffer(b
, &vb
) < 0) {
294 PyErr_Format(PyExc_TypeError
, "can't concat %.100s to %.100s",
295 Py_TYPE(a
)->tp_name
, Py_TYPE(b
)->tp_name
);
299 size
= va
.len
+ vb
.len
;
301 return PyErr_NoMemory();
305 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, size
);
306 if (result
!= NULL
) {
307 memcpy(result
->ob_bytes
, va
.buf
, va
.len
);
308 memcpy(result
->ob_bytes
+ va
.len
, vb
.buf
, vb
.len
);
313 PyBuffer_Release(&va
);
315 PyBuffer_Release(&vb
);
316 return (PyObject
*)result
;
319 /* Functions stuffed into the type object */
322 bytes_length(PyByteArrayObject
*self
)
324 return Py_SIZE(self
);
328 bytes_iconcat(PyByteArrayObject
*self
, PyObject
*other
)
334 if (_getbuffer(other
, &vo
) < 0) {
335 PyErr_Format(PyExc_TypeError
, "can't concat %.100s to %.100s",
336 Py_TYPE(other
)->tp_name
, Py_TYPE(self
)->tp_name
);
340 mysize
= Py_SIZE(self
);
341 size
= mysize
+ vo
.len
;
343 PyBuffer_Release(&vo
);
344 return PyErr_NoMemory();
346 if (size
< self
->ob_alloc
) {
347 Py_SIZE(self
) = size
;
348 self
->ob_bytes
[Py_SIZE(self
)] = '\0'; /* Trailing null byte */
350 else if (PyByteArray_Resize((PyObject
*)self
, size
) < 0) {
351 PyBuffer_Release(&vo
);
354 memcpy(self
->ob_bytes
+ mysize
, vo
.buf
, vo
.len
);
355 PyBuffer_Release(&vo
);
357 return (PyObject
*)self
;
361 bytes_repeat(PyByteArrayObject
*self
, Py_ssize_t count
)
363 PyByteArrayObject
*result
;
369 mysize
= Py_SIZE(self
);
370 size
= mysize
* count
;
371 if (count
!= 0 && size
/ count
!= mysize
)
372 return PyErr_NoMemory();
373 result
= (PyByteArrayObject
*)PyByteArray_FromStringAndSize(NULL
, size
);
374 if (result
!= NULL
&& size
!= 0) {
376 memset(result
->ob_bytes
, self
->ob_bytes
[0], size
);
379 for (i
= 0; i
< count
; i
++)
380 memcpy(result
->ob_bytes
+ i
*mysize
, self
->ob_bytes
, mysize
);
383 return (PyObject
*)result
;
387 bytes_irepeat(PyByteArrayObject
*self
, Py_ssize_t count
)
394 mysize
= Py_SIZE(self
);
395 size
= mysize
* count
;
396 if (count
!= 0 && size
/ count
!= mysize
)
397 return PyErr_NoMemory();
398 if (size
< self
->ob_alloc
) {
399 Py_SIZE(self
) = size
;
400 self
->ob_bytes
[Py_SIZE(self
)] = '\0'; /* Trailing null byte */
402 else if (PyByteArray_Resize((PyObject
*)self
, size
) < 0)
406 memset(self
->ob_bytes
, self
->ob_bytes
[0], size
);
409 for (i
= 1; i
< count
; i
++)
410 memcpy(self
->ob_bytes
+ i
*mysize
, self
->ob_bytes
, mysize
);
414 return (PyObject
*)self
;
418 bytes_getitem(PyByteArrayObject
*self
, Py_ssize_t i
)
422 if (i
< 0 || i
>= Py_SIZE(self
)) {
423 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
426 return PyInt_FromLong((unsigned char)(self
->ob_bytes
[i
]));
430 bytes_subscript(PyByteArrayObject
*self
, PyObject
*index
)
432 if (PyIndex_Check(index
)) {
433 Py_ssize_t i
= PyNumber_AsSsize_t(index
, PyExc_IndexError
);
435 if (i
== -1 && PyErr_Occurred())
439 i
+= PyByteArray_GET_SIZE(self
);
441 if (i
< 0 || i
>= Py_SIZE(self
)) {
442 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
445 return PyInt_FromLong((unsigned char)(self
->ob_bytes
[i
]));
447 else if (PySlice_Check(index
)) {
448 Py_ssize_t start
, stop
, step
, slicelength
, cur
, i
;
449 if (PySlice_GetIndicesEx((PySliceObject
*)index
,
450 PyByteArray_GET_SIZE(self
),
451 &start
, &stop
, &step
, &slicelength
) < 0) {
455 if (slicelength
<= 0)
456 return PyByteArray_FromStringAndSize("", 0);
457 else if (step
== 1) {
458 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ start
,
462 char *source_buf
= PyByteArray_AS_STRING(self
);
463 char *result_buf
= (char *)PyMem_Malloc(slicelength
);
466 if (result_buf
== NULL
)
467 return PyErr_NoMemory();
469 for (cur
= start
, i
= 0; i
< slicelength
;
471 result_buf
[i
] = source_buf
[cur
];
473 result
= PyByteArray_FromStringAndSize(result_buf
, slicelength
);
474 PyMem_Free(result_buf
);
479 PyErr_SetString(PyExc_TypeError
, "bytearray indices must be integers");
485 bytes_setslice(PyByteArrayObject
*self
, Py_ssize_t lo
, Py_ssize_t hi
,
488 Py_ssize_t avail
, needed
;
494 if (values
== (PyObject
*)self
) {
495 /* Make a copy and call this function recursively */
497 values
= PyByteArray_FromObject(values
);
500 err
= bytes_setslice(self
, lo
, hi
, values
);
504 if (values
== NULL
) {
510 if (_getbuffer(values
, &vbytes
) < 0) {
511 PyErr_Format(PyExc_TypeError
,
512 "can't set bytearray slice from %.100s",
513 Py_TYPE(values
)->tp_name
);
524 if (hi
> Py_SIZE(self
))
531 if (avail
!= needed
) {
532 if (avail
> needed
) {
533 if (!_canresize(self
)) {
539 | |<----avail----->|<-----tomove------>|
540 | |<-needed->|<-----tomove------>|
543 memmove(self
->ob_bytes
+ lo
+ needed
, self
->ob_bytes
+ hi
,
546 /* XXX(nnorwitz): need to verify this can't overflow! */
547 if (PyByteArray_Resize((PyObject
*)self
,
548 Py_SIZE(self
) + needed
- avail
) < 0) {
552 if (avail
< needed
) {
555 | |<-avail->|<-----tomove------>|
556 | |<----needed---->|<-----tomove------>|
559 memmove(self
->ob_bytes
+ lo
+ needed
, self
->ob_bytes
+ hi
,
560 Py_SIZE(self
) - lo
- needed
);
565 memcpy(self
->ob_bytes
+ lo
, bytes
, needed
);
569 if (vbytes
.len
!= -1)
570 PyBuffer_Release(&vbytes
);
575 bytes_setitem(PyByteArrayObject
*self
, Py_ssize_t i
, PyObject
*value
)
582 if (i
< 0 || i
>= Py_SIZE(self
)) {
583 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
588 return bytes_setslice(self
, i
, i
+1, NULL
);
590 if (!_getbytevalue(value
, &ival
))
593 self
->ob_bytes
[i
] = ival
;
598 bytes_ass_subscript(PyByteArrayObject
*self
, PyObject
*index
, PyObject
*values
)
600 Py_ssize_t start
, stop
, step
, slicelen
, needed
;
603 if (PyIndex_Check(index
)) {
604 Py_ssize_t i
= PyNumber_AsSsize_t(index
, PyExc_IndexError
);
606 if (i
== -1 && PyErr_Occurred())
610 i
+= PyByteArray_GET_SIZE(self
);
612 if (i
< 0 || i
>= Py_SIZE(self
)) {
613 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
617 if (values
== NULL
) {
618 /* Fall through to slice assignment */
626 if (!_getbytevalue(values
, &ival
))
628 self
->ob_bytes
[i
] = (char)ival
;
632 else if (PySlice_Check(index
)) {
633 if (PySlice_GetIndicesEx((PySliceObject
*)index
,
634 PyByteArray_GET_SIZE(self
),
635 &start
, &stop
, &step
, &slicelen
) < 0) {
640 PyErr_SetString(PyExc_TypeError
, "bytearray indices must be integer");
644 if (values
== NULL
) {
648 else if (values
== (PyObject
*)self
|| !PyByteArray_Check(values
)) {
649 /* Make a copy an call this function recursively */
651 values
= PyByteArray_FromObject(values
);
654 err
= bytes_ass_subscript(self
, index
, values
);
659 assert(PyByteArray_Check(values
));
660 bytes
= ((PyByteArrayObject
*)values
)->ob_bytes
;
661 needed
= Py_SIZE(values
);
663 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
664 if ((step
< 0 && start
< stop
) ||
665 (step
> 0 && start
> stop
))
668 if (slicelen
!= needed
) {
669 if (!_canresize(self
))
671 if (slicelen
> needed
) {
673 0 start stop old_size
674 | |<---slicelen--->|<-----tomove------>|
675 | |<-needed->|<-----tomove------>|
678 memmove(self
->ob_bytes
+ start
+ needed
, self
->ob_bytes
+ stop
,
679 Py_SIZE(self
) - stop
);
681 if (PyByteArray_Resize((PyObject
*)self
,
682 Py_SIZE(self
) + needed
- slicelen
) < 0)
684 if (slicelen
< needed
) {
687 | |<-avail->|<-----tomove------>|
688 | |<----needed---->|<-----tomove------>|
691 memmove(self
->ob_bytes
+ start
+ needed
, self
->ob_bytes
+ stop
,
692 Py_SIZE(self
) - start
- needed
);
697 memcpy(self
->ob_bytes
+ start
, bytes
, needed
);
706 if (!_canresize(self
))
710 start
= stop
+ step
* (slicelen
- 1) - 1;
713 for (cur
= start
, i
= 0;
714 i
< slicelen
; cur
+= step
, i
++) {
715 Py_ssize_t lim
= step
- 1;
717 if (cur
+ step
>= PyByteArray_GET_SIZE(self
))
718 lim
= PyByteArray_GET_SIZE(self
) - cur
- 1;
720 memmove(self
->ob_bytes
+ cur
- i
,
721 self
->ob_bytes
+ cur
+ 1, lim
);
723 /* Move the tail of the bytes, in one chunk */
724 cur
= start
+ slicelen
*step
;
725 if (cur
< PyByteArray_GET_SIZE(self
)) {
726 memmove(self
->ob_bytes
+ cur
- slicelen
,
727 self
->ob_bytes
+ cur
,
728 PyByteArray_GET_SIZE(self
) - cur
);
730 if (PyByteArray_Resize((PyObject
*)self
,
731 PyByteArray_GET_SIZE(self
) - slicelen
) < 0)
740 if (needed
!= slicelen
) {
741 PyErr_Format(PyExc_ValueError
,
742 "attempt to assign bytes of size %zd "
743 "to extended slice of size %zd",
747 for (cur
= start
, i
= 0; i
< slicelen
; cur
+= step
, i
++)
748 self
->ob_bytes
[cur
] = bytes
[i
];
755 bytes_init(PyByteArrayObject
*self
, PyObject
*args
, PyObject
*kwds
)
757 static char *kwlist
[] = {"source", "encoding", "errors", 0};
758 PyObject
*arg
= NULL
;
759 const char *encoding
= NULL
;
760 const char *errors
= NULL
;
763 PyObject
*(*iternext
)(PyObject
*);
765 if (Py_SIZE(self
) != 0) {
766 /* Empty previous contents (yes, do this first of all!) */
767 if (PyByteArray_Resize((PyObject
*)self
, 0) < 0)
771 /* Parse arguments */
772 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|Oss:bytearray", kwlist
,
773 &arg
, &encoding
, &errors
))
776 /* Make a quick exit if no first argument */
778 if (encoding
!= NULL
|| errors
!= NULL
) {
779 PyErr_SetString(PyExc_TypeError
,
780 "encoding or errors without sequence argument");
786 if (PyBytes_Check(arg
)) {
787 PyObject
*new, *encoded
;
788 if (encoding
!= NULL
) {
789 encoded
= PyCodec_Encode(arg
, encoding
, errors
);
792 assert(PyBytes_Check(encoded
));
798 new = bytes_iconcat(self
, arg
);
806 if (PyUnicode_Check(arg
)) {
807 /* Encode via the codec registry */
808 PyObject
*encoded
, *new;
809 if (encoding
== NULL
) {
810 PyErr_SetString(PyExc_TypeError
,
811 "unicode argument without an encoding");
814 encoded
= PyCodec_Encode(arg
, encoding
, errors
);
817 assert(PyBytes_Check(encoded
));
818 new = bytes_iconcat(self
, encoded
);
826 /* If it's not unicode, there can't be encoding or errors */
827 if (encoding
!= NULL
|| errors
!= NULL
) {
828 PyErr_SetString(PyExc_TypeError
,
829 "encoding or errors without a string argument");
834 count
= PyNumber_AsSsize_t(arg
, PyExc_ValueError
);
835 if (count
== -1 && PyErr_Occurred())
839 PyErr_SetString(PyExc_ValueError
, "negative count");
843 if (PyByteArray_Resize((PyObject
*)self
, count
))
845 memset(self
->ob_bytes
, 0, count
);
850 /* Use the buffer API */
851 if (PyObject_CheckBuffer(arg
)) {
854 if (PyObject_GetBuffer(arg
, &view
, PyBUF_FULL_RO
) < 0)
857 if (PyByteArray_Resize((PyObject
*)self
, size
) < 0) goto fail
;
858 if (PyBuffer_ToContiguous(self
->ob_bytes
, &view
, size
, 'C') < 0)
860 PyBuffer_Release(&view
);
863 PyBuffer_Release(&view
);
867 /* XXX Optimize this if the arguments is a list, tuple */
869 /* Get the iterator */
870 it
= PyObject_GetIter(arg
);
873 iternext
= *Py_TYPE(it
)->tp_iternext
;
875 /* Run the iterator to exhaustion */
880 /* Get the next item */
883 if (PyErr_Occurred()) {
884 if (!PyErr_ExceptionMatches(PyExc_StopIteration
))
891 /* Interpret it as an int (__index__) */
892 rc
= _getbytevalue(item
, &value
);
897 /* Append the byte */
898 if (Py_SIZE(self
) < self
->ob_alloc
)
900 else if (PyByteArray_Resize((PyObject
*)self
, Py_SIZE(self
)+1) < 0)
902 self
->ob_bytes
[Py_SIZE(self
)-1] = value
;
905 /* Clean up and return success */
910 /* Error handling when it != NULL */
915 /* Mostly copied from string_repr, but without the
916 "smart quote" functionality. */
918 bytes_repr(PyByteArrayObject
*self
)
920 static const char *hexdigits
= "0123456789abcdef";
921 const char *quote_prefix
= "bytearray(b";
922 const char *quote_postfix
= ")";
923 Py_ssize_t length
= Py_SIZE(self
);
924 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
925 size_t newsize
= 14 + 4 * length
;
927 if (newsize
> PY_SSIZE_T_MAX
|| newsize
/ 4 - 3 != length
) {
928 PyErr_SetString(PyExc_OverflowError
,
929 "bytearray object is too large to make repr");
932 v
= PyUnicode_FromUnicode(NULL
, newsize
);
937 register Py_ssize_t i
;
938 register Py_UNICODE c
;
939 register Py_UNICODE
*p
;
942 /* Figure out which quote to use; single is preferred */
946 start
= PyByteArray_AS_STRING(self
);
947 for (test
= start
; test
< start
+length
; ++test
) {
949 quote
= '\''; /* back to single */
952 else if (*test
== '\'')
959 p
= PyUnicode_AS_UNICODE(v
);
960 while (*quote_prefix
)
961 *p
++ = *quote_prefix
++;
964 for (i
= 0; i
< length
; i
++) {
965 /* There's at least enough room for a hex escape
966 and a closing quote. */
967 assert(newsize
- (p
- PyUnicode_AS_UNICODE(v
)) >= 5);
968 c
= self
->ob_bytes
[i
];
969 if (c
== '\'' || c
== '\\')
970 *p
++ = '\\', *p
++ = c
;
972 *p
++ = '\\', *p
++ = 't';
974 *p
++ = '\\', *p
++ = 'n';
976 *p
++ = '\\', *p
++ = 'r';
978 *p
++ = '\\', *p
++ = 'x', *p
++ = '0', *p
++ = '0';
979 else if (c
< ' ' || c
>= 0x7f) {
982 *p
++ = hexdigits
[(c
& 0xf0) >> 4];
983 *p
++ = hexdigits
[c
& 0xf];
988 assert(newsize
- (p
- PyUnicode_AS_UNICODE(v
)) >= 1);
990 while (*quote_postfix
) {
991 *p
++ = *quote_postfix
++;
994 if (PyUnicode_Resize(&v
, (p
- PyUnicode_AS_UNICODE(v
)))) {
1003 bytes_str(PyObject
*op
)
1006 if (Py_BytesWarningFlag
) {
1007 if (PyErr_WarnEx(PyExc_BytesWarning
,
1008 "str() on a bytearray instance", 1))
1011 return bytes_repr((PyByteArrayObject
*)op
);
1013 return PyBytes_FromStringAndSize(((PyByteArrayObject
*)op
)->ob_bytes
, Py_SIZE(op
));
1017 bytes_richcompare(PyObject
*self
, PyObject
*other
, int op
)
1019 Py_ssize_t self_size
, other_size
;
1020 Py_buffer self_bytes
, other_bytes
;
1025 /* Bytes can be compared to anything that supports the (binary)
1026 buffer API. Except that a comparison with Unicode is always an
1027 error, even if the comparison is for equality. */
1028 if (PyObject_IsInstance(self
, (PyObject
*)&PyUnicode_Type
) ||
1029 PyObject_IsInstance(other
, (PyObject
*)&PyUnicode_Type
)) {
1030 if (Py_BytesWarningFlag
&& op
== Py_EQ
) {
1031 if (PyErr_WarnEx(PyExc_BytesWarning
,
1032 "Comparsion between bytearray and string", 1))
1036 Py_INCREF(Py_NotImplemented
);
1037 return Py_NotImplemented
;
1040 self_size
= _getbuffer(self
, &self_bytes
);
1041 if (self_size
< 0) {
1043 Py_INCREF(Py_NotImplemented
);
1044 return Py_NotImplemented
;
1047 other_size
= _getbuffer(other
, &other_bytes
);
1048 if (other_size
< 0) {
1050 PyBuffer_Release(&self_bytes
);
1051 Py_INCREF(Py_NotImplemented
);
1052 return Py_NotImplemented
;
1055 if (self_size
!= other_size
&& (op
== Py_EQ
|| op
== Py_NE
)) {
1056 /* Shortcut: if the lengths differ, the objects differ */
1057 cmp
= (op
== Py_NE
);
1060 minsize
= self_size
;
1061 if (other_size
< minsize
)
1062 minsize
= other_size
;
1064 cmp
= memcmp(self_bytes
.buf
, other_bytes
.buf
, minsize
);
1065 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1068 if (self_size
< other_size
)
1070 else if (self_size
> other_size
)
1075 case Py_LT
: cmp
= cmp
< 0; break;
1076 case Py_LE
: cmp
= cmp
<= 0; break;
1077 case Py_EQ
: cmp
= cmp
== 0; break;
1078 case Py_NE
: cmp
= cmp
!= 0; break;
1079 case Py_GT
: cmp
= cmp
> 0; break;
1080 case Py_GE
: cmp
= cmp
>= 0; break;
1084 res
= cmp
? Py_True
: Py_False
;
1085 PyBuffer_Release(&self_bytes
);
1086 PyBuffer_Release(&other_bytes
);
1092 bytes_dealloc(PyByteArrayObject
*self
)
1094 if (self
->ob_exports
> 0) {
1095 PyErr_SetString(PyExc_SystemError
,
1096 "deallocated bytearray object has exported buffers");
1099 if (self
->ob_bytes
!= 0) {
1100 PyMem_Free(self
->ob_bytes
);
1102 Py_TYPE(self
)->tp_free((PyObject
*)self
);
1106 /* -------------------------------------------------------------------- */
1109 #define STRINGLIB_CHAR char
1110 #define STRINGLIB_CMP memcmp
1111 #define STRINGLIB_LEN PyByteArray_GET_SIZE
1112 #define STRINGLIB_STR PyByteArray_AS_STRING
1113 #define STRINGLIB_NEW PyByteArray_FromStringAndSize
1114 #define STRINGLIB_EMPTY nullbytes
1115 #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1116 #define STRINGLIB_MUTABLE 1
1117 #define FROM_BYTEARRAY 1
1119 #include "stringlib/fastsearch.h"
1120 #include "stringlib/count.h"
1121 #include "stringlib/find.h"
1122 #include "stringlib/partition.h"
1123 #include "stringlib/ctype.h"
1124 #include "stringlib/transmogrify.h"
1127 /* The following Py_LOCAL_INLINE and Py_LOCAL functions
1128 were copied from the old char* style string object. */
1130 Py_LOCAL_INLINE(void)
1131 _adjust_indices(Py_ssize_t
*start
, Py_ssize_t
*end
, Py_ssize_t len
)
1146 Py_LOCAL_INLINE(Py_ssize_t
)
1147 bytes_find_internal(PyByteArrayObject
*self
, PyObject
*args
, int dir
)
1151 Py_ssize_t start
=0, end
=PY_SSIZE_T_MAX
;
1154 if (!PyArg_ParseTuple(args
, "O|O&O&:find/rfind/index/rindex", &subobj
,
1155 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1157 if (_getbuffer(subobj
, &subbuf
) < 0)
1160 res
= stringlib_find_slice(
1161 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
1162 subbuf
.buf
, subbuf
.len
, start
, end
);
1164 res
= stringlib_rfind_slice(
1165 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
1166 subbuf
.buf
, subbuf
.len
, start
, end
);
1167 PyBuffer_Release(&subbuf
);
1171 PyDoc_STRVAR(find__doc__
,
1172 "B.find(sub [,start [,end]]) -> int\n\
1174 Return the lowest index in B where subsection sub is found,\n\
1175 such that sub is contained within s[start,end]. Optional\n\
1176 arguments start and end are interpreted as in slice notation.\n\
1178 Return -1 on failure.");
1181 bytes_find(PyByteArrayObject
*self
, PyObject
*args
)
1183 Py_ssize_t result
= bytes_find_internal(self
, args
, +1);
1186 return PyInt_FromSsize_t(result
);
1189 PyDoc_STRVAR(count__doc__
,
1190 "B.count(sub [,start [,end]]) -> int\n\
1192 Return the number of non-overlapping occurrences of subsection sub in\n\
1193 bytes B[start:end]. Optional arguments start and end are interpreted\n\
1194 as in slice notation.");
1197 bytes_count(PyByteArrayObject
*self
, PyObject
*args
)
1200 const char *str
= PyByteArray_AS_STRING(self
);
1201 Py_ssize_t start
= 0, end
= PY_SSIZE_T_MAX
;
1203 PyObject
*count_obj
;
1205 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &sub_obj
,
1206 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1209 if (_getbuffer(sub_obj
, &vsub
) < 0)
1212 _adjust_indices(&start
, &end
, PyByteArray_GET_SIZE(self
));
1214 count_obj
= PyInt_FromSsize_t(
1215 stringlib_count(str
+ start
, end
- start
, vsub
.buf
, vsub
.len
)
1217 PyBuffer_Release(&vsub
);
1222 PyDoc_STRVAR(index__doc__
,
1223 "B.index(sub [,start [,end]]) -> int\n\
1225 Like B.find() but raise ValueError when the subsection is not found.");
1228 bytes_index(PyByteArrayObject
*self
, PyObject
*args
)
1230 Py_ssize_t result
= bytes_find_internal(self
, args
, +1);
1234 PyErr_SetString(PyExc_ValueError
,
1235 "subsection not found");
1238 return PyInt_FromSsize_t(result
);
1242 PyDoc_STRVAR(rfind__doc__
,
1243 "B.rfind(sub [,start [,end]]) -> int\n\
1245 Return the highest index in B where subsection sub is found,\n\
1246 such that sub is contained within s[start,end]. Optional\n\
1247 arguments start and end are interpreted as in slice notation.\n\
1249 Return -1 on failure.");
1252 bytes_rfind(PyByteArrayObject
*self
, PyObject
*args
)
1254 Py_ssize_t result
= bytes_find_internal(self
, args
, -1);
1257 return PyInt_FromSsize_t(result
);
1261 PyDoc_STRVAR(rindex__doc__
,
1262 "B.rindex(sub [,start [,end]]) -> int\n\
1264 Like B.rfind() but raise ValueError when the subsection is not found.");
1267 bytes_rindex(PyByteArrayObject
*self
, PyObject
*args
)
1269 Py_ssize_t result
= bytes_find_internal(self
, args
, -1);
1273 PyErr_SetString(PyExc_ValueError
,
1274 "subsection not found");
1277 return PyInt_FromSsize_t(result
);
1282 bytes_contains(PyObject
*self
, PyObject
*arg
)
1284 Py_ssize_t ival
= PyNumber_AsSsize_t(arg
, PyExc_ValueError
);
1285 if (ival
== -1 && PyErr_Occurred()) {
1289 if (_getbuffer(arg
, &varg
) < 0)
1291 pos
= stringlib_find(PyByteArray_AS_STRING(self
), Py_SIZE(self
),
1292 varg
.buf
, varg
.len
, 0);
1293 PyBuffer_Release(&varg
);
1296 if (ival
< 0 || ival
>= 256) {
1297 PyErr_SetString(PyExc_ValueError
, "byte must be in range(0, 256)");
1301 return memchr(PyByteArray_AS_STRING(self
), ival
, Py_SIZE(self
)) != NULL
;
1305 /* Matches the end (direction >= 0) or start (direction < 0) of self
1306 * against substr, using the start and end arguments. Returns
1307 * -1 on error, 0 if not found and 1 if found.
1310 _bytes_tailmatch(PyByteArrayObject
*self
, PyObject
*substr
, Py_ssize_t start
,
1311 Py_ssize_t end
, int direction
)
1313 Py_ssize_t len
= PyByteArray_GET_SIZE(self
);
1318 str
= PyByteArray_AS_STRING(self
);
1320 if (_getbuffer(substr
, &vsubstr
) < 0)
1323 _adjust_indices(&start
, &end
, len
);
1325 if (direction
< 0) {
1327 if (start
+vsubstr
.len
> len
) {
1332 if (end
-start
< vsubstr
.len
|| start
> len
) {
1336 if (end
-vsubstr
.len
> start
)
1337 start
= end
- vsubstr
.len
;
1339 if (end
-start
>= vsubstr
.len
)
1340 rv
= ! memcmp(str
+start
, vsubstr
.buf
, vsubstr
.len
);
1343 PyBuffer_Release(&vsubstr
);
1348 PyDoc_STRVAR(startswith__doc__
,
1349 "B.startswith(prefix [,start [,end]]) -> bool\n\
1351 Return True if B starts with the specified prefix, False otherwise.\n\
1352 With optional start, test B beginning at that position.\n\
1353 With optional end, stop comparing B at that position.\n\
1354 prefix can also be a tuple of strings to try.");
1357 bytes_startswith(PyByteArrayObject
*self
, PyObject
*args
)
1359 Py_ssize_t start
= 0;
1360 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1364 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
1365 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1367 if (PyTuple_Check(subobj
)) {
1369 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
1370 result
= _bytes_tailmatch(self
,
1371 PyTuple_GET_ITEM(subobj
, i
),
1381 result
= _bytes_tailmatch(self
, subobj
, start
, end
, -1);
1385 return PyBool_FromLong(result
);
1388 PyDoc_STRVAR(endswith__doc__
,
1389 "B.endswith(suffix [,start [,end]]) -> bool\n\
1391 Return True if B ends with the specified suffix, False otherwise.\n\
1392 With optional start, test B beginning at that position.\n\
1393 With optional end, stop comparing B at that position.\n\
1394 suffix can also be a tuple of strings to try.");
1397 bytes_endswith(PyByteArrayObject
*self
, PyObject
*args
)
1399 Py_ssize_t start
= 0;
1400 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1404 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
1405 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1407 if (PyTuple_Check(subobj
)) {
1409 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
1410 result
= _bytes_tailmatch(self
,
1411 PyTuple_GET_ITEM(subobj
, i
),
1421 result
= _bytes_tailmatch(self
, subobj
, start
, end
, +1);
1425 return PyBool_FromLong(result
);
1429 PyDoc_STRVAR(translate__doc__
,
1430 "B.translate(table[, deletechars]) -> bytearray\n\
1432 Return a copy of B, where all characters occurring in the\n\
1433 optional argument deletechars are removed, and the remaining\n\
1434 characters have been mapped through the given translation\n\
1435 table, which must be a bytes object of length 256.");
1438 bytes_translate(PyByteArrayObject
*self
, PyObject
*args
)
1440 register char *input
, *output
;
1441 register const char *table
;
1442 register Py_ssize_t i
, c
;
1443 PyObject
*input_obj
= (PyObject
*)self
;
1444 const char *output_start
;
1446 PyObject
*result
= NULL
;
1447 int trans_table
[256];
1448 PyObject
*tableobj
= NULL
, *delobj
= NULL
;
1449 Py_buffer vtable
, vdel
;
1451 if (!PyArg_UnpackTuple(args
, "translate", 1, 2,
1452 &tableobj
, &delobj
))
1455 if (tableobj
== Py_None
) {
1458 } else if (_getbuffer(tableobj
, &vtable
) < 0) {
1461 if (vtable
.len
!= 256) {
1462 PyErr_SetString(PyExc_ValueError
,
1463 "translation table must be 256 characters long");
1466 table
= (const char*)vtable
.buf
;
1469 if (delobj
!= NULL
) {
1470 if (_getbuffer(delobj
, &vdel
) < 0) {
1471 delobj
= NULL
; /* don't try to release vdel buffer on exit */
1480 inlen
= PyByteArray_GET_SIZE(input_obj
);
1481 result
= PyByteArray_FromStringAndSize((char *)NULL
, inlen
);
1484 output_start
= output
= PyByteArray_AsString(result
);
1485 input
= PyByteArray_AS_STRING(input_obj
);
1487 if (vdel
.len
== 0 && table
!= NULL
) {
1488 /* If no deletions are required, use faster code */
1489 for (i
= inlen
; --i
>= 0; ) {
1490 c
= Py_CHARMASK(*input
++);
1491 *output
++ = table
[c
];
1496 if (table
== NULL
) {
1497 for (i
= 0; i
< 256; i
++)
1498 trans_table
[i
] = Py_CHARMASK(i
);
1500 for (i
= 0; i
< 256; i
++)
1501 trans_table
[i
] = Py_CHARMASK(table
[i
]);
1504 for (i
= 0; i
< vdel
.len
; i
++)
1505 trans_table
[(int) Py_CHARMASK( ((unsigned char*)vdel
.buf
)[i
] )] = -1;
1507 for (i
= inlen
; --i
>= 0; ) {
1508 c
= Py_CHARMASK(*input
++);
1509 if (trans_table
[c
] != -1)
1510 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
1513 /* Fix the size of the resulting string */
1515 PyByteArray_Resize(result
, output
- output_start
);
1518 if (tableobj
!= NULL
)
1519 PyBuffer_Release(&vtable
);
1521 PyBuffer_Release(&vdel
);
1529 /* find and count characters and substrings */
1531 #define findchar(target, target_len, c) \
1532 ((char *)memchr((const void *)(target), c, target_len))
1534 /* Don't call if length < 2 */
1535 #define Py_STRING_MATCH(target, offset, pattern, length) \
1536 (target[offset] == pattern[0] && \
1537 target[offset+length-1] == pattern[length-1] && \
1538 !memcmp(target+offset+1, pattern+1, length-2) )
1541 /* Bytes ops must return a string, create a copy */
1542 Py_LOCAL(PyByteArrayObject
*)
1543 return_self(PyByteArrayObject
*self
)
1545 return (PyByteArrayObject
*)PyByteArray_FromStringAndSize(
1546 PyByteArray_AS_STRING(self
),
1547 PyByteArray_GET_SIZE(self
));
1550 Py_LOCAL_INLINE(Py_ssize_t
)
1551 countchar(const char *target
, Py_ssize_t target_len
, char c
, Py_ssize_t maxcount
)
1554 const char *start
=target
;
1555 const char *end
=target
+target_len
;
1557 while ( (start
=findchar(start
, end
-start
, c
)) != NULL
) {
1559 if (count
>= maxcount
)
1566 Py_LOCAL(Py_ssize_t
)
1567 findstring(const char *target
, Py_ssize_t target_len
,
1568 const char *pattern
, Py_ssize_t pattern_len
,
1574 start
+= target_len
;
1578 if (end
> target_len
) {
1580 } else if (end
< 0) {
1586 /* zero-length substrings always match at the first attempt */
1587 if (pattern_len
== 0)
1588 return (direction
> 0) ? start
: end
;
1592 if (direction
< 0) {
1593 for (; end
>= start
; end
--)
1594 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
))
1597 for (; start
<= end
; start
++)
1598 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
))
1604 Py_LOCAL_INLINE(Py_ssize_t
)
1605 countstring(const char *target
, Py_ssize_t target_len
,
1606 const char *pattern
, Py_ssize_t pattern_len
,
1609 int direction
, Py_ssize_t maxcount
)
1614 start
+= target_len
;
1618 if (end
> target_len
) {
1620 } else if (end
< 0) {
1626 /* zero-length substrings match everywhere */
1627 if (pattern_len
== 0 || maxcount
== 0) {
1628 if (target_len
+1 < maxcount
)
1629 return target_len
+1;
1634 if (direction
< 0) {
1635 for (; (end
>= start
); end
--)
1636 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
)) {
1638 if (--maxcount
<= 0) break;
1639 end
-= pattern_len
-1;
1642 for (; (start
<= end
); start
++)
1643 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
)) {
1645 if (--maxcount
<= 0)
1647 start
+= pattern_len
-1;
1654 /* Algorithms for different cases of string replacement */
1656 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1657 Py_LOCAL(PyByteArrayObject
*)
1658 replace_interleave(PyByteArrayObject
*self
,
1659 const char *to_s
, Py_ssize_t to_len
,
1660 Py_ssize_t maxcount
)
1662 char *self_s
, *result_s
;
1663 Py_ssize_t self_len
, result_len
;
1664 Py_ssize_t count
, i
, product
;
1665 PyByteArrayObject
*result
;
1667 self_len
= PyByteArray_GET_SIZE(self
);
1669 /* 1 at the end plus 1 after every character */
1671 if (maxcount
< count
)
1674 /* Check for overflow */
1675 /* result_len = count * to_len + self_len; */
1676 product
= count
* to_len
;
1677 if (product
/ to_len
!= count
) {
1678 PyErr_SetString(PyExc_OverflowError
,
1679 "replace string is too long");
1682 result_len
= product
+ self_len
;
1683 if (result_len
< 0) {
1684 PyErr_SetString(PyExc_OverflowError
,
1685 "replace string is too long");
1689 if (! (result
= (PyByteArrayObject
*)
1690 PyByteArray_FromStringAndSize(NULL
, result_len
)) )
1693 self_s
= PyByteArray_AS_STRING(self
);
1694 result_s
= PyByteArray_AS_STRING(result
);
1696 /* TODO: special case single character, which doesn't need memcpy */
1698 /* Lay the first one down (guaranteed this will occur) */
1699 Py_MEMCPY(result_s
, to_s
, to_len
);
1703 for (i
=0; i
<count
; i
++) {
1704 *result_s
++ = *self_s
++;
1705 Py_MEMCPY(result_s
, to_s
, to_len
);
1709 /* Copy the rest of the original string */
1710 Py_MEMCPY(result_s
, self_s
, self_len
-i
);
1715 /* Special case for deleting a single character */
1716 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1717 Py_LOCAL(PyByteArrayObject
*)
1718 replace_delete_single_character(PyByteArrayObject
*self
,
1719 char from_c
, Py_ssize_t maxcount
)
1721 char *self_s
, *result_s
;
1722 char *start
, *next
, *end
;
1723 Py_ssize_t self_len
, result_len
;
1725 PyByteArrayObject
*result
;
1727 self_len
= PyByteArray_GET_SIZE(self
);
1728 self_s
= PyByteArray_AS_STRING(self
);
1730 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
1732 return return_self(self
);
1735 result_len
= self_len
- count
; /* from_len == 1 */
1736 assert(result_len
>=0);
1738 if ( (result
= (PyByteArrayObject
*)
1739 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1741 result_s
= PyByteArray_AS_STRING(result
);
1744 end
= self_s
+ self_len
;
1745 while (count
-- > 0) {
1746 next
= findchar(start
, end
-start
, from_c
);
1749 Py_MEMCPY(result_s
, start
, next
-start
);
1750 result_s
+= (next
-start
);
1753 Py_MEMCPY(result_s
, start
, end
-start
);
1758 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1760 Py_LOCAL(PyByteArrayObject
*)
1761 replace_delete_substring(PyByteArrayObject
*self
,
1762 const char *from_s
, Py_ssize_t from_len
,
1763 Py_ssize_t maxcount
)
1765 char *self_s
, *result_s
;
1766 char *start
, *next
, *end
;
1767 Py_ssize_t self_len
, result_len
;
1768 Py_ssize_t count
, offset
;
1769 PyByteArrayObject
*result
;
1771 self_len
= PyByteArray_GET_SIZE(self
);
1772 self_s
= PyByteArray_AS_STRING(self
);
1774 count
= countstring(self_s
, self_len
,
1781 return return_self(self
);
1784 result_len
= self_len
- (count
* from_len
);
1785 assert (result_len
>=0);
1787 if ( (result
= (PyByteArrayObject
*)
1788 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1791 result_s
= PyByteArray_AS_STRING(result
);
1794 end
= self_s
+ self_len
;
1795 while (count
-- > 0) {
1796 offset
= findstring(start
, end
-start
,
1798 0, end
-start
, FORWARD
);
1801 next
= start
+ offset
;
1803 Py_MEMCPY(result_s
, start
, next
-start
);
1805 result_s
+= (next
-start
);
1806 start
= next
+from_len
;
1808 Py_MEMCPY(result_s
, start
, end
-start
);
1812 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1813 Py_LOCAL(PyByteArrayObject
*)
1814 replace_single_character_in_place(PyByteArrayObject
*self
,
1815 char from_c
, char to_c
,
1816 Py_ssize_t maxcount
)
1818 char *self_s
, *result_s
, *start
, *end
, *next
;
1819 Py_ssize_t self_len
;
1820 PyByteArrayObject
*result
;
1822 /* The result string will be the same size */
1823 self_s
= PyByteArray_AS_STRING(self
);
1824 self_len
= PyByteArray_GET_SIZE(self
);
1826 next
= findchar(self_s
, self_len
, from_c
);
1829 /* No matches; return the original bytes */
1830 return return_self(self
);
1833 /* Need to make a new bytes */
1834 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, self_len
);
1837 result_s
= PyByteArray_AS_STRING(result
);
1838 Py_MEMCPY(result_s
, self_s
, self_len
);
1840 /* change everything in-place, starting with this one */
1841 start
= result_s
+ (next
-self_s
);
1844 end
= result_s
+ self_len
;
1846 while (--maxcount
> 0) {
1847 next
= findchar(start
, end
-start
, from_c
);
1857 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1858 Py_LOCAL(PyByteArrayObject
*)
1859 replace_substring_in_place(PyByteArrayObject
*self
,
1860 const char *from_s
, Py_ssize_t from_len
,
1861 const char *to_s
, Py_ssize_t to_len
,
1862 Py_ssize_t maxcount
)
1864 char *result_s
, *start
, *end
;
1866 Py_ssize_t self_len
, offset
;
1867 PyByteArrayObject
*result
;
1869 /* The result bytes will be the same size */
1871 self_s
= PyByteArray_AS_STRING(self
);
1872 self_len
= PyByteArray_GET_SIZE(self
);
1874 offset
= findstring(self_s
, self_len
,
1876 0, self_len
, FORWARD
);
1878 /* No matches; return the original bytes */
1879 return return_self(self
);
1882 /* Need to make a new bytes */
1883 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, self_len
);
1886 result_s
= PyByteArray_AS_STRING(result
);
1887 Py_MEMCPY(result_s
, self_s
, self_len
);
1889 /* change everything in-place, starting with this one */
1890 start
= result_s
+ offset
;
1891 Py_MEMCPY(start
, to_s
, from_len
);
1893 end
= result_s
+ self_len
;
1895 while ( --maxcount
> 0) {
1896 offset
= findstring(start
, end
-start
,
1898 0, end
-start
, FORWARD
);
1901 Py_MEMCPY(start
+offset
, to_s
, from_len
);
1902 start
+= offset
+from_len
;
1908 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1909 Py_LOCAL(PyByteArrayObject
*)
1910 replace_single_character(PyByteArrayObject
*self
,
1912 const char *to_s
, Py_ssize_t to_len
,
1913 Py_ssize_t maxcount
)
1915 char *self_s
, *result_s
;
1916 char *start
, *next
, *end
;
1917 Py_ssize_t self_len
, result_len
;
1918 Py_ssize_t count
, product
;
1919 PyByteArrayObject
*result
;
1921 self_s
= PyByteArray_AS_STRING(self
);
1922 self_len
= PyByteArray_GET_SIZE(self
);
1924 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
1926 /* no matches, return unchanged */
1927 return return_self(self
);
1930 /* use the difference between current and new, hence the "-1" */
1931 /* result_len = self_len + count * (to_len-1) */
1932 product
= count
* (to_len
-1);
1933 if (product
/ (to_len
-1) != count
) {
1934 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
1937 result_len
= self_len
+ product
;
1938 if (result_len
< 0) {
1939 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
1943 if ( (result
= (PyByteArrayObject
*)
1944 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1946 result_s
= PyByteArray_AS_STRING(result
);
1949 end
= self_s
+ self_len
;
1950 while (count
-- > 0) {
1951 next
= findchar(start
, end
-start
, from_c
);
1955 if (next
== start
) {
1956 /* replace with the 'to' */
1957 Py_MEMCPY(result_s
, to_s
, to_len
);
1961 /* copy the unchanged old then the 'to' */
1962 Py_MEMCPY(result_s
, start
, next
-start
);
1963 result_s
+= (next
-start
);
1964 Py_MEMCPY(result_s
, to_s
, to_len
);
1969 /* Copy the remainder of the remaining bytes */
1970 Py_MEMCPY(result_s
, start
, end
-start
);
1975 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1976 Py_LOCAL(PyByteArrayObject
*)
1977 replace_substring(PyByteArrayObject
*self
,
1978 const char *from_s
, Py_ssize_t from_len
,
1979 const char *to_s
, Py_ssize_t to_len
,
1980 Py_ssize_t maxcount
)
1982 char *self_s
, *result_s
;
1983 char *start
, *next
, *end
;
1984 Py_ssize_t self_len
, result_len
;
1985 Py_ssize_t count
, offset
, product
;
1986 PyByteArrayObject
*result
;
1988 self_s
= PyByteArray_AS_STRING(self
);
1989 self_len
= PyByteArray_GET_SIZE(self
);
1991 count
= countstring(self_s
, self_len
,
1993 0, self_len
, FORWARD
, maxcount
);
1995 /* no matches, return unchanged */
1996 return return_self(self
);
1999 /* Check for overflow */
2000 /* result_len = self_len + count * (to_len-from_len) */
2001 product
= count
* (to_len
-from_len
);
2002 if (product
/ (to_len
-from_len
) != count
) {
2003 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
2006 result_len
= self_len
+ product
;
2007 if (result_len
< 0) {
2008 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
2012 if ( (result
= (PyByteArrayObject
*)
2013 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
2015 result_s
= PyByteArray_AS_STRING(result
);
2018 end
= self_s
+ self_len
;
2019 while (count
-- > 0) {
2020 offset
= findstring(start
, end
-start
,
2022 0, end
-start
, FORWARD
);
2025 next
= start
+offset
;
2026 if (next
== start
) {
2027 /* replace with the 'to' */
2028 Py_MEMCPY(result_s
, to_s
, to_len
);
2032 /* copy the unchanged old then the 'to' */
2033 Py_MEMCPY(result_s
, start
, next
-start
);
2034 result_s
+= (next
-start
);
2035 Py_MEMCPY(result_s
, to_s
, to_len
);
2037 start
= next
+from_len
;
2040 /* Copy the remainder of the remaining bytes */
2041 Py_MEMCPY(result_s
, start
, end
-start
);
2047 Py_LOCAL(PyByteArrayObject
*)
2048 replace(PyByteArrayObject
*self
,
2049 const char *from_s
, Py_ssize_t from_len
,
2050 const char *to_s
, Py_ssize_t to_len
,
2051 Py_ssize_t maxcount
)
2054 maxcount
= PY_SSIZE_T_MAX
;
2055 } else if (maxcount
== 0 || PyByteArray_GET_SIZE(self
) == 0) {
2056 /* nothing to do; return the original bytes */
2057 return return_self(self
);
2060 if (maxcount
== 0 ||
2061 (from_len
== 0 && to_len
== 0)) {
2062 /* nothing to do; return the original bytes */
2063 return return_self(self
);
2066 /* Handle zero-length special cases */
2068 if (from_len
== 0) {
2069 /* insert the 'to' bytes everywhere. */
2070 /* >>> "Python".replace("", ".") */
2071 /* '.P.y.t.h.o.n.' */
2072 return replace_interleave(self
, to_s
, to_len
, maxcount
);
2075 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2076 /* point for an empty self bytes to generate a non-empty bytes */
2077 /* Special case so the remaining code always gets a non-empty bytes */
2078 if (PyByteArray_GET_SIZE(self
) == 0) {
2079 return return_self(self
);
2083 /* delete all occurances of 'from' bytes */
2084 if (from_len
== 1) {
2085 return replace_delete_single_character(
2086 self
, from_s
[0], maxcount
);
2088 return replace_delete_substring(self
, from_s
, from_len
, maxcount
);
2092 /* Handle special case where both bytes have the same length */
2094 if (from_len
== to_len
) {
2095 if (from_len
== 1) {
2096 return replace_single_character_in_place(
2102 return replace_substring_in_place(
2103 self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
2107 /* Otherwise use the more generic algorithms */
2108 if (from_len
== 1) {
2109 return replace_single_character(self
, from_s
[0],
2110 to_s
, to_len
, maxcount
);
2112 /* len('from')>=2, len('to')>=1 */
2113 return replace_substring(self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
2118 PyDoc_STRVAR(replace__doc__
,
2119 "B.replace(old, new[, count]) -> bytes\n\
2121 Return a copy of B with all occurrences of subsection\n\
2122 old replaced by new. If the optional argument count is\n\
2123 given, only the first count occurrences are replaced.");
2126 bytes_replace(PyByteArrayObject
*self
, PyObject
*args
)
2128 Py_ssize_t count
= -1;
2129 PyObject
*from
, *to
, *res
;
2130 Py_buffer vfrom
, vto
;
2132 if (!PyArg_ParseTuple(args
, "OO|n:replace", &from
, &to
, &count
))
2135 if (_getbuffer(from
, &vfrom
) < 0)
2137 if (_getbuffer(to
, &vto
) < 0) {
2138 PyBuffer_Release(&vfrom
);
2142 res
= (PyObject
*)replace((PyByteArrayObject
*) self
,
2143 vfrom
.buf
, vfrom
.len
,
2144 vto
.buf
, vto
.len
, count
);
2146 PyBuffer_Release(&vfrom
);
2147 PyBuffer_Release(&vto
);
2152 /* Overallocate the initial list to reduce the number of reallocs for small
2153 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2154 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2155 text (roughly 11 words per line) and field delimited data (usually 1-10
2156 fields). For large strings the split algorithms are bandwidth limited
2157 so increasing the preallocation likely will not improve things.*/
2159 #define MAX_PREALLOC 12
2161 /* 5 splits gives 6 elements */
2162 #define PREALLOC_SIZE(maxsplit) \
2163 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2165 #define SPLIT_APPEND(data, left, right) \
2166 str = PyByteArray_FromStringAndSize((data) + (left), \
2167 (right) - (left)); \
2170 if (PyList_Append(list, str)) { \
2177 #define SPLIT_ADD(data, left, right) { \
2178 str = PyByteArray_FromStringAndSize((data) + (left), \
2179 (right) - (left)); \
2182 if (count < MAX_PREALLOC) { \
2183 PyList_SET_ITEM(list, count, str); \
2185 if (PyList_Append(list, str)) { \
2194 /* Always force the list to the expected size. */
2195 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2198 Py_LOCAL_INLINE(PyObject
*)
2199 split_char(const char *s
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
2201 register Py_ssize_t i
, j
, count
= 0;
2203 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2209 while ((j
< len
) && (maxcount
-- > 0)) {
2210 for(; j
< len
; j
++) {
2211 /* I found that using memchr makes no difference */
2220 SPLIT_ADD(s
, i
, len
);
2222 FIX_PREALLOC_SIZE(list
);
2231 Py_LOCAL_INLINE(PyObject
*)
2232 split_whitespace(const char *s
, Py_ssize_t len
, Py_ssize_t maxcount
)
2234 register Py_ssize_t i
, j
, count
= 0;
2236 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2241 for (i
= j
= 0; i
< len
; ) {
2243 while (i
< len
&& ISSPACE(s
[i
]))
2246 while (i
< len
&& !ISSPACE(s
[i
]))
2249 if (maxcount
-- <= 0)
2252 while (i
< len
&& ISSPACE(s
[i
]))
2258 SPLIT_ADD(s
, j
, len
);
2260 FIX_PREALLOC_SIZE(list
);
2268 PyDoc_STRVAR(split__doc__
,
2269 "B.split([sep[, maxsplit]]) -> list of bytearray\n\
2271 Return a list of the sections in B, using sep as the delimiter.\n\
2272 If sep is not given, B is split on ASCII whitespace characters\n\
2273 (space, tab, return, newline, formfeed, vertical tab).\n\
2274 If maxsplit is given, at most maxsplit splits are done.");
2277 bytes_split(PyByteArrayObject
*self
, PyObject
*args
)
2279 Py_ssize_t len
= PyByteArray_GET_SIZE(self
), n
, i
, j
;
2280 Py_ssize_t maxsplit
= -1, count
= 0;
2281 const char *s
= PyByteArray_AS_STRING(self
), *sub
;
2282 PyObject
*list
, *str
, *subobj
= Py_None
;
2288 if (!PyArg_ParseTuple(args
, "|On:split", &subobj
, &maxsplit
))
2291 maxsplit
= PY_SSIZE_T_MAX
;
2293 if (subobj
== Py_None
)
2294 return split_whitespace(s
, len
, maxsplit
);
2296 if (_getbuffer(subobj
, &vsub
) < 0)
2302 PyErr_SetString(PyExc_ValueError
, "empty separator");
2303 PyBuffer_Release(&vsub
);
2307 list
= split_char(s
, len
, sub
[0], maxsplit
);
2308 PyBuffer_Release(&vsub
);
2312 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
2314 PyBuffer_Release(&vsub
);
2320 while (maxsplit
-- > 0) {
2321 pos
= fastsearch(s
+i
, len
-i
, sub
, n
, FAST_SEARCH
);
2330 while ((j
+n
<= len
) && (maxsplit
-- > 0)) {
2331 for (; j
+n
<= len
; j
++) {
2332 if (Py_STRING_MATCH(s
, j
, sub
, n
)) {
2340 SPLIT_ADD(s
, i
, len
);
2341 FIX_PREALLOC_SIZE(list
);
2342 PyBuffer_Release(&vsub
);
2347 PyBuffer_Release(&vsub
);
2351 /* stringlib's partition shares nullbytes in some cases.
2352 undo this, we don't want the nullbytes to be shared. */
2354 make_nullbytes_unique(PyObject
*result
)
2356 if (result
!= NULL
) {
2358 assert(PyTuple_Check(result
));
2359 assert(PyTuple_GET_SIZE(result
) == 3);
2360 for (i
= 0; i
< 3; i
++) {
2361 if (PyTuple_GET_ITEM(result
, i
) == (PyObject
*)nullbytes
) {
2362 PyObject
*new = PyByteArray_FromStringAndSize(NULL
, 0);
2368 Py_DECREF(nullbytes
);
2369 PyTuple_SET_ITEM(result
, i
, new);
2376 PyDoc_STRVAR(partition__doc__
,
2377 "B.partition(sep) -> (head, sep, tail)\n\
2379 Searches for the separator sep in B, and returns the part before it,\n\
2380 the separator itself, and the part after it. If the separator is not\n\
2381 found, returns B and two empty bytearray objects.");
2384 bytes_partition(PyByteArrayObject
*self
, PyObject
*sep_obj
)
2386 PyObject
*bytesep
, *result
;
2388 bytesep
= PyByteArray_FromObject(sep_obj
);
2392 result
= stringlib_partition(
2394 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
2396 PyByteArray_AS_STRING(bytesep
), PyByteArray_GET_SIZE(bytesep
)
2400 return make_nullbytes_unique(result
);
2403 PyDoc_STRVAR(rpartition__doc__
,
2404 "B.rpartition(sep) -> (tail, sep, head)\n\
2406 Searches for the separator sep in B, starting at the end of B,\n\
2407 and returns the part before it, the separator itself, and the\n\
2408 part after it. If the separator is not found, returns two empty\n\
2409 bytearray objects and B.");
2412 bytes_rpartition(PyByteArrayObject
*self
, PyObject
*sep_obj
)
2414 PyObject
*bytesep
, *result
;
2416 bytesep
= PyByteArray_FromObject(sep_obj
);
2420 result
= stringlib_rpartition(
2422 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
2424 PyByteArray_AS_STRING(bytesep
), PyByteArray_GET_SIZE(bytesep
)
2428 return make_nullbytes_unique(result
);
2431 Py_LOCAL_INLINE(PyObject
*)
2432 rsplit_char(const char *s
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
2434 register Py_ssize_t i
, j
, count
=0;
2436 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2442 while ((i
>= 0) && (maxcount
-- > 0)) {
2443 for (; i
>= 0; i
--) {
2445 SPLIT_ADD(s
, i
+ 1, j
+ 1);
2452 SPLIT_ADD(s
, 0, j
+ 1);
2454 FIX_PREALLOC_SIZE(list
);
2455 if (PyList_Reverse(list
) < 0)
2465 Py_LOCAL_INLINE(PyObject
*)
2466 rsplit_whitespace(const char *s
, Py_ssize_t len
, Py_ssize_t maxcount
)
2468 register Py_ssize_t i
, j
, count
= 0;
2470 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2475 for (i
= j
= len
- 1; i
>= 0; ) {
2477 while (i
>= 0 && ISSPACE(s
[i
]))
2480 while (i
>= 0 && !ISSPACE(s
[i
]))
2483 if (maxcount
-- <= 0)
2485 SPLIT_ADD(s
, i
+ 1, j
+ 1);
2486 while (i
>= 0 && ISSPACE(s
[i
]))
2492 SPLIT_ADD(s
, 0, j
+ 1);
2494 FIX_PREALLOC_SIZE(list
);
2495 if (PyList_Reverse(list
) < 0)
2505 PyDoc_STRVAR(rsplit__doc__
,
2506 "B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2508 Return a list of the sections in B, using sep as the delimiter,\n\
2509 starting at the end of B and working to the front.\n\
2510 If sep is not given, B is split on ASCII whitespace characters\n\
2511 (space, tab, return, newline, formfeed, vertical tab).\n\
2512 If maxsplit is given, at most maxsplit splits are done.");
2515 bytes_rsplit(PyByteArrayObject
*self
, PyObject
*args
)
2517 Py_ssize_t len
= PyByteArray_GET_SIZE(self
), n
, i
, j
;
2518 Py_ssize_t maxsplit
= -1, count
= 0;
2519 const char *s
= PyByteArray_AS_STRING(self
), *sub
;
2520 PyObject
*list
, *str
, *subobj
= Py_None
;
2523 if (!PyArg_ParseTuple(args
, "|On:rsplit", &subobj
, &maxsplit
))
2526 maxsplit
= PY_SSIZE_T_MAX
;
2528 if (subobj
== Py_None
)
2529 return rsplit_whitespace(s
, len
, maxsplit
);
2531 if (_getbuffer(subobj
, &vsub
) < 0)
2537 PyErr_SetString(PyExc_ValueError
, "empty separator");
2538 PyBuffer_Release(&vsub
);
2542 list
= rsplit_char(s
, len
, sub
[0], maxsplit
);
2543 PyBuffer_Release(&vsub
);
2547 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
2549 PyBuffer_Release(&vsub
);
2556 while ( (i
>= 0) && (maxsplit
-- > 0) ) {
2558 if (Py_STRING_MATCH(s
, i
, sub
, n
)) {
2559 SPLIT_ADD(s
, i
+ n
, j
);
2567 FIX_PREALLOC_SIZE(list
);
2568 if (PyList_Reverse(list
) < 0)
2570 PyBuffer_Release(&vsub
);
2575 PyBuffer_Release(&vsub
);
2579 PyDoc_STRVAR(reverse__doc__
,
2580 "B.reverse() -> None\n\
2582 Reverse the order of the values in B in place.");
2584 bytes_reverse(PyByteArrayObject
*self
, PyObject
*unused
)
2586 char swap
, *head
, *tail
;
2587 Py_ssize_t i
, j
, n
= Py_SIZE(self
);
2590 head
= self
->ob_bytes
;
2591 tail
= head
+ n
- 1;
2592 for (i
= 0; i
< j
; i
++) {
2601 PyDoc_STRVAR(insert__doc__
,
2602 "B.insert(index, int) -> None\n\
2604 Insert a single item into the bytearray before the given index.");
2606 bytes_insert(PyByteArrayObject
*self
, PyObject
*args
)
2610 Py_ssize_t where
, n
= Py_SIZE(self
);
2612 if (!PyArg_ParseTuple(args
, "nO:insert", &where
, &value
))
2615 if (n
== PY_SSIZE_T_MAX
) {
2616 PyErr_SetString(PyExc_OverflowError
,
2617 "cannot add more objects to bytes");
2620 if (!_getbytevalue(value
, &ival
))
2622 if (PyByteArray_Resize((PyObject
*)self
, n
+ 1) < 0)
2632 memmove(self
->ob_bytes
+ where
+ 1, self
->ob_bytes
+ where
, n
- where
);
2633 self
->ob_bytes
[where
] = ival
;
2638 PyDoc_STRVAR(append__doc__
,
2639 "B.append(int) -> None\n\
2641 Append a single item to the end of B.");
2643 bytes_append(PyByteArrayObject
*self
, PyObject
*arg
)
2646 Py_ssize_t n
= Py_SIZE(self
);
2648 if (! _getbytevalue(arg
, &value
))
2650 if (n
== PY_SSIZE_T_MAX
) {
2651 PyErr_SetString(PyExc_OverflowError
,
2652 "cannot add more objects to bytes");
2655 if (PyByteArray_Resize((PyObject
*)self
, n
+ 1) < 0)
2658 self
->ob_bytes
[n
] = value
;
2663 PyDoc_STRVAR(extend__doc__
,
2664 "B.extend(iterable int) -> None\n\
2666 Append all the elements from the iterator or sequence to the\n\
2669 bytes_extend(PyByteArrayObject
*self
, PyObject
*arg
)
2671 PyObject
*it
, *item
, *bytes_obj
;
2672 Py_ssize_t buf_size
= 0, len
= 0;
2676 /* bytes_setslice code only accepts something supporting PEP 3118. */
2677 if (PyObject_CheckBuffer(arg
)) {
2678 if (bytes_setslice(self
, Py_SIZE(self
), Py_SIZE(self
), arg
) == -1)
2684 it
= PyObject_GetIter(arg
);
2688 /* Try to determine the length of the argument. 32 is abitrary. */
2689 buf_size
= _PyObject_LengthHint(arg
, 32);
2691 bytes_obj
= PyByteArray_FromStringAndSize(NULL
, buf_size
);
2692 if (bytes_obj
== NULL
)
2694 buf
= PyByteArray_AS_STRING(bytes_obj
);
2696 while ((item
= PyIter_Next(it
)) != NULL
) {
2697 if (! _getbytevalue(item
, &value
)) {
2700 Py_DECREF(bytes_obj
);
2706 if (len
>= buf_size
) {
2707 buf_size
= len
+ (len
>> 1) + 1;
2708 if (PyByteArray_Resize((PyObject
*)bytes_obj
, buf_size
) < 0) {
2710 Py_DECREF(bytes_obj
);
2713 /* Recompute the `buf' pointer, since the resizing operation may
2714 have invalidated it. */
2715 buf
= PyByteArray_AS_STRING(bytes_obj
);
2720 /* Resize down to exact size. */
2721 if (PyByteArray_Resize((PyObject
*)bytes_obj
, len
) < 0) {
2722 Py_DECREF(bytes_obj
);
2726 if (bytes_setslice(self
, Py_SIZE(self
), Py_SIZE(self
), bytes_obj
) == -1)
2728 Py_DECREF(bytes_obj
);
2733 PyDoc_STRVAR(pop__doc__
,
2734 "B.pop([index]) -> int\n\
2736 Remove and return a single item from B. If no index\n\
2737 argument is given, will pop the last value.");
2739 bytes_pop(PyByteArrayObject
*self
, PyObject
*args
)
2742 Py_ssize_t where
= -1, n
= Py_SIZE(self
);
2744 if (!PyArg_ParseTuple(args
, "|n:pop", &where
))
2748 PyErr_SetString(PyExc_OverflowError
,
2749 "cannot pop an empty bytes");
2753 where
+= Py_SIZE(self
);
2754 if (where
< 0 || where
>= Py_SIZE(self
)) {
2755 PyErr_SetString(PyExc_IndexError
, "pop index out of range");
2758 if (!_canresize(self
))
2761 value
= self
->ob_bytes
[where
];
2762 memmove(self
->ob_bytes
+ where
, self
->ob_bytes
+ where
+ 1, n
- where
);
2763 if (PyByteArray_Resize((PyObject
*)self
, n
- 1) < 0)
2766 return PyInt_FromLong(value
);
2769 PyDoc_STRVAR(remove__doc__
,
2770 "B.remove(int) -> None\n\
2772 Remove the first occurance of a value in B.");
2774 bytes_remove(PyByteArrayObject
*self
, PyObject
*arg
)
2777 Py_ssize_t where
, n
= Py_SIZE(self
);
2779 if (! _getbytevalue(arg
, &value
))
2782 for (where
= 0; where
< n
; where
++) {
2783 if (self
->ob_bytes
[where
] == value
)
2787 PyErr_SetString(PyExc_ValueError
, "value not found in bytes");
2790 if (!_canresize(self
))
2793 memmove(self
->ob_bytes
+ where
, self
->ob_bytes
+ where
+ 1, n
- where
);
2794 if (PyByteArray_Resize((PyObject
*)self
, n
- 1) < 0)
2800 /* XXX These two helpers could be optimized if argsize == 1 */
2803 lstrip_helper(unsigned char *myptr
, Py_ssize_t mysize
,
2804 void *argptr
, Py_ssize_t argsize
)
2807 while (i
< mysize
&& memchr(argptr
, myptr
[i
], argsize
))
2813 rstrip_helper(unsigned char *myptr
, Py_ssize_t mysize
,
2814 void *argptr
, Py_ssize_t argsize
)
2816 Py_ssize_t i
= mysize
- 1;
2817 while (i
>= 0 && memchr(argptr
, myptr
[i
], argsize
))
2822 PyDoc_STRVAR(strip__doc__
,
2823 "B.strip([bytes]) -> bytearray\n\
2825 Strip leading and trailing bytes contained in the argument.\n\
2826 If the argument is omitted, strip ASCII whitespace.");
2828 bytes_strip(PyByteArrayObject
*self
, PyObject
*args
)
2830 Py_ssize_t left
, right
, mysize
, argsize
;
2831 void *myptr
, *argptr
;
2832 PyObject
*arg
= Py_None
;
2834 if (!PyArg_ParseTuple(args
, "|O:strip", &arg
))
2836 if (arg
== Py_None
) {
2837 argptr
= "\t\n\r\f\v ";
2841 if (_getbuffer(arg
, &varg
) < 0)
2846 myptr
= self
->ob_bytes
;
2847 mysize
= Py_SIZE(self
);
2848 left
= lstrip_helper(myptr
, mysize
, argptr
, argsize
);
2852 right
= rstrip_helper(myptr
, mysize
, argptr
, argsize
);
2854 PyBuffer_Release(&varg
);
2855 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2858 PyDoc_STRVAR(lstrip__doc__
,
2859 "B.lstrip([bytes]) -> bytearray\n\
2861 Strip leading bytes contained in the argument.\n\
2862 If the argument is omitted, strip leading ASCII whitespace.");
2864 bytes_lstrip(PyByteArrayObject
*self
, PyObject
*args
)
2866 Py_ssize_t left
, right
, mysize
, argsize
;
2867 void *myptr
, *argptr
;
2868 PyObject
*arg
= Py_None
;
2870 if (!PyArg_ParseTuple(args
, "|O:lstrip", &arg
))
2872 if (arg
== Py_None
) {
2873 argptr
= "\t\n\r\f\v ";
2877 if (_getbuffer(arg
, &varg
) < 0)
2882 myptr
= self
->ob_bytes
;
2883 mysize
= Py_SIZE(self
);
2884 left
= lstrip_helper(myptr
, mysize
, argptr
, argsize
);
2887 PyBuffer_Release(&varg
);
2888 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2891 PyDoc_STRVAR(rstrip__doc__
,
2892 "B.rstrip([bytes]) -> bytearray\n\
2894 Strip trailing bytes contained in the argument.\n\
2895 If the argument is omitted, strip trailing ASCII whitespace.");
2897 bytes_rstrip(PyByteArrayObject
*self
, PyObject
*args
)
2899 Py_ssize_t left
, right
, mysize
, argsize
;
2900 void *myptr
, *argptr
;
2901 PyObject
*arg
= Py_None
;
2903 if (!PyArg_ParseTuple(args
, "|O:rstrip", &arg
))
2905 if (arg
== Py_None
) {
2906 argptr
= "\t\n\r\f\v ";
2910 if (_getbuffer(arg
, &varg
) < 0)
2915 myptr
= self
->ob_bytes
;
2916 mysize
= Py_SIZE(self
);
2918 right
= rstrip_helper(myptr
, mysize
, argptr
, argsize
);
2920 PyBuffer_Release(&varg
);
2921 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2924 PyDoc_STRVAR(decode_doc
,
2925 "B.decode([encoding[, errors]]) -> unicode object.\n\
2927 Decodes B using the codec registered for encoding. encoding defaults\n\
2928 to the default encoding. errors may be given to set a different error\n\
2929 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2930 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2931 as well as any other name registered with codecs.register_error that is\n\
2932 able to handle UnicodeDecodeErrors.");
2935 bytes_decode(PyObject
*self
, PyObject
*args
)
2937 const char *encoding
= NULL
;
2938 const char *errors
= NULL
;
2940 if (!PyArg_ParseTuple(args
, "|ss:decode", &encoding
, &errors
))
2942 if (encoding
== NULL
)
2943 encoding
= PyUnicode_GetDefaultEncoding();
2944 return PyCodec_Decode(self
, encoding
, errors
);
2947 PyDoc_STRVAR(alloc_doc
,
2948 "B.__alloc__() -> int\n\
2950 Returns the number of bytes actually allocated.");
2953 bytes_alloc(PyByteArrayObject
*self
)
2955 return PyInt_FromSsize_t(self
->ob_alloc
);
2958 PyDoc_STRVAR(join_doc
,
2959 "B.join(iterable_of_bytes) -> bytes\n\
2961 Concatenates any number of bytearray objects, with B in between each pair.");
2964 bytes_join(PyByteArrayObject
*self
, PyObject
*it
)
2967 Py_ssize_t mysize
= Py_SIZE(self
);
2971 Py_ssize_t totalsize
= 0;
2975 seq
= PySequence_Fast(it
, "can only join an iterable");
2978 n
= PySequence_Fast_GET_SIZE(seq
);
2979 items
= PySequence_Fast_ITEMS(seq
);
2981 /* Compute the total size, and check that they are all bytes */
2982 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2983 for (i
= 0; i
< n
; i
++) {
2984 PyObject
*obj
= items
[i
];
2985 if (!PyByteArray_Check(obj
) && !PyBytes_Check(obj
)) {
2986 PyErr_Format(PyExc_TypeError
,
2987 "can only join an iterable of bytes "
2988 "(item %ld has type '%.100s')",
2989 /* XXX %ld isn't right on Win64 */
2990 (long)i
, Py_TYPE(obj
)->tp_name
);
2994 totalsize
+= mysize
;
2995 totalsize
+= Py_SIZE(obj
);
2996 if (totalsize
< 0) {
3002 /* Allocate the result, and copy the bytes */
3003 result
= PyByteArray_FromStringAndSize(NULL
, totalsize
);
3006 dest
= PyByteArray_AS_STRING(result
);
3007 for (i
= 0; i
< n
; i
++) {
3008 PyObject
*obj
= items
[i
];
3009 Py_ssize_t size
= Py_SIZE(obj
);
3011 if (PyByteArray_Check(obj
))
3012 buf
= PyByteArray_AS_STRING(obj
);
3014 buf
= PyBytes_AS_STRING(obj
);
3016 memcpy(dest
, self
->ob_bytes
, mysize
);
3019 memcpy(dest
, buf
, size
);
3027 /* Error handling */
3033 PyDoc_STRVAR(fromhex_doc
,
3034 "bytearray.fromhex(string) -> bytearray\n\
3036 Create a bytearray object from a string of hexadecimal numbers.\n\
3037 Spaces between two numbers are accepted.\n\
3038 Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3041 hex_digit_to_int(Py_UNICODE c
)
3050 if (c
>= 'a' && c
<= 'f')
3051 return c
- 'a' + 10;
3057 bytes_fromhex(PyObject
*cls
, PyObject
*args
)
3059 PyObject
*newbytes
, *hexobj
;
3062 Py_ssize_t hexlen
, byteslen
, i
, j
;
3065 if (!PyArg_ParseTuple(args
, "U:fromhex", &hexobj
))
3067 assert(PyUnicode_Check(hexobj
));
3068 hexlen
= PyUnicode_GET_SIZE(hexobj
);
3069 hex
= PyUnicode_AS_UNICODE(hexobj
);
3070 byteslen
= hexlen
/2; /* This overestimates if there are spaces */
3071 newbytes
= PyByteArray_FromStringAndSize(NULL
, byteslen
);
3074 buf
= PyByteArray_AS_STRING(newbytes
);
3075 for (i
= j
= 0; i
< hexlen
; i
+= 2) {
3076 /* skip over spaces in the input */
3077 while (hex
[i
] == ' ')
3081 top
= hex_digit_to_int(hex
[i
]);
3082 bot
= hex_digit_to_int(hex
[i
+1]);
3083 if (top
== -1 || bot
== -1) {
3084 PyErr_Format(PyExc_ValueError
,
3085 "non-hexadecimal number found in "
3086 "fromhex() arg at position %zd", i
);
3089 buf
[j
++] = (top
<< 4) + bot
;
3091 if (PyByteArray_Resize(newbytes
, j
) < 0)
3096 Py_DECREF(newbytes
);
3100 PyDoc_STRVAR(reduce_doc
, "Return state information for pickling.");
3103 bytes_reduce(PyByteArrayObject
*self
)
3105 PyObject
*latin1
, *dict
;
3107 latin1
= PyUnicode_DecodeLatin1(self
->ob_bytes
,
3108 Py_SIZE(self
), NULL
);
3110 latin1
= PyUnicode_FromString("");
3112 dict
= PyObject_GetAttrString((PyObject
*)self
, "__dict__");
3119 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self
), latin1
, "latin-1", dict
);
3122 PyDoc_STRVAR(sizeof_doc
,
3123 "B.__sizeof__() -> int\n\
3125 Returns the size of B in memory, in bytes");
3127 bytes_sizeof(PyByteArrayObject
*self
)
3131 res
= sizeof(PyByteArrayObject
) + self
->ob_alloc
* sizeof(char);
3132 return PyInt_FromSsize_t(res
);
3135 static PySequenceMethods bytes_as_sequence
= {
3136 (lenfunc
)bytes_length
, /* sq_length */
3137 (binaryfunc
)PyByteArray_Concat
, /* sq_concat */
3138 (ssizeargfunc
)bytes_repeat
, /* sq_repeat */
3139 (ssizeargfunc
)bytes_getitem
, /* sq_item */
3141 (ssizeobjargproc
)bytes_setitem
, /* sq_ass_item */
3142 0, /* sq_ass_slice */
3143 (objobjproc
)bytes_contains
, /* sq_contains */
3144 (binaryfunc
)bytes_iconcat
, /* sq_inplace_concat */
3145 (ssizeargfunc
)bytes_irepeat
, /* sq_inplace_repeat */
3148 static PyMappingMethods bytes_as_mapping
= {
3149 (lenfunc
)bytes_length
,
3150 (binaryfunc
)bytes_subscript
,
3151 (objobjargproc
)bytes_ass_subscript
,
3154 static PyBufferProcs bytes_as_buffer
= {
3155 (readbufferproc
)bytes_buffer_getreadbuf
,
3156 (writebufferproc
)bytes_buffer_getwritebuf
,
3157 (segcountproc
)bytes_buffer_getsegcount
,
3158 (charbufferproc
)bytes_buffer_getcharbuf
,
3159 (getbufferproc
)bytes_getbuffer
,
3160 (releasebufferproc
)bytes_releasebuffer
,
3165 {"__alloc__", (PyCFunction
)bytes_alloc
, METH_NOARGS
, alloc_doc
},
3166 {"__reduce__", (PyCFunction
)bytes_reduce
, METH_NOARGS
, reduce_doc
},
3167 {"__sizeof__", (PyCFunction
)bytes_sizeof
, METH_NOARGS
, sizeof_doc
},
3168 {"append", (PyCFunction
)bytes_append
, METH_O
, append__doc__
},
3169 {"capitalize", (PyCFunction
)stringlib_capitalize
, METH_NOARGS
,
3170 _Py_capitalize__doc__
},
3171 {"center", (PyCFunction
)stringlib_center
, METH_VARARGS
, center__doc__
},
3172 {"count", (PyCFunction
)bytes_count
, METH_VARARGS
, count__doc__
},
3173 {"decode", (PyCFunction
)bytes_decode
, METH_VARARGS
, decode_doc
},
3174 {"endswith", (PyCFunction
)bytes_endswith
, METH_VARARGS
, endswith__doc__
},
3175 {"expandtabs", (PyCFunction
)stringlib_expandtabs
, METH_VARARGS
,
3177 {"extend", (PyCFunction
)bytes_extend
, METH_O
, extend__doc__
},
3178 {"find", (PyCFunction
)bytes_find
, METH_VARARGS
, find__doc__
},
3179 {"fromhex", (PyCFunction
)bytes_fromhex
, METH_VARARGS
|METH_CLASS
,
3181 {"index", (PyCFunction
)bytes_index
, METH_VARARGS
, index__doc__
},
3182 {"insert", (PyCFunction
)bytes_insert
, METH_VARARGS
, insert__doc__
},
3183 {"isalnum", (PyCFunction
)stringlib_isalnum
, METH_NOARGS
,
3184 _Py_isalnum__doc__
},
3185 {"isalpha", (PyCFunction
)stringlib_isalpha
, METH_NOARGS
,
3186 _Py_isalpha__doc__
},
3187 {"isdigit", (PyCFunction
)stringlib_isdigit
, METH_NOARGS
,
3188 _Py_isdigit__doc__
},
3189 {"islower", (PyCFunction
)stringlib_islower
, METH_NOARGS
,
3190 _Py_islower__doc__
},
3191 {"isspace", (PyCFunction
)stringlib_isspace
, METH_NOARGS
,
3192 _Py_isspace__doc__
},
3193 {"istitle", (PyCFunction
)stringlib_istitle
, METH_NOARGS
,
3194 _Py_istitle__doc__
},
3195 {"isupper", (PyCFunction
)stringlib_isupper
, METH_NOARGS
,
3196 _Py_isupper__doc__
},
3197 {"join", (PyCFunction
)bytes_join
, METH_O
, join_doc
},
3198 {"ljust", (PyCFunction
)stringlib_ljust
, METH_VARARGS
, ljust__doc__
},
3199 {"lower", (PyCFunction
)stringlib_lower
, METH_NOARGS
, _Py_lower__doc__
},
3200 {"lstrip", (PyCFunction
)bytes_lstrip
, METH_VARARGS
, lstrip__doc__
},
3201 {"partition", (PyCFunction
)bytes_partition
, METH_O
, partition__doc__
},
3202 {"pop", (PyCFunction
)bytes_pop
, METH_VARARGS
, pop__doc__
},
3203 {"remove", (PyCFunction
)bytes_remove
, METH_O
, remove__doc__
},
3204 {"replace", (PyCFunction
)bytes_replace
, METH_VARARGS
, replace__doc__
},
3205 {"reverse", (PyCFunction
)bytes_reverse
, METH_NOARGS
, reverse__doc__
},
3206 {"rfind", (PyCFunction
)bytes_rfind
, METH_VARARGS
, rfind__doc__
},
3207 {"rindex", (PyCFunction
)bytes_rindex
, METH_VARARGS
, rindex__doc__
},
3208 {"rjust", (PyCFunction
)stringlib_rjust
, METH_VARARGS
, rjust__doc__
},
3209 {"rpartition", (PyCFunction
)bytes_rpartition
, METH_O
, rpartition__doc__
},
3210 {"rsplit", (PyCFunction
)bytes_rsplit
, METH_VARARGS
, rsplit__doc__
},
3211 {"rstrip", (PyCFunction
)bytes_rstrip
, METH_VARARGS
, rstrip__doc__
},
3212 {"split", (PyCFunction
)bytes_split
, METH_VARARGS
, split__doc__
},
3213 {"splitlines", (PyCFunction
)stringlib_splitlines
, METH_VARARGS
,
3215 {"startswith", (PyCFunction
)bytes_startswith
, METH_VARARGS
,
3217 {"strip", (PyCFunction
)bytes_strip
, METH_VARARGS
, strip__doc__
},
3218 {"swapcase", (PyCFunction
)stringlib_swapcase
, METH_NOARGS
,
3219 _Py_swapcase__doc__
},
3220 {"title", (PyCFunction
)stringlib_title
, METH_NOARGS
, _Py_title__doc__
},
3221 {"translate", (PyCFunction
)bytes_translate
, METH_VARARGS
,
3223 {"upper", (PyCFunction
)stringlib_upper
, METH_NOARGS
, _Py_upper__doc__
},
3224 {"zfill", (PyCFunction
)stringlib_zfill
, METH_VARARGS
, zfill__doc__
},
3228 PyDoc_STRVAR(bytes_doc
,
3229 "bytearray(iterable_of_ints) -> bytearray.\n\
3230 bytearray(string, encoding[, errors]) -> bytearray.\n\
3231 bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3232 bytearray(memory_view) -> bytearray.\n\
3234 Construct an mutable bytearray object from:\n\
3235 - an iterable yielding integers in range(256)\n\
3236 - a text string encoded using the specified encoding\n\
3237 - a bytes or a bytearray object\n\
3238 - any object implementing the buffer API.\n\
3240 bytearray(int) -> bytearray.\n\
3242 Construct a zero-initialized bytearray of the given length.");
3245 static PyObject
*bytes_iter(PyObject
*seq
);
3247 PyTypeObject PyByteArray_Type
= {
3248 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
3250 sizeof(PyByteArrayObject
),
3252 (destructor
)bytes_dealloc
, /* tp_dealloc */
3257 (reprfunc
)bytes_repr
, /* tp_repr */
3258 0, /* tp_as_number */
3259 &bytes_as_sequence
, /* tp_as_sequence */
3260 &bytes_as_mapping
, /* tp_as_mapping */
3263 bytes_str
, /* tp_str */
3264 PyObject_GenericGetAttr
, /* tp_getattro */
3265 0, /* tp_setattro */
3266 &bytes_as_buffer
, /* tp_as_buffer */
3267 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
|
3268 Py_TPFLAGS_HAVE_NEWBUFFER
, /* tp_flags */
3269 bytes_doc
, /* tp_doc */
3270 0, /* tp_traverse */
3272 (richcmpfunc
)bytes_richcompare
, /* tp_richcompare */
3273 0, /* tp_weaklistoffset */
3274 bytes_iter
, /* tp_iter */
3275 0, /* tp_iternext */
3276 bytes_methods
, /* tp_methods */
3281 0, /* tp_descr_get */
3282 0, /* tp_descr_set */
3283 0, /* tp_dictoffset */
3284 (initproc
)bytes_init
, /* tp_init */
3285 PyType_GenericAlloc
, /* tp_alloc */
3286 PyType_GenericNew
, /* tp_new */
3287 PyObject_Del
, /* tp_free */
3290 /*********************** Bytes Iterator ****************************/
3294 Py_ssize_t it_index
;
3295 PyByteArrayObject
*it_seq
; /* Set to NULL when iterator is exhausted */
3299 bytesiter_dealloc(bytesiterobject
*it
)
3301 _PyObject_GC_UNTRACK(it
);
3302 Py_XDECREF(it
->it_seq
);
3303 PyObject_GC_Del(it
);
3307 bytesiter_traverse(bytesiterobject
*it
, visitproc visit
, void *arg
)
3309 Py_VISIT(it
->it_seq
);
3314 bytesiter_next(bytesiterobject
*it
)
3316 PyByteArrayObject
*seq
;
3323 assert(PyByteArray_Check(seq
));
3325 if (it
->it_index
< PyByteArray_GET_SIZE(seq
)) {
3326 item
= PyInt_FromLong(
3327 (unsigned char)seq
->ob_bytes
[it
->it_index
]);
3339 bytesiter_length_hint(bytesiterobject
*it
)
3343 len
= PyByteArray_GET_SIZE(it
->it_seq
) - it
->it_index
;
3344 return PyInt_FromSsize_t(len
);
3347 PyDoc_STRVAR(length_hint_doc
,
3348 "Private method returning an estimate of len(list(it)).");
3350 static PyMethodDef bytesiter_methods
[] = {
3351 {"__length_hint__", (PyCFunction
)bytesiter_length_hint
, METH_NOARGS
,
3353 {NULL
, NULL
} /* sentinel */
3356 PyTypeObject PyByteArrayIter_Type
= {
3357 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
3358 "bytearray_iterator", /* tp_name */
3359 sizeof(bytesiterobject
), /* tp_basicsize */
3360 0, /* tp_itemsize */
3362 (destructor
)bytesiter_dealloc
, /* tp_dealloc */
3368 0, /* tp_as_number */
3369 0, /* tp_as_sequence */
3370 0, /* tp_as_mapping */
3374 PyObject_GenericGetAttr
, /* tp_getattro */
3375 0, /* tp_setattro */
3376 0, /* tp_as_buffer */
3377 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_HAVE_GC
, /* tp_flags */
3379 (traverseproc
)bytesiter_traverse
, /* tp_traverse */
3381 0, /* tp_richcompare */
3382 0, /* tp_weaklistoffset */
3383 PyObject_SelfIter
, /* tp_iter */
3384 (iternextfunc
)bytesiter_next
, /* tp_iternext */
3385 bytesiter_methods
, /* tp_methods */
3390 bytes_iter(PyObject
*seq
)
3392 bytesiterobject
*it
;
3394 if (!PyByteArray_Check(seq
)) {
3395 PyErr_BadInternalCall();
3398 it
= PyObject_GC_New(bytesiterobject
, &PyByteArrayIter_Type
);
3403 it
->it_seq
= (PyByteArrayObject
*)seq
;
3404 _PyObject_GC_TRACK(it
);
3405 return (PyObject
*)it
;