1 /* PyBytes (bytearray) implementation */
3 #define PY_SSIZE_T_CLEAN
5 #include "structmember.h"
6 #include "bytes_methods.h"
8 static PyByteArrayObject
*nullbytes
= NULL
;
11 PyByteArray_Fini(void)
17 PyByteArray_Init(void)
19 nullbytes
= PyObject_New(PyByteArrayObject
, &PyByteArray_Type
);
20 if (nullbytes
== NULL
)
22 nullbytes
->ob_bytes
= NULL
;
23 Py_SIZE(nullbytes
) = nullbytes
->ob_alloc
= 0;
24 nullbytes
->ob_exports
= 0;
28 /* end nullbytes support */
33 _getbytevalue(PyObject
* arg
, int *value
)
37 if (PyBytes_CheckExact(arg
)) {
38 if (Py_SIZE(arg
) != 1) {
39 PyErr_SetString(PyExc_ValueError
, "string must be of size 1");
42 *value
= Py_CHARMASK(((PyBytesObject
*)arg
)->ob_sval
[0]);
45 else if (PyInt_Check(arg
) || PyLong_Check(arg
)) {
46 face_value
= PyLong_AsLong(arg
);
49 PyObject
*index
= PyNumber_Index(arg
);
51 PyErr_Format(PyExc_TypeError
,
52 "an integer or string of size 1 is required");
55 face_value
= PyLong_AsLong(index
);
59 if (face_value
< 0 || face_value
>= 256) {
60 /* this includes the OverflowError in case the long is too large */
61 PyErr_SetString(PyExc_ValueError
, "byte must be in range(0, 256)");
70 bytes_buffer_getreadbuf(PyByteArrayObject
*self
, Py_ssize_t index
, const void **ptr
)
73 PyErr_SetString(PyExc_SystemError
,
74 "accessing non-existent bytes segment");
77 *ptr
= (void *)self
->ob_bytes
;
82 bytes_buffer_getwritebuf(PyByteArrayObject
*self
, Py_ssize_t index
, const void **ptr
)
85 PyErr_SetString(PyExc_SystemError
,
86 "accessing non-existent bytes segment");
89 *ptr
= (void *)self
->ob_bytes
;
94 bytes_buffer_getsegcount(PyByteArrayObject
*self
, Py_ssize_t
*lenp
)
97 *lenp
= Py_SIZE(self
);
102 bytes_buffer_getcharbuf(PyByteArrayObject
*self
, Py_ssize_t index
, const char **ptr
)
105 PyErr_SetString(PyExc_SystemError
,
106 "accessing non-existent bytes segment");
109 *ptr
= self
->ob_bytes
;
110 return Py_SIZE(self
);
114 bytes_getbuffer(PyByteArrayObject
*obj
, Py_buffer
*view
, int flags
)
122 if (obj
->ob_bytes
== NULL
)
126 ret
= PyBuffer_FillInfo(view
, (PyObject
*)obj
, ptr
, Py_SIZE(obj
), 0, flags
);
134 bytes_releasebuffer(PyByteArrayObject
*obj
, Py_buffer
*view
)
140 _getbuffer(PyObject
*obj
, Py_buffer
*view
)
142 PyBufferProcs
*buffer
= Py_TYPE(obj
)->tp_as_buffer
;
144 if (buffer
== NULL
|| buffer
->bf_getbuffer
== NULL
)
146 PyErr_Format(PyExc_TypeError
,
147 "Type %.100s doesn't support the buffer API",
148 Py_TYPE(obj
)->tp_name
);
152 if (buffer
->bf_getbuffer(obj
, view
, PyBUF_SIMPLE
) < 0)
158 _canresize(PyByteArrayObject
*self
)
160 if (self
->ob_exports
> 0) {
161 PyErr_SetString(PyExc_BufferError
,
162 "Existing exports of data: object cannot be re-sized");
168 /* Direct API functions */
171 PyByteArray_FromObject(PyObject
*input
)
173 return PyObject_CallFunctionObjArgs((PyObject
*)&PyByteArray_Type
,
178 PyByteArray_FromStringAndSize(const char *bytes
, Py_ssize_t size
)
180 PyByteArrayObject
*new;
184 PyErr_SetString(PyExc_SystemError
,
185 "Negative size passed to PyByteArray_FromStringAndSize");
189 new = PyObject_New(PyByteArrayObject
, &PyByteArray_Type
);
194 new->ob_bytes
= NULL
;
199 new->ob_bytes
= PyMem_Malloc(alloc
);
200 if (new->ob_bytes
== NULL
) {
202 return PyErr_NoMemory();
205 memcpy(new->ob_bytes
, bytes
, size
);
206 new->ob_bytes
[size
] = '\0'; /* Trailing null byte */
209 new->ob_alloc
= alloc
;
212 return (PyObject
*)new;
216 PyByteArray_Size(PyObject
*self
)
218 assert(self
!= NULL
);
219 assert(PyByteArray_Check(self
));
221 return PyByteArray_GET_SIZE(self
);
225 PyByteArray_AsString(PyObject
*self
)
227 assert(self
!= NULL
);
228 assert(PyByteArray_Check(self
));
230 return PyByteArray_AS_STRING(self
);
234 PyByteArray_Resize(PyObject
*self
, Py_ssize_t size
)
237 Py_ssize_t alloc
= ((PyByteArrayObject
*)self
)->ob_alloc
;
239 assert(self
!= NULL
);
240 assert(PyByteArray_Check(self
));
243 if (size
== Py_SIZE(self
)) {
246 if (!_canresize((PyByteArrayObject
*)self
)) {
250 if (size
< alloc
/ 2) {
251 /* Major downsize; resize down to exact size */
254 else if (size
< alloc
) {
255 /* Within allocated size; quick exit */
256 Py_SIZE(self
) = size
;
257 ((PyByteArrayObject
*)self
)->ob_bytes
[size
] = '\0'; /* Trailing null */
260 else if (size
<= alloc
* 1.125) {
261 /* Moderate upsize; overallocate similar to list_resize() */
262 alloc
= size
+ (size
>> 3) + (size
< 9 ? 3 : 6);
265 /* Major upsize; resize up to exact size */
269 sval
= PyMem_Realloc(((PyByteArrayObject
*)self
)->ob_bytes
, alloc
);
275 ((PyByteArrayObject
*)self
)->ob_bytes
= sval
;
276 Py_SIZE(self
) = size
;
277 ((PyByteArrayObject
*)self
)->ob_alloc
= alloc
;
278 ((PyByteArrayObject
*)self
)->ob_bytes
[size
] = '\0'; /* Trailing null byte */
284 PyByteArray_Concat(PyObject
*a
, PyObject
*b
)
288 PyByteArrayObject
*result
= NULL
;
292 if (_getbuffer(a
, &va
) < 0 ||
293 _getbuffer(b
, &vb
) < 0) {
294 PyErr_Format(PyExc_TypeError
, "can't concat %.100s to %.100s",
295 Py_TYPE(a
)->tp_name
, Py_TYPE(b
)->tp_name
);
299 size
= va
.len
+ vb
.len
;
301 return PyErr_NoMemory();
305 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, size
);
306 if (result
!= NULL
) {
307 memcpy(result
->ob_bytes
, va
.buf
, va
.len
);
308 memcpy(result
->ob_bytes
+ va
.len
, vb
.buf
, vb
.len
);
313 PyBuffer_Release(&va
);
315 PyBuffer_Release(&vb
);
316 return (PyObject
*)result
;
319 /* Functions stuffed into the type object */
322 bytes_length(PyByteArrayObject
*self
)
324 return Py_SIZE(self
);
328 bytes_iconcat(PyByteArrayObject
*self
, PyObject
*other
)
334 if (_getbuffer(other
, &vo
) < 0) {
335 PyErr_Format(PyExc_TypeError
, "can't concat %.100s to %.100s",
336 Py_TYPE(other
)->tp_name
, Py_TYPE(self
)->tp_name
);
340 mysize
= Py_SIZE(self
);
341 size
= mysize
+ vo
.len
;
343 PyBuffer_Release(&vo
);
344 return PyErr_NoMemory();
346 if (size
< self
->ob_alloc
) {
347 Py_SIZE(self
) = size
;
348 self
->ob_bytes
[Py_SIZE(self
)] = '\0'; /* Trailing null byte */
350 else if (PyByteArray_Resize((PyObject
*)self
, size
) < 0) {
351 PyBuffer_Release(&vo
);
354 memcpy(self
->ob_bytes
+ mysize
, vo
.buf
, vo
.len
);
355 PyBuffer_Release(&vo
);
357 return (PyObject
*)self
;
361 bytes_repeat(PyByteArrayObject
*self
, Py_ssize_t count
)
363 PyByteArrayObject
*result
;
369 mysize
= Py_SIZE(self
);
370 size
= mysize
* count
;
371 if (count
!= 0 && size
/ count
!= mysize
)
372 return PyErr_NoMemory();
373 result
= (PyByteArrayObject
*)PyByteArray_FromStringAndSize(NULL
, size
);
374 if (result
!= NULL
&& size
!= 0) {
376 memset(result
->ob_bytes
, self
->ob_bytes
[0], size
);
379 for (i
= 0; i
< count
; i
++)
380 memcpy(result
->ob_bytes
+ i
*mysize
, self
->ob_bytes
, mysize
);
383 return (PyObject
*)result
;
387 bytes_irepeat(PyByteArrayObject
*self
, Py_ssize_t count
)
394 mysize
= Py_SIZE(self
);
395 size
= mysize
* count
;
396 if (count
!= 0 && size
/ count
!= mysize
)
397 return PyErr_NoMemory();
398 if (size
< self
->ob_alloc
) {
399 Py_SIZE(self
) = size
;
400 self
->ob_bytes
[Py_SIZE(self
)] = '\0'; /* Trailing null byte */
402 else if (PyByteArray_Resize((PyObject
*)self
, size
) < 0)
406 memset(self
->ob_bytes
, self
->ob_bytes
[0], size
);
409 for (i
= 1; i
< count
; i
++)
410 memcpy(self
->ob_bytes
+ i
*mysize
, self
->ob_bytes
, mysize
);
414 return (PyObject
*)self
;
418 bytes_getitem(PyByteArrayObject
*self
, Py_ssize_t i
)
422 if (i
< 0 || i
>= Py_SIZE(self
)) {
423 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
426 return PyInt_FromLong((unsigned char)(self
->ob_bytes
[i
]));
430 bytes_subscript(PyByteArrayObject
*self
, PyObject
*index
)
432 if (PyIndex_Check(index
)) {
433 Py_ssize_t i
= PyNumber_AsSsize_t(index
, PyExc_IndexError
);
435 if (i
== -1 && PyErr_Occurred())
439 i
+= PyByteArray_GET_SIZE(self
);
441 if (i
< 0 || i
>= Py_SIZE(self
)) {
442 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
445 return PyInt_FromLong((unsigned char)(self
->ob_bytes
[i
]));
447 else if (PySlice_Check(index
)) {
448 Py_ssize_t start
, stop
, step
, slicelength
, cur
, i
;
449 if (PySlice_GetIndicesEx((PySliceObject
*)index
,
450 PyByteArray_GET_SIZE(self
),
451 &start
, &stop
, &step
, &slicelength
) < 0) {
455 if (slicelength
<= 0)
456 return PyByteArray_FromStringAndSize("", 0);
457 else if (step
== 1) {
458 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ start
,
462 char *source_buf
= PyByteArray_AS_STRING(self
);
463 char *result_buf
= (char *)PyMem_Malloc(slicelength
);
466 if (result_buf
== NULL
)
467 return PyErr_NoMemory();
469 for (cur
= start
, i
= 0; i
< slicelength
;
471 result_buf
[i
] = source_buf
[cur
];
473 result
= PyByteArray_FromStringAndSize(result_buf
, slicelength
);
474 PyMem_Free(result_buf
);
479 PyErr_SetString(PyExc_TypeError
, "bytearray indices must be integers");
485 bytes_setslice(PyByteArrayObject
*self
, Py_ssize_t lo
, Py_ssize_t hi
,
488 Py_ssize_t avail
, needed
;
494 if (values
== (PyObject
*)self
) {
495 /* Make a copy and call this function recursively */
497 values
= PyByteArray_FromObject(values
);
500 err
= bytes_setslice(self
, lo
, hi
, values
);
504 if (values
== NULL
) {
510 if (_getbuffer(values
, &vbytes
) < 0) {
511 PyErr_Format(PyExc_TypeError
,
512 "can't set bytearray slice from %.100s",
513 Py_TYPE(values
)->tp_name
);
524 if (hi
> Py_SIZE(self
))
531 if (avail
!= needed
) {
532 if (avail
> needed
) {
533 if (!_canresize(self
)) {
539 | |<----avail----->|<-----tomove------>|
540 | |<-needed->|<-----tomove------>|
543 memmove(self
->ob_bytes
+ lo
+ needed
, self
->ob_bytes
+ hi
,
546 /* XXX(nnorwitz): need to verify this can't overflow! */
547 if (PyByteArray_Resize((PyObject
*)self
,
548 Py_SIZE(self
) + needed
- avail
) < 0) {
552 if (avail
< needed
) {
555 | |<-avail->|<-----tomove------>|
556 | |<----needed---->|<-----tomove------>|
559 memmove(self
->ob_bytes
+ lo
+ needed
, self
->ob_bytes
+ hi
,
560 Py_SIZE(self
) - lo
- needed
);
565 memcpy(self
->ob_bytes
+ lo
, bytes
, needed
);
569 if (vbytes
.len
!= -1)
570 PyBuffer_Release(&vbytes
);
575 bytes_setitem(PyByteArrayObject
*self
, Py_ssize_t i
, PyObject
*value
)
582 if (i
< 0 || i
>= Py_SIZE(self
)) {
583 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
588 return bytes_setslice(self
, i
, i
+1, NULL
);
590 if (!_getbytevalue(value
, &ival
))
593 self
->ob_bytes
[i
] = ival
;
598 bytes_ass_subscript(PyByteArrayObject
*self
, PyObject
*index
, PyObject
*values
)
600 Py_ssize_t start
, stop
, step
, slicelen
, needed
;
603 if (PyIndex_Check(index
)) {
604 Py_ssize_t i
= PyNumber_AsSsize_t(index
, PyExc_IndexError
);
606 if (i
== -1 && PyErr_Occurred())
610 i
+= PyByteArray_GET_SIZE(self
);
612 if (i
< 0 || i
>= Py_SIZE(self
)) {
613 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
617 if (values
== NULL
) {
618 /* Fall through to slice assignment */
626 if (!_getbytevalue(values
, &ival
))
628 self
->ob_bytes
[i
] = (char)ival
;
632 else if (PySlice_Check(index
)) {
633 if (PySlice_GetIndicesEx((PySliceObject
*)index
,
634 PyByteArray_GET_SIZE(self
),
635 &start
, &stop
, &step
, &slicelen
) < 0) {
640 PyErr_SetString(PyExc_TypeError
, "bytearray indices must be integer");
644 if (values
== NULL
) {
648 else if (values
== (PyObject
*)self
|| !PyByteArray_Check(values
)) {
649 /* Make a copy an call this function recursively */
651 values
= PyByteArray_FromObject(values
);
654 err
= bytes_ass_subscript(self
, index
, values
);
659 assert(PyByteArray_Check(values
));
660 bytes
= ((PyByteArrayObject
*)values
)->ob_bytes
;
661 needed
= Py_SIZE(values
);
663 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
664 if ((step
< 0 && start
< stop
) ||
665 (step
> 0 && start
> stop
))
668 if (slicelen
!= needed
) {
669 if (!_canresize(self
))
671 if (slicelen
> needed
) {
673 0 start stop old_size
674 | |<---slicelen--->|<-----tomove------>|
675 | |<-needed->|<-----tomove------>|
678 memmove(self
->ob_bytes
+ start
+ needed
, self
->ob_bytes
+ stop
,
679 Py_SIZE(self
) - stop
);
681 if (PyByteArray_Resize((PyObject
*)self
,
682 Py_SIZE(self
) + needed
- slicelen
) < 0)
684 if (slicelen
< needed
) {
687 | |<-avail->|<-----tomove------>|
688 | |<----needed---->|<-----tomove------>|
691 memmove(self
->ob_bytes
+ start
+ needed
, self
->ob_bytes
+ stop
,
692 Py_SIZE(self
) - start
- needed
);
697 memcpy(self
->ob_bytes
+ start
, bytes
, needed
);
706 if (!_canresize(self
))
710 start
= stop
+ step
* (slicelen
- 1) - 1;
713 for (cur
= start
, i
= 0;
714 i
< slicelen
; cur
+= step
, i
++) {
715 Py_ssize_t lim
= step
- 1;
717 if (cur
+ step
>= PyByteArray_GET_SIZE(self
))
718 lim
= PyByteArray_GET_SIZE(self
) - cur
- 1;
720 memmove(self
->ob_bytes
+ cur
- i
,
721 self
->ob_bytes
+ cur
+ 1, lim
);
723 /* Move the tail of the bytes, in one chunk */
724 cur
= start
+ slicelen
*step
;
725 if (cur
< PyByteArray_GET_SIZE(self
)) {
726 memmove(self
->ob_bytes
+ cur
- slicelen
,
727 self
->ob_bytes
+ cur
,
728 PyByteArray_GET_SIZE(self
) - cur
);
730 if (PyByteArray_Resize((PyObject
*)self
,
731 PyByteArray_GET_SIZE(self
) - slicelen
) < 0)
740 if (needed
!= slicelen
) {
741 PyErr_Format(PyExc_ValueError
,
742 "attempt to assign bytes of size %zd "
743 "to extended slice of size %zd",
747 for (cur
= start
, i
= 0; i
< slicelen
; cur
+= step
, i
++)
748 self
->ob_bytes
[cur
] = bytes
[i
];
755 bytes_init(PyByteArrayObject
*self
, PyObject
*args
, PyObject
*kwds
)
757 static char *kwlist
[] = {"source", "encoding", "errors", 0};
758 PyObject
*arg
= NULL
;
759 const char *encoding
= NULL
;
760 const char *errors
= NULL
;
763 PyObject
*(*iternext
)(PyObject
*);
765 if (Py_SIZE(self
) != 0) {
766 /* Empty previous contents (yes, do this first of all!) */
767 if (PyByteArray_Resize((PyObject
*)self
, 0) < 0)
771 /* Parse arguments */
772 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|Oss:bytearray", kwlist
,
773 &arg
, &encoding
, &errors
))
776 /* Make a quick exit if no first argument */
778 if (encoding
!= NULL
|| errors
!= NULL
) {
779 PyErr_SetString(PyExc_TypeError
,
780 "encoding or errors without sequence argument");
786 if (PyBytes_Check(arg
)) {
787 PyObject
*new, *encoded
;
788 if (encoding
!= NULL
) {
789 encoded
= PyCodec_Encode(arg
, encoding
, errors
);
792 assert(PyBytes_Check(encoded
));
798 new = bytes_iconcat(self
, arg
);
806 if (PyUnicode_Check(arg
)) {
807 /* Encode via the codec registry */
808 PyObject
*encoded
, *new;
809 if (encoding
== NULL
) {
810 PyErr_SetString(PyExc_TypeError
,
811 "unicode argument without an encoding");
814 encoded
= PyCodec_Encode(arg
, encoding
, errors
);
817 assert(PyBytes_Check(encoded
));
818 new = bytes_iconcat(self
, encoded
);
826 /* If it's not unicode, there can't be encoding or errors */
827 if (encoding
!= NULL
|| errors
!= NULL
) {
828 PyErr_SetString(PyExc_TypeError
,
829 "encoding or errors without a string argument");
834 count
= PyNumber_AsSsize_t(arg
, PyExc_ValueError
);
835 if (count
== -1 && PyErr_Occurred())
839 PyErr_SetString(PyExc_ValueError
, "negative count");
843 if (PyByteArray_Resize((PyObject
*)self
, count
))
845 memset(self
->ob_bytes
, 0, count
);
850 /* Use the buffer API */
851 if (PyObject_CheckBuffer(arg
)) {
854 if (PyObject_GetBuffer(arg
, &view
, PyBUF_FULL_RO
) < 0)
857 if (PyByteArray_Resize((PyObject
*)self
, size
) < 0) goto fail
;
858 if (PyBuffer_ToContiguous(self
->ob_bytes
, &view
, size
, 'C') < 0)
860 PyBuffer_Release(&view
);
863 PyBuffer_Release(&view
);
867 /* XXX Optimize this if the arguments is a list, tuple */
869 /* Get the iterator */
870 it
= PyObject_GetIter(arg
);
873 iternext
= *Py_TYPE(it
)->tp_iternext
;
875 /* Run the iterator to exhaustion */
880 /* Get the next item */
883 if (PyErr_Occurred()) {
884 if (!PyErr_ExceptionMatches(PyExc_StopIteration
))
891 /* Interpret it as an int (__index__) */
892 rc
= _getbytevalue(item
, &value
);
897 /* Append the byte */
898 if (Py_SIZE(self
) < self
->ob_alloc
)
900 else if (PyByteArray_Resize((PyObject
*)self
, Py_SIZE(self
)+1) < 0)
902 self
->ob_bytes
[Py_SIZE(self
)-1] = value
;
905 /* Clean up and return success */
910 /* Error handling when it != NULL */
915 /* Mostly copied from string_repr, but without the
916 "smart quote" functionality. */
918 bytes_repr(PyByteArrayObject
*self
)
920 static const char *hexdigits
= "0123456789abcdef";
921 const char *quote_prefix
= "bytearray(b";
922 const char *quote_postfix
= ")";
923 Py_ssize_t length
= Py_SIZE(self
);
924 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
925 size_t newsize
= 14 + 4 * length
;
927 if (newsize
> PY_SSIZE_T_MAX
|| newsize
/ 4 - 3 != length
) {
928 PyErr_SetString(PyExc_OverflowError
,
929 "bytearray object is too large to make repr");
932 v
= PyUnicode_FromUnicode(NULL
, newsize
);
937 register Py_ssize_t i
;
938 register Py_UNICODE c
;
939 register Py_UNICODE
*p
;
942 /* Figure out which quote to use; single is preferred */
946 start
= PyByteArray_AS_STRING(self
);
947 for (test
= start
; test
< start
+length
; ++test
) {
949 quote
= '\''; /* back to single */
952 else if (*test
== '\'')
959 p
= PyUnicode_AS_UNICODE(v
);
960 while (*quote_prefix
)
961 *p
++ = *quote_prefix
++;
964 for (i
= 0; i
< length
; i
++) {
965 /* There's at least enough room for a hex escape
966 and a closing quote. */
967 assert(newsize
- (p
- PyUnicode_AS_UNICODE(v
)) >= 5);
968 c
= self
->ob_bytes
[i
];
969 if (c
== '\'' || c
== '\\')
970 *p
++ = '\\', *p
++ = c
;
972 *p
++ = '\\', *p
++ = 't';
974 *p
++ = '\\', *p
++ = 'n';
976 *p
++ = '\\', *p
++ = 'r';
978 *p
++ = '\\', *p
++ = 'x', *p
++ = '0', *p
++ = '0';
979 else if (c
< ' ' || c
>= 0x7f) {
982 *p
++ = hexdigits
[(c
& 0xf0) >> 4];
983 *p
++ = hexdigits
[c
& 0xf];
988 assert(newsize
- (p
- PyUnicode_AS_UNICODE(v
)) >= 1);
990 while (*quote_postfix
) {
991 *p
++ = *quote_postfix
++;
994 if (PyUnicode_Resize(&v
, (p
- PyUnicode_AS_UNICODE(v
)))) {
1003 bytes_str(PyObject
*op
)
1006 if (Py_BytesWarningFlag
) {
1007 if (PyErr_WarnEx(PyExc_BytesWarning
,
1008 "str() on a bytearray instance", 1))
1011 return bytes_repr((PyByteArrayObject
*)op
);
1013 return PyBytes_FromStringAndSize(((PyByteArrayObject
*)op
)->ob_bytes
, Py_SIZE(op
));
1017 bytes_richcompare(PyObject
*self
, PyObject
*other
, int op
)
1019 Py_ssize_t self_size
, other_size
;
1020 Py_buffer self_bytes
, other_bytes
;
1025 /* Bytes can be compared to anything that supports the (binary)
1026 buffer API. Except that a comparison with Unicode is always an
1027 error, even if the comparison is for equality. */
1028 if (PyObject_IsInstance(self
, (PyObject
*)&PyUnicode_Type
) ||
1029 PyObject_IsInstance(other
, (PyObject
*)&PyUnicode_Type
)) {
1030 if (Py_BytesWarningFlag
&& op
== Py_EQ
) {
1031 if (PyErr_WarnEx(PyExc_BytesWarning
,
1032 "Comparsion between bytearray and string", 1))
1036 Py_INCREF(Py_NotImplemented
);
1037 return Py_NotImplemented
;
1040 self_size
= _getbuffer(self
, &self_bytes
);
1041 if (self_size
< 0) {
1043 Py_INCREF(Py_NotImplemented
);
1044 return Py_NotImplemented
;
1047 other_size
= _getbuffer(other
, &other_bytes
);
1048 if (other_size
< 0) {
1050 PyBuffer_Release(&self_bytes
);
1051 Py_INCREF(Py_NotImplemented
);
1052 return Py_NotImplemented
;
1055 if (self_size
!= other_size
&& (op
== Py_EQ
|| op
== Py_NE
)) {
1056 /* Shortcut: if the lengths differ, the objects differ */
1057 cmp
= (op
== Py_NE
);
1060 minsize
= self_size
;
1061 if (other_size
< minsize
)
1062 minsize
= other_size
;
1064 cmp
= memcmp(self_bytes
.buf
, other_bytes
.buf
, minsize
);
1065 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1068 if (self_size
< other_size
)
1070 else if (self_size
> other_size
)
1075 case Py_LT
: cmp
= cmp
< 0; break;
1076 case Py_LE
: cmp
= cmp
<= 0; break;
1077 case Py_EQ
: cmp
= cmp
== 0; break;
1078 case Py_NE
: cmp
= cmp
!= 0; break;
1079 case Py_GT
: cmp
= cmp
> 0; break;
1080 case Py_GE
: cmp
= cmp
>= 0; break;
1084 res
= cmp
? Py_True
: Py_False
;
1085 PyBuffer_Release(&self_bytes
);
1086 PyBuffer_Release(&other_bytes
);
1092 bytes_dealloc(PyByteArrayObject
*self
)
1094 if (self
->ob_exports
> 0) {
1095 PyErr_SetString(PyExc_SystemError
,
1096 "deallocated bytearray object has exported buffers");
1099 if (self
->ob_bytes
!= 0) {
1100 PyMem_Free(self
->ob_bytes
);
1102 Py_TYPE(self
)->tp_free((PyObject
*)self
);
1106 /* -------------------------------------------------------------------- */
1109 #define STRINGLIB_CHAR char
1110 #define STRINGLIB_CMP memcmp
1111 #define STRINGLIB_LEN PyByteArray_GET_SIZE
1112 #define STRINGLIB_STR PyByteArray_AS_STRING
1113 #define STRINGLIB_NEW PyByteArray_FromStringAndSize
1114 #define STRINGLIB_EMPTY nullbytes
1115 #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1116 #define STRINGLIB_MUTABLE 1
1117 #define FROM_BYTEARRAY 1
1119 #include "stringlib/fastsearch.h"
1120 #include "stringlib/count.h"
1121 #include "stringlib/find.h"
1122 #include "stringlib/partition.h"
1123 #include "stringlib/ctype.h"
1124 #include "stringlib/transmogrify.h"
1127 /* The following Py_LOCAL_INLINE and Py_LOCAL functions
1128 were copied from the old char* style string object. */
1130 Py_LOCAL_INLINE(void)
1131 _adjust_indices(Py_ssize_t
*start
, Py_ssize_t
*end
, Py_ssize_t len
)
1146 Py_LOCAL_INLINE(Py_ssize_t
)
1147 bytes_find_internal(PyByteArrayObject
*self
, PyObject
*args
, int dir
)
1151 Py_ssize_t start
=0, end
=PY_SSIZE_T_MAX
;
1154 if (!PyArg_ParseTuple(args
, "O|O&O&:find/rfind/index/rindex", &subobj
,
1155 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1157 if (_getbuffer(subobj
, &subbuf
) < 0)
1160 res
= stringlib_find_slice(
1161 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
1162 subbuf
.buf
, subbuf
.len
, start
, end
);
1164 res
= stringlib_rfind_slice(
1165 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
1166 subbuf
.buf
, subbuf
.len
, start
, end
);
1167 PyBuffer_Release(&subbuf
);
1171 PyDoc_STRVAR(find__doc__
,
1172 "B.find(sub [,start [,end]]) -> int\n\
1174 Return the lowest index in B where subsection sub is found,\n\
1175 such that sub is contained within s[start,end]. Optional\n\
1176 arguments start and end are interpreted as in slice notation.\n\
1178 Return -1 on failure.");
1181 bytes_find(PyByteArrayObject
*self
, PyObject
*args
)
1183 Py_ssize_t result
= bytes_find_internal(self
, args
, +1);
1186 return PyInt_FromSsize_t(result
);
1189 PyDoc_STRVAR(count__doc__
,
1190 "B.count(sub [,start [,end]]) -> int\n\
1192 Return the number of non-overlapping occurrences of subsection sub in\n\
1193 bytes B[start:end]. Optional arguments start and end are interpreted\n\
1194 as in slice notation.");
1197 bytes_count(PyByteArrayObject
*self
, PyObject
*args
)
1200 const char *str
= PyByteArray_AS_STRING(self
);
1201 Py_ssize_t start
= 0, end
= PY_SSIZE_T_MAX
;
1203 PyObject
*count_obj
;
1205 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &sub_obj
,
1206 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1209 if (_getbuffer(sub_obj
, &vsub
) < 0)
1212 _adjust_indices(&start
, &end
, PyByteArray_GET_SIZE(self
));
1214 count_obj
= PyInt_FromSsize_t(
1215 stringlib_count(str
+ start
, end
- start
, vsub
.buf
, vsub
.len
)
1217 PyBuffer_Release(&vsub
);
1222 PyDoc_STRVAR(index__doc__
,
1223 "B.index(sub [,start [,end]]) -> int\n\
1225 Like B.find() but raise ValueError when the subsection is not found.");
1228 bytes_index(PyByteArrayObject
*self
, PyObject
*args
)
1230 Py_ssize_t result
= bytes_find_internal(self
, args
, +1);
1234 PyErr_SetString(PyExc_ValueError
,
1235 "subsection not found");
1238 return PyInt_FromSsize_t(result
);
1242 PyDoc_STRVAR(rfind__doc__
,
1243 "B.rfind(sub [,start [,end]]) -> int\n\
1245 Return the highest index in B where subsection sub is found,\n\
1246 such that sub is contained within s[start,end]. Optional\n\
1247 arguments start and end are interpreted as in slice notation.\n\
1249 Return -1 on failure.");
1252 bytes_rfind(PyByteArrayObject
*self
, PyObject
*args
)
1254 Py_ssize_t result
= bytes_find_internal(self
, args
, -1);
1257 return PyInt_FromSsize_t(result
);
1261 PyDoc_STRVAR(rindex__doc__
,
1262 "B.rindex(sub [,start [,end]]) -> int\n\
1264 Like B.rfind() but raise ValueError when the subsection is not found.");
1267 bytes_rindex(PyByteArrayObject
*self
, PyObject
*args
)
1269 Py_ssize_t result
= bytes_find_internal(self
, args
, -1);
1273 PyErr_SetString(PyExc_ValueError
,
1274 "subsection not found");
1277 return PyInt_FromSsize_t(result
);
1282 bytes_contains(PyObject
*self
, PyObject
*arg
)
1284 Py_ssize_t ival
= PyNumber_AsSsize_t(arg
, PyExc_ValueError
);
1285 if (ival
== -1 && PyErr_Occurred()) {
1289 if (_getbuffer(arg
, &varg
) < 0)
1291 pos
= stringlib_find(PyByteArray_AS_STRING(self
), Py_SIZE(self
),
1292 varg
.buf
, varg
.len
, 0);
1293 PyBuffer_Release(&varg
);
1296 if (ival
< 0 || ival
>= 256) {
1297 PyErr_SetString(PyExc_ValueError
, "byte must be in range(0, 256)");
1301 return memchr(PyByteArray_AS_STRING(self
), ival
, Py_SIZE(self
)) != NULL
;
1305 /* Matches the end (direction >= 0) or start (direction < 0) of self
1306 * against substr, using the start and end arguments. Returns
1307 * -1 on error, 0 if not found and 1 if found.
1310 _bytes_tailmatch(PyByteArrayObject
*self
, PyObject
*substr
, Py_ssize_t start
,
1311 Py_ssize_t end
, int direction
)
1313 Py_ssize_t len
= PyByteArray_GET_SIZE(self
);
1318 str
= PyByteArray_AS_STRING(self
);
1320 if (_getbuffer(substr
, &vsubstr
) < 0)
1323 _adjust_indices(&start
, &end
, len
);
1325 if (direction
< 0) {
1327 if (start
+vsubstr
.len
> len
) {
1332 if (end
-start
< vsubstr
.len
|| start
> len
) {
1336 if (end
-vsubstr
.len
> start
)
1337 start
= end
- vsubstr
.len
;
1339 if (end
-start
>= vsubstr
.len
)
1340 rv
= ! memcmp(str
+start
, vsubstr
.buf
, vsubstr
.len
);
1343 PyBuffer_Release(&vsubstr
);
1348 PyDoc_STRVAR(startswith__doc__
,
1349 "B.startswith(prefix [,start [,end]]) -> bool\n\
1351 Return True if B starts with the specified prefix, False otherwise.\n\
1352 With optional start, test B beginning at that position.\n\
1353 With optional end, stop comparing B at that position.\n\
1354 prefix can also be a tuple of strings to try.");
1357 bytes_startswith(PyByteArrayObject
*self
, PyObject
*args
)
1359 Py_ssize_t start
= 0;
1360 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1364 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
1365 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1367 if (PyTuple_Check(subobj
)) {
1369 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
1370 result
= _bytes_tailmatch(self
,
1371 PyTuple_GET_ITEM(subobj
, i
),
1381 result
= _bytes_tailmatch(self
, subobj
, start
, end
, -1);
1385 return PyBool_FromLong(result
);
1388 PyDoc_STRVAR(endswith__doc__
,
1389 "B.endswith(suffix [,start [,end]]) -> bool\n\
1391 Return True if B ends with the specified suffix, False otherwise.\n\
1392 With optional start, test B beginning at that position.\n\
1393 With optional end, stop comparing B at that position.\n\
1394 suffix can also be a tuple of strings to try.");
1397 bytes_endswith(PyByteArrayObject
*self
, PyObject
*args
)
1399 Py_ssize_t start
= 0;
1400 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1404 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
1405 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1407 if (PyTuple_Check(subobj
)) {
1409 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
1410 result
= _bytes_tailmatch(self
,
1411 PyTuple_GET_ITEM(subobj
, i
),
1421 result
= _bytes_tailmatch(self
, subobj
, start
, end
, +1);
1425 return PyBool_FromLong(result
);
1429 PyDoc_STRVAR(translate__doc__
,
1430 "B.translate(table[, deletechars]) -> bytearray\n\
1432 Return a copy of B, where all characters occurring in the\n\
1433 optional argument deletechars are removed, and the remaining\n\
1434 characters have been mapped through the given translation\n\
1435 table, which must be a bytes object of length 256.");
1438 bytes_translate(PyByteArrayObject
*self
, PyObject
*args
)
1440 register char *input
, *output
;
1441 register const char *table
;
1442 register Py_ssize_t i
, c
;
1443 PyObject
*input_obj
= (PyObject
*)self
;
1444 const char *output_start
;
1447 int trans_table
[256];
1448 PyObject
*tableobj
, *delobj
= NULL
;
1449 Py_buffer vtable
, vdel
;
1451 if (!PyArg_UnpackTuple(args
, "translate", 1, 2,
1452 &tableobj
, &delobj
))
1455 if (_getbuffer(tableobj
, &vtable
) < 0)
1458 if (vtable
.len
!= 256) {
1459 PyErr_SetString(PyExc_ValueError
,
1460 "translation table must be 256 characters long");
1465 if (delobj
!= NULL
) {
1466 if (_getbuffer(delobj
, &vdel
) < 0) {
1476 table
= (const char *)vtable
.buf
;
1477 inlen
= PyByteArray_GET_SIZE(input_obj
);
1478 result
= PyByteArray_FromStringAndSize((char *)NULL
, inlen
);
1481 output_start
= output
= PyByteArray_AsString(result
);
1482 input
= PyByteArray_AS_STRING(input_obj
);
1484 if (vdel
.len
== 0) {
1485 /* If no deletions are required, use faster code */
1486 for (i
= inlen
; --i
>= 0; ) {
1487 c
= Py_CHARMASK(*input
++);
1488 *output
++ = table
[c
];
1493 for (i
= 0; i
< 256; i
++)
1494 trans_table
[i
] = Py_CHARMASK(table
[i
]);
1496 for (i
= 0; i
< vdel
.len
; i
++)
1497 trans_table
[(int) Py_CHARMASK( ((unsigned char*)vdel
.buf
)[i
] )] = -1;
1499 for (i
= inlen
; --i
>= 0; ) {
1500 c
= Py_CHARMASK(*input
++);
1501 if (trans_table
[c
] != -1)
1502 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
1505 /* Fix the size of the resulting string */
1507 PyByteArray_Resize(result
, output
- output_start
);
1510 PyBuffer_Release(&vtable
);
1512 PyBuffer_Release(&vdel
);
1520 /* find and count characters and substrings */
1522 #define findchar(target, target_len, c) \
1523 ((char *)memchr((const void *)(target), c, target_len))
1525 /* Don't call if length < 2 */
1526 #define Py_STRING_MATCH(target, offset, pattern, length) \
1527 (target[offset] == pattern[0] && \
1528 target[offset+length-1] == pattern[length-1] && \
1529 !memcmp(target+offset+1, pattern+1, length-2) )
1532 /* Bytes ops must return a string, create a copy */
1533 Py_LOCAL(PyByteArrayObject
*)
1534 return_self(PyByteArrayObject
*self
)
1536 return (PyByteArrayObject
*)PyByteArray_FromStringAndSize(
1537 PyByteArray_AS_STRING(self
),
1538 PyByteArray_GET_SIZE(self
));
1541 Py_LOCAL_INLINE(Py_ssize_t
)
1542 countchar(const char *target
, Py_ssize_t target_len
, char c
, Py_ssize_t maxcount
)
1545 const char *start
=target
;
1546 const char *end
=target
+target_len
;
1548 while ( (start
=findchar(start
, end
-start
, c
)) != NULL
) {
1550 if (count
>= maxcount
)
1557 Py_LOCAL(Py_ssize_t
)
1558 findstring(const char *target
, Py_ssize_t target_len
,
1559 const char *pattern
, Py_ssize_t pattern_len
,
1565 start
+= target_len
;
1569 if (end
> target_len
) {
1571 } else if (end
< 0) {
1577 /* zero-length substrings always match at the first attempt */
1578 if (pattern_len
== 0)
1579 return (direction
> 0) ? start
: end
;
1583 if (direction
< 0) {
1584 for (; end
>= start
; end
--)
1585 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
))
1588 for (; start
<= end
; start
++)
1589 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
))
1595 Py_LOCAL_INLINE(Py_ssize_t
)
1596 countstring(const char *target
, Py_ssize_t target_len
,
1597 const char *pattern
, Py_ssize_t pattern_len
,
1600 int direction
, Py_ssize_t maxcount
)
1605 start
+= target_len
;
1609 if (end
> target_len
) {
1611 } else if (end
< 0) {
1617 /* zero-length substrings match everywhere */
1618 if (pattern_len
== 0 || maxcount
== 0) {
1619 if (target_len
+1 < maxcount
)
1620 return target_len
+1;
1625 if (direction
< 0) {
1626 for (; (end
>= start
); end
--)
1627 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
)) {
1629 if (--maxcount
<= 0) break;
1630 end
-= pattern_len
-1;
1633 for (; (start
<= end
); start
++)
1634 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
)) {
1636 if (--maxcount
<= 0)
1638 start
+= pattern_len
-1;
1645 /* Algorithms for different cases of string replacement */
1647 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1648 Py_LOCAL(PyByteArrayObject
*)
1649 replace_interleave(PyByteArrayObject
*self
,
1650 const char *to_s
, Py_ssize_t to_len
,
1651 Py_ssize_t maxcount
)
1653 char *self_s
, *result_s
;
1654 Py_ssize_t self_len
, result_len
;
1655 Py_ssize_t count
, i
, product
;
1656 PyByteArrayObject
*result
;
1658 self_len
= PyByteArray_GET_SIZE(self
);
1660 /* 1 at the end plus 1 after every character */
1662 if (maxcount
< count
)
1665 /* Check for overflow */
1666 /* result_len = count * to_len + self_len; */
1667 product
= count
* to_len
;
1668 if (product
/ to_len
!= count
) {
1669 PyErr_SetString(PyExc_OverflowError
,
1670 "replace string is too long");
1673 result_len
= product
+ self_len
;
1674 if (result_len
< 0) {
1675 PyErr_SetString(PyExc_OverflowError
,
1676 "replace string is too long");
1680 if (! (result
= (PyByteArrayObject
*)
1681 PyByteArray_FromStringAndSize(NULL
, result_len
)) )
1684 self_s
= PyByteArray_AS_STRING(self
);
1685 result_s
= PyByteArray_AS_STRING(result
);
1687 /* TODO: special case single character, which doesn't need memcpy */
1689 /* Lay the first one down (guaranteed this will occur) */
1690 Py_MEMCPY(result_s
, to_s
, to_len
);
1694 for (i
=0; i
<count
; i
++) {
1695 *result_s
++ = *self_s
++;
1696 Py_MEMCPY(result_s
, to_s
, to_len
);
1700 /* Copy the rest of the original string */
1701 Py_MEMCPY(result_s
, self_s
, self_len
-i
);
1706 /* Special case for deleting a single character */
1707 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1708 Py_LOCAL(PyByteArrayObject
*)
1709 replace_delete_single_character(PyByteArrayObject
*self
,
1710 char from_c
, Py_ssize_t maxcount
)
1712 char *self_s
, *result_s
;
1713 char *start
, *next
, *end
;
1714 Py_ssize_t self_len
, result_len
;
1716 PyByteArrayObject
*result
;
1718 self_len
= PyByteArray_GET_SIZE(self
);
1719 self_s
= PyByteArray_AS_STRING(self
);
1721 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
1723 return return_self(self
);
1726 result_len
= self_len
- count
; /* from_len == 1 */
1727 assert(result_len
>=0);
1729 if ( (result
= (PyByteArrayObject
*)
1730 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1732 result_s
= PyByteArray_AS_STRING(result
);
1735 end
= self_s
+ self_len
;
1736 while (count
-- > 0) {
1737 next
= findchar(start
, end
-start
, from_c
);
1740 Py_MEMCPY(result_s
, start
, next
-start
);
1741 result_s
+= (next
-start
);
1744 Py_MEMCPY(result_s
, start
, end
-start
);
1749 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1751 Py_LOCAL(PyByteArrayObject
*)
1752 replace_delete_substring(PyByteArrayObject
*self
,
1753 const char *from_s
, Py_ssize_t from_len
,
1754 Py_ssize_t maxcount
)
1756 char *self_s
, *result_s
;
1757 char *start
, *next
, *end
;
1758 Py_ssize_t self_len
, result_len
;
1759 Py_ssize_t count
, offset
;
1760 PyByteArrayObject
*result
;
1762 self_len
= PyByteArray_GET_SIZE(self
);
1763 self_s
= PyByteArray_AS_STRING(self
);
1765 count
= countstring(self_s
, self_len
,
1772 return return_self(self
);
1775 result_len
= self_len
- (count
* from_len
);
1776 assert (result_len
>=0);
1778 if ( (result
= (PyByteArrayObject
*)
1779 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1782 result_s
= PyByteArray_AS_STRING(result
);
1785 end
= self_s
+ self_len
;
1786 while (count
-- > 0) {
1787 offset
= findstring(start
, end
-start
,
1789 0, end
-start
, FORWARD
);
1792 next
= start
+ offset
;
1794 Py_MEMCPY(result_s
, start
, next
-start
);
1796 result_s
+= (next
-start
);
1797 start
= next
+from_len
;
1799 Py_MEMCPY(result_s
, start
, end
-start
);
1803 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1804 Py_LOCAL(PyByteArrayObject
*)
1805 replace_single_character_in_place(PyByteArrayObject
*self
,
1806 char from_c
, char to_c
,
1807 Py_ssize_t maxcount
)
1809 char *self_s
, *result_s
, *start
, *end
, *next
;
1810 Py_ssize_t self_len
;
1811 PyByteArrayObject
*result
;
1813 /* The result string will be the same size */
1814 self_s
= PyByteArray_AS_STRING(self
);
1815 self_len
= PyByteArray_GET_SIZE(self
);
1817 next
= findchar(self_s
, self_len
, from_c
);
1820 /* No matches; return the original bytes */
1821 return return_self(self
);
1824 /* Need to make a new bytes */
1825 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, self_len
);
1828 result_s
= PyByteArray_AS_STRING(result
);
1829 Py_MEMCPY(result_s
, self_s
, self_len
);
1831 /* change everything in-place, starting with this one */
1832 start
= result_s
+ (next
-self_s
);
1835 end
= result_s
+ self_len
;
1837 while (--maxcount
> 0) {
1838 next
= findchar(start
, end
-start
, from_c
);
1848 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1849 Py_LOCAL(PyByteArrayObject
*)
1850 replace_substring_in_place(PyByteArrayObject
*self
,
1851 const char *from_s
, Py_ssize_t from_len
,
1852 const char *to_s
, Py_ssize_t to_len
,
1853 Py_ssize_t maxcount
)
1855 char *result_s
, *start
, *end
;
1857 Py_ssize_t self_len
, offset
;
1858 PyByteArrayObject
*result
;
1860 /* The result bytes will be the same size */
1862 self_s
= PyByteArray_AS_STRING(self
);
1863 self_len
= PyByteArray_GET_SIZE(self
);
1865 offset
= findstring(self_s
, self_len
,
1867 0, self_len
, FORWARD
);
1869 /* No matches; return the original bytes */
1870 return return_self(self
);
1873 /* Need to make a new bytes */
1874 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, self_len
);
1877 result_s
= PyByteArray_AS_STRING(result
);
1878 Py_MEMCPY(result_s
, self_s
, self_len
);
1880 /* change everything in-place, starting with this one */
1881 start
= result_s
+ offset
;
1882 Py_MEMCPY(start
, to_s
, from_len
);
1884 end
= result_s
+ self_len
;
1886 while ( --maxcount
> 0) {
1887 offset
= findstring(start
, end
-start
,
1889 0, end
-start
, FORWARD
);
1892 Py_MEMCPY(start
+offset
, to_s
, from_len
);
1893 start
+= offset
+from_len
;
1899 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1900 Py_LOCAL(PyByteArrayObject
*)
1901 replace_single_character(PyByteArrayObject
*self
,
1903 const char *to_s
, Py_ssize_t to_len
,
1904 Py_ssize_t maxcount
)
1906 char *self_s
, *result_s
;
1907 char *start
, *next
, *end
;
1908 Py_ssize_t self_len
, result_len
;
1909 Py_ssize_t count
, product
;
1910 PyByteArrayObject
*result
;
1912 self_s
= PyByteArray_AS_STRING(self
);
1913 self_len
= PyByteArray_GET_SIZE(self
);
1915 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
1917 /* no matches, return unchanged */
1918 return return_self(self
);
1921 /* use the difference between current and new, hence the "-1" */
1922 /* result_len = self_len + count * (to_len-1) */
1923 product
= count
* (to_len
-1);
1924 if (product
/ (to_len
-1) != count
) {
1925 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
1928 result_len
= self_len
+ product
;
1929 if (result_len
< 0) {
1930 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
1934 if ( (result
= (PyByteArrayObject
*)
1935 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1937 result_s
= PyByteArray_AS_STRING(result
);
1940 end
= self_s
+ self_len
;
1941 while (count
-- > 0) {
1942 next
= findchar(start
, end
-start
, from_c
);
1946 if (next
== start
) {
1947 /* replace with the 'to' */
1948 Py_MEMCPY(result_s
, to_s
, to_len
);
1952 /* copy the unchanged old then the 'to' */
1953 Py_MEMCPY(result_s
, start
, next
-start
);
1954 result_s
+= (next
-start
);
1955 Py_MEMCPY(result_s
, to_s
, to_len
);
1960 /* Copy the remainder of the remaining bytes */
1961 Py_MEMCPY(result_s
, start
, end
-start
);
1966 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1967 Py_LOCAL(PyByteArrayObject
*)
1968 replace_substring(PyByteArrayObject
*self
,
1969 const char *from_s
, Py_ssize_t from_len
,
1970 const char *to_s
, Py_ssize_t to_len
,
1971 Py_ssize_t maxcount
)
1973 char *self_s
, *result_s
;
1974 char *start
, *next
, *end
;
1975 Py_ssize_t self_len
, result_len
;
1976 Py_ssize_t count
, offset
, product
;
1977 PyByteArrayObject
*result
;
1979 self_s
= PyByteArray_AS_STRING(self
);
1980 self_len
= PyByteArray_GET_SIZE(self
);
1982 count
= countstring(self_s
, self_len
,
1984 0, self_len
, FORWARD
, maxcount
);
1986 /* no matches, return unchanged */
1987 return return_self(self
);
1990 /* Check for overflow */
1991 /* result_len = self_len + count * (to_len-from_len) */
1992 product
= count
* (to_len
-from_len
);
1993 if (product
/ (to_len
-from_len
) != count
) {
1994 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
1997 result_len
= self_len
+ product
;
1998 if (result_len
< 0) {
1999 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
2003 if ( (result
= (PyByteArrayObject
*)
2004 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
2006 result_s
= PyByteArray_AS_STRING(result
);
2009 end
= self_s
+ self_len
;
2010 while (count
-- > 0) {
2011 offset
= findstring(start
, end
-start
,
2013 0, end
-start
, FORWARD
);
2016 next
= start
+offset
;
2017 if (next
== start
) {
2018 /* replace with the 'to' */
2019 Py_MEMCPY(result_s
, to_s
, to_len
);
2023 /* copy the unchanged old then the 'to' */
2024 Py_MEMCPY(result_s
, start
, next
-start
);
2025 result_s
+= (next
-start
);
2026 Py_MEMCPY(result_s
, to_s
, to_len
);
2028 start
= next
+from_len
;
2031 /* Copy the remainder of the remaining bytes */
2032 Py_MEMCPY(result_s
, start
, end
-start
);
2038 Py_LOCAL(PyByteArrayObject
*)
2039 replace(PyByteArrayObject
*self
,
2040 const char *from_s
, Py_ssize_t from_len
,
2041 const char *to_s
, Py_ssize_t to_len
,
2042 Py_ssize_t maxcount
)
2045 maxcount
= PY_SSIZE_T_MAX
;
2046 } else if (maxcount
== 0 || PyByteArray_GET_SIZE(self
) == 0) {
2047 /* nothing to do; return the original bytes */
2048 return return_self(self
);
2051 if (maxcount
== 0 ||
2052 (from_len
== 0 && to_len
== 0)) {
2053 /* nothing to do; return the original bytes */
2054 return return_self(self
);
2057 /* Handle zero-length special cases */
2059 if (from_len
== 0) {
2060 /* insert the 'to' bytes everywhere. */
2061 /* >>> "Python".replace("", ".") */
2062 /* '.P.y.t.h.o.n.' */
2063 return replace_interleave(self
, to_s
, to_len
, maxcount
);
2066 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2067 /* point for an empty self bytes to generate a non-empty bytes */
2068 /* Special case so the remaining code always gets a non-empty bytes */
2069 if (PyByteArray_GET_SIZE(self
) == 0) {
2070 return return_self(self
);
2074 /* delete all occurances of 'from' bytes */
2075 if (from_len
== 1) {
2076 return replace_delete_single_character(
2077 self
, from_s
[0], maxcount
);
2079 return replace_delete_substring(self
, from_s
, from_len
, maxcount
);
2083 /* Handle special case where both bytes have the same length */
2085 if (from_len
== to_len
) {
2086 if (from_len
== 1) {
2087 return replace_single_character_in_place(
2093 return replace_substring_in_place(
2094 self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
2098 /* Otherwise use the more generic algorithms */
2099 if (from_len
== 1) {
2100 return replace_single_character(self
, from_s
[0],
2101 to_s
, to_len
, maxcount
);
2103 /* len('from')>=2, len('to')>=1 */
2104 return replace_substring(self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
2109 PyDoc_STRVAR(replace__doc__
,
2110 "B.replace(old, new[, count]) -> bytes\n\
2112 Return a copy of B with all occurrences of subsection\n\
2113 old replaced by new. If the optional argument count is\n\
2114 given, only the first count occurrences are replaced.");
2117 bytes_replace(PyByteArrayObject
*self
, PyObject
*args
)
2119 Py_ssize_t count
= -1;
2120 PyObject
*from
, *to
, *res
;
2121 Py_buffer vfrom
, vto
;
2123 if (!PyArg_ParseTuple(args
, "OO|n:replace", &from
, &to
, &count
))
2126 if (_getbuffer(from
, &vfrom
) < 0)
2128 if (_getbuffer(to
, &vto
) < 0) {
2129 PyBuffer_Release(&vfrom
);
2133 res
= (PyObject
*)replace((PyByteArrayObject
*) self
,
2134 vfrom
.buf
, vfrom
.len
,
2135 vto
.buf
, vto
.len
, count
);
2137 PyBuffer_Release(&vfrom
);
2138 PyBuffer_Release(&vto
);
2143 /* Overallocate the initial list to reduce the number of reallocs for small
2144 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2145 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2146 text (roughly 11 words per line) and field delimited data (usually 1-10
2147 fields). For large strings the split algorithms are bandwidth limited
2148 so increasing the preallocation likely will not improve things.*/
2150 #define MAX_PREALLOC 12
2152 /* 5 splits gives 6 elements */
2153 #define PREALLOC_SIZE(maxsplit) \
2154 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2156 #define SPLIT_APPEND(data, left, right) \
2157 str = PyByteArray_FromStringAndSize((data) + (left), \
2158 (right) - (left)); \
2161 if (PyList_Append(list, str)) { \
2168 #define SPLIT_ADD(data, left, right) { \
2169 str = PyByteArray_FromStringAndSize((data) + (left), \
2170 (right) - (left)); \
2173 if (count < MAX_PREALLOC) { \
2174 PyList_SET_ITEM(list, count, str); \
2176 if (PyList_Append(list, str)) { \
2185 /* Always force the list to the expected size. */
2186 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2189 Py_LOCAL_INLINE(PyObject
*)
2190 split_char(const char *s
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
2192 register Py_ssize_t i
, j
, count
= 0;
2194 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2200 while ((j
< len
) && (maxcount
-- > 0)) {
2201 for(; j
< len
; j
++) {
2202 /* I found that using memchr makes no difference */
2211 SPLIT_ADD(s
, i
, len
);
2213 FIX_PREALLOC_SIZE(list
);
2222 Py_LOCAL_INLINE(PyObject
*)
2223 split_whitespace(const char *s
, Py_ssize_t len
, Py_ssize_t maxcount
)
2225 register Py_ssize_t i
, j
, count
= 0;
2227 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2232 for (i
= j
= 0; i
< len
; ) {
2234 while (i
< len
&& ISSPACE(s
[i
]))
2237 while (i
< len
&& !ISSPACE(s
[i
]))
2240 if (maxcount
-- <= 0)
2243 while (i
< len
&& ISSPACE(s
[i
]))
2249 SPLIT_ADD(s
, j
, len
);
2251 FIX_PREALLOC_SIZE(list
);
2259 PyDoc_STRVAR(split__doc__
,
2260 "B.split([sep[, maxsplit]]) -> list of bytearray\n\
2262 Return a list of the sections in B, using sep as the delimiter.\n\
2263 If sep is not given, B is split on ASCII whitespace characters\n\
2264 (space, tab, return, newline, formfeed, vertical tab).\n\
2265 If maxsplit is given, at most maxsplit splits are done.");
2268 bytes_split(PyByteArrayObject
*self
, PyObject
*args
)
2270 Py_ssize_t len
= PyByteArray_GET_SIZE(self
), n
, i
, j
;
2271 Py_ssize_t maxsplit
= -1, count
= 0;
2272 const char *s
= PyByteArray_AS_STRING(self
), *sub
;
2273 PyObject
*list
, *str
, *subobj
= Py_None
;
2279 if (!PyArg_ParseTuple(args
, "|On:split", &subobj
, &maxsplit
))
2282 maxsplit
= PY_SSIZE_T_MAX
;
2284 if (subobj
== Py_None
)
2285 return split_whitespace(s
, len
, maxsplit
);
2287 if (_getbuffer(subobj
, &vsub
) < 0)
2293 PyErr_SetString(PyExc_ValueError
, "empty separator");
2294 PyBuffer_Release(&vsub
);
2298 list
= split_char(s
, len
, sub
[0], maxsplit
);
2299 PyBuffer_Release(&vsub
);
2303 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
2305 PyBuffer_Release(&vsub
);
2311 while (maxsplit
-- > 0) {
2312 pos
= fastsearch(s
+i
, len
-i
, sub
, n
, FAST_SEARCH
);
2321 while ((j
+n
<= len
) && (maxsplit
-- > 0)) {
2322 for (; j
+n
<= len
; j
++) {
2323 if (Py_STRING_MATCH(s
, j
, sub
, n
)) {
2331 SPLIT_ADD(s
, i
, len
);
2332 FIX_PREALLOC_SIZE(list
);
2333 PyBuffer_Release(&vsub
);
2338 PyBuffer_Release(&vsub
);
2342 /* stringlib's partition shares nullbytes in some cases.
2343 undo this, we don't want the nullbytes to be shared. */
2345 make_nullbytes_unique(PyObject
*result
)
2347 if (result
!= NULL
) {
2349 assert(PyTuple_Check(result
));
2350 assert(PyTuple_GET_SIZE(result
) == 3);
2351 for (i
= 0; i
< 3; i
++) {
2352 if (PyTuple_GET_ITEM(result
, i
) == (PyObject
*)nullbytes
) {
2353 PyObject
*new = PyByteArray_FromStringAndSize(NULL
, 0);
2359 Py_DECREF(nullbytes
);
2360 PyTuple_SET_ITEM(result
, i
, new);
2367 PyDoc_STRVAR(partition__doc__
,
2368 "B.partition(sep) -> (head, sep, tail)\n\
2370 Searches for the separator sep in B, and returns the part before it,\n\
2371 the separator itself, and the part after it. If the separator is not\n\
2372 found, returns B and two empty bytearray objects.");
2375 bytes_partition(PyByteArrayObject
*self
, PyObject
*sep_obj
)
2377 PyObject
*bytesep
, *result
;
2379 bytesep
= PyByteArray_FromObject(sep_obj
);
2383 result
= stringlib_partition(
2385 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
2387 PyByteArray_AS_STRING(bytesep
), PyByteArray_GET_SIZE(bytesep
)
2391 return make_nullbytes_unique(result
);
2394 PyDoc_STRVAR(rpartition__doc__
,
2395 "B.rpartition(sep) -> (tail, sep, head)\n\
2397 Searches for the separator sep in B, starting at the end of B,\n\
2398 and returns the part before it, the separator itself, and the\n\
2399 part after it. If the separator is not found, returns two empty\n\
2400 bytearray objects and B.");
2403 bytes_rpartition(PyByteArrayObject
*self
, PyObject
*sep_obj
)
2405 PyObject
*bytesep
, *result
;
2407 bytesep
= PyByteArray_FromObject(sep_obj
);
2411 result
= stringlib_rpartition(
2413 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
2415 PyByteArray_AS_STRING(bytesep
), PyByteArray_GET_SIZE(bytesep
)
2419 return make_nullbytes_unique(result
);
2422 Py_LOCAL_INLINE(PyObject
*)
2423 rsplit_char(const char *s
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
2425 register Py_ssize_t i
, j
, count
=0;
2427 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2433 while ((i
>= 0) && (maxcount
-- > 0)) {
2434 for (; i
>= 0; i
--) {
2436 SPLIT_ADD(s
, i
+ 1, j
+ 1);
2443 SPLIT_ADD(s
, 0, j
+ 1);
2445 FIX_PREALLOC_SIZE(list
);
2446 if (PyList_Reverse(list
) < 0)
2456 Py_LOCAL_INLINE(PyObject
*)
2457 rsplit_whitespace(const char *s
, Py_ssize_t len
, Py_ssize_t maxcount
)
2459 register Py_ssize_t i
, j
, count
= 0;
2461 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2466 for (i
= j
= len
- 1; i
>= 0; ) {
2468 while (i
>= 0 && ISSPACE(s
[i
]))
2471 while (i
>= 0 && !ISSPACE(s
[i
]))
2474 if (maxcount
-- <= 0)
2476 SPLIT_ADD(s
, i
+ 1, j
+ 1);
2477 while (i
>= 0 && ISSPACE(s
[i
]))
2483 SPLIT_ADD(s
, 0, j
+ 1);
2485 FIX_PREALLOC_SIZE(list
);
2486 if (PyList_Reverse(list
) < 0)
2496 PyDoc_STRVAR(rsplit__doc__
,
2497 "B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2499 Return a list of the sections in B, using sep as the delimiter,\n\
2500 starting at the end of B and working to the front.\n\
2501 If sep is not given, B is split on ASCII whitespace characters\n\
2502 (space, tab, return, newline, formfeed, vertical tab).\n\
2503 If maxsplit is given, at most maxsplit splits are done.");
2506 bytes_rsplit(PyByteArrayObject
*self
, PyObject
*args
)
2508 Py_ssize_t len
= PyByteArray_GET_SIZE(self
), n
, i
, j
;
2509 Py_ssize_t maxsplit
= -1, count
= 0;
2510 const char *s
= PyByteArray_AS_STRING(self
), *sub
;
2511 PyObject
*list
, *str
, *subobj
= Py_None
;
2514 if (!PyArg_ParseTuple(args
, "|On:rsplit", &subobj
, &maxsplit
))
2517 maxsplit
= PY_SSIZE_T_MAX
;
2519 if (subobj
== Py_None
)
2520 return rsplit_whitespace(s
, len
, maxsplit
);
2522 if (_getbuffer(subobj
, &vsub
) < 0)
2528 PyErr_SetString(PyExc_ValueError
, "empty separator");
2529 PyBuffer_Release(&vsub
);
2533 list
= rsplit_char(s
, len
, sub
[0], maxsplit
);
2534 PyBuffer_Release(&vsub
);
2538 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
2540 PyBuffer_Release(&vsub
);
2547 while ( (i
>= 0) && (maxsplit
-- > 0) ) {
2549 if (Py_STRING_MATCH(s
, i
, sub
, n
)) {
2550 SPLIT_ADD(s
, i
+ n
, j
);
2558 FIX_PREALLOC_SIZE(list
);
2559 if (PyList_Reverse(list
) < 0)
2561 PyBuffer_Release(&vsub
);
2566 PyBuffer_Release(&vsub
);
2570 PyDoc_STRVAR(reverse__doc__
,
2571 "B.reverse() -> None\n\
2573 Reverse the order of the values in B in place.");
2575 bytes_reverse(PyByteArrayObject
*self
, PyObject
*unused
)
2577 char swap
, *head
, *tail
;
2578 Py_ssize_t i
, j
, n
= Py_SIZE(self
);
2581 head
= self
->ob_bytes
;
2582 tail
= head
+ n
- 1;
2583 for (i
= 0; i
< j
; i
++) {
2592 PyDoc_STRVAR(insert__doc__
,
2593 "B.insert(index, int) -> None\n\
2595 Insert a single item into the bytearray before the given index.");
2597 bytes_insert(PyByteArrayObject
*self
, PyObject
*args
)
2601 Py_ssize_t where
, n
= Py_SIZE(self
);
2603 if (!PyArg_ParseTuple(args
, "nO:insert", &where
, &value
))
2606 if (n
== PY_SSIZE_T_MAX
) {
2607 PyErr_SetString(PyExc_OverflowError
,
2608 "cannot add more objects to bytes");
2611 if (!_getbytevalue(value
, &ival
))
2613 if (PyByteArray_Resize((PyObject
*)self
, n
+ 1) < 0)
2623 memmove(self
->ob_bytes
+ where
+ 1, self
->ob_bytes
+ where
, n
- where
);
2624 self
->ob_bytes
[where
] = ival
;
2629 PyDoc_STRVAR(append__doc__
,
2630 "B.append(int) -> None\n\
2632 Append a single item to the end of B.");
2634 bytes_append(PyByteArrayObject
*self
, PyObject
*arg
)
2637 Py_ssize_t n
= Py_SIZE(self
);
2639 if (! _getbytevalue(arg
, &value
))
2641 if (n
== PY_SSIZE_T_MAX
) {
2642 PyErr_SetString(PyExc_OverflowError
,
2643 "cannot add more objects to bytes");
2646 if (PyByteArray_Resize((PyObject
*)self
, n
+ 1) < 0)
2649 self
->ob_bytes
[n
] = value
;
2654 PyDoc_STRVAR(extend__doc__
,
2655 "B.extend(iterable int) -> None\n\
2657 Append all the elements from the iterator or sequence to the\n\
2660 bytes_extend(PyByteArrayObject
*self
, PyObject
*arg
)
2662 PyObject
*it
, *item
, *bytes_obj
;
2663 Py_ssize_t buf_size
= 0, len
= 0;
2667 /* bytes_setslice code only accepts something supporting PEP 3118. */
2668 if (PyObject_CheckBuffer(arg
)) {
2669 if (bytes_setslice(self
, Py_SIZE(self
), Py_SIZE(self
), arg
) == -1)
2675 it
= PyObject_GetIter(arg
);
2679 /* Try to determine the length of the argument. 32 is abitrary. */
2680 buf_size
= _PyObject_LengthHint(arg
, 32);
2682 bytes_obj
= PyByteArray_FromStringAndSize(NULL
, buf_size
);
2683 if (bytes_obj
== NULL
)
2685 buf
= PyByteArray_AS_STRING(bytes_obj
);
2687 while ((item
= PyIter_Next(it
)) != NULL
) {
2688 if (! _getbytevalue(item
, &value
)) {
2691 Py_DECREF(bytes_obj
);
2697 if (len
>= buf_size
) {
2698 buf_size
= len
+ (len
>> 1) + 1;
2699 if (PyByteArray_Resize((PyObject
*)bytes_obj
, buf_size
) < 0) {
2701 Py_DECREF(bytes_obj
);
2704 /* Recompute the `buf' pointer, since the resizing operation may
2705 have invalidated it. */
2706 buf
= PyByteArray_AS_STRING(bytes_obj
);
2711 /* Resize down to exact size. */
2712 if (PyByteArray_Resize((PyObject
*)bytes_obj
, len
) < 0) {
2713 Py_DECREF(bytes_obj
);
2717 if (bytes_setslice(self
, Py_SIZE(self
), Py_SIZE(self
), bytes_obj
) == -1)
2719 Py_DECREF(bytes_obj
);
2724 PyDoc_STRVAR(pop__doc__
,
2725 "B.pop([index]) -> int\n\
2727 Remove and return a single item from B. If no index\n\
2728 argument is given, will pop the last value.");
2730 bytes_pop(PyByteArrayObject
*self
, PyObject
*args
)
2733 Py_ssize_t where
= -1, n
= Py_SIZE(self
);
2735 if (!PyArg_ParseTuple(args
, "|n:pop", &where
))
2739 PyErr_SetString(PyExc_OverflowError
,
2740 "cannot pop an empty bytes");
2744 where
+= Py_SIZE(self
);
2745 if (where
< 0 || where
>= Py_SIZE(self
)) {
2746 PyErr_SetString(PyExc_IndexError
, "pop index out of range");
2749 if (!_canresize(self
))
2752 value
= self
->ob_bytes
[where
];
2753 memmove(self
->ob_bytes
+ where
, self
->ob_bytes
+ where
+ 1, n
- where
);
2754 if (PyByteArray_Resize((PyObject
*)self
, n
- 1) < 0)
2757 return PyInt_FromLong(value
);
2760 PyDoc_STRVAR(remove__doc__
,
2761 "B.remove(int) -> None\n\
2763 Remove the first occurance of a value in B.");
2765 bytes_remove(PyByteArrayObject
*self
, PyObject
*arg
)
2768 Py_ssize_t where
, n
= Py_SIZE(self
);
2770 if (! _getbytevalue(arg
, &value
))
2773 for (where
= 0; where
< n
; where
++) {
2774 if (self
->ob_bytes
[where
] == value
)
2778 PyErr_SetString(PyExc_ValueError
, "value not found in bytes");
2781 if (!_canresize(self
))
2784 memmove(self
->ob_bytes
+ where
, self
->ob_bytes
+ where
+ 1, n
- where
);
2785 if (PyByteArray_Resize((PyObject
*)self
, n
- 1) < 0)
2791 /* XXX These two helpers could be optimized if argsize == 1 */
2794 lstrip_helper(unsigned char *myptr
, Py_ssize_t mysize
,
2795 void *argptr
, Py_ssize_t argsize
)
2798 while (i
< mysize
&& memchr(argptr
, myptr
[i
], argsize
))
2804 rstrip_helper(unsigned char *myptr
, Py_ssize_t mysize
,
2805 void *argptr
, Py_ssize_t argsize
)
2807 Py_ssize_t i
= mysize
- 1;
2808 while (i
>= 0 && memchr(argptr
, myptr
[i
], argsize
))
2813 PyDoc_STRVAR(strip__doc__
,
2814 "B.strip([bytes]) -> bytearray\n\
2816 Strip leading and trailing bytes contained in the argument.\n\
2817 If the argument is omitted, strip ASCII whitespace.");
2819 bytes_strip(PyByteArrayObject
*self
, PyObject
*args
)
2821 Py_ssize_t left
, right
, mysize
, argsize
;
2822 void *myptr
, *argptr
;
2823 PyObject
*arg
= Py_None
;
2825 if (!PyArg_ParseTuple(args
, "|O:strip", &arg
))
2827 if (arg
== Py_None
) {
2828 argptr
= "\t\n\r\f\v ";
2832 if (_getbuffer(arg
, &varg
) < 0)
2837 myptr
= self
->ob_bytes
;
2838 mysize
= Py_SIZE(self
);
2839 left
= lstrip_helper(myptr
, mysize
, argptr
, argsize
);
2843 right
= rstrip_helper(myptr
, mysize
, argptr
, argsize
);
2845 PyBuffer_Release(&varg
);
2846 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2849 PyDoc_STRVAR(lstrip__doc__
,
2850 "B.lstrip([bytes]) -> bytearray\n\
2852 Strip leading bytes contained in the argument.\n\
2853 If the argument is omitted, strip leading ASCII whitespace.");
2855 bytes_lstrip(PyByteArrayObject
*self
, PyObject
*args
)
2857 Py_ssize_t left
, right
, mysize
, argsize
;
2858 void *myptr
, *argptr
;
2859 PyObject
*arg
= Py_None
;
2861 if (!PyArg_ParseTuple(args
, "|O:lstrip", &arg
))
2863 if (arg
== Py_None
) {
2864 argptr
= "\t\n\r\f\v ";
2868 if (_getbuffer(arg
, &varg
) < 0)
2873 myptr
= self
->ob_bytes
;
2874 mysize
= Py_SIZE(self
);
2875 left
= lstrip_helper(myptr
, mysize
, argptr
, argsize
);
2878 PyBuffer_Release(&varg
);
2879 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2882 PyDoc_STRVAR(rstrip__doc__
,
2883 "B.rstrip([bytes]) -> bytearray\n\
2885 Strip trailing bytes contained in the argument.\n\
2886 If the argument is omitted, strip trailing ASCII whitespace.");
2888 bytes_rstrip(PyByteArrayObject
*self
, PyObject
*args
)
2890 Py_ssize_t left
, right
, mysize
, argsize
;
2891 void *myptr
, *argptr
;
2892 PyObject
*arg
= Py_None
;
2894 if (!PyArg_ParseTuple(args
, "|O:rstrip", &arg
))
2896 if (arg
== Py_None
) {
2897 argptr
= "\t\n\r\f\v ";
2901 if (_getbuffer(arg
, &varg
) < 0)
2906 myptr
= self
->ob_bytes
;
2907 mysize
= Py_SIZE(self
);
2909 right
= rstrip_helper(myptr
, mysize
, argptr
, argsize
);
2911 PyBuffer_Release(&varg
);
2912 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2915 PyDoc_STRVAR(decode_doc
,
2916 "B.decode([encoding[, errors]]) -> unicode object.\n\
2918 Decodes B using the codec registered for encoding. encoding defaults\n\
2919 to the default encoding. errors may be given to set a different error\n\
2920 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2921 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2922 as well as any other name registered with codecs.register_error that is\n\
2923 able to handle UnicodeDecodeErrors.");
2926 bytes_decode(PyObject
*self
, PyObject
*args
)
2928 const char *encoding
= NULL
;
2929 const char *errors
= NULL
;
2931 if (!PyArg_ParseTuple(args
, "|ss:decode", &encoding
, &errors
))
2933 if (encoding
== NULL
)
2934 encoding
= PyUnicode_GetDefaultEncoding();
2935 return PyCodec_Decode(self
, encoding
, errors
);
2938 PyDoc_STRVAR(alloc_doc
,
2939 "B.__alloc__() -> int\n\
2941 Returns the number of bytes actually allocated.");
2944 bytes_alloc(PyByteArrayObject
*self
)
2946 return PyInt_FromSsize_t(self
->ob_alloc
);
2949 PyDoc_STRVAR(join_doc
,
2950 "B.join(iterable_of_bytes) -> bytes\n\
2952 Concatenates any number of bytearray objects, with B in between each pair.");
2955 bytes_join(PyByteArrayObject
*self
, PyObject
*it
)
2958 Py_ssize_t mysize
= Py_SIZE(self
);
2962 Py_ssize_t totalsize
= 0;
2966 seq
= PySequence_Fast(it
, "can only join an iterable");
2969 n
= PySequence_Fast_GET_SIZE(seq
);
2970 items
= PySequence_Fast_ITEMS(seq
);
2972 /* Compute the total size, and check that they are all bytes */
2973 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2974 for (i
= 0; i
< n
; i
++) {
2975 PyObject
*obj
= items
[i
];
2976 if (!PyByteArray_Check(obj
) && !PyBytes_Check(obj
)) {
2977 PyErr_Format(PyExc_TypeError
,
2978 "can only join an iterable of bytes "
2979 "(item %ld has type '%.100s')",
2980 /* XXX %ld isn't right on Win64 */
2981 (long)i
, Py_TYPE(obj
)->tp_name
);
2985 totalsize
+= mysize
;
2986 totalsize
+= Py_SIZE(obj
);
2987 if (totalsize
< 0) {
2993 /* Allocate the result, and copy the bytes */
2994 result
= PyByteArray_FromStringAndSize(NULL
, totalsize
);
2997 dest
= PyByteArray_AS_STRING(result
);
2998 for (i
= 0; i
< n
; i
++) {
2999 PyObject
*obj
= items
[i
];
3000 Py_ssize_t size
= Py_SIZE(obj
);
3002 if (PyByteArray_Check(obj
))
3003 buf
= PyByteArray_AS_STRING(obj
);
3005 buf
= PyBytes_AS_STRING(obj
);
3007 memcpy(dest
, self
->ob_bytes
, mysize
);
3010 memcpy(dest
, buf
, size
);
3018 /* Error handling */
3024 PyDoc_STRVAR(fromhex_doc
,
3025 "bytearray.fromhex(string) -> bytearray\n\
3027 Create a bytearray object from a string of hexadecimal numbers.\n\
3028 Spaces between two numbers are accepted.\n\
3029 Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3032 hex_digit_to_int(Py_UNICODE c
)
3041 if (c
>= 'a' && c
<= 'f')
3042 return c
- 'a' + 10;
3048 bytes_fromhex(PyObject
*cls
, PyObject
*args
)
3050 PyObject
*newbytes
, *hexobj
;
3053 Py_ssize_t hexlen
, byteslen
, i
, j
;
3056 if (!PyArg_ParseTuple(args
, "U:fromhex", &hexobj
))
3058 assert(PyUnicode_Check(hexobj
));
3059 hexlen
= PyUnicode_GET_SIZE(hexobj
);
3060 hex
= PyUnicode_AS_UNICODE(hexobj
);
3061 byteslen
= hexlen
/2; /* This overestimates if there are spaces */
3062 newbytes
= PyByteArray_FromStringAndSize(NULL
, byteslen
);
3065 buf
= PyByteArray_AS_STRING(newbytes
);
3066 for (i
= j
= 0; i
< hexlen
; i
+= 2) {
3067 /* skip over spaces in the input */
3068 while (hex
[i
] == ' ')
3072 top
= hex_digit_to_int(hex
[i
]);
3073 bot
= hex_digit_to_int(hex
[i
+1]);
3074 if (top
== -1 || bot
== -1) {
3075 PyErr_Format(PyExc_ValueError
,
3076 "non-hexadecimal number found in "
3077 "fromhex() arg at position %zd", i
);
3080 buf
[j
++] = (top
<< 4) + bot
;
3082 if (PyByteArray_Resize(newbytes
, j
) < 0)
3087 Py_DECREF(newbytes
);
3091 PyDoc_STRVAR(reduce_doc
, "Return state information for pickling.");
3094 bytes_reduce(PyByteArrayObject
*self
)
3096 PyObject
*latin1
, *dict
;
3098 latin1
= PyUnicode_DecodeLatin1(self
->ob_bytes
,
3099 Py_SIZE(self
), NULL
);
3101 latin1
= PyUnicode_FromString("");
3103 dict
= PyObject_GetAttrString((PyObject
*)self
, "__dict__");
3110 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self
), latin1
, "latin-1", dict
);
3113 PyDoc_STRVAR(sizeof_doc
,
3114 "B.__sizeof__() -> int\n\
3116 Returns the size of B in memory, in bytes");
3118 bytes_sizeof(PyByteArrayObject
*self
)
3122 res
= sizeof(PyByteArrayObject
) + self
->ob_alloc
* sizeof(char);
3123 return PyInt_FromSsize_t(res
);
3126 static PySequenceMethods bytes_as_sequence
= {
3127 (lenfunc
)bytes_length
, /* sq_length */
3128 (binaryfunc
)PyByteArray_Concat
, /* sq_concat */
3129 (ssizeargfunc
)bytes_repeat
, /* sq_repeat */
3130 (ssizeargfunc
)bytes_getitem
, /* sq_item */
3132 (ssizeobjargproc
)bytes_setitem
, /* sq_ass_item */
3133 0, /* sq_ass_slice */
3134 (objobjproc
)bytes_contains
, /* sq_contains */
3135 (binaryfunc
)bytes_iconcat
, /* sq_inplace_concat */
3136 (ssizeargfunc
)bytes_irepeat
, /* sq_inplace_repeat */
3139 static PyMappingMethods bytes_as_mapping
= {
3140 (lenfunc
)bytes_length
,
3141 (binaryfunc
)bytes_subscript
,
3142 (objobjargproc
)bytes_ass_subscript
,
3145 static PyBufferProcs bytes_as_buffer
= {
3146 (readbufferproc
)bytes_buffer_getreadbuf
,
3147 (writebufferproc
)bytes_buffer_getwritebuf
,
3148 (segcountproc
)bytes_buffer_getsegcount
,
3149 (charbufferproc
)bytes_buffer_getcharbuf
,
3150 (getbufferproc
)bytes_getbuffer
,
3151 (releasebufferproc
)bytes_releasebuffer
,
3156 {"__alloc__", (PyCFunction
)bytes_alloc
, METH_NOARGS
, alloc_doc
},
3157 {"__reduce__", (PyCFunction
)bytes_reduce
, METH_NOARGS
, reduce_doc
},
3158 {"__sizeof__", (PyCFunction
)bytes_sizeof
, METH_NOARGS
, sizeof_doc
},
3159 {"append", (PyCFunction
)bytes_append
, METH_O
, append__doc__
},
3160 {"capitalize", (PyCFunction
)stringlib_capitalize
, METH_NOARGS
,
3161 _Py_capitalize__doc__
},
3162 {"center", (PyCFunction
)stringlib_center
, METH_VARARGS
, center__doc__
},
3163 {"count", (PyCFunction
)bytes_count
, METH_VARARGS
, count__doc__
},
3164 {"decode", (PyCFunction
)bytes_decode
, METH_VARARGS
, decode_doc
},
3165 {"endswith", (PyCFunction
)bytes_endswith
, METH_VARARGS
, endswith__doc__
},
3166 {"expandtabs", (PyCFunction
)stringlib_expandtabs
, METH_VARARGS
,
3168 {"extend", (PyCFunction
)bytes_extend
, METH_O
, extend__doc__
},
3169 {"find", (PyCFunction
)bytes_find
, METH_VARARGS
, find__doc__
},
3170 {"fromhex", (PyCFunction
)bytes_fromhex
, METH_VARARGS
|METH_CLASS
,
3172 {"index", (PyCFunction
)bytes_index
, METH_VARARGS
, index__doc__
},
3173 {"insert", (PyCFunction
)bytes_insert
, METH_VARARGS
, insert__doc__
},
3174 {"isalnum", (PyCFunction
)stringlib_isalnum
, METH_NOARGS
,
3175 _Py_isalnum__doc__
},
3176 {"isalpha", (PyCFunction
)stringlib_isalpha
, METH_NOARGS
,
3177 _Py_isalpha__doc__
},
3178 {"isdigit", (PyCFunction
)stringlib_isdigit
, METH_NOARGS
,
3179 _Py_isdigit__doc__
},
3180 {"islower", (PyCFunction
)stringlib_islower
, METH_NOARGS
,
3181 _Py_islower__doc__
},
3182 {"isspace", (PyCFunction
)stringlib_isspace
, METH_NOARGS
,
3183 _Py_isspace__doc__
},
3184 {"istitle", (PyCFunction
)stringlib_istitle
, METH_NOARGS
,
3185 _Py_istitle__doc__
},
3186 {"isupper", (PyCFunction
)stringlib_isupper
, METH_NOARGS
,
3187 _Py_isupper__doc__
},
3188 {"join", (PyCFunction
)bytes_join
, METH_O
, join_doc
},
3189 {"ljust", (PyCFunction
)stringlib_ljust
, METH_VARARGS
, ljust__doc__
},
3190 {"lower", (PyCFunction
)stringlib_lower
, METH_NOARGS
, _Py_lower__doc__
},
3191 {"lstrip", (PyCFunction
)bytes_lstrip
, METH_VARARGS
, lstrip__doc__
},
3192 {"partition", (PyCFunction
)bytes_partition
, METH_O
, partition__doc__
},
3193 {"pop", (PyCFunction
)bytes_pop
, METH_VARARGS
, pop__doc__
},
3194 {"remove", (PyCFunction
)bytes_remove
, METH_O
, remove__doc__
},
3195 {"replace", (PyCFunction
)bytes_replace
, METH_VARARGS
, replace__doc__
},
3196 {"reverse", (PyCFunction
)bytes_reverse
, METH_NOARGS
, reverse__doc__
},
3197 {"rfind", (PyCFunction
)bytes_rfind
, METH_VARARGS
, rfind__doc__
},
3198 {"rindex", (PyCFunction
)bytes_rindex
, METH_VARARGS
, rindex__doc__
},
3199 {"rjust", (PyCFunction
)stringlib_rjust
, METH_VARARGS
, rjust__doc__
},
3200 {"rpartition", (PyCFunction
)bytes_rpartition
, METH_O
, rpartition__doc__
},
3201 {"rsplit", (PyCFunction
)bytes_rsplit
, METH_VARARGS
, rsplit__doc__
},
3202 {"rstrip", (PyCFunction
)bytes_rstrip
, METH_VARARGS
, rstrip__doc__
},
3203 {"split", (PyCFunction
)bytes_split
, METH_VARARGS
, split__doc__
},
3204 {"splitlines", (PyCFunction
)stringlib_splitlines
, METH_VARARGS
,
3206 {"startswith", (PyCFunction
)bytes_startswith
, METH_VARARGS
,
3208 {"strip", (PyCFunction
)bytes_strip
, METH_VARARGS
, strip__doc__
},
3209 {"swapcase", (PyCFunction
)stringlib_swapcase
, METH_NOARGS
,
3210 _Py_swapcase__doc__
},
3211 {"title", (PyCFunction
)stringlib_title
, METH_NOARGS
, _Py_title__doc__
},
3212 {"translate", (PyCFunction
)bytes_translate
, METH_VARARGS
,
3214 {"upper", (PyCFunction
)stringlib_upper
, METH_NOARGS
, _Py_upper__doc__
},
3215 {"zfill", (PyCFunction
)stringlib_zfill
, METH_VARARGS
, zfill__doc__
},
3219 PyDoc_STRVAR(bytes_doc
,
3220 "bytearray(iterable_of_ints) -> bytearray.\n\
3221 bytearray(string, encoding[, errors]) -> bytearray.\n\
3222 bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3223 bytearray(memory_view) -> bytearray.\n\
3225 Construct an mutable bytearray object from:\n\
3226 - an iterable yielding integers in range(256)\n\
3227 - a text string encoded using the specified encoding\n\
3228 - a bytes or a bytearray object\n\
3229 - any object implementing the buffer API.\n\
3231 bytearray(int) -> bytearray.\n\
3233 Construct a zero-initialized bytearray of the given length.");
3236 static PyObject
*bytes_iter(PyObject
*seq
);
3238 PyTypeObject PyByteArray_Type
= {
3239 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
3241 sizeof(PyByteArrayObject
),
3243 (destructor
)bytes_dealloc
, /* tp_dealloc */
3248 (reprfunc
)bytes_repr
, /* tp_repr */
3249 0, /* tp_as_number */
3250 &bytes_as_sequence
, /* tp_as_sequence */
3251 &bytes_as_mapping
, /* tp_as_mapping */
3254 bytes_str
, /* tp_str */
3255 PyObject_GenericGetAttr
, /* tp_getattro */
3256 0, /* tp_setattro */
3257 &bytes_as_buffer
, /* tp_as_buffer */
3258 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
|
3259 Py_TPFLAGS_HAVE_NEWBUFFER
, /* tp_flags */
3260 bytes_doc
, /* tp_doc */
3261 0, /* tp_traverse */
3263 (richcmpfunc
)bytes_richcompare
, /* tp_richcompare */
3264 0, /* tp_weaklistoffset */
3265 bytes_iter
, /* tp_iter */
3266 0, /* tp_iternext */
3267 bytes_methods
, /* tp_methods */
3272 0, /* tp_descr_get */
3273 0, /* tp_descr_set */
3274 0, /* tp_dictoffset */
3275 (initproc
)bytes_init
, /* tp_init */
3276 PyType_GenericAlloc
, /* tp_alloc */
3277 PyType_GenericNew
, /* tp_new */
3278 PyObject_Del
, /* tp_free */
3281 /*********************** Bytes Iterator ****************************/
3285 Py_ssize_t it_index
;
3286 PyByteArrayObject
*it_seq
; /* Set to NULL when iterator is exhausted */
3290 bytesiter_dealloc(bytesiterobject
*it
)
3292 _PyObject_GC_UNTRACK(it
);
3293 Py_XDECREF(it
->it_seq
);
3294 PyObject_GC_Del(it
);
3298 bytesiter_traverse(bytesiterobject
*it
, visitproc visit
, void *arg
)
3300 Py_VISIT(it
->it_seq
);
3305 bytesiter_next(bytesiterobject
*it
)
3307 PyByteArrayObject
*seq
;
3314 assert(PyByteArray_Check(seq
));
3316 if (it
->it_index
< PyByteArray_GET_SIZE(seq
)) {
3317 item
= PyInt_FromLong(
3318 (unsigned char)seq
->ob_bytes
[it
->it_index
]);
3330 bytesiter_length_hint(bytesiterobject
*it
)
3334 len
= PyByteArray_GET_SIZE(it
->it_seq
) - it
->it_index
;
3335 return PyInt_FromSsize_t(len
);
3338 PyDoc_STRVAR(length_hint_doc
,
3339 "Private method returning an estimate of len(list(it)).");
3341 static PyMethodDef bytesiter_methods
[] = {
3342 {"__length_hint__", (PyCFunction
)bytesiter_length_hint
, METH_NOARGS
,
3344 {NULL
, NULL
} /* sentinel */
3347 PyTypeObject PyByteArrayIter_Type
= {
3348 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
3349 "bytearray_iterator", /* tp_name */
3350 sizeof(bytesiterobject
), /* tp_basicsize */
3351 0, /* tp_itemsize */
3353 (destructor
)bytesiter_dealloc
, /* tp_dealloc */
3359 0, /* tp_as_number */
3360 0, /* tp_as_sequence */
3361 0, /* tp_as_mapping */
3365 PyObject_GenericGetAttr
, /* tp_getattro */
3366 0, /* tp_setattro */
3367 0, /* tp_as_buffer */
3368 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_HAVE_GC
, /* tp_flags */
3370 (traverseproc
)bytesiter_traverse
, /* tp_traverse */
3372 0, /* tp_richcompare */
3373 0, /* tp_weaklistoffset */
3374 PyObject_SelfIter
, /* tp_iter */
3375 (iternextfunc
)bytesiter_next
, /* tp_iternext */
3376 bytesiter_methods
, /* tp_methods */
3381 bytes_iter(PyObject
*seq
)
3383 bytesiterobject
*it
;
3385 if (!PyByteArray_Check(seq
)) {
3386 PyErr_BadInternalCall();
3389 it
= PyObject_GC_New(bytesiterobject
, &PyByteArrayIter_Type
);
3394 it
->it_seq
= (PyByteArrayObject
*)seq
;
3395 _PyObject_GC_TRACK(it
);
3396 return (PyObject
*)it
;