1 /* PyBytes (bytearray) implementation */
3 #define PY_SSIZE_T_CLEAN
5 #include "structmember.h"
6 #include "bytes_methods.h"
8 static PyByteArrayObject
*nullbytes
= NULL
;
11 PyByteArray_Fini(void)
17 PyByteArray_Init(void)
19 nullbytes
= PyObject_New(PyByteArrayObject
, &PyByteArray_Type
);
20 if (nullbytes
== NULL
)
22 nullbytes
->ob_bytes
= NULL
;
23 Py_SIZE(nullbytes
) = nullbytes
->ob_alloc
= 0;
24 nullbytes
->ob_exports
= 0;
28 /* end nullbytes support */
33 _getbytevalue(PyObject
* arg
, int *value
)
37 if (PyBytes_CheckExact(arg
)) {
38 if (Py_SIZE(arg
) != 1) {
39 PyErr_SetString(PyExc_ValueError
, "string must be of size 1");
42 *value
= Py_CHARMASK(((PyBytesObject
*)arg
)->ob_sval
[0]);
45 else if (PyInt_Check(arg
) || PyLong_Check(arg
)) {
46 face_value
= PyLong_AsLong(arg
);
49 PyObject
*index
= PyNumber_Index(arg
);
51 PyErr_Format(PyExc_TypeError
,
52 "an integer or string of size 1 is required");
55 face_value
= PyLong_AsLong(index
);
59 if (face_value
< 0 || face_value
>= 256) {
60 /* this includes the OverflowError in case the long is too large */
61 PyErr_SetString(PyExc_ValueError
, "byte must be in range(0, 256)");
70 bytes_buffer_getreadbuf(PyByteArrayObject
*self
, Py_ssize_t index
, const void **ptr
)
73 PyErr_SetString(PyExc_SystemError
,
74 "accessing non-existent bytes segment");
77 *ptr
= (void *)self
->ob_bytes
;
82 bytes_buffer_getwritebuf(PyByteArrayObject
*self
, Py_ssize_t index
, const void **ptr
)
85 PyErr_SetString(PyExc_SystemError
,
86 "accessing non-existent bytes segment");
89 *ptr
= (void *)self
->ob_bytes
;
94 bytes_buffer_getsegcount(PyByteArrayObject
*self
, Py_ssize_t
*lenp
)
97 *lenp
= Py_SIZE(self
);
102 bytes_buffer_getcharbuf(PyByteArrayObject
*self
, Py_ssize_t index
, const char **ptr
)
105 PyErr_SetString(PyExc_SystemError
,
106 "accessing non-existent bytes segment");
109 *ptr
= self
->ob_bytes
;
110 return Py_SIZE(self
);
114 bytes_getbuffer(PyByteArrayObject
*obj
, Py_buffer
*view
, int flags
)
122 if (obj
->ob_bytes
== NULL
)
126 ret
= PyBuffer_FillInfo(view
, ptr
, Py_SIZE(obj
), 0, flags
);
134 bytes_releasebuffer(PyByteArrayObject
*obj
, Py_buffer
*view
)
140 _getbuffer(PyObject
*obj
, Py_buffer
*view
)
142 PyBufferProcs
*buffer
= Py_TYPE(obj
)->tp_as_buffer
;
144 if (buffer
== NULL
|| buffer
->bf_getbuffer
== NULL
)
146 PyErr_Format(PyExc_TypeError
,
147 "Type %.100s doesn't support the buffer API",
148 Py_TYPE(obj
)->tp_name
);
152 if (buffer
->bf_getbuffer(obj
, view
, PyBUF_SIMPLE
) < 0)
157 /* Direct API functions */
160 PyByteArray_FromObject(PyObject
*input
)
162 return PyObject_CallFunctionObjArgs((PyObject
*)&PyByteArray_Type
,
167 PyByteArray_FromStringAndSize(const char *bytes
, Py_ssize_t size
)
169 PyByteArrayObject
*new;
173 PyErr_SetString(PyExc_SystemError
,
174 "Negative size passed to PyByteArray_FromStringAndSize");
178 new = PyObject_New(PyByteArrayObject
, &PyByteArray_Type
);
183 new->ob_bytes
= NULL
;
188 new->ob_bytes
= PyMem_Malloc(alloc
);
189 if (new->ob_bytes
== NULL
) {
191 return PyErr_NoMemory();
194 memcpy(new->ob_bytes
, bytes
, size
);
195 new->ob_bytes
[size
] = '\0'; /* Trailing null byte */
198 new->ob_alloc
= alloc
;
201 return (PyObject
*)new;
205 PyByteArray_Size(PyObject
*self
)
207 assert(self
!= NULL
);
208 assert(PyByteArray_Check(self
));
210 return PyByteArray_GET_SIZE(self
);
214 PyByteArray_AsString(PyObject
*self
)
216 assert(self
!= NULL
);
217 assert(PyByteArray_Check(self
));
219 return PyByteArray_AS_STRING(self
);
223 PyByteArray_Resize(PyObject
*self
, Py_ssize_t size
)
226 Py_ssize_t alloc
= ((PyByteArrayObject
*)self
)->ob_alloc
;
228 assert(self
!= NULL
);
229 assert(PyByteArray_Check(self
));
232 if (size
< alloc
/ 2) {
233 /* Major downsize; resize down to exact size */
236 else if (size
< alloc
) {
237 /* Within allocated size; quick exit */
238 Py_SIZE(self
) = size
;
239 ((PyByteArrayObject
*)self
)->ob_bytes
[size
] = '\0'; /* Trailing null */
242 else if (size
<= alloc
* 1.125) {
243 /* Moderate upsize; overallocate similar to list_resize() */
244 alloc
= size
+ (size
>> 3) + (size
< 9 ? 3 : 6);
247 /* Major upsize; resize up to exact size */
251 if (((PyByteArrayObject
*)self
)->ob_exports
> 0) {
253 fprintf(stderr, "%d: %s", ((PyByteArrayObject *)self)->ob_exports,
254 ((PyByteArrayObject *)self)->ob_bytes);
256 PyErr_SetString(PyExc_BufferError
,
257 "Existing exports of data: object cannot be re-sized");
261 sval
= PyMem_Realloc(((PyByteArrayObject
*)self
)->ob_bytes
, alloc
);
267 ((PyByteArrayObject
*)self
)->ob_bytes
= sval
;
268 Py_SIZE(self
) = size
;
269 ((PyByteArrayObject
*)self
)->ob_alloc
= alloc
;
270 ((PyByteArrayObject
*)self
)->ob_bytes
[size
] = '\0'; /* Trailing null byte */
276 PyByteArray_Concat(PyObject
*a
, PyObject
*b
)
280 PyByteArrayObject
*result
= NULL
;
284 if (_getbuffer(a
, &va
) < 0 ||
285 _getbuffer(b
, &vb
) < 0) {
286 PyErr_Format(PyExc_TypeError
, "can't concat %.100s to %.100s",
287 Py_TYPE(a
)->tp_name
, Py_TYPE(b
)->tp_name
);
291 size
= va
.len
+ vb
.len
;
293 return PyErr_NoMemory();
297 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, size
);
298 if (result
!= NULL
) {
299 memcpy(result
->ob_bytes
, va
.buf
, va
.len
);
300 memcpy(result
->ob_bytes
+ va
.len
, vb
.buf
, vb
.len
);
305 PyObject_ReleaseBuffer(a
, &va
);
307 PyObject_ReleaseBuffer(b
, &vb
);
308 return (PyObject
*)result
;
311 /* Functions stuffed into the type object */
314 bytes_length(PyByteArrayObject
*self
)
316 return Py_SIZE(self
);
320 bytes_iconcat(PyByteArrayObject
*self
, PyObject
*other
)
326 if (_getbuffer(other
, &vo
) < 0) {
327 PyErr_Format(PyExc_TypeError
, "can't concat %.100s to %.100s",
328 Py_TYPE(other
)->tp_name
, Py_TYPE(self
)->tp_name
);
332 mysize
= Py_SIZE(self
);
333 size
= mysize
+ vo
.len
;
335 PyObject_ReleaseBuffer(other
, &vo
);
336 return PyErr_NoMemory();
338 if (size
< self
->ob_alloc
) {
339 Py_SIZE(self
) = size
;
340 self
->ob_bytes
[Py_SIZE(self
)] = '\0'; /* Trailing null byte */
342 else if (PyByteArray_Resize((PyObject
*)self
, size
) < 0) {
343 PyObject_ReleaseBuffer(other
, &vo
);
346 memcpy(self
->ob_bytes
+ mysize
, vo
.buf
, vo
.len
);
347 PyObject_ReleaseBuffer(other
, &vo
);
349 return (PyObject
*)self
;
353 bytes_repeat(PyByteArrayObject
*self
, Py_ssize_t count
)
355 PyByteArrayObject
*result
;
361 mysize
= Py_SIZE(self
);
362 size
= mysize
* count
;
363 if (count
!= 0 && size
/ count
!= mysize
)
364 return PyErr_NoMemory();
365 result
= (PyByteArrayObject
*)PyByteArray_FromStringAndSize(NULL
, size
);
366 if (result
!= NULL
&& size
!= 0) {
368 memset(result
->ob_bytes
, self
->ob_bytes
[0], size
);
371 for (i
= 0; i
< count
; i
++)
372 memcpy(result
->ob_bytes
+ i
*mysize
, self
->ob_bytes
, mysize
);
375 return (PyObject
*)result
;
379 bytes_irepeat(PyByteArrayObject
*self
, Py_ssize_t count
)
386 mysize
= Py_SIZE(self
);
387 size
= mysize
* count
;
388 if (count
!= 0 && size
/ count
!= mysize
)
389 return PyErr_NoMemory();
390 if (size
< self
->ob_alloc
) {
391 Py_SIZE(self
) = size
;
392 self
->ob_bytes
[Py_SIZE(self
)] = '\0'; /* Trailing null byte */
394 else if (PyByteArray_Resize((PyObject
*)self
, size
) < 0)
398 memset(self
->ob_bytes
, self
->ob_bytes
[0], size
);
401 for (i
= 1; i
< count
; i
++)
402 memcpy(self
->ob_bytes
+ i
*mysize
, self
->ob_bytes
, mysize
);
406 return (PyObject
*)self
;
410 bytes_getitem(PyByteArrayObject
*self
, Py_ssize_t i
)
414 if (i
< 0 || i
>= Py_SIZE(self
)) {
415 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
418 return PyInt_FromLong((unsigned char)(self
->ob_bytes
[i
]));
422 bytes_subscript(PyByteArrayObject
*self
, PyObject
*index
)
424 if (PyIndex_Check(index
)) {
425 Py_ssize_t i
= PyNumber_AsSsize_t(index
, PyExc_IndexError
);
427 if (i
== -1 && PyErr_Occurred())
431 i
+= PyByteArray_GET_SIZE(self
);
433 if (i
< 0 || i
>= Py_SIZE(self
)) {
434 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
437 return PyInt_FromLong((unsigned char)(self
->ob_bytes
[i
]));
439 else if (PySlice_Check(index
)) {
440 Py_ssize_t start
, stop
, step
, slicelength
, cur
, i
;
441 if (PySlice_GetIndicesEx((PySliceObject
*)index
,
442 PyByteArray_GET_SIZE(self
),
443 &start
, &stop
, &step
, &slicelength
) < 0) {
447 if (slicelength
<= 0)
448 return PyByteArray_FromStringAndSize("", 0);
449 else if (step
== 1) {
450 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ start
,
454 char *source_buf
= PyByteArray_AS_STRING(self
);
455 char *result_buf
= (char *)PyMem_Malloc(slicelength
);
458 if (result_buf
== NULL
)
459 return PyErr_NoMemory();
461 for (cur
= start
, i
= 0; i
< slicelength
;
463 result_buf
[i
] = source_buf
[cur
];
465 result
= PyByteArray_FromStringAndSize(result_buf
, slicelength
);
466 PyMem_Free(result_buf
);
471 PyErr_SetString(PyExc_TypeError
, "bytearray indices must be integers");
477 bytes_setslice(PyByteArrayObject
*self
, Py_ssize_t lo
, Py_ssize_t hi
,
480 Py_ssize_t avail
, needed
;
486 if (values
== (PyObject
*)self
) {
487 /* Make a copy and call this function recursively */
489 values
= PyByteArray_FromObject(values
);
492 err
= bytes_setslice(self
, lo
, hi
, values
);
496 if (values
== NULL
) {
502 if (_getbuffer(values
, &vbytes
) < 0) {
503 PyErr_Format(PyExc_TypeError
,
504 "can't set bytearray slice from %.100s",
505 Py_TYPE(values
)->tp_name
);
516 if (hi
> Py_SIZE(self
))
523 if (avail
!= needed
) {
524 if (avail
> needed
) {
527 | |<----avail----->|<-----tomove------>|
528 | |<-needed->|<-----tomove------>|
531 memmove(self
->ob_bytes
+ lo
+ needed
, self
->ob_bytes
+ hi
,
534 /* XXX(nnorwitz): need to verify this can't overflow! */
535 if (PyByteArray_Resize((PyObject
*)self
,
536 Py_SIZE(self
) + needed
- avail
) < 0) {
540 if (avail
< needed
) {
543 | |<-avail->|<-----tomove------>|
544 | |<----needed---->|<-----tomove------>|
547 memmove(self
->ob_bytes
+ lo
+ needed
, self
->ob_bytes
+ hi
,
548 Py_SIZE(self
) - lo
- needed
);
553 memcpy(self
->ob_bytes
+ lo
, bytes
, needed
);
557 if (vbytes
.len
!= -1)
558 PyObject_ReleaseBuffer(values
, &vbytes
);
563 bytes_setitem(PyByteArrayObject
*self
, Py_ssize_t i
, PyObject
*value
)
570 if (i
< 0 || i
>= Py_SIZE(self
)) {
571 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
576 return bytes_setslice(self
, i
, i
+1, NULL
);
578 if (!_getbytevalue(value
, &ival
))
581 self
->ob_bytes
[i
] = ival
;
586 bytes_ass_subscript(PyByteArrayObject
*self
, PyObject
*index
, PyObject
*values
)
588 Py_ssize_t start
, stop
, step
, slicelen
, needed
;
591 if (PyIndex_Check(index
)) {
592 Py_ssize_t i
= PyNumber_AsSsize_t(index
, PyExc_IndexError
);
594 if (i
== -1 && PyErr_Occurred())
598 i
+= PyByteArray_GET_SIZE(self
);
600 if (i
< 0 || i
>= Py_SIZE(self
)) {
601 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
605 if (values
== NULL
) {
606 /* Fall through to slice assignment */
614 if (!_getbytevalue(values
, &ival
))
616 self
->ob_bytes
[i
] = (char)ival
;
620 else if (PySlice_Check(index
)) {
621 if (PySlice_GetIndicesEx((PySliceObject
*)index
,
622 PyByteArray_GET_SIZE(self
),
623 &start
, &stop
, &step
, &slicelen
) < 0) {
628 PyErr_SetString(PyExc_TypeError
, "bytearray indices must be integer");
632 if (values
== NULL
) {
636 else if (values
== (PyObject
*)self
|| !PyByteArray_Check(values
)) {
637 /* Make a copy an call this function recursively */
639 values
= PyByteArray_FromObject(values
);
642 err
= bytes_ass_subscript(self
, index
, values
);
647 assert(PyByteArray_Check(values
));
648 bytes
= ((PyByteArrayObject
*)values
)->ob_bytes
;
649 needed
= Py_SIZE(values
);
651 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
652 if ((step
< 0 && start
< stop
) ||
653 (step
> 0 && start
> stop
))
656 if (slicelen
!= needed
) {
657 if (slicelen
> needed
) {
659 0 start stop old_size
660 | |<---slicelen--->|<-----tomove------>|
661 | |<-needed->|<-----tomove------>|
664 memmove(self
->ob_bytes
+ start
+ needed
, self
->ob_bytes
+ stop
,
665 Py_SIZE(self
) - stop
);
667 if (PyByteArray_Resize((PyObject
*)self
,
668 Py_SIZE(self
) + needed
- slicelen
) < 0)
670 if (slicelen
< needed
) {
673 | |<-avail->|<-----tomove------>|
674 | |<----needed---->|<-----tomove------>|
677 memmove(self
->ob_bytes
+ start
+ needed
, self
->ob_bytes
+ stop
,
678 Py_SIZE(self
) - start
- needed
);
683 memcpy(self
->ob_bytes
+ start
, bytes
, needed
);
694 start
= stop
+ step
* (slicelen
- 1) - 1;
697 for (cur
= start
, i
= 0;
698 i
< slicelen
; cur
+= step
, i
++) {
699 Py_ssize_t lim
= step
- 1;
701 if (cur
+ step
>= PyByteArray_GET_SIZE(self
))
702 lim
= PyByteArray_GET_SIZE(self
) - cur
- 1;
704 memmove(self
->ob_bytes
+ cur
- i
,
705 self
->ob_bytes
+ cur
+ 1, lim
);
707 /* Move the tail of the bytes, in one chunk */
708 cur
= start
+ slicelen
*step
;
709 if (cur
< PyByteArray_GET_SIZE(self
)) {
710 memmove(self
->ob_bytes
+ cur
- slicelen
,
711 self
->ob_bytes
+ cur
,
712 PyByteArray_GET_SIZE(self
) - cur
);
714 if (PyByteArray_Resize((PyObject
*)self
,
715 PyByteArray_GET_SIZE(self
) - slicelen
) < 0)
724 if (needed
!= slicelen
) {
725 PyErr_Format(PyExc_ValueError
,
726 "attempt to assign bytes of size %zd "
727 "to extended slice of size %zd",
731 for (cur
= start
, i
= 0; i
< slicelen
; cur
+= step
, i
++)
732 self
->ob_bytes
[cur
] = bytes
[i
];
739 bytes_init(PyByteArrayObject
*self
, PyObject
*args
, PyObject
*kwds
)
741 static char *kwlist
[] = {"source", "encoding", "errors", 0};
742 PyObject
*arg
= NULL
;
743 const char *encoding
= NULL
;
744 const char *errors
= NULL
;
747 PyObject
*(*iternext
)(PyObject
*);
749 if (Py_SIZE(self
) != 0) {
750 /* Empty previous contents (yes, do this first of all!) */
751 if (PyByteArray_Resize((PyObject
*)self
, 0) < 0)
755 /* Parse arguments */
756 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|Oss:bytearray", kwlist
,
757 &arg
, &encoding
, &errors
))
760 /* Make a quick exit if no first argument */
762 if (encoding
!= NULL
|| errors
!= NULL
) {
763 PyErr_SetString(PyExc_TypeError
,
764 "encoding or errors without sequence argument");
770 if (PyBytes_Check(arg
)) {
771 PyObject
*new, *encoded
;
772 if (encoding
!= NULL
) {
773 encoded
= PyCodec_Encode(arg
, encoding
, errors
);
776 assert(PyBytes_Check(encoded
));
782 new = bytes_iconcat(self
, arg
);
790 if (PyUnicode_Check(arg
)) {
791 /* Encode via the codec registry */
792 PyObject
*encoded
, *new;
793 if (encoding
== NULL
) {
794 PyErr_SetString(PyExc_TypeError
,
795 "unicode argument without an encoding");
798 encoded
= PyCodec_Encode(arg
, encoding
, errors
);
801 assert(PyBytes_Check(encoded
));
802 new = bytes_iconcat(self
, encoded
);
810 /* If it's not unicode, there can't be encoding or errors */
811 if (encoding
!= NULL
|| errors
!= NULL
) {
812 PyErr_SetString(PyExc_TypeError
,
813 "encoding or errors without a string argument");
818 count
= PyNumber_AsSsize_t(arg
, PyExc_ValueError
);
819 if (count
== -1 && PyErr_Occurred())
823 PyErr_SetString(PyExc_ValueError
, "negative count");
827 if (PyByteArray_Resize((PyObject
*)self
, count
))
829 memset(self
->ob_bytes
, 0, count
);
834 /* Use the buffer API */
835 if (PyObject_CheckBuffer(arg
)) {
838 if (PyObject_GetBuffer(arg
, &view
, PyBUF_FULL_RO
) < 0)
841 if (PyByteArray_Resize((PyObject
*)self
, size
) < 0) goto fail
;
842 if (PyBuffer_ToContiguous(self
->ob_bytes
, &view
, size
, 'C') < 0)
844 PyObject_ReleaseBuffer(arg
, &view
);
847 PyObject_ReleaseBuffer(arg
, &view
);
851 /* XXX Optimize this if the arguments is a list, tuple */
853 /* Get the iterator */
854 it
= PyObject_GetIter(arg
);
857 iternext
= *Py_TYPE(it
)->tp_iternext
;
859 /* Run the iterator to exhaustion */
864 /* Get the next item */
867 if (PyErr_Occurred()) {
868 if (!PyErr_ExceptionMatches(PyExc_StopIteration
))
875 /* Interpret it as an int (__index__) */
876 rc
= _getbytevalue(item
, &value
);
881 /* Append the byte */
882 if (Py_SIZE(self
) < self
->ob_alloc
)
884 else if (PyByteArray_Resize((PyObject
*)self
, Py_SIZE(self
)+1) < 0)
886 self
->ob_bytes
[Py_SIZE(self
)-1] = value
;
889 /* Clean up and return success */
894 /* Error handling when it != NULL */
899 /* Mostly copied from string_repr, but without the
900 "smart quote" functionality. */
902 bytes_repr(PyByteArrayObject
*self
)
904 static const char *hexdigits
= "0123456789abcdef";
905 const char *quote_prefix
= "bytearray(b";
906 const char *quote_postfix
= ")";
907 Py_ssize_t length
= Py_SIZE(self
);
908 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
909 size_t newsize
= 14 + 4 * length
;
911 if (newsize
> PY_SSIZE_T_MAX
|| newsize
/ 4 - 3 != length
) {
912 PyErr_SetString(PyExc_OverflowError
,
913 "bytearray object is too large to make repr");
916 v
= PyUnicode_FromUnicode(NULL
, newsize
);
921 register Py_ssize_t i
;
922 register Py_UNICODE c
;
923 register Py_UNICODE
*p
;
926 /* Figure out which quote to use; single is preferred */
930 start
= PyByteArray_AS_STRING(self
);
931 for (test
= start
; test
< start
+length
; ++test
) {
933 quote
= '\''; /* back to single */
936 else if (*test
== '\'')
943 p
= PyUnicode_AS_UNICODE(v
);
944 while (*quote_prefix
)
945 *p
++ = *quote_prefix
++;
948 for (i
= 0; i
< length
; i
++) {
949 /* There's at least enough room for a hex escape
950 and a closing quote. */
951 assert(newsize
- (p
- PyUnicode_AS_UNICODE(v
)) >= 5);
952 c
= self
->ob_bytes
[i
];
953 if (c
== '\'' || c
== '\\')
954 *p
++ = '\\', *p
++ = c
;
956 *p
++ = '\\', *p
++ = 't';
958 *p
++ = '\\', *p
++ = 'n';
960 *p
++ = '\\', *p
++ = 'r';
962 *p
++ = '\\', *p
++ = 'x', *p
++ = '0', *p
++ = '0';
963 else if (c
< ' ' || c
>= 0x7f) {
966 *p
++ = hexdigits
[(c
& 0xf0) >> 4];
967 *p
++ = hexdigits
[c
& 0xf];
972 assert(newsize
- (p
- PyUnicode_AS_UNICODE(v
)) >= 1);
974 while (*quote_postfix
) {
975 *p
++ = *quote_postfix
++;
978 if (PyUnicode_Resize(&v
, (p
- PyUnicode_AS_UNICODE(v
)))) {
987 bytes_str(PyObject
*op
)
990 if (Py_BytesWarningFlag
) {
991 if (PyErr_WarnEx(PyExc_BytesWarning
,
992 "str() on a bytearray instance", 1))
995 return bytes_repr((PyByteArrayObject
*)op
);
997 return PyBytes_FromStringAndSize(((PyByteArrayObject
*)op
)->ob_bytes
, Py_SIZE(op
));
1001 bytes_richcompare(PyObject
*self
, PyObject
*other
, int op
)
1003 Py_ssize_t self_size
, other_size
;
1004 Py_buffer self_bytes
, other_bytes
;
1009 /* Bytes can be compared to anything that supports the (binary)
1010 buffer API. Except that a comparison with Unicode is always an
1011 error, even if the comparison is for equality. */
1012 if (PyObject_IsInstance(self
, (PyObject
*)&PyUnicode_Type
) ||
1013 PyObject_IsInstance(other
, (PyObject
*)&PyUnicode_Type
)) {
1014 if (Py_BytesWarningFlag
&& op
== Py_EQ
) {
1015 if (PyErr_WarnEx(PyExc_BytesWarning
,
1016 "Comparsion between bytearray and string", 1))
1020 Py_INCREF(Py_NotImplemented
);
1021 return Py_NotImplemented
;
1024 self_size
= _getbuffer(self
, &self_bytes
);
1025 if (self_size
< 0) {
1027 Py_INCREF(Py_NotImplemented
);
1028 return Py_NotImplemented
;
1031 other_size
= _getbuffer(other
, &other_bytes
);
1032 if (other_size
< 0) {
1034 PyObject_ReleaseBuffer(self
, &self_bytes
);
1035 Py_INCREF(Py_NotImplemented
);
1036 return Py_NotImplemented
;
1039 if (self_size
!= other_size
&& (op
== Py_EQ
|| op
== Py_NE
)) {
1040 /* Shortcut: if the lengths differ, the objects differ */
1041 cmp
= (op
== Py_NE
);
1044 minsize
= self_size
;
1045 if (other_size
< minsize
)
1046 minsize
= other_size
;
1048 cmp
= memcmp(self_bytes
.buf
, other_bytes
.buf
, minsize
);
1049 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1052 if (self_size
< other_size
)
1054 else if (self_size
> other_size
)
1059 case Py_LT
: cmp
= cmp
< 0; break;
1060 case Py_LE
: cmp
= cmp
<= 0; break;
1061 case Py_EQ
: cmp
= cmp
== 0; break;
1062 case Py_NE
: cmp
= cmp
!= 0; break;
1063 case Py_GT
: cmp
= cmp
> 0; break;
1064 case Py_GE
: cmp
= cmp
>= 0; break;
1068 res
= cmp
? Py_True
: Py_False
;
1069 PyObject_ReleaseBuffer(self
, &self_bytes
);
1070 PyObject_ReleaseBuffer(other
, &other_bytes
);
1076 bytes_dealloc(PyByteArrayObject
*self
)
1078 if (self
->ob_bytes
!= 0) {
1079 PyMem_Free(self
->ob_bytes
);
1081 Py_TYPE(self
)->tp_free((PyObject
*)self
);
1085 /* -------------------------------------------------------------------- */
1088 #define STRINGLIB_CHAR char
1089 #define STRINGLIB_CMP memcmp
1090 #define STRINGLIB_LEN PyByteArray_GET_SIZE
1091 #define STRINGLIB_STR PyByteArray_AS_STRING
1092 #define STRINGLIB_NEW PyByteArray_FromStringAndSize
1093 #define STRINGLIB_EMPTY nullbytes
1094 #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1095 #define STRINGLIB_MUTABLE 1
1097 #include "stringlib/fastsearch.h"
1098 #include "stringlib/count.h"
1099 #include "stringlib/find.h"
1100 #include "stringlib/partition.h"
1101 #include "stringlib/ctype.h"
1102 #include "stringlib/transmogrify.h"
1105 /* The following Py_LOCAL_INLINE and Py_LOCAL functions
1106 were copied from the old char* style string object. */
1108 Py_LOCAL_INLINE(void)
1109 _adjust_indices(Py_ssize_t
*start
, Py_ssize_t
*end
, Py_ssize_t len
)
1124 Py_LOCAL_INLINE(Py_ssize_t
)
1125 bytes_find_internal(PyByteArrayObject
*self
, PyObject
*args
, int dir
)
1129 Py_ssize_t start
=0, end
=PY_SSIZE_T_MAX
;
1132 if (!PyArg_ParseTuple(args
, "O|O&O&:find/rfind/index/rindex", &subobj
,
1133 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1135 if (_getbuffer(subobj
, &subbuf
) < 0)
1138 res
= stringlib_find_slice(
1139 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
1140 subbuf
.buf
, subbuf
.len
, start
, end
);
1142 res
= stringlib_rfind_slice(
1143 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
1144 subbuf
.buf
, subbuf
.len
, start
, end
);
1145 PyObject_ReleaseBuffer(subobj
, &subbuf
);
1149 PyDoc_STRVAR(find__doc__
,
1150 "B.find(sub [,start [,end]]) -> int\n\
1152 Return the lowest index in B where subsection sub is found,\n\
1153 such that sub is contained within s[start,end]. Optional\n\
1154 arguments start and end are interpreted as in slice notation.\n\
1156 Return -1 on failure.");
1159 bytes_find(PyByteArrayObject
*self
, PyObject
*args
)
1161 Py_ssize_t result
= bytes_find_internal(self
, args
, +1);
1164 return PyInt_FromSsize_t(result
);
1167 PyDoc_STRVAR(count__doc__
,
1168 "B.count(sub [,start [,end]]) -> int\n\
1170 Return the number of non-overlapping occurrences of subsection sub in\n\
1171 bytes B[start:end]. Optional arguments start and end are interpreted\n\
1172 as in slice notation.");
1175 bytes_count(PyByteArrayObject
*self
, PyObject
*args
)
1178 const char *str
= PyByteArray_AS_STRING(self
);
1179 Py_ssize_t start
= 0, end
= PY_SSIZE_T_MAX
;
1181 PyObject
*count_obj
;
1183 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &sub_obj
,
1184 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1187 if (_getbuffer(sub_obj
, &vsub
) < 0)
1190 _adjust_indices(&start
, &end
, PyByteArray_GET_SIZE(self
));
1192 count_obj
= PyInt_FromSsize_t(
1193 stringlib_count(str
+ start
, end
- start
, vsub
.buf
, vsub
.len
)
1195 PyObject_ReleaseBuffer(sub_obj
, &vsub
);
1200 PyDoc_STRVAR(index__doc__
,
1201 "B.index(sub [,start [,end]]) -> int\n\
1203 Like B.find() but raise ValueError when the subsection is not found.");
1206 bytes_index(PyByteArrayObject
*self
, PyObject
*args
)
1208 Py_ssize_t result
= bytes_find_internal(self
, args
, +1);
1212 PyErr_SetString(PyExc_ValueError
,
1213 "subsection not found");
1216 return PyInt_FromSsize_t(result
);
1220 PyDoc_STRVAR(rfind__doc__
,
1221 "B.rfind(sub [,start [,end]]) -> int\n\
1223 Return the highest index in B where subsection sub is found,\n\
1224 such that sub is contained within s[start,end]. Optional\n\
1225 arguments start and end are interpreted as in slice notation.\n\
1227 Return -1 on failure.");
1230 bytes_rfind(PyByteArrayObject
*self
, PyObject
*args
)
1232 Py_ssize_t result
= bytes_find_internal(self
, args
, -1);
1235 return PyInt_FromSsize_t(result
);
1239 PyDoc_STRVAR(rindex__doc__
,
1240 "B.rindex(sub [,start [,end]]) -> int\n\
1242 Like B.rfind() but raise ValueError when the subsection is not found.");
1245 bytes_rindex(PyByteArrayObject
*self
, PyObject
*args
)
1247 Py_ssize_t result
= bytes_find_internal(self
, args
, -1);
1251 PyErr_SetString(PyExc_ValueError
,
1252 "subsection not found");
1255 return PyInt_FromSsize_t(result
);
1260 bytes_contains(PyObject
*self
, PyObject
*arg
)
1262 Py_ssize_t ival
= PyNumber_AsSsize_t(arg
, PyExc_ValueError
);
1263 if (ival
== -1 && PyErr_Occurred()) {
1267 if (_getbuffer(arg
, &varg
) < 0)
1269 pos
= stringlib_find(PyByteArray_AS_STRING(self
), Py_SIZE(self
),
1270 varg
.buf
, varg
.len
, 0);
1271 PyObject_ReleaseBuffer(arg
, &varg
);
1274 if (ival
< 0 || ival
>= 256) {
1275 PyErr_SetString(PyExc_ValueError
, "byte must be in range(0, 256)");
1279 return memchr(PyByteArray_AS_STRING(self
), ival
, Py_SIZE(self
)) != NULL
;
1283 /* Matches the end (direction >= 0) or start (direction < 0) of self
1284 * against substr, using the start and end arguments. Returns
1285 * -1 on error, 0 if not found and 1 if found.
1288 _bytes_tailmatch(PyByteArrayObject
*self
, PyObject
*substr
, Py_ssize_t start
,
1289 Py_ssize_t end
, int direction
)
1291 Py_ssize_t len
= PyByteArray_GET_SIZE(self
);
1296 str
= PyByteArray_AS_STRING(self
);
1298 if (_getbuffer(substr
, &vsubstr
) < 0)
1301 _adjust_indices(&start
, &end
, len
);
1303 if (direction
< 0) {
1305 if (start
+vsubstr
.len
> len
) {
1310 if (end
-start
< vsubstr
.len
|| start
> len
) {
1314 if (end
-vsubstr
.len
> start
)
1315 start
= end
- vsubstr
.len
;
1317 if (end
-start
>= vsubstr
.len
)
1318 rv
= ! memcmp(str
+start
, vsubstr
.buf
, vsubstr
.len
);
1321 PyObject_ReleaseBuffer(substr
, &vsubstr
);
1326 PyDoc_STRVAR(startswith__doc__
,
1327 "B.startswith(prefix [,start [,end]]) -> bool\n\
1329 Return True if B starts with the specified prefix, False otherwise.\n\
1330 With optional start, test B beginning at that position.\n\
1331 With optional end, stop comparing B at that position.\n\
1332 prefix can also be a tuple of strings to try.");
1335 bytes_startswith(PyByteArrayObject
*self
, PyObject
*args
)
1337 Py_ssize_t start
= 0;
1338 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1342 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
1343 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1345 if (PyTuple_Check(subobj
)) {
1347 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
1348 result
= _bytes_tailmatch(self
,
1349 PyTuple_GET_ITEM(subobj
, i
),
1359 result
= _bytes_tailmatch(self
, subobj
, start
, end
, -1);
1363 return PyBool_FromLong(result
);
1366 PyDoc_STRVAR(endswith__doc__
,
1367 "B.endswith(suffix [,start [,end]]) -> bool\n\
1369 Return True if B ends with the specified suffix, False otherwise.\n\
1370 With optional start, test B beginning at that position.\n\
1371 With optional end, stop comparing B at that position.\n\
1372 suffix can also be a tuple of strings to try.");
1375 bytes_endswith(PyByteArrayObject
*self
, PyObject
*args
)
1377 Py_ssize_t start
= 0;
1378 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1382 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
1383 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1385 if (PyTuple_Check(subobj
)) {
1387 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
1388 result
= _bytes_tailmatch(self
,
1389 PyTuple_GET_ITEM(subobj
, i
),
1399 result
= _bytes_tailmatch(self
, subobj
, start
, end
, +1);
1403 return PyBool_FromLong(result
);
1407 PyDoc_STRVAR(translate__doc__
,
1408 "B.translate(table[, deletechars]) -> bytearray\n\
1410 Return a copy of B, where all characters occurring in the\n\
1411 optional argument deletechars are removed, and the remaining\n\
1412 characters have been mapped through the given translation\n\
1413 table, which must be a bytes object of length 256.");
1416 bytes_translate(PyByteArrayObject
*self
, PyObject
*args
)
1418 register char *input
, *output
;
1419 register const char *table
;
1420 register Py_ssize_t i
, c
, changed
= 0;
1421 PyObject
*input_obj
= (PyObject
*)self
;
1422 const char *output_start
;
1425 int trans_table
[256];
1426 PyObject
*tableobj
, *delobj
= NULL
;
1427 Py_buffer vtable
, vdel
;
1429 if (!PyArg_UnpackTuple(args
, "translate", 1, 2,
1430 &tableobj
, &delobj
))
1433 if (_getbuffer(tableobj
, &vtable
) < 0)
1436 if (vtable
.len
!= 256) {
1437 PyErr_SetString(PyExc_ValueError
,
1438 "translation table must be 256 characters long");
1443 if (delobj
!= NULL
) {
1444 if (_getbuffer(delobj
, &vdel
) < 0) {
1454 table
= (const char *)vtable
.buf
;
1455 inlen
= PyByteArray_GET_SIZE(input_obj
);
1456 result
= PyByteArray_FromStringAndSize((char *)NULL
, inlen
);
1459 output_start
= output
= PyByteArray_AsString(result
);
1460 input
= PyByteArray_AS_STRING(input_obj
);
1462 if (vdel
.len
== 0) {
1463 /* If no deletions are required, use faster code */
1464 for (i
= inlen
; --i
>= 0; ) {
1465 c
= Py_CHARMASK(*input
++);
1466 if (Py_CHARMASK((*output
++ = table
[c
])) != c
)
1469 if (changed
|| !PyByteArray_CheckExact(input_obj
))
1472 Py_INCREF(input_obj
);
1477 for (i
= 0; i
< 256; i
++)
1478 trans_table
[i
] = Py_CHARMASK(table
[i
]);
1480 for (i
= 0; i
< vdel
.len
; i
++)
1481 trans_table
[(int) Py_CHARMASK( ((unsigned char*)vdel
.buf
)[i
] )] = -1;
1483 for (i
= inlen
; --i
>= 0; ) {
1484 c
= Py_CHARMASK(*input
++);
1485 if (trans_table
[c
] != -1)
1486 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
1490 if (!changed
&& PyByteArray_CheckExact(input_obj
)) {
1492 Py_INCREF(input_obj
);
1496 /* Fix the size of the resulting string */
1498 PyByteArray_Resize(result
, output
- output_start
);
1501 PyObject_ReleaseBuffer(tableobj
, &vtable
);
1503 PyObject_ReleaseBuffer(delobj
, &vdel
);
1511 /* find and count characters and substrings */
1513 #define findchar(target, target_len, c) \
1514 ((char *)memchr((const void *)(target), c, target_len))
1516 /* Don't call if length < 2 */
1517 #define Py_STRING_MATCH(target, offset, pattern, length) \
1518 (target[offset] == pattern[0] && \
1519 target[offset+length-1] == pattern[length-1] && \
1520 !memcmp(target+offset+1, pattern+1, length-2) )
1523 /* Bytes ops must return a string. */
1524 /* If the object is subclass of bytes, create a copy */
1525 Py_LOCAL(PyByteArrayObject
*)
1526 return_self(PyByteArrayObject
*self
)
1528 if (PyByteArray_CheckExact(self
)) {
1530 return (PyByteArrayObject
*)self
;
1532 return (PyByteArrayObject
*)PyByteArray_FromStringAndSize(
1533 PyByteArray_AS_STRING(self
),
1534 PyByteArray_GET_SIZE(self
));
1537 Py_LOCAL_INLINE(Py_ssize_t
)
1538 countchar(const char *target
, Py_ssize_t target_len
, char c
, Py_ssize_t maxcount
)
1541 const char *start
=target
;
1542 const char *end
=target
+target_len
;
1544 while ( (start
=findchar(start
, end
-start
, c
)) != NULL
) {
1546 if (count
>= maxcount
)
1553 Py_LOCAL(Py_ssize_t
)
1554 findstring(const char *target
, Py_ssize_t target_len
,
1555 const char *pattern
, Py_ssize_t pattern_len
,
1561 start
+= target_len
;
1565 if (end
> target_len
) {
1567 } else if (end
< 0) {
1573 /* zero-length substrings always match at the first attempt */
1574 if (pattern_len
== 0)
1575 return (direction
> 0) ? start
: end
;
1579 if (direction
< 0) {
1580 for (; end
>= start
; end
--)
1581 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
))
1584 for (; start
<= end
; start
++)
1585 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
))
1591 Py_LOCAL_INLINE(Py_ssize_t
)
1592 countstring(const char *target
, Py_ssize_t target_len
,
1593 const char *pattern
, Py_ssize_t pattern_len
,
1596 int direction
, Py_ssize_t maxcount
)
1601 start
+= target_len
;
1605 if (end
> target_len
) {
1607 } else if (end
< 0) {
1613 /* zero-length substrings match everywhere */
1614 if (pattern_len
== 0 || maxcount
== 0) {
1615 if (target_len
+1 < maxcount
)
1616 return target_len
+1;
1621 if (direction
< 0) {
1622 for (; (end
>= start
); end
--)
1623 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
)) {
1625 if (--maxcount
<= 0) break;
1626 end
-= pattern_len
-1;
1629 for (; (start
<= end
); start
++)
1630 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
)) {
1632 if (--maxcount
<= 0)
1634 start
+= pattern_len
-1;
1641 /* Algorithms for different cases of string replacement */
1643 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1644 Py_LOCAL(PyByteArrayObject
*)
1645 replace_interleave(PyByteArrayObject
*self
,
1646 const char *to_s
, Py_ssize_t to_len
,
1647 Py_ssize_t maxcount
)
1649 char *self_s
, *result_s
;
1650 Py_ssize_t self_len
, result_len
;
1651 Py_ssize_t count
, i
, product
;
1652 PyByteArrayObject
*result
;
1654 self_len
= PyByteArray_GET_SIZE(self
);
1656 /* 1 at the end plus 1 after every character */
1658 if (maxcount
< count
)
1661 /* Check for overflow */
1662 /* result_len = count * to_len + self_len; */
1663 product
= count
* to_len
;
1664 if (product
/ to_len
!= count
) {
1665 PyErr_SetString(PyExc_OverflowError
,
1666 "replace string is too long");
1669 result_len
= product
+ self_len
;
1670 if (result_len
< 0) {
1671 PyErr_SetString(PyExc_OverflowError
,
1672 "replace string is too long");
1676 if (! (result
= (PyByteArrayObject
*)
1677 PyByteArray_FromStringAndSize(NULL
, result_len
)) )
1680 self_s
= PyByteArray_AS_STRING(self
);
1681 result_s
= PyByteArray_AS_STRING(result
);
1683 /* TODO: special case single character, which doesn't need memcpy */
1685 /* Lay the first one down (guaranteed this will occur) */
1686 Py_MEMCPY(result_s
, to_s
, to_len
);
1690 for (i
=0; i
<count
; i
++) {
1691 *result_s
++ = *self_s
++;
1692 Py_MEMCPY(result_s
, to_s
, to_len
);
1696 /* Copy the rest of the original string */
1697 Py_MEMCPY(result_s
, self_s
, self_len
-i
);
1702 /* Special case for deleting a single character */
1703 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1704 Py_LOCAL(PyByteArrayObject
*)
1705 replace_delete_single_character(PyByteArrayObject
*self
,
1706 char from_c
, Py_ssize_t maxcount
)
1708 char *self_s
, *result_s
;
1709 char *start
, *next
, *end
;
1710 Py_ssize_t self_len
, result_len
;
1712 PyByteArrayObject
*result
;
1714 self_len
= PyByteArray_GET_SIZE(self
);
1715 self_s
= PyByteArray_AS_STRING(self
);
1717 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
1719 return return_self(self
);
1722 result_len
= self_len
- count
; /* from_len == 1 */
1723 assert(result_len
>=0);
1725 if ( (result
= (PyByteArrayObject
*)
1726 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1728 result_s
= PyByteArray_AS_STRING(result
);
1731 end
= self_s
+ self_len
;
1732 while (count
-- > 0) {
1733 next
= findchar(start
, end
-start
, from_c
);
1736 Py_MEMCPY(result_s
, start
, next
-start
);
1737 result_s
+= (next
-start
);
1740 Py_MEMCPY(result_s
, start
, end
-start
);
1745 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1747 Py_LOCAL(PyByteArrayObject
*)
1748 replace_delete_substring(PyByteArrayObject
*self
,
1749 const char *from_s
, Py_ssize_t from_len
,
1750 Py_ssize_t maxcount
)
1752 char *self_s
, *result_s
;
1753 char *start
, *next
, *end
;
1754 Py_ssize_t self_len
, result_len
;
1755 Py_ssize_t count
, offset
;
1756 PyByteArrayObject
*result
;
1758 self_len
= PyByteArray_GET_SIZE(self
);
1759 self_s
= PyByteArray_AS_STRING(self
);
1761 count
= countstring(self_s
, self_len
,
1768 return return_self(self
);
1771 result_len
= self_len
- (count
* from_len
);
1772 assert (result_len
>=0);
1774 if ( (result
= (PyByteArrayObject
*)
1775 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1778 result_s
= PyByteArray_AS_STRING(result
);
1781 end
= self_s
+ self_len
;
1782 while (count
-- > 0) {
1783 offset
= findstring(start
, end
-start
,
1785 0, end
-start
, FORWARD
);
1788 next
= start
+ offset
;
1790 Py_MEMCPY(result_s
, start
, next
-start
);
1792 result_s
+= (next
-start
);
1793 start
= next
+from_len
;
1795 Py_MEMCPY(result_s
, start
, end
-start
);
1799 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1800 Py_LOCAL(PyByteArrayObject
*)
1801 replace_single_character_in_place(PyByteArrayObject
*self
,
1802 char from_c
, char to_c
,
1803 Py_ssize_t maxcount
)
1805 char *self_s
, *result_s
, *start
, *end
, *next
;
1806 Py_ssize_t self_len
;
1807 PyByteArrayObject
*result
;
1809 /* The result string will be the same size */
1810 self_s
= PyByteArray_AS_STRING(self
);
1811 self_len
= PyByteArray_GET_SIZE(self
);
1813 next
= findchar(self_s
, self_len
, from_c
);
1816 /* No matches; return the original bytes */
1817 return return_self(self
);
1820 /* Need to make a new bytes */
1821 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, self_len
);
1824 result_s
= PyByteArray_AS_STRING(result
);
1825 Py_MEMCPY(result_s
, self_s
, self_len
);
1827 /* change everything in-place, starting with this one */
1828 start
= result_s
+ (next
-self_s
);
1831 end
= result_s
+ self_len
;
1833 while (--maxcount
> 0) {
1834 next
= findchar(start
, end
-start
, from_c
);
1844 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1845 Py_LOCAL(PyByteArrayObject
*)
1846 replace_substring_in_place(PyByteArrayObject
*self
,
1847 const char *from_s
, Py_ssize_t from_len
,
1848 const char *to_s
, Py_ssize_t to_len
,
1849 Py_ssize_t maxcount
)
1851 char *result_s
, *start
, *end
;
1853 Py_ssize_t self_len
, offset
;
1854 PyByteArrayObject
*result
;
1856 /* The result bytes will be the same size */
1858 self_s
= PyByteArray_AS_STRING(self
);
1859 self_len
= PyByteArray_GET_SIZE(self
);
1861 offset
= findstring(self_s
, self_len
,
1863 0, self_len
, FORWARD
);
1865 /* No matches; return the original bytes */
1866 return return_self(self
);
1869 /* Need to make a new bytes */
1870 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, self_len
);
1873 result_s
= PyByteArray_AS_STRING(result
);
1874 Py_MEMCPY(result_s
, self_s
, self_len
);
1876 /* change everything in-place, starting with this one */
1877 start
= result_s
+ offset
;
1878 Py_MEMCPY(start
, to_s
, from_len
);
1880 end
= result_s
+ self_len
;
1882 while ( --maxcount
> 0) {
1883 offset
= findstring(start
, end
-start
,
1885 0, end
-start
, FORWARD
);
1888 Py_MEMCPY(start
+offset
, to_s
, from_len
);
1889 start
+= offset
+from_len
;
1895 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1896 Py_LOCAL(PyByteArrayObject
*)
1897 replace_single_character(PyByteArrayObject
*self
,
1899 const char *to_s
, Py_ssize_t to_len
,
1900 Py_ssize_t maxcount
)
1902 char *self_s
, *result_s
;
1903 char *start
, *next
, *end
;
1904 Py_ssize_t self_len
, result_len
;
1905 Py_ssize_t count
, product
;
1906 PyByteArrayObject
*result
;
1908 self_s
= PyByteArray_AS_STRING(self
);
1909 self_len
= PyByteArray_GET_SIZE(self
);
1911 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
1913 /* no matches, return unchanged */
1914 return return_self(self
);
1917 /* use the difference between current and new, hence the "-1" */
1918 /* result_len = self_len + count * (to_len-1) */
1919 product
= count
* (to_len
-1);
1920 if (product
/ (to_len
-1) != count
) {
1921 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
1924 result_len
= self_len
+ product
;
1925 if (result_len
< 0) {
1926 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
1930 if ( (result
= (PyByteArrayObject
*)
1931 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1933 result_s
= PyByteArray_AS_STRING(result
);
1936 end
= self_s
+ self_len
;
1937 while (count
-- > 0) {
1938 next
= findchar(start
, end
-start
, from_c
);
1942 if (next
== start
) {
1943 /* replace with the 'to' */
1944 Py_MEMCPY(result_s
, to_s
, to_len
);
1948 /* copy the unchanged old then the 'to' */
1949 Py_MEMCPY(result_s
, start
, next
-start
);
1950 result_s
+= (next
-start
);
1951 Py_MEMCPY(result_s
, to_s
, to_len
);
1956 /* Copy the remainder of the remaining bytes */
1957 Py_MEMCPY(result_s
, start
, end
-start
);
1962 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1963 Py_LOCAL(PyByteArrayObject
*)
1964 replace_substring(PyByteArrayObject
*self
,
1965 const char *from_s
, Py_ssize_t from_len
,
1966 const char *to_s
, Py_ssize_t to_len
,
1967 Py_ssize_t maxcount
)
1969 char *self_s
, *result_s
;
1970 char *start
, *next
, *end
;
1971 Py_ssize_t self_len
, result_len
;
1972 Py_ssize_t count
, offset
, product
;
1973 PyByteArrayObject
*result
;
1975 self_s
= PyByteArray_AS_STRING(self
);
1976 self_len
= PyByteArray_GET_SIZE(self
);
1978 count
= countstring(self_s
, self_len
,
1980 0, self_len
, FORWARD
, maxcount
);
1982 /* no matches, return unchanged */
1983 return return_self(self
);
1986 /* Check for overflow */
1987 /* result_len = self_len + count * (to_len-from_len) */
1988 product
= count
* (to_len
-from_len
);
1989 if (product
/ (to_len
-from_len
) != count
) {
1990 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
1993 result_len
= self_len
+ product
;
1994 if (result_len
< 0) {
1995 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
1999 if ( (result
= (PyByteArrayObject
*)
2000 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
2002 result_s
= PyByteArray_AS_STRING(result
);
2005 end
= self_s
+ self_len
;
2006 while (count
-- > 0) {
2007 offset
= findstring(start
, end
-start
,
2009 0, end
-start
, FORWARD
);
2012 next
= start
+offset
;
2013 if (next
== start
) {
2014 /* replace with the 'to' */
2015 Py_MEMCPY(result_s
, to_s
, to_len
);
2019 /* copy the unchanged old then the 'to' */
2020 Py_MEMCPY(result_s
, start
, next
-start
);
2021 result_s
+= (next
-start
);
2022 Py_MEMCPY(result_s
, to_s
, to_len
);
2024 start
= next
+from_len
;
2027 /* Copy the remainder of the remaining bytes */
2028 Py_MEMCPY(result_s
, start
, end
-start
);
2034 Py_LOCAL(PyByteArrayObject
*)
2035 replace(PyByteArrayObject
*self
,
2036 const char *from_s
, Py_ssize_t from_len
,
2037 const char *to_s
, Py_ssize_t to_len
,
2038 Py_ssize_t maxcount
)
2041 maxcount
= PY_SSIZE_T_MAX
;
2042 } else if (maxcount
== 0 || PyByteArray_GET_SIZE(self
) == 0) {
2043 /* nothing to do; return the original bytes */
2044 return return_self(self
);
2047 if (maxcount
== 0 ||
2048 (from_len
== 0 && to_len
== 0)) {
2049 /* nothing to do; return the original bytes */
2050 return return_self(self
);
2053 /* Handle zero-length special cases */
2055 if (from_len
== 0) {
2056 /* insert the 'to' bytes everywhere. */
2057 /* >>> "Python".replace("", ".") */
2058 /* '.P.y.t.h.o.n.' */
2059 return replace_interleave(self
, to_s
, to_len
, maxcount
);
2062 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2063 /* point for an empty self bytes to generate a non-empty bytes */
2064 /* Special case so the remaining code always gets a non-empty bytes */
2065 if (PyByteArray_GET_SIZE(self
) == 0) {
2066 return return_self(self
);
2070 /* delete all occurances of 'from' bytes */
2071 if (from_len
== 1) {
2072 return replace_delete_single_character(
2073 self
, from_s
[0], maxcount
);
2075 return replace_delete_substring(self
, from_s
, from_len
, maxcount
);
2079 /* Handle special case where both bytes have the same length */
2081 if (from_len
== to_len
) {
2082 if (from_len
== 1) {
2083 return replace_single_character_in_place(
2089 return replace_substring_in_place(
2090 self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
2094 /* Otherwise use the more generic algorithms */
2095 if (from_len
== 1) {
2096 return replace_single_character(self
, from_s
[0],
2097 to_s
, to_len
, maxcount
);
2099 /* len('from')>=2, len('to')>=1 */
2100 return replace_substring(self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
2105 PyDoc_STRVAR(replace__doc__
,
2106 "B.replace(old, new[, count]) -> bytes\n\
2108 Return a copy of B with all occurrences of subsection\n\
2109 old replaced by new. If the optional argument count is\n\
2110 given, only the first count occurrences are replaced.");
2113 bytes_replace(PyByteArrayObject
*self
, PyObject
*args
)
2115 Py_ssize_t count
= -1;
2116 PyObject
*from
, *to
, *res
;
2117 Py_buffer vfrom
, vto
;
2119 if (!PyArg_ParseTuple(args
, "OO|n:replace", &from
, &to
, &count
))
2122 if (_getbuffer(from
, &vfrom
) < 0)
2124 if (_getbuffer(to
, &vto
) < 0) {
2125 PyObject_ReleaseBuffer(from
, &vfrom
);
2129 res
= (PyObject
*)replace((PyByteArrayObject
*) self
,
2130 vfrom
.buf
, vfrom
.len
,
2131 vto
.buf
, vto
.len
, count
);
2133 PyObject_ReleaseBuffer(from
, &vfrom
);
2134 PyObject_ReleaseBuffer(to
, &vto
);
2139 /* Overallocate the initial list to reduce the number of reallocs for small
2140 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2141 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2142 text (roughly 11 words per line) and field delimited data (usually 1-10
2143 fields). For large strings the split algorithms are bandwidth limited
2144 so increasing the preallocation likely will not improve things.*/
2146 #define MAX_PREALLOC 12
2148 /* 5 splits gives 6 elements */
2149 #define PREALLOC_SIZE(maxsplit) \
2150 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2152 #define SPLIT_APPEND(data, left, right) \
2153 str = PyByteArray_FromStringAndSize((data) + (left), \
2154 (right) - (left)); \
2157 if (PyList_Append(list, str)) { \
2164 #define SPLIT_ADD(data, left, right) { \
2165 str = PyByteArray_FromStringAndSize((data) + (left), \
2166 (right) - (left)); \
2169 if (count < MAX_PREALLOC) { \
2170 PyList_SET_ITEM(list, count, str); \
2172 if (PyList_Append(list, str)) { \
2181 /* Always force the list to the expected size. */
2182 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2185 Py_LOCAL_INLINE(PyObject
*)
2186 split_char(const char *s
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
2188 register Py_ssize_t i
, j
, count
= 0;
2190 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2196 while ((j
< len
) && (maxcount
-- > 0)) {
2197 for(; j
< len
; j
++) {
2198 /* I found that using memchr makes no difference */
2207 SPLIT_ADD(s
, i
, len
);
2209 FIX_PREALLOC_SIZE(list
);
2218 Py_LOCAL_INLINE(PyObject
*)
2219 split_whitespace(const char *s
, Py_ssize_t len
, Py_ssize_t maxcount
)
2221 register Py_ssize_t i
, j
, count
= 0;
2223 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2228 for (i
= j
= 0; i
< len
; ) {
2230 while (i
< len
&& ISSPACE(s
[i
]))
2233 while (i
< len
&& !ISSPACE(s
[i
]))
2236 if (maxcount
-- <= 0)
2239 while (i
< len
&& ISSPACE(s
[i
]))
2245 SPLIT_ADD(s
, j
, len
);
2247 FIX_PREALLOC_SIZE(list
);
2255 PyDoc_STRVAR(split__doc__
,
2256 "B.split([sep[, maxsplit]]) -> list of bytearray\n\
2258 Return a list of the sections in B, using sep as the delimiter.\n\
2259 If sep is not given, B is split on ASCII whitespace characters\n\
2260 (space, tab, return, newline, formfeed, vertical tab).\n\
2261 If maxsplit is given, at most maxsplit splits are done.");
2264 bytes_split(PyByteArrayObject
*self
, PyObject
*args
)
2266 Py_ssize_t len
= PyByteArray_GET_SIZE(self
), n
, i
, j
;
2267 Py_ssize_t maxsplit
= -1, count
= 0;
2268 const char *s
= PyByteArray_AS_STRING(self
), *sub
;
2269 PyObject
*list
, *str
, *subobj
= Py_None
;
2275 if (!PyArg_ParseTuple(args
, "|On:split", &subobj
, &maxsplit
))
2278 maxsplit
= PY_SSIZE_T_MAX
;
2280 if (subobj
== Py_None
)
2281 return split_whitespace(s
, len
, maxsplit
);
2283 if (_getbuffer(subobj
, &vsub
) < 0)
2289 PyErr_SetString(PyExc_ValueError
, "empty separator");
2290 PyObject_ReleaseBuffer(subobj
, &vsub
);
2294 return split_char(s
, len
, sub
[0], maxsplit
);
2296 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
2298 PyObject_ReleaseBuffer(subobj
, &vsub
);
2304 while (maxsplit
-- > 0) {
2305 pos
= fastsearch(s
+i
, len
-i
, sub
, n
, FAST_SEARCH
);
2314 while ((j
+n
<= len
) && (maxsplit
-- > 0)) {
2315 for (; j
+n
<= len
; j
++) {
2316 if (Py_STRING_MATCH(s
, j
, sub
, n
)) {
2324 SPLIT_ADD(s
, i
, len
);
2325 FIX_PREALLOC_SIZE(list
);
2326 PyObject_ReleaseBuffer(subobj
, &vsub
);
2331 PyObject_ReleaseBuffer(subobj
, &vsub
);
2335 /* stringlib's partition shares nullbytes in some cases.
2336 undo this, we don't want the nullbytes to be shared. */
2338 make_nullbytes_unique(PyObject
*result
)
2340 if (result
!= NULL
) {
2342 assert(PyTuple_Check(result
));
2343 assert(PyTuple_GET_SIZE(result
) == 3);
2344 for (i
= 0; i
< 3; i
++) {
2345 if (PyTuple_GET_ITEM(result
, i
) == (PyObject
*)nullbytes
) {
2346 PyObject
*new = PyByteArray_FromStringAndSize(NULL
, 0);
2352 Py_DECREF(nullbytes
);
2353 PyTuple_SET_ITEM(result
, i
, new);
2360 PyDoc_STRVAR(partition__doc__
,
2361 "B.partition(sep) -> (head, sep, tail)\n\
2363 Searches for the separator sep in B, and returns the part before it,\n\
2364 the separator itself, and the part after it. If the separator is not\n\
2365 found, returns B and two empty bytearray objects.");
2368 bytes_partition(PyByteArrayObject
*self
, PyObject
*sep_obj
)
2370 PyObject
*bytesep
, *result
;
2372 bytesep
= PyByteArray_FromObject(sep_obj
);
2376 result
= stringlib_partition(
2378 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
2380 PyByteArray_AS_STRING(bytesep
), PyByteArray_GET_SIZE(bytesep
)
2384 return make_nullbytes_unique(result
);
2387 PyDoc_STRVAR(rpartition__doc__
,
2388 "B.rpartition(sep) -> (tail, sep, head)\n\
2390 Searches for the separator sep in B, starting at the end of B,\n\
2391 and returns the part before it, the separator itself, and the\n\
2392 part after it. If the separator is not found, returns two empty\n\
2393 bytearray objects and B.");
2396 bytes_rpartition(PyByteArrayObject
*self
, PyObject
*sep_obj
)
2398 PyObject
*bytesep
, *result
;
2400 bytesep
= PyByteArray_FromObject(sep_obj
);
2404 result
= stringlib_rpartition(
2406 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
2408 PyByteArray_AS_STRING(bytesep
), PyByteArray_GET_SIZE(bytesep
)
2412 return make_nullbytes_unique(result
);
2415 Py_LOCAL_INLINE(PyObject
*)
2416 rsplit_char(const char *s
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
2418 register Py_ssize_t i
, j
, count
=0;
2420 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2426 while ((i
>= 0) && (maxcount
-- > 0)) {
2427 for (; i
>= 0; i
--) {
2429 SPLIT_ADD(s
, i
+ 1, j
+ 1);
2436 SPLIT_ADD(s
, 0, j
+ 1);
2438 FIX_PREALLOC_SIZE(list
);
2439 if (PyList_Reverse(list
) < 0)
2449 Py_LOCAL_INLINE(PyObject
*)
2450 rsplit_whitespace(const char *s
, Py_ssize_t len
, Py_ssize_t maxcount
)
2452 register Py_ssize_t i
, j
, count
= 0;
2454 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2459 for (i
= j
= len
- 1; i
>= 0; ) {
2461 while (i
>= 0 && ISSPACE(s
[i
]))
2464 while (i
>= 0 && !ISSPACE(s
[i
]))
2467 if (maxcount
-- <= 0)
2469 SPLIT_ADD(s
, i
+ 1, j
+ 1);
2470 while (i
>= 0 && ISSPACE(s
[i
]))
2476 SPLIT_ADD(s
, 0, j
+ 1);
2478 FIX_PREALLOC_SIZE(list
);
2479 if (PyList_Reverse(list
) < 0)
2489 PyDoc_STRVAR(rsplit__doc__
,
2490 "B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2492 Return a list of the sections in B, using sep as the delimiter,\n\
2493 starting at the end of B and working to the front.\n\
2494 If sep is not given, B is split on ASCII whitespace characters\n\
2495 (space, tab, return, newline, formfeed, vertical tab).\n\
2496 If maxsplit is given, at most maxsplit splits are done.");
2499 bytes_rsplit(PyByteArrayObject
*self
, PyObject
*args
)
2501 Py_ssize_t len
= PyByteArray_GET_SIZE(self
), n
, i
, j
;
2502 Py_ssize_t maxsplit
= -1, count
= 0;
2503 const char *s
= PyByteArray_AS_STRING(self
), *sub
;
2504 PyObject
*list
, *str
, *subobj
= Py_None
;
2507 if (!PyArg_ParseTuple(args
, "|On:rsplit", &subobj
, &maxsplit
))
2510 maxsplit
= PY_SSIZE_T_MAX
;
2512 if (subobj
== Py_None
)
2513 return rsplit_whitespace(s
, len
, maxsplit
);
2515 if (_getbuffer(subobj
, &vsub
) < 0)
2521 PyErr_SetString(PyExc_ValueError
, "empty separator");
2522 PyObject_ReleaseBuffer(subobj
, &vsub
);
2526 return rsplit_char(s
, len
, sub
[0], maxsplit
);
2528 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
2530 PyObject_ReleaseBuffer(subobj
, &vsub
);
2537 while ( (i
>= 0) && (maxsplit
-- > 0) ) {
2539 if (Py_STRING_MATCH(s
, i
, sub
, n
)) {
2540 SPLIT_ADD(s
, i
+ n
, j
);
2548 FIX_PREALLOC_SIZE(list
);
2549 if (PyList_Reverse(list
) < 0)
2551 PyObject_ReleaseBuffer(subobj
, &vsub
);
2556 PyObject_ReleaseBuffer(subobj
, &vsub
);
2560 PyDoc_STRVAR(reverse__doc__
,
2561 "B.reverse() -> None\n\
2563 Reverse the order of the values in B in place.");
2565 bytes_reverse(PyByteArrayObject
*self
, PyObject
*unused
)
2567 char swap
, *head
, *tail
;
2568 Py_ssize_t i
, j
, n
= Py_SIZE(self
);
2571 head
= self
->ob_bytes
;
2572 tail
= head
+ n
- 1;
2573 for (i
= 0; i
< j
; i
++) {
2582 PyDoc_STRVAR(insert__doc__
,
2583 "B.insert(index, int) -> None\n\
2585 Insert a single item into the bytearray before the given index.");
2587 bytes_insert(PyByteArrayObject
*self
, PyObject
*args
)
2591 Py_ssize_t where
, n
= Py_SIZE(self
);
2593 if (!PyArg_ParseTuple(args
, "nO:insert", &where
, &value
))
2596 if (n
== PY_SSIZE_T_MAX
) {
2597 PyErr_SetString(PyExc_OverflowError
,
2598 "cannot add more objects to bytes");
2601 if (!_getbytevalue(value
, &ival
))
2603 if (PyByteArray_Resize((PyObject
*)self
, n
+ 1) < 0)
2613 memmove(self
->ob_bytes
+ where
+ 1, self
->ob_bytes
+ where
, n
- where
);
2614 self
->ob_bytes
[where
] = ival
;
2619 PyDoc_STRVAR(append__doc__
,
2620 "B.append(int) -> None\n\
2622 Append a single item to the end of B.");
2624 bytes_append(PyByteArrayObject
*self
, PyObject
*arg
)
2627 Py_ssize_t n
= Py_SIZE(self
);
2629 if (! _getbytevalue(arg
, &value
))
2631 if (n
== PY_SSIZE_T_MAX
) {
2632 PyErr_SetString(PyExc_OverflowError
,
2633 "cannot add more objects to bytes");
2636 if (PyByteArray_Resize((PyObject
*)self
, n
+ 1) < 0)
2639 self
->ob_bytes
[n
] = value
;
2644 PyDoc_STRVAR(extend__doc__
,
2645 "B.extend(iterable int) -> None\n\
2647 Append all the elements from the iterator or sequence to the\n\
2650 bytes_extend(PyByteArrayObject
*self
, PyObject
*arg
)
2652 PyObject
*it
, *item
, *bytes_obj
;
2653 Py_ssize_t buf_size
= 0, len
= 0;
2657 /* bytes_setslice code only accepts something supporting PEP 3118. */
2658 if (PyObject_CheckBuffer(arg
)) {
2659 if (bytes_setslice(self
, Py_SIZE(self
), Py_SIZE(self
), arg
) == -1)
2665 it
= PyObject_GetIter(arg
);
2669 /* Try to determine the length of the argument. 32 is abitrary. */
2670 buf_size
= _PyObject_LengthHint(arg
, 32);
2672 bytes_obj
= PyByteArray_FromStringAndSize(NULL
, buf_size
);
2673 if (bytes_obj
== NULL
)
2675 buf
= PyByteArray_AS_STRING(bytes_obj
);
2677 while ((item
= PyIter_Next(it
)) != NULL
) {
2678 if (! _getbytevalue(item
, &value
)) {
2681 Py_DECREF(bytes_obj
);
2687 if (len
>= buf_size
) {
2688 buf_size
= len
+ (len
>> 1) + 1;
2689 if (PyByteArray_Resize((PyObject
*)bytes_obj
, buf_size
) < 0) {
2691 Py_DECREF(bytes_obj
);
2694 /* Recompute the `buf' pointer, since the resizing operation may
2695 have invalidated it. */
2696 buf
= PyByteArray_AS_STRING(bytes_obj
);
2701 /* Resize down to exact size. */
2702 if (PyByteArray_Resize((PyObject
*)bytes_obj
, len
) < 0) {
2703 Py_DECREF(bytes_obj
);
2707 if (bytes_setslice(self
, Py_SIZE(self
), Py_SIZE(self
), bytes_obj
) == -1)
2709 Py_DECREF(bytes_obj
);
2714 PyDoc_STRVAR(pop__doc__
,
2715 "B.pop([index]) -> int\n\
2717 Remove and return a single item from B. If no index\n\
2718 argument is given, will pop the last value.");
2720 bytes_pop(PyByteArrayObject
*self
, PyObject
*args
)
2723 Py_ssize_t where
= -1, n
= Py_SIZE(self
);
2725 if (!PyArg_ParseTuple(args
, "|n:pop", &where
))
2729 PyErr_SetString(PyExc_OverflowError
,
2730 "cannot pop an empty bytes");
2734 where
+= Py_SIZE(self
);
2735 if (where
< 0 || where
>= Py_SIZE(self
)) {
2736 PyErr_SetString(PyExc_IndexError
, "pop index out of range");
2740 value
= self
->ob_bytes
[where
];
2741 memmove(self
->ob_bytes
+ where
, self
->ob_bytes
+ where
+ 1, n
- where
);
2742 if (PyByteArray_Resize((PyObject
*)self
, n
- 1) < 0)
2745 return PyInt_FromLong(value
);
2748 PyDoc_STRVAR(remove__doc__
,
2749 "B.remove(int) -> None\n\
2751 Remove the first occurance of a value in B.");
2753 bytes_remove(PyByteArrayObject
*self
, PyObject
*arg
)
2756 Py_ssize_t where
, n
= Py_SIZE(self
);
2758 if (! _getbytevalue(arg
, &value
))
2761 for (where
= 0; where
< n
; where
++) {
2762 if (self
->ob_bytes
[where
] == value
)
2766 PyErr_SetString(PyExc_ValueError
, "value not found in bytes");
2770 memmove(self
->ob_bytes
+ where
, self
->ob_bytes
+ where
+ 1, n
- where
);
2771 if (PyByteArray_Resize((PyObject
*)self
, n
- 1) < 0)
2777 /* XXX These two helpers could be optimized if argsize == 1 */
2780 lstrip_helper(unsigned char *myptr
, Py_ssize_t mysize
,
2781 void *argptr
, Py_ssize_t argsize
)
2784 while (i
< mysize
&& memchr(argptr
, myptr
[i
], argsize
))
2790 rstrip_helper(unsigned char *myptr
, Py_ssize_t mysize
,
2791 void *argptr
, Py_ssize_t argsize
)
2793 Py_ssize_t i
= mysize
- 1;
2794 while (i
>= 0 && memchr(argptr
, myptr
[i
], argsize
))
2799 PyDoc_STRVAR(strip__doc__
,
2800 "B.strip([bytes]) -> bytearray\n\
2802 Strip leading and trailing bytes contained in the argument.\n\
2803 If the argument is omitted, strip ASCII whitespace.");
2805 bytes_strip(PyByteArrayObject
*self
, PyObject
*args
)
2807 Py_ssize_t left
, right
, mysize
, argsize
;
2808 void *myptr
, *argptr
;
2809 PyObject
*arg
= Py_None
;
2811 if (!PyArg_ParseTuple(args
, "|O:strip", &arg
))
2813 if (arg
== Py_None
) {
2814 argptr
= "\t\n\r\f\v ";
2818 if (_getbuffer(arg
, &varg
) < 0)
2823 myptr
= self
->ob_bytes
;
2824 mysize
= Py_SIZE(self
);
2825 left
= lstrip_helper(myptr
, mysize
, argptr
, argsize
);
2829 right
= rstrip_helper(myptr
, mysize
, argptr
, argsize
);
2831 PyObject_ReleaseBuffer(arg
, &varg
);
2832 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2835 PyDoc_STRVAR(lstrip__doc__
,
2836 "B.lstrip([bytes]) -> bytearray\n\
2838 Strip leading bytes contained in the argument.\n\
2839 If the argument is omitted, strip leading ASCII whitespace.");
2841 bytes_lstrip(PyByteArrayObject
*self
, PyObject
*args
)
2843 Py_ssize_t left
, right
, mysize
, argsize
;
2844 void *myptr
, *argptr
;
2845 PyObject
*arg
= Py_None
;
2847 if (!PyArg_ParseTuple(args
, "|O:lstrip", &arg
))
2849 if (arg
== Py_None
) {
2850 argptr
= "\t\n\r\f\v ";
2854 if (_getbuffer(arg
, &varg
) < 0)
2859 myptr
= self
->ob_bytes
;
2860 mysize
= Py_SIZE(self
);
2861 left
= lstrip_helper(myptr
, mysize
, argptr
, argsize
);
2864 PyObject_ReleaseBuffer(arg
, &varg
);
2865 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2868 PyDoc_STRVAR(rstrip__doc__
,
2869 "B.rstrip([bytes]) -> bytearray\n\
2871 Strip trailing bytes contained in the argument.\n\
2872 If the argument is omitted, strip trailing ASCII whitespace.");
2874 bytes_rstrip(PyByteArrayObject
*self
, PyObject
*args
)
2876 Py_ssize_t left
, right
, mysize
, argsize
;
2877 void *myptr
, *argptr
;
2878 PyObject
*arg
= Py_None
;
2880 if (!PyArg_ParseTuple(args
, "|O:rstrip", &arg
))
2882 if (arg
== Py_None
) {
2883 argptr
= "\t\n\r\f\v ";
2887 if (_getbuffer(arg
, &varg
) < 0)
2892 myptr
= self
->ob_bytes
;
2893 mysize
= Py_SIZE(self
);
2895 right
= rstrip_helper(myptr
, mysize
, argptr
, argsize
);
2897 PyObject_ReleaseBuffer(arg
, &varg
);
2898 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2901 PyDoc_STRVAR(decode_doc
,
2902 "B.decode([encoding[, errors]]) -> unicode object.\n\
2904 Decodes B using the codec registered for encoding. encoding defaults\n\
2905 to the default encoding. errors may be given to set a different error\n\
2906 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2907 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2908 as well as any other name registered with codecs.register_error that is\n\
2909 able to handle UnicodeDecodeErrors.");
2912 bytes_decode(PyObject
*self
, PyObject
*args
)
2914 const char *encoding
= NULL
;
2915 const char *errors
= NULL
;
2917 if (!PyArg_ParseTuple(args
, "|ss:decode", &encoding
, &errors
))
2919 if (encoding
== NULL
)
2920 encoding
= PyUnicode_GetDefaultEncoding();
2921 return PyCodec_Decode(self
, encoding
, errors
);
2924 PyDoc_STRVAR(alloc_doc
,
2925 "B.__alloc__() -> int\n\
2927 Returns the number of bytes actually allocated.");
2930 bytes_alloc(PyByteArrayObject
*self
)
2932 return PyInt_FromSsize_t(self
->ob_alloc
);
2935 PyDoc_STRVAR(join_doc
,
2936 "B.join(iterable_of_bytes) -> bytes\n\
2938 Concatenates any number of bytearray objects, with B in between each pair.");
2941 bytes_join(PyByteArrayObject
*self
, PyObject
*it
)
2944 Py_ssize_t mysize
= Py_SIZE(self
);
2948 Py_ssize_t totalsize
= 0;
2952 seq
= PySequence_Fast(it
, "can only join an iterable");
2955 n
= PySequence_Fast_GET_SIZE(seq
);
2956 items
= PySequence_Fast_ITEMS(seq
);
2958 /* Compute the total size, and check that they are all bytes */
2959 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2960 for (i
= 0; i
< n
; i
++) {
2961 PyObject
*obj
= items
[i
];
2962 if (!PyByteArray_Check(obj
) && !PyBytes_Check(obj
)) {
2963 PyErr_Format(PyExc_TypeError
,
2964 "can only join an iterable of bytes "
2965 "(item %ld has type '%.100s')",
2966 /* XXX %ld isn't right on Win64 */
2967 (long)i
, Py_TYPE(obj
)->tp_name
);
2971 totalsize
+= mysize
;
2972 totalsize
+= Py_SIZE(obj
);
2973 if (totalsize
< 0) {
2979 /* Allocate the result, and copy the bytes */
2980 result
= PyByteArray_FromStringAndSize(NULL
, totalsize
);
2983 dest
= PyByteArray_AS_STRING(result
);
2984 for (i
= 0; i
< n
; i
++) {
2985 PyObject
*obj
= items
[i
];
2986 Py_ssize_t size
= Py_SIZE(obj
);
2988 if (PyByteArray_Check(obj
))
2989 buf
= PyByteArray_AS_STRING(obj
);
2991 buf
= PyBytes_AS_STRING(obj
);
2993 memcpy(dest
, self
->ob_bytes
, mysize
);
2996 memcpy(dest
, buf
, size
);
3004 /* Error handling */
3010 PyDoc_STRVAR(fromhex_doc
,
3011 "bytearray.fromhex(string) -> bytearray\n\
3013 Create a bytearray object from a string of hexadecimal numbers.\n\
3014 Spaces between two numbers are accepted.\n\
3015 Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3018 hex_digit_to_int(Py_UNICODE c
)
3027 if (c
>= 'a' && c
<= 'f')
3028 return c
- 'a' + 10;
3034 bytes_fromhex(PyObject
*cls
, PyObject
*args
)
3036 PyObject
*newbytes
, *hexobj
;
3039 Py_ssize_t hexlen
, byteslen
, i
, j
;
3042 if (!PyArg_ParseTuple(args
, "U:fromhex", &hexobj
))
3044 assert(PyUnicode_Check(hexobj
));
3045 hexlen
= PyUnicode_GET_SIZE(hexobj
);
3046 hex
= PyUnicode_AS_UNICODE(hexobj
);
3047 byteslen
= hexlen
/2; /* This overestimates if there are spaces */
3048 newbytes
= PyByteArray_FromStringAndSize(NULL
, byteslen
);
3051 buf
= PyByteArray_AS_STRING(newbytes
);
3052 for (i
= j
= 0; i
< hexlen
; i
+= 2) {
3053 /* skip over spaces in the input */
3054 while (hex
[i
] == ' ')
3058 top
= hex_digit_to_int(hex
[i
]);
3059 bot
= hex_digit_to_int(hex
[i
+1]);
3060 if (top
== -1 || bot
== -1) {
3061 PyErr_Format(PyExc_ValueError
,
3062 "non-hexadecimal number found in "
3063 "fromhex() arg at position %zd", i
);
3066 buf
[j
++] = (top
<< 4) + bot
;
3068 if (PyByteArray_Resize(newbytes
, j
) < 0)
3073 Py_DECREF(newbytes
);
3077 PyDoc_STRVAR(reduce_doc
, "Return state information for pickling.");
3080 bytes_reduce(PyByteArrayObject
*self
)
3082 PyObject
*latin1
, *dict
;
3084 latin1
= PyUnicode_DecodeLatin1(self
->ob_bytes
,
3085 Py_SIZE(self
), NULL
);
3087 latin1
= PyUnicode_FromString("");
3089 dict
= PyObject_GetAttrString((PyObject
*)self
, "__dict__");
3096 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self
), latin1
, "latin-1", dict
);
3099 PyDoc_STRVAR(sizeof_doc
,
3100 "B.__sizeof__() -> int\n\
3102 Returns the size of B in memory, in bytes");
3104 bytes_sizeof(PyByteArrayObject
*self
)
3108 res
= sizeof(PyByteArrayObject
) + self
->ob_alloc
* sizeof(char);
3109 return PyInt_FromSsize_t(res
);
3112 static PySequenceMethods bytes_as_sequence
= {
3113 (lenfunc
)bytes_length
, /* sq_length */
3114 (binaryfunc
)PyByteArray_Concat
, /* sq_concat */
3115 (ssizeargfunc
)bytes_repeat
, /* sq_repeat */
3116 (ssizeargfunc
)bytes_getitem
, /* sq_item */
3118 (ssizeobjargproc
)bytes_setitem
, /* sq_ass_item */
3119 0, /* sq_ass_slice */
3120 (objobjproc
)bytes_contains
, /* sq_contains */
3121 (binaryfunc
)bytes_iconcat
, /* sq_inplace_concat */
3122 (ssizeargfunc
)bytes_irepeat
, /* sq_inplace_repeat */
3125 static PyMappingMethods bytes_as_mapping
= {
3126 (lenfunc
)bytes_length
,
3127 (binaryfunc
)bytes_subscript
,
3128 (objobjargproc
)bytes_ass_subscript
,
3131 static PyBufferProcs bytes_as_buffer
= {
3132 (readbufferproc
)bytes_buffer_getreadbuf
,
3133 (writebufferproc
)bytes_buffer_getwritebuf
,
3134 (segcountproc
)bytes_buffer_getsegcount
,
3135 (charbufferproc
)bytes_buffer_getcharbuf
,
3136 (getbufferproc
)bytes_getbuffer
,
3137 (releasebufferproc
)bytes_releasebuffer
,
3142 {"__alloc__", (PyCFunction
)bytes_alloc
, METH_NOARGS
, alloc_doc
},
3143 {"__reduce__", (PyCFunction
)bytes_reduce
, METH_NOARGS
, reduce_doc
},
3144 {"__sizeof__", (PyCFunction
)bytes_sizeof
, METH_NOARGS
, sizeof_doc
},
3145 {"append", (PyCFunction
)bytes_append
, METH_O
, append__doc__
},
3146 {"capitalize", (PyCFunction
)stringlib_capitalize
, METH_NOARGS
,
3147 _Py_capitalize__doc__
},
3148 {"center", (PyCFunction
)stringlib_center
, METH_VARARGS
, center__doc__
},
3149 {"count", (PyCFunction
)bytes_count
, METH_VARARGS
, count__doc__
},
3150 {"decode", (PyCFunction
)bytes_decode
, METH_VARARGS
, decode_doc
},
3151 {"endswith", (PyCFunction
)bytes_endswith
, METH_VARARGS
, endswith__doc__
},
3152 {"expandtabs", (PyCFunction
)stringlib_expandtabs
, METH_VARARGS
,
3154 {"extend", (PyCFunction
)bytes_extend
, METH_O
, extend__doc__
},
3155 {"find", (PyCFunction
)bytes_find
, METH_VARARGS
, find__doc__
},
3156 {"fromhex", (PyCFunction
)bytes_fromhex
, METH_VARARGS
|METH_CLASS
,
3158 {"index", (PyCFunction
)bytes_index
, METH_VARARGS
, index__doc__
},
3159 {"insert", (PyCFunction
)bytes_insert
, METH_VARARGS
, insert__doc__
},
3160 {"isalnum", (PyCFunction
)stringlib_isalnum
, METH_NOARGS
,
3161 _Py_isalnum__doc__
},
3162 {"isalpha", (PyCFunction
)stringlib_isalpha
, METH_NOARGS
,
3163 _Py_isalpha__doc__
},
3164 {"isdigit", (PyCFunction
)stringlib_isdigit
, METH_NOARGS
,
3165 _Py_isdigit__doc__
},
3166 {"islower", (PyCFunction
)stringlib_islower
, METH_NOARGS
,
3167 _Py_islower__doc__
},
3168 {"isspace", (PyCFunction
)stringlib_isspace
, METH_NOARGS
,
3169 _Py_isspace__doc__
},
3170 {"istitle", (PyCFunction
)stringlib_istitle
, METH_NOARGS
,
3171 _Py_istitle__doc__
},
3172 {"isupper", (PyCFunction
)stringlib_isupper
, METH_NOARGS
,
3173 _Py_isupper__doc__
},
3174 {"join", (PyCFunction
)bytes_join
, METH_O
, join_doc
},
3175 {"ljust", (PyCFunction
)stringlib_ljust
, METH_VARARGS
, ljust__doc__
},
3176 {"lower", (PyCFunction
)stringlib_lower
, METH_NOARGS
, _Py_lower__doc__
},
3177 {"lstrip", (PyCFunction
)bytes_lstrip
, METH_VARARGS
, lstrip__doc__
},
3178 {"partition", (PyCFunction
)bytes_partition
, METH_O
, partition__doc__
},
3179 {"pop", (PyCFunction
)bytes_pop
, METH_VARARGS
, pop__doc__
},
3180 {"remove", (PyCFunction
)bytes_remove
, METH_O
, remove__doc__
},
3181 {"replace", (PyCFunction
)bytes_replace
, METH_VARARGS
, replace__doc__
},
3182 {"reverse", (PyCFunction
)bytes_reverse
, METH_NOARGS
, reverse__doc__
},
3183 {"rfind", (PyCFunction
)bytes_rfind
, METH_VARARGS
, rfind__doc__
},
3184 {"rindex", (PyCFunction
)bytes_rindex
, METH_VARARGS
, rindex__doc__
},
3185 {"rjust", (PyCFunction
)stringlib_rjust
, METH_VARARGS
, rjust__doc__
},
3186 {"rpartition", (PyCFunction
)bytes_rpartition
, METH_O
, rpartition__doc__
},
3187 {"rsplit", (PyCFunction
)bytes_rsplit
, METH_VARARGS
, rsplit__doc__
},
3188 {"rstrip", (PyCFunction
)bytes_rstrip
, METH_VARARGS
, rstrip__doc__
},
3189 {"split", (PyCFunction
)bytes_split
, METH_VARARGS
, split__doc__
},
3190 {"splitlines", (PyCFunction
)stringlib_splitlines
, METH_VARARGS
,
3192 {"startswith", (PyCFunction
)bytes_startswith
, METH_VARARGS
,
3194 {"strip", (PyCFunction
)bytes_strip
, METH_VARARGS
, strip__doc__
},
3195 {"swapcase", (PyCFunction
)stringlib_swapcase
, METH_NOARGS
,
3196 _Py_swapcase__doc__
},
3197 {"title", (PyCFunction
)stringlib_title
, METH_NOARGS
, _Py_title__doc__
},
3198 {"translate", (PyCFunction
)bytes_translate
, METH_VARARGS
,
3200 {"upper", (PyCFunction
)stringlib_upper
, METH_NOARGS
, _Py_upper__doc__
},
3201 {"zfill", (PyCFunction
)stringlib_zfill
, METH_VARARGS
, zfill__doc__
},
3205 PyDoc_STRVAR(bytes_doc
,
3206 "bytearray(iterable_of_ints) -> bytearray.\n\
3207 bytearray(string, encoding[, errors]) -> bytearray.\n\
3208 bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3209 bytearray(memory_view) -> bytearray.\n\
3211 Construct an mutable bytearray object from:\n\
3212 - an iterable yielding integers in range(256)\n\
3213 - a text string encoded using the specified encoding\n\
3214 - a bytes or a bytearray object\n\
3215 - any object implementing the buffer API.\n\
3217 bytearray(int) -> bytearray.\n\
3219 Construct a zero-initialized bytearray of the given length.");
3222 static PyObject
*bytes_iter(PyObject
*seq
);
3224 PyTypeObject PyByteArray_Type
= {
3225 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
3227 sizeof(PyByteArrayObject
),
3229 (destructor
)bytes_dealloc
, /* tp_dealloc */
3234 (reprfunc
)bytes_repr
, /* tp_repr */
3235 0, /* tp_as_number */
3236 &bytes_as_sequence
, /* tp_as_sequence */
3237 &bytes_as_mapping
, /* tp_as_mapping */
3240 bytes_str
, /* tp_str */
3241 PyObject_GenericGetAttr
, /* tp_getattro */
3242 0, /* tp_setattro */
3243 &bytes_as_buffer
, /* tp_as_buffer */
3244 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
|
3245 Py_TPFLAGS_HAVE_NEWBUFFER
, /* tp_flags */
3246 bytes_doc
, /* tp_doc */
3247 0, /* tp_traverse */
3249 (richcmpfunc
)bytes_richcompare
, /* tp_richcompare */
3250 0, /* tp_weaklistoffset */
3251 bytes_iter
, /* tp_iter */
3252 0, /* tp_iternext */
3253 bytes_methods
, /* tp_methods */
3258 0, /* tp_descr_get */
3259 0, /* tp_descr_set */
3260 0, /* tp_dictoffset */
3261 (initproc
)bytes_init
, /* tp_init */
3262 PyType_GenericAlloc
, /* tp_alloc */
3263 PyType_GenericNew
, /* tp_new */
3264 PyObject_Del
, /* tp_free */
3267 /*********************** Bytes Iterator ****************************/
3271 Py_ssize_t it_index
;
3272 PyByteArrayObject
*it_seq
; /* Set to NULL when iterator is exhausted */
3276 bytesiter_dealloc(bytesiterobject
*it
)
3278 _PyObject_GC_UNTRACK(it
);
3279 Py_XDECREF(it
->it_seq
);
3280 PyObject_GC_Del(it
);
3284 bytesiter_traverse(bytesiterobject
*it
, visitproc visit
, void *arg
)
3286 Py_VISIT(it
->it_seq
);
3291 bytesiter_next(bytesiterobject
*it
)
3293 PyByteArrayObject
*seq
;
3300 assert(PyByteArray_Check(seq
));
3302 if (it
->it_index
< PyByteArray_GET_SIZE(seq
)) {
3303 item
= PyInt_FromLong(
3304 (unsigned char)seq
->ob_bytes
[it
->it_index
]);
3316 bytesiter_length_hint(bytesiterobject
*it
)
3320 len
= PyByteArray_GET_SIZE(it
->it_seq
) - it
->it_index
;
3321 return PyInt_FromSsize_t(len
);
3324 PyDoc_STRVAR(length_hint_doc
,
3325 "Private method returning an estimate of len(list(it)).");
3327 static PyMethodDef bytesiter_methods
[] = {
3328 {"__length_hint__", (PyCFunction
)bytesiter_length_hint
, METH_NOARGS
,
3330 {NULL
, NULL
} /* sentinel */
3333 PyTypeObject PyByteArrayIter_Type
= {
3334 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
3335 "bytearray_iterator", /* tp_name */
3336 sizeof(bytesiterobject
), /* tp_basicsize */
3337 0, /* tp_itemsize */
3339 (destructor
)bytesiter_dealloc
, /* tp_dealloc */
3345 0, /* tp_as_number */
3346 0, /* tp_as_sequence */
3347 0, /* tp_as_mapping */
3351 PyObject_GenericGetAttr
, /* tp_getattro */
3352 0, /* tp_setattro */
3353 0, /* tp_as_buffer */
3354 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_HAVE_GC
, /* tp_flags */
3356 (traverseproc
)bytesiter_traverse
, /* tp_traverse */
3358 0, /* tp_richcompare */
3359 0, /* tp_weaklistoffset */
3360 PyObject_SelfIter
, /* tp_iter */
3361 (iternextfunc
)bytesiter_next
, /* tp_iternext */
3362 bytesiter_methods
, /* tp_methods */
3367 bytes_iter(PyObject
*seq
)
3369 bytesiterobject
*it
;
3371 if (!PyByteArray_Check(seq
)) {
3372 PyErr_BadInternalCall();
3375 it
= PyObject_GC_New(bytesiterobject
, &PyByteArrayIter_Type
);
3380 it
->it_seq
= (PyByteArrayObject
*)seq
;
3381 _PyObject_GC_TRACK(it
);
3382 return (PyObject
*)it
;