1 /* PyBytes (bytearray) implementation */
3 #define PY_SSIZE_T_CLEAN
5 #include "structmember.h"
6 #include "bytes_methods.h"
8 static PyByteArrayObject
*nullbytes
= NULL
;
11 PyByteArray_Fini(void)
17 PyByteArray_Init(void)
19 nullbytes
= PyObject_New(PyByteArrayObject
, &PyByteArray_Type
);
20 if (nullbytes
== NULL
)
22 nullbytes
->ob_bytes
= NULL
;
23 Py_SIZE(nullbytes
) = nullbytes
->ob_alloc
= 0;
24 nullbytes
->ob_exports
= 0;
28 /* end nullbytes support */
33 _getbytevalue(PyObject
* arg
, int *value
)
37 if (PyBytes_CheckExact(arg
)) {
38 if (Py_SIZE(arg
) != 1) {
39 PyErr_SetString(PyExc_ValueError
, "string must be of size 1");
42 *value
= Py_CHARMASK(((PyBytesObject
*)arg
)->ob_sval
[0]);
45 else if (PyInt_Check(arg
) || PyLong_Check(arg
)) {
46 face_value
= PyLong_AsLong(arg
);
49 PyObject
*index
= PyNumber_Index(arg
);
51 PyErr_Format(PyExc_TypeError
,
52 "an integer or string of size 1 is required");
55 face_value
= PyLong_AsLong(index
);
59 if (face_value
< 0 || face_value
>= 256) {
60 /* this includes the OverflowError in case the long is too large */
61 PyErr_SetString(PyExc_ValueError
, "byte must be in range(0, 256)");
70 bytes_buffer_getreadbuf(PyByteArrayObject
*self
, Py_ssize_t index
, const void **ptr
)
73 PyErr_SetString(PyExc_SystemError
,
74 "accessing non-existent bytes segment");
77 *ptr
= (void *)self
->ob_bytes
;
82 bytes_buffer_getwritebuf(PyByteArrayObject
*self
, Py_ssize_t index
, const void **ptr
)
85 PyErr_SetString(PyExc_SystemError
,
86 "accessing non-existent bytes segment");
89 *ptr
= (void *)self
->ob_bytes
;
94 bytes_buffer_getsegcount(PyByteArrayObject
*self
, Py_ssize_t
*lenp
)
97 *lenp
= Py_SIZE(self
);
102 bytes_buffer_getcharbuf(PyByteArrayObject
*self
, Py_ssize_t index
, const char **ptr
)
105 PyErr_SetString(PyExc_SystemError
,
106 "accessing non-existent bytes segment");
109 *ptr
= self
->ob_bytes
;
110 return Py_SIZE(self
);
114 bytes_getbuffer(PyByteArrayObject
*obj
, Py_buffer
*view
, int flags
)
122 if (obj
->ob_bytes
== NULL
)
126 ret
= PyBuffer_FillInfo(view
, (PyObject
*)obj
, ptr
, Py_SIZE(obj
), 0, flags
);
134 bytes_releasebuffer(PyByteArrayObject
*obj
, Py_buffer
*view
)
140 _getbuffer(PyObject
*obj
, Py_buffer
*view
)
142 PyBufferProcs
*buffer
= Py_TYPE(obj
)->tp_as_buffer
;
144 if (buffer
== NULL
|| buffer
->bf_getbuffer
== NULL
)
146 PyErr_Format(PyExc_TypeError
,
147 "Type %.100s doesn't support the buffer API",
148 Py_TYPE(obj
)->tp_name
);
152 if (buffer
->bf_getbuffer(obj
, view
, PyBUF_SIMPLE
) < 0)
157 /* Direct API functions */
160 PyByteArray_FromObject(PyObject
*input
)
162 return PyObject_CallFunctionObjArgs((PyObject
*)&PyByteArray_Type
,
167 PyByteArray_FromStringAndSize(const char *bytes
, Py_ssize_t size
)
169 PyByteArrayObject
*new;
173 PyErr_SetString(PyExc_SystemError
,
174 "Negative size passed to PyByteArray_FromStringAndSize");
178 new = PyObject_New(PyByteArrayObject
, &PyByteArray_Type
);
183 new->ob_bytes
= NULL
;
188 new->ob_bytes
= PyMem_Malloc(alloc
);
189 if (new->ob_bytes
== NULL
) {
191 return PyErr_NoMemory();
194 memcpy(new->ob_bytes
, bytes
, size
);
195 new->ob_bytes
[size
] = '\0'; /* Trailing null byte */
198 new->ob_alloc
= alloc
;
201 return (PyObject
*)new;
205 PyByteArray_Size(PyObject
*self
)
207 assert(self
!= NULL
);
208 assert(PyByteArray_Check(self
));
210 return PyByteArray_GET_SIZE(self
);
214 PyByteArray_AsString(PyObject
*self
)
216 assert(self
!= NULL
);
217 assert(PyByteArray_Check(self
));
219 return PyByteArray_AS_STRING(self
);
223 PyByteArray_Resize(PyObject
*self
, Py_ssize_t size
)
226 Py_ssize_t alloc
= ((PyByteArrayObject
*)self
)->ob_alloc
;
228 assert(self
!= NULL
);
229 assert(PyByteArray_Check(self
));
232 if (size
< alloc
/ 2) {
233 /* Major downsize; resize down to exact size */
236 else if (size
< alloc
) {
237 /* Within allocated size; quick exit */
238 Py_SIZE(self
) = size
;
239 ((PyByteArrayObject
*)self
)->ob_bytes
[size
] = '\0'; /* Trailing null */
242 else if (size
<= alloc
* 1.125) {
243 /* Moderate upsize; overallocate similar to list_resize() */
244 alloc
= size
+ (size
>> 3) + (size
< 9 ? 3 : 6);
247 /* Major upsize; resize up to exact size */
251 if (((PyByteArrayObject
*)self
)->ob_exports
> 0) {
253 fprintf(stderr, "%d: %s", ((PyByteArrayObject *)self)->ob_exports,
254 ((PyByteArrayObject *)self)->ob_bytes);
256 PyErr_SetString(PyExc_BufferError
,
257 "Existing exports of data: object cannot be re-sized");
261 sval
= PyMem_Realloc(((PyByteArrayObject
*)self
)->ob_bytes
, alloc
);
267 ((PyByteArrayObject
*)self
)->ob_bytes
= sval
;
268 Py_SIZE(self
) = size
;
269 ((PyByteArrayObject
*)self
)->ob_alloc
= alloc
;
270 ((PyByteArrayObject
*)self
)->ob_bytes
[size
] = '\0'; /* Trailing null byte */
276 PyByteArray_Concat(PyObject
*a
, PyObject
*b
)
280 PyByteArrayObject
*result
= NULL
;
284 if (_getbuffer(a
, &va
) < 0 ||
285 _getbuffer(b
, &vb
) < 0) {
286 PyErr_Format(PyExc_TypeError
, "can't concat %.100s to %.100s",
287 Py_TYPE(a
)->tp_name
, Py_TYPE(b
)->tp_name
);
291 size
= va
.len
+ vb
.len
;
293 return PyErr_NoMemory();
297 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, size
);
298 if (result
!= NULL
) {
299 memcpy(result
->ob_bytes
, va
.buf
, va
.len
);
300 memcpy(result
->ob_bytes
+ va
.len
, vb
.buf
, vb
.len
);
305 PyBuffer_Release(&va
);
307 PyBuffer_Release(&vb
);
308 return (PyObject
*)result
;
311 /* Functions stuffed into the type object */
314 bytes_length(PyByteArrayObject
*self
)
316 return Py_SIZE(self
);
320 bytes_iconcat(PyByteArrayObject
*self
, PyObject
*other
)
326 if (_getbuffer(other
, &vo
) < 0) {
327 PyErr_Format(PyExc_TypeError
, "can't concat %.100s to %.100s",
328 Py_TYPE(other
)->tp_name
, Py_TYPE(self
)->tp_name
);
332 mysize
= Py_SIZE(self
);
333 size
= mysize
+ vo
.len
;
335 PyBuffer_Release(&vo
);
336 return PyErr_NoMemory();
338 if (size
< self
->ob_alloc
) {
339 Py_SIZE(self
) = size
;
340 self
->ob_bytes
[Py_SIZE(self
)] = '\0'; /* Trailing null byte */
342 else if (PyByteArray_Resize((PyObject
*)self
, size
) < 0) {
343 PyBuffer_Release(&vo
);
346 memcpy(self
->ob_bytes
+ mysize
, vo
.buf
, vo
.len
);
347 PyBuffer_Release(&vo
);
349 return (PyObject
*)self
;
353 bytes_repeat(PyByteArrayObject
*self
, Py_ssize_t count
)
355 PyByteArrayObject
*result
;
361 mysize
= Py_SIZE(self
);
362 size
= mysize
* count
;
363 if (count
!= 0 && size
/ count
!= mysize
)
364 return PyErr_NoMemory();
365 result
= (PyByteArrayObject
*)PyByteArray_FromStringAndSize(NULL
, size
);
366 if (result
!= NULL
&& size
!= 0) {
368 memset(result
->ob_bytes
, self
->ob_bytes
[0], size
);
371 for (i
= 0; i
< count
; i
++)
372 memcpy(result
->ob_bytes
+ i
*mysize
, self
->ob_bytes
, mysize
);
375 return (PyObject
*)result
;
379 bytes_irepeat(PyByteArrayObject
*self
, Py_ssize_t count
)
386 mysize
= Py_SIZE(self
);
387 size
= mysize
* count
;
388 if (count
!= 0 && size
/ count
!= mysize
)
389 return PyErr_NoMemory();
390 if (size
< self
->ob_alloc
) {
391 Py_SIZE(self
) = size
;
392 self
->ob_bytes
[Py_SIZE(self
)] = '\0'; /* Trailing null byte */
394 else if (PyByteArray_Resize((PyObject
*)self
, size
) < 0)
398 memset(self
->ob_bytes
, self
->ob_bytes
[0], size
);
401 for (i
= 1; i
< count
; i
++)
402 memcpy(self
->ob_bytes
+ i
*mysize
, self
->ob_bytes
, mysize
);
406 return (PyObject
*)self
;
410 bytes_getitem(PyByteArrayObject
*self
, Py_ssize_t i
)
414 if (i
< 0 || i
>= Py_SIZE(self
)) {
415 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
418 return PyInt_FromLong((unsigned char)(self
->ob_bytes
[i
]));
422 bytes_subscript(PyByteArrayObject
*self
, PyObject
*index
)
424 if (PyIndex_Check(index
)) {
425 Py_ssize_t i
= PyNumber_AsSsize_t(index
, PyExc_IndexError
);
427 if (i
== -1 && PyErr_Occurred())
431 i
+= PyByteArray_GET_SIZE(self
);
433 if (i
< 0 || i
>= Py_SIZE(self
)) {
434 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
437 return PyInt_FromLong((unsigned char)(self
->ob_bytes
[i
]));
439 else if (PySlice_Check(index
)) {
440 Py_ssize_t start
, stop
, step
, slicelength
, cur
, i
;
441 if (PySlice_GetIndicesEx((PySliceObject
*)index
,
442 PyByteArray_GET_SIZE(self
),
443 &start
, &stop
, &step
, &slicelength
) < 0) {
447 if (slicelength
<= 0)
448 return PyByteArray_FromStringAndSize("", 0);
449 else if (step
== 1) {
450 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ start
,
454 char *source_buf
= PyByteArray_AS_STRING(self
);
455 char *result_buf
= (char *)PyMem_Malloc(slicelength
);
458 if (result_buf
== NULL
)
459 return PyErr_NoMemory();
461 for (cur
= start
, i
= 0; i
< slicelength
;
463 result_buf
[i
] = source_buf
[cur
];
465 result
= PyByteArray_FromStringAndSize(result_buf
, slicelength
);
466 PyMem_Free(result_buf
);
471 PyErr_SetString(PyExc_TypeError
, "bytearray indices must be integers");
477 bytes_setslice(PyByteArrayObject
*self
, Py_ssize_t lo
, Py_ssize_t hi
,
480 Py_ssize_t avail
, needed
;
486 if (values
== (PyObject
*)self
) {
487 /* Make a copy and call this function recursively */
489 values
= PyByteArray_FromObject(values
);
492 err
= bytes_setslice(self
, lo
, hi
, values
);
496 if (values
== NULL
) {
502 if (_getbuffer(values
, &vbytes
) < 0) {
503 PyErr_Format(PyExc_TypeError
,
504 "can't set bytearray slice from %.100s",
505 Py_TYPE(values
)->tp_name
);
516 if (hi
> Py_SIZE(self
))
523 if (avail
!= needed
) {
524 if (avail
> needed
) {
527 | |<----avail----->|<-----tomove------>|
528 | |<-needed->|<-----tomove------>|
531 memmove(self
->ob_bytes
+ lo
+ needed
, self
->ob_bytes
+ hi
,
534 /* XXX(nnorwitz): need to verify this can't overflow! */
535 if (PyByteArray_Resize((PyObject
*)self
,
536 Py_SIZE(self
) + needed
- avail
) < 0) {
540 if (avail
< needed
) {
543 | |<-avail->|<-----tomove------>|
544 | |<----needed---->|<-----tomove------>|
547 memmove(self
->ob_bytes
+ lo
+ needed
, self
->ob_bytes
+ hi
,
548 Py_SIZE(self
) - lo
- needed
);
553 memcpy(self
->ob_bytes
+ lo
, bytes
, needed
);
557 if (vbytes
.len
!= -1)
558 PyBuffer_Release(&vbytes
);
563 bytes_setitem(PyByteArrayObject
*self
, Py_ssize_t i
, PyObject
*value
)
570 if (i
< 0 || i
>= Py_SIZE(self
)) {
571 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
576 return bytes_setslice(self
, i
, i
+1, NULL
);
578 if (!_getbytevalue(value
, &ival
))
581 self
->ob_bytes
[i
] = ival
;
586 bytes_ass_subscript(PyByteArrayObject
*self
, PyObject
*index
, PyObject
*values
)
588 Py_ssize_t start
, stop
, step
, slicelen
, needed
;
591 if (PyIndex_Check(index
)) {
592 Py_ssize_t i
= PyNumber_AsSsize_t(index
, PyExc_IndexError
);
594 if (i
== -1 && PyErr_Occurred())
598 i
+= PyByteArray_GET_SIZE(self
);
600 if (i
< 0 || i
>= Py_SIZE(self
)) {
601 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
605 if (values
== NULL
) {
606 /* Fall through to slice assignment */
614 if (!_getbytevalue(values
, &ival
))
616 self
->ob_bytes
[i
] = (char)ival
;
620 else if (PySlice_Check(index
)) {
621 if (PySlice_GetIndicesEx((PySliceObject
*)index
,
622 PyByteArray_GET_SIZE(self
),
623 &start
, &stop
, &step
, &slicelen
) < 0) {
628 PyErr_SetString(PyExc_TypeError
, "bytearray indices must be integer");
632 if (values
== NULL
) {
636 else if (values
== (PyObject
*)self
|| !PyByteArray_Check(values
)) {
637 /* Make a copy an call this function recursively */
639 values
= PyByteArray_FromObject(values
);
642 err
= bytes_ass_subscript(self
, index
, values
);
647 assert(PyByteArray_Check(values
));
648 bytes
= ((PyByteArrayObject
*)values
)->ob_bytes
;
649 needed
= Py_SIZE(values
);
651 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
652 if ((step
< 0 && start
< stop
) ||
653 (step
> 0 && start
> stop
))
656 if (slicelen
!= needed
) {
657 if (slicelen
> needed
) {
659 0 start stop old_size
660 | |<---slicelen--->|<-----tomove------>|
661 | |<-needed->|<-----tomove------>|
664 memmove(self
->ob_bytes
+ start
+ needed
, self
->ob_bytes
+ stop
,
665 Py_SIZE(self
) - stop
);
667 if (PyByteArray_Resize((PyObject
*)self
,
668 Py_SIZE(self
) + needed
- slicelen
) < 0)
670 if (slicelen
< needed
) {
673 | |<-avail->|<-----tomove------>|
674 | |<----needed---->|<-----tomove------>|
677 memmove(self
->ob_bytes
+ start
+ needed
, self
->ob_bytes
+ stop
,
678 Py_SIZE(self
) - start
- needed
);
683 memcpy(self
->ob_bytes
+ start
, bytes
, needed
);
694 start
= stop
+ step
* (slicelen
- 1) - 1;
697 for (cur
= start
, i
= 0;
698 i
< slicelen
; cur
+= step
, i
++) {
699 Py_ssize_t lim
= step
- 1;
701 if (cur
+ step
>= PyByteArray_GET_SIZE(self
))
702 lim
= PyByteArray_GET_SIZE(self
) - cur
- 1;
704 memmove(self
->ob_bytes
+ cur
- i
,
705 self
->ob_bytes
+ cur
+ 1, lim
);
707 /* Move the tail of the bytes, in one chunk */
708 cur
= start
+ slicelen
*step
;
709 if (cur
< PyByteArray_GET_SIZE(self
)) {
710 memmove(self
->ob_bytes
+ cur
- slicelen
,
711 self
->ob_bytes
+ cur
,
712 PyByteArray_GET_SIZE(self
) - cur
);
714 if (PyByteArray_Resize((PyObject
*)self
,
715 PyByteArray_GET_SIZE(self
) - slicelen
) < 0)
724 if (needed
!= slicelen
) {
725 PyErr_Format(PyExc_ValueError
,
726 "attempt to assign bytes of size %zd "
727 "to extended slice of size %zd",
731 for (cur
= start
, i
= 0; i
< slicelen
; cur
+= step
, i
++)
732 self
->ob_bytes
[cur
] = bytes
[i
];
739 bytes_init(PyByteArrayObject
*self
, PyObject
*args
, PyObject
*kwds
)
741 static char *kwlist
[] = {"source", "encoding", "errors", 0};
742 PyObject
*arg
= NULL
;
743 const char *encoding
= NULL
;
744 const char *errors
= NULL
;
747 PyObject
*(*iternext
)(PyObject
*);
749 if (Py_SIZE(self
) != 0) {
750 /* Empty previous contents (yes, do this first of all!) */
751 if (PyByteArray_Resize((PyObject
*)self
, 0) < 0)
755 /* Parse arguments */
756 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|Oss:bytearray", kwlist
,
757 &arg
, &encoding
, &errors
))
760 /* Make a quick exit if no first argument */
762 if (encoding
!= NULL
|| errors
!= NULL
) {
763 PyErr_SetString(PyExc_TypeError
,
764 "encoding or errors without sequence argument");
770 if (PyBytes_Check(arg
)) {
771 PyObject
*new, *encoded
;
772 if (encoding
!= NULL
) {
773 encoded
= PyCodec_Encode(arg
, encoding
, errors
);
776 assert(PyBytes_Check(encoded
));
782 new = bytes_iconcat(self
, arg
);
790 if (PyUnicode_Check(arg
)) {
791 /* Encode via the codec registry */
792 PyObject
*encoded
, *new;
793 if (encoding
== NULL
) {
794 PyErr_SetString(PyExc_TypeError
,
795 "unicode argument without an encoding");
798 encoded
= PyCodec_Encode(arg
, encoding
, errors
);
801 assert(PyBytes_Check(encoded
));
802 new = bytes_iconcat(self
, encoded
);
810 /* If it's not unicode, there can't be encoding or errors */
811 if (encoding
!= NULL
|| errors
!= NULL
) {
812 PyErr_SetString(PyExc_TypeError
,
813 "encoding or errors without a string argument");
818 count
= PyNumber_AsSsize_t(arg
, PyExc_ValueError
);
819 if (count
== -1 && PyErr_Occurred())
823 PyErr_SetString(PyExc_ValueError
, "negative count");
827 if (PyByteArray_Resize((PyObject
*)self
, count
))
829 memset(self
->ob_bytes
, 0, count
);
834 /* Use the buffer API */
835 if (PyObject_CheckBuffer(arg
)) {
838 if (PyObject_GetBuffer(arg
, &view
, PyBUF_FULL_RO
) < 0)
841 if (PyByteArray_Resize((PyObject
*)self
, size
) < 0) goto fail
;
842 if (PyBuffer_ToContiguous(self
->ob_bytes
, &view
, size
, 'C') < 0)
844 PyBuffer_Release(&view
);
847 PyBuffer_Release(&view
);
851 /* XXX Optimize this if the arguments is a list, tuple */
853 /* Get the iterator */
854 it
= PyObject_GetIter(arg
);
857 iternext
= *Py_TYPE(it
)->tp_iternext
;
859 /* Run the iterator to exhaustion */
864 /* Get the next item */
867 if (PyErr_Occurred()) {
868 if (!PyErr_ExceptionMatches(PyExc_StopIteration
))
875 /* Interpret it as an int (__index__) */
876 rc
= _getbytevalue(item
, &value
);
881 /* Append the byte */
882 if (Py_SIZE(self
) < self
->ob_alloc
)
884 else if (PyByteArray_Resize((PyObject
*)self
, Py_SIZE(self
)+1) < 0)
886 self
->ob_bytes
[Py_SIZE(self
)-1] = value
;
889 /* Clean up and return success */
894 /* Error handling when it != NULL */
899 /* Mostly copied from string_repr, but without the
900 "smart quote" functionality. */
902 bytes_repr(PyByteArrayObject
*self
)
904 static const char *hexdigits
= "0123456789abcdef";
905 const char *quote_prefix
= "bytearray(b";
906 const char *quote_postfix
= ")";
907 Py_ssize_t length
= Py_SIZE(self
);
908 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
909 size_t newsize
= 14 + 4 * length
;
911 if (newsize
> PY_SSIZE_T_MAX
|| newsize
/ 4 - 3 != length
) {
912 PyErr_SetString(PyExc_OverflowError
,
913 "bytearray object is too large to make repr");
916 v
= PyUnicode_FromUnicode(NULL
, newsize
);
921 register Py_ssize_t i
;
922 register Py_UNICODE c
;
923 register Py_UNICODE
*p
;
926 /* Figure out which quote to use; single is preferred */
930 start
= PyByteArray_AS_STRING(self
);
931 for (test
= start
; test
< start
+length
; ++test
) {
933 quote
= '\''; /* back to single */
936 else if (*test
== '\'')
943 p
= PyUnicode_AS_UNICODE(v
);
944 while (*quote_prefix
)
945 *p
++ = *quote_prefix
++;
948 for (i
= 0; i
< length
; i
++) {
949 /* There's at least enough room for a hex escape
950 and a closing quote. */
951 assert(newsize
- (p
- PyUnicode_AS_UNICODE(v
)) >= 5);
952 c
= self
->ob_bytes
[i
];
953 if (c
== '\'' || c
== '\\')
954 *p
++ = '\\', *p
++ = c
;
956 *p
++ = '\\', *p
++ = 't';
958 *p
++ = '\\', *p
++ = 'n';
960 *p
++ = '\\', *p
++ = 'r';
962 *p
++ = '\\', *p
++ = 'x', *p
++ = '0', *p
++ = '0';
963 else if (c
< ' ' || c
>= 0x7f) {
966 *p
++ = hexdigits
[(c
& 0xf0) >> 4];
967 *p
++ = hexdigits
[c
& 0xf];
972 assert(newsize
- (p
- PyUnicode_AS_UNICODE(v
)) >= 1);
974 while (*quote_postfix
) {
975 *p
++ = *quote_postfix
++;
978 if (PyUnicode_Resize(&v
, (p
- PyUnicode_AS_UNICODE(v
)))) {
987 bytes_str(PyObject
*op
)
990 if (Py_BytesWarningFlag
) {
991 if (PyErr_WarnEx(PyExc_BytesWarning
,
992 "str() on a bytearray instance", 1))
995 return bytes_repr((PyByteArrayObject
*)op
);
997 return PyBytes_FromStringAndSize(((PyByteArrayObject
*)op
)->ob_bytes
, Py_SIZE(op
));
1001 bytes_richcompare(PyObject
*self
, PyObject
*other
, int op
)
1003 Py_ssize_t self_size
, other_size
;
1004 Py_buffer self_bytes
, other_bytes
;
1009 /* Bytes can be compared to anything that supports the (binary)
1010 buffer API. Except that a comparison with Unicode is always an
1011 error, even if the comparison is for equality. */
1012 if (PyObject_IsInstance(self
, (PyObject
*)&PyUnicode_Type
) ||
1013 PyObject_IsInstance(other
, (PyObject
*)&PyUnicode_Type
)) {
1014 if (Py_BytesWarningFlag
&& op
== Py_EQ
) {
1015 if (PyErr_WarnEx(PyExc_BytesWarning
,
1016 "Comparsion between bytearray and string", 1))
1020 Py_INCREF(Py_NotImplemented
);
1021 return Py_NotImplemented
;
1024 self_size
= _getbuffer(self
, &self_bytes
);
1025 if (self_size
< 0) {
1027 Py_INCREF(Py_NotImplemented
);
1028 return Py_NotImplemented
;
1031 other_size
= _getbuffer(other
, &other_bytes
);
1032 if (other_size
< 0) {
1034 PyBuffer_Release(&self_bytes
);
1035 Py_INCREF(Py_NotImplemented
);
1036 return Py_NotImplemented
;
1039 if (self_size
!= other_size
&& (op
== Py_EQ
|| op
== Py_NE
)) {
1040 /* Shortcut: if the lengths differ, the objects differ */
1041 cmp
= (op
== Py_NE
);
1044 minsize
= self_size
;
1045 if (other_size
< minsize
)
1046 minsize
= other_size
;
1048 cmp
= memcmp(self_bytes
.buf
, other_bytes
.buf
, minsize
);
1049 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1052 if (self_size
< other_size
)
1054 else if (self_size
> other_size
)
1059 case Py_LT
: cmp
= cmp
< 0; break;
1060 case Py_LE
: cmp
= cmp
<= 0; break;
1061 case Py_EQ
: cmp
= cmp
== 0; break;
1062 case Py_NE
: cmp
= cmp
!= 0; break;
1063 case Py_GT
: cmp
= cmp
> 0; break;
1064 case Py_GE
: cmp
= cmp
>= 0; break;
1068 res
= cmp
? Py_True
: Py_False
;
1069 PyBuffer_Release(&self_bytes
);
1070 PyBuffer_Release(&other_bytes
);
1076 bytes_dealloc(PyByteArrayObject
*self
)
1078 if (self
->ob_exports
> 0) {
1079 PyErr_SetString(PyExc_SystemError
,
1080 "deallocated bytearray object has exported buffers");
1083 if (self
->ob_bytes
!= 0) {
1084 PyMem_Free(self
->ob_bytes
);
1086 Py_TYPE(self
)->tp_free((PyObject
*)self
);
1090 /* -------------------------------------------------------------------- */
1093 #define STRINGLIB_CHAR char
1094 #define STRINGLIB_CMP memcmp
1095 #define STRINGLIB_LEN PyByteArray_GET_SIZE
1096 #define STRINGLIB_STR PyByteArray_AS_STRING
1097 #define STRINGLIB_NEW PyByteArray_FromStringAndSize
1098 #define STRINGLIB_EMPTY nullbytes
1099 #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1100 #define STRINGLIB_MUTABLE 1
1101 #define FROM_BYTEARRAY 1
1103 #include "stringlib/fastsearch.h"
1104 #include "stringlib/count.h"
1105 #include "stringlib/find.h"
1106 #include "stringlib/partition.h"
1107 #include "stringlib/ctype.h"
1108 #include "stringlib/transmogrify.h"
1111 /* The following Py_LOCAL_INLINE and Py_LOCAL functions
1112 were copied from the old char* style string object. */
1114 Py_LOCAL_INLINE(void)
1115 _adjust_indices(Py_ssize_t
*start
, Py_ssize_t
*end
, Py_ssize_t len
)
1130 Py_LOCAL_INLINE(Py_ssize_t
)
1131 bytes_find_internal(PyByteArrayObject
*self
, PyObject
*args
, int dir
)
1135 Py_ssize_t start
=0, end
=PY_SSIZE_T_MAX
;
1138 if (!PyArg_ParseTuple(args
, "O|O&O&:find/rfind/index/rindex", &subobj
,
1139 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1141 if (_getbuffer(subobj
, &subbuf
) < 0)
1144 res
= stringlib_find_slice(
1145 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
1146 subbuf
.buf
, subbuf
.len
, start
, end
);
1148 res
= stringlib_rfind_slice(
1149 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
1150 subbuf
.buf
, subbuf
.len
, start
, end
);
1151 PyBuffer_Release(&subbuf
);
1155 PyDoc_STRVAR(find__doc__
,
1156 "B.find(sub [,start [,end]]) -> int\n\
1158 Return the lowest index in B where subsection sub is found,\n\
1159 such that sub is contained within s[start,end]. Optional\n\
1160 arguments start and end are interpreted as in slice notation.\n\
1162 Return -1 on failure.");
1165 bytes_find(PyByteArrayObject
*self
, PyObject
*args
)
1167 Py_ssize_t result
= bytes_find_internal(self
, args
, +1);
1170 return PyInt_FromSsize_t(result
);
1173 PyDoc_STRVAR(count__doc__
,
1174 "B.count(sub [,start [,end]]) -> int\n\
1176 Return the number of non-overlapping occurrences of subsection sub in\n\
1177 bytes B[start:end]. Optional arguments start and end are interpreted\n\
1178 as in slice notation.");
1181 bytes_count(PyByteArrayObject
*self
, PyObject
*args
)
1184 const char *str
= PyByteArray_AS_STRING(self
);
1185 Py_ssize_t start
= 0, end
= PY_SSIZE_T_MAX
;
1187 PyObject
*count_obj
;
1189 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &sub_obj
,
1190 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1193 if (_getbuffer(sub_obj
, &vsub
) < 0)
1196 _adjust_indices(&start
, &end
, PyByteArray_GET_SIZE(self
));
1198 count_obj
= PyInt_FromSsize_t(
1199 stringlib_count(str
+ start
, end
- start
, vsub
.buf
, vsub
.len
)
1201 PyBuffer_Release(&vsub
);
1206 PyDoc_STRVAR(index__doc__
,
1207 "B.index(sub [,start [,end]]) -> int\n\
1209 Like B.find() but raise ValueError when the subsection is not found.");
1212 bytes_index(PyByteArrayObject
*self
, PyObject
*args
)
1214 Py_ssize_t result
= bytes_find_internal(self
, args
, +1);
1218 PyErr_SetString(PyExc_ValueError
,
1219 "subsection not found");
1222 return PyInt_FromSsize_t(result
);
1226 PyDoc_STRVAR(rfind__doc__
,
1227 "B.rfind(sub [,start [,end]]) -> int\n\
1229 Return the highest index in B where subsection sub is found,\n\
1230 such that sub is contained within s[start,end]. Optional\n\
1231 arguments start and end are interpreted as in slice notation.\n\
1233 Return -1 on failure.");
1236 bytes_rfind(PyByteArrayObject
*self
, PyObject
*args
)
1238 Py_ssize_t result
= bytes_find_internal(self
, args
, -1);
1241 return PyInt_FromSsize_t(result
);
1245 PyDoc_STRVAR(rindex__doc__
,
1246 "B.rindex(sub [,start [,end]]) -> int\n\
1248 Like B.rfind() but raise ValueError when the subsection is not found.");
1251 bytes_rindex(PyByteArrayObject
*self
, PyObject
*args
)
1253 Py_ssize_t result
= bytes_find_internal(self
, args
, -1);
1257 PyErr_SetString(PyExc_ValueError
,
1258 "subsection not found");
1261 return PyInt_FromSsize_t(result
);
1266 bytes_contains(PyObject
*self
, PyObject
*arg
)
1268 Py_ssize_t ival
= PyNumber_AsSsize_t(arg
, PyExc_ValueError
);
1269 if (ival
== -1 && PyErr_Occurred()) {
1273 if (_getbuffer(arg
, &varg
) < 0)
1275 pos
= stringlib_find(PyByteArray_AS_STRING(self
), Py_SIZE(self
),
1276 varg
.buf
, varg
.len
, 0);
1277 PyBuffer_Release(&varg
);
1280 if (ival
< 0 || ival
>= 256) {
1281 PyErr_SetString(PyExc_ValueError
, "byte must be in range(0, 256)");
1285 return memchr(PyByteArray_AS_STRING(self
), ival
, Py_SIZE(self
)) != NULL
;
1289 /* Matches the end (direction >= 0) or start (direction < 0) of self
1290 * against substr, using the start and end arguments. Returns
1291 * -1 on error, 0 if not found and 1 if found.
1294 _bytes_tailmatch(PyByteArrayObject
*self
, PyObject
*substr
, Py_ssize_t start
,
1295 Py_ssize_t end
, int direction
)
1297 Py_ssize_t len
= PyByteArray_GET_SIZE(self
);
1302 str
= PyByteArray_AS_STRING(self
);
1304 if (_getbuffer(substr
, &vsubstr
) < 0)
1307 _adjust_indices(&start
, &end
, len
);
1309 if (direction
< 0) {
1311 if (start
+vsubstr
.len
> len
) {
1316 if (end
-start
< vsubstr
.len
|| start
> len
) {
1320 if (end
-vsubstr
.len
> start
)
1321 start
= end
- vsubstr
.len
;
1323 if (end
-start
>= vsubstr
.len
)
1324 rv
= ! memcmp(str
+start
, vsubstr
.buf
, vsubstr
.len
);
1327 PyBuffer_Release(&vsubstr
);
1332 PyDoc_STRVAR(startswith__doc__
,
1333 "B.startswith(prefix [,start [,end]]) -> bool\n\
1335 Return True if B starts with the specified prefix, False otherwise.\n\
1336 With optional start, test B beginning at that position.\n\
1337 With optional end, stop comparing B at that position.\n\
1338 prefix can also be a tuple of strings to try.");
1341 bytes_startswith(PyByteArrayObject
*self
, PyObject
*args
)
1343 Py_ssize_t start
= 0;
1344 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1348 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
1349 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1351 if (PyTuple_Check(subobj
)) {
1353 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
1354 result
= _bytes_tailmatch(self
,
1355 PyTuple_GET_ITEM(subobj
, i
),
1365 result
= _bytes_tailmatch(self
, subobj
, start
, end
, -1);
1369 return PyBool_FromLong(result
);
1372 PyDoc_STRVAR(endswith__doc__
,
1373 "B.endswith(suffix [,start [,end]]) -> bool\n\
1375 Return True if B ends with the specified suffix, False otherwise.\n\
1376 With optional start, test B beginning at that position.\n\
1377 With optional end, stop comparing B at that position.\n\
1378 suffix can also be a tuple of strings to try.");
1381 bytes_endswith(PyByteArrayObject
*self
, PyObject
*args
)
1383 Py_ssize_t start
= 0;
1384 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1388 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
1389 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1391 if (PyTuple_Check(subobj
)) {
1393 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
1394 result
= _bytes_tailmatch(self
,
1395 PyTuple_GET_ITEM(subobj
, i
),
1405 result
= _bytes_tailmatch(self
, subobj
, start
, end
, +1);
1409 return PyBool_FromLong(result
);
1413 PyDoc_STRVAR(translate__doc__
,
1414 "B.translate(table[, deletechars]) -> bytearray\n\
1416 Return a copy of B, where all characters occurring in the\n\
1417 optional argument deletechars are removed, and the remaining\n\
1418 characters have been mapped through the given translation\n\
1419 table, which must be a bytes object of length 256.");
1422 bytes_translate(PyByteArrayObject
*self
, PyObject
*args
)
1424 register char *input
, *output
;
1425 register const char *table
;
1426 register Py_ssize_t i
, c
, changed
= 0;
1427 PyObject
*input_obj
= (PyObject
*)self
;
1428 const char *output_start
;
1431 int trans_table
[256];
1432 PyObject
*tableobj
, *delobj
= NULL
;
1433 Py_buffer vtable
, vdel
;
1435 if (!PyArg_UnpackTuple(args
, "translate", 1, 2,
1436 &tableobj
, &delobj
))
1439 if (_getbuffer(tableobj
, &vtable
) < 0)
1442 if (vtable
.len
!= 256) {
1443 PyErr_SetString(PyExc_ValueError
,
1444 "translation table must be 256 characters long");
1449 if (delobj
!= NULL
) {
1450 if (_getbuffer(delobj
, &vdel
) < 0) {
1460 table
= (const char *)vtable
.buf
;
1461 inlen
= PyByteArray_GET_SIZE(input_obj
);
1462 result
= PyByteArray_FromStringAndSize((char *)NULL
, inlen
);
1465 output_start
= output
= PyByteArray_AsString(result
);
1466 input
= PyByteArray_AS_STRING(input_obj
);
1468 if (vdel
.len
== 0) {
1469 /* If no deletions are required, use faster code */
1470 for (i
= inlen
; --i
>= 0; ) {
1471 c
= Py_CHARMASK(*input
++);
1472 if (Py_CHARMASK((*output
++ = table
[c
])) != c
)
1475 if (changed
|| !PyByteArray_CheckExact(input_obj
))
1478 Py_INCREF(input_obj
);
1483 for (i
= 0; i
< 256; i
++)
1484 trans_table
[i
] = Py_CHARMASK(table
[i
]);
1486 for (i
= 0; i
< vdel
.len
; i
++)
1487 trans_table
[(int) Py_CHARMASK( ((unsigned char*)vdel
.buf
)[i
] )] = -1;
1489 for (i
= inlen
; --i
>= 0; ) {
1490 c
= Py_CHARMASK(*input
++);
1491 if (trans_table
[c
] != -1)
1492 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
1496 if (!changed
&& PyByteArray_CheckExact(input_obj
)) {
1498 Py_INCREF(input_obj
);
1502 /* Fix the size of the resulting string */
1504 PyByteArray_Resize(result
, output
- output_start
);
1507 PyBuffer_Release(&vtable
);
1509 PyBuffer_Release(&vdel
);
1517 /* find and count characters and substrings */
1519 #define findchar(target, target_len, c) \
1520 ((char *)memchr((const void *)(target), c, target_len))
1522 /* Don't call if length < 2 */
1523 #define Py_STRING_MATCH(target, offset, pattern, length) \
1524 (target[offset] == pattern[0] && \
1525 target[offset+length-1] == pattern[length-1] && \
1526 !memcmp(target+offset+1, pattern+1, length-2) )
1529 /* Bytes ops must return a string. */
1530 /* If the object is subclass of bytes, create a copy */
1531 Py_LOCAL(PyByteArrayObject
*)
1532 return_self(PyByteArrayObject
*self
)
1534 if (PyByteArray_CheckExact(self
)) {
1536 return (PyByteArrayObject
*)self
;
1538 return (PyByteArrayObject
*)PyByteArray_FromStringAndSize(
1539 PyByteArray_AS_STRING(self
),
1540 PyByteArray_GET_SIZE(self
));
1543 Py_LOCAL_INLINE(Py_ssize_t
)
1544 countchar(const char *target
, Py_ssize_t target_len
, char c
, Py_ssize_t maxcount
)
1547 const char *start
=target
;
1548 const char *end
=target
+target_len
;
1550 while ( (start
=findchar(start
, end
-start
, c
)) != NULL
) {
1552 if (count
>= maxcount
)
1559 Py_LOCAL(Py_ssize_t
)
1560 findstring(const char *target
, Py_ssize_t target_len
,
1561 const char *pattern
, Py_ssize_t pattern_len
,
1567 start
+= target_len
;
1571 if (end
> target_len
) {
1573 } else if (end
< 0) {
1579 /* zero-length substrings always match at the first attempt */
1580 if (pattern_len
== 0)
1581 return (direction
> 0) ? start
: end
;
1585 if (direction
< 0) {
1586 for (; end
>= start
; end
--)
1587 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
))
1590 for (; start
<= end
; start
++)
1591 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
))
1597 Py_LOCAL_INLINE(Py_ssize_t
)
1598 countstring(const char *target
, Py_ssize_t target_len
,
1599 const char *pattern
, Py_ssize_t pattern_len
,
1602 int direction
, Py_ssize_t maxcount
)
1607 start
+= target_len
;
1611 if (end
> target_len
) {
1613 } else if (end
< 0) {
1619 /* zero-length substrings match everywhere */
1620 if (pattern_len
== 0 || maxcount
== 0) {
1621 if (target_len
+1 < maxcount
)
1622 return target_len
+1;
1627 if (direction
< 0) {
1628 for (; (end
>= start
); end
--)
1629 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
)) {
1631 if (--maxcount
<= 0) break;
1632 end
-= pattern_len
-1;
1635 for (; (start
<= end
); start
++)
1636 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
)) {
1638 if (--maxcount
<= 0)
1640 start
+= pattern_len
-1;
1647 /* Algorithms for different cases of string replacement */
1649 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1650 Py_LOCAL(PyByteArrayObject
*)
1651 replace_interleave(PyByteArrayObject
*self
,
1652 const char *to_s
, Py_ssize_t to_len
,
1653 Py_ssize_t maxcount
)
1655 char *self_s
, *result_s
;
1656 Py_ssize_t self_len
, result_len
;
1657 Py_ssize_t count
, i
, product
;
1658 PyByteArrayObject
*result
;
1660 self_len
= PyByteArray_GET_SIZE(self
);
1662 /* 1 at the end plus 1 after every character */
1664 if (maxcount
< count
)
1667 /* Check for overflow */
1668 /* result_len = count * to_len + self_len; */
1669 product
= count
* to_len
;
1670 if (product
/ to_len
!= count
) {
1671 PyErr_SetString(PyExc_OverflowError
,
1672 "replace string is too long");
1675 result_len
= product
+ self_len
;
1676 if (result_len
< 0) {
1677 PyErr_SetString(PyExc_OverflowError
,
1678 "replace string is too long");
1682 if (! (result
= (PyByteArrayObject
*)
1683 PyByteArray_FromStringAndSize(NULL
, result_len
)) )
1686 self_s
= PyByteArray_AS_STRING(self
);
1687 result_s
= PyByteArray_AS_STRING(result
);
1689 /* TODO: special case single character, which doesn't need memcpy */
1691 /* Lay the first one down (guaranteed this will occur) */
1692 Py_MEMCPY(result_s
, to_s
, to_len
);
1696 for (i
=0; i
<count
; i
++) {
1697 *result_s
++ = *self_s
++;
1698 Py_MEMCPY(result_s
, to_s
, to_len
);
1702 /* Copy the rest of the original string */
1703 Py_MEMCPY(result_s
, self_s
, self_len
-i
);
1708 /* Special case for deleting a single character */
1709 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1710 Py_LOCAL(PyByteArrayObject
*)
1711 replace_delete_single_character(PyByteArrayObject
*self
,
1712 char from_c
, Py_ssize_t maxcount
)
1714 char *self_s
, *result_s
;
1715 char *start
, *next
, *end
;
1716 Py_ssize_t self_len
, result_len
;
1718 PyByteArrayObject
*result
;
1720 self_len
= PyByteArray_GET_SIZE(self
);
1721 self_s
= PyByteArray_AS_STRING(self
);
1723 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
1725 return return_self(self
);
1728 result_len
= self_len
- count
; /* from_len == 1 */
1729 assert(result_len
>=0);
1731 if ( (result
= (PyByteArrayObject
*)
1732 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1734 result_s
= PyByteArray_AS_STRING(result
);
1737 end
= self_s
+ self_len
;
1738 while (count
-- > 0) {
1739 next
= findchar(start
, end
-start
, from_c
);
1742 Py_MEMCPY(result_s
, start
, next
-start
);
1743 result_s
+= (next
-start
);
1746 Py_MEMCPY(result_s
, start
, end
-start
);
1751 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1753 Py_LOCAL(PyByteArrayObject
*)
1754 replace_delete_substring(PyByteArrayObject
*self
,
1755 const char *from_s
, Py_ssize_t from_len
,
1756 Py_ssize_t maxcount
)
1758 char *self_s
, *result_s
;
1759 char *start
, *next
, *end
;
1760 Py_ssize_t self_len
, result_len
;
1761 Py_ssize_t count
, offset
;
1762 PyByteArrayObject
*result
;
1764 self_len
= PyByteArray_GET_SIZE(self
);
1765 self_s
= PyByteArray_AS_STRING(self
);
1767 count
= countstring(self_s
, self_len
,
1774 return return_self(self
);
1777 result_len
= self_len
- (count
* from_len
);
1778 assert (result_len
>=0);
1780 if ( (result
= (PyByteArrayObject
*)
1781 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1784 result_s
= PyByteArray_AS_STRING(result
);
1787 end
= self_s
+ self_len
;
1788 while (count
-- > 0) {
1789 offset
= findstring(start
, end
-start
,
1791 0, end
-start
, FORWARD
);
1794 next
= start
+ offset
;
1796 Py_MEMCPY(result_s
, start
, next
-start
);
1798 result_s
+= (next
-start
);
1799 start
= next
+from_len
;
1801 Py_MEMCPY(result_s
, start
, end
-start
);
1805 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1806 Py_LOCAL(PyByteArrayObject
*)
1807 replace_single_character_in_place(PyByteArrayObject
*self
,
1808 char from_c
, char to_c
,
1809 Py_ssize_t maxcount
)
1811 char *self_s
, *result_s
, *start
, *end
, *next
;
1812 Py_ssize_t self_len
;
1813 PyByteArrayObject
*result
;
1815 /* The result string will be the same size */
1816 self_s
= PyByteArray_AS_STRING(self
);
1817 self_len
= PyByteArray_GET_SIZE(self
);
1819 next
= findchar(self_s
, self_len
, from_c
);
1822 /* No matches; return the original bytes */
1823 return return_self(self
);
1826 /* Need to make a new bytes */
1827 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, self_len
);
1830 result_s
= PyByteArray_AS_STRING(result
);
1831 Py_MEMCPY(result_s
, self_s
, self_len
);
1833 /* change everything in-place, starting with this one */
1834 start
= result_s
+ (next
-self_s
);
1837 end
= result_s
+ self_len
;
1839 while (--maxcount
> 0) {
1840 next
= findchar(start
, end
-start
, from_c
);
1850 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1851 Py_LOCAL(PyByteArrayObject
*)
1852 replace_substring_in_place(PyByteArrayObject
*self
,
1853 const char *from_s
, Py_ssize_t from_len
,
1854 const char *to_s
, Py_ssize_t to_len
,
1855 Py_ssize_t maxcount
)
1857 char *result_s
, *start
, *end
;
1859 Py_ssize_t self_len
, offset
;
1860 PyByteArrayObject
*result
;
1862 /* The result bytes will be the same size */
1864 self_s
= PyByteArray_AS_STRING(self
);
1865 self_len
= PyByteArray_GET_SIZE(self
);
1867 offset
= findstring(self_s
, self_len
,
1869 0, self_len
, FORWARD
);
1871 /* No matches; return the original bytes */
1872 return return_self(self
);
1875 /* Need to make a new bytes */
1876 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, self_len
);
1879 result_s
= PyByteArray_AS_STRING(result
);
1880 Py_MEMCPY(result_s
, self_s
, self_len
);
1882 /* change everything in-place, starting with this one */
1883 start
= result_s
+ offset
;
1884 Py_MEMCPY(start
, to_s
, from_len
);
1886 end
= result_s
+ self_len
;
1888 while ( --maxcount
> 0) {
1889 offset
= findstring(start
, end
-start
,
1891 0, end
-start
, FORWARD
);
1894 Py_MEMCPY(start
+offset
, to_s
, from_len
);
1895 start
+= offset
+from_len
;
1901 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1902 Py_LOCAL(PyByteArrayObject
*)
1903 replace_single_character(PyByteArrayObject
*self
,
1905 const char *to_s
, Py_ssize_t to_len
,
1906 Py_ssize_t maxcount
)
1908 char *self_s
, *result_s
;
1909 char *start
, *next
, *end
;
1910 Py_ssize_t self_len
, result_len
;
1911 Py_ssize_t count
, product
;
1912 PyByteArrayObject
*result
;
1914 self_s
= PyByteArray_AS_STRING(self
);
1915 self_len
= PyByteArray_GET_SIZE(self
);
1917 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
1919 /* no matches, return unchanged */
1920 return return_self(self
);
1923 /* use the difference between current and new, hence the "-1" */
1924 /* result_len = self_len + count * (to_len-1) */
1925 product
= count
* (to_len
-1);
1926 if (product
/ (to_len
-1) != count
) {
1927 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
1930 result_len
= self_len
+ product
;
1931 if (result_len
< 0) {
1932 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
1936 if ( (result
= (PyByteArrayObject
*)
1937 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1939 result_s
= PyByteArray_AS_STRING(result
);
1942 end
= self_s
+ self_len
;
1943 while (count
-- > 0) {
1944 next
= findchar(start
, end
-start
, from_c
);
1948 if (next
== start
) {
1949 /* replace with the 'to' */
1950 Py_MEMCPY(result_s
, to_s
, to_len
);
1954 /* copy the unchanged old then the 'to' */
1955 Py_MEMCPY(result_s
, start
, next
-start
);
1956 result_s
+= (next
-start
);
1957 Py_MEMCPY(result_s
, to_s
, to_len
);
1962 /* Copy the remainder of the remaining bytes */
1963 Py_MEMCPY(result_s
, start
, end
-start
);
1968 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1969 Py_LOCAL(PyByteArrayObject
*)
1970 replace_substring(PyByteArrayObject
*self
,
1971 const char *from_s
, Py_ssize_t from_len
,
1972 const char *to_s
, Py_ssize_t to_len
,
1973 Py_ssize_t maxcount
)
1975 char *self_s
, *result_s
;
1976 char *start
, *next
, *end
;
1977 Py_ssize_t self_len
, result_len
;
1978 Py_ssize_t count
, offset
, product
;
1979 PyByteArrayObject
*result
;
1981 self_s
= PyByteArray_AS_STRING(self
);
1982 self_len
= PyByteArray_GET_SIZE(self
);
1984 count
= countstring(self_s
, self_len
,
1986 0, self_len
, FORWARD
, maxcount
);
1988 /* no matches, return unchanged */
1989 return return_self(self
);
1992 /* Check for overflow */
1993 /* result_len = self_len + count * (to_len-from_len) */
1994 product
= count
* (to_len
-from_len
);
1995 if (product
/ (to_len
-from_len
) != count
) {
1996 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
1999 result_len
= self_len
+ product
;
2000 if (result_len
< 0) {
2001 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
2005 if ( (result
= (PyByteArrayObject
*)
2006 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
2008 result_s
= PyByteArray_AS_STRING(result
);
2011 end
= self_s
+ self_len
;
2012 while (count
-- > 0) {
2013 offset
= findstring(start
, end
-start
,
2015 0, end
-start
, FORWARD
);
2018 next
= start
+offset
;
2019 if (next
== start
) {
2020 /* replace with the 'to' */
2021 Py_MEMCPY(result_s
, to_s
, to_len
);
2025 /* copy the unchanged old then the 'to' */
2026 Py_MEMCPY(result_s
, start
, next
-start
);
2027 result_s
+= (next
-start
);
2028 Py_MEMCPY(result_s
, to_s
, to_len
);
2030 start
= next
+from_len
;
2033 /* Copy the remainder of the remaining bytes */
2034 Py_MEMCPY(result_s
, start
, end
-start
);
2040 Py_LOCAL(PyByteArrayObject
*)
2041 replace(PyByteArrayObject
*self
,
2042 const char *from_s
, Py_ssize_t from_len
,
2043 const char *to_s
, Py_ssize_t to_len
,
2044 Py_ssize_t maxcount
)
2047 maxcount
= PY_SSIZE_T_MAX
;
2048 } else if (maxcount
== 0 || PyByteArray_GET_SIZE(self
) == 0) {
2049 /* nothing to do; return the original bytes */
2050 return return_self(self
);
2053 if (maxcount
== 0 ||
2054 (from_len
== 0 && to_len
== 0)) {
2055 /* nothing to do; return the original bytes */
2056 return return_self(self
);
2059 /* Handle zero-length special cases */
2061 if (from_len
== 0) {
2062 /* insert the 'to' bytes everywhere. */
2063 /* >>> "Python".replace("", ".") */
2064 /* '.P.y.t.h.o.n.' */
2065 return replace_interleave(self
, to_s
, to_len
, maxcount
);
2068 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2069 /* point for an empty self bytes to generate a non-empty bytes */
2070 /* Special case so the remaining code always gets a non-empty bytes */
2071 if (PyByteArray_GET_SIZE(self
) == 0) {
2072 return return_self(self
);
2076 /* delete all occurances of 'from' bytes */
2077 if (from_len
== 1) {
2078 return replace_delete_single_character(
2079 self
, from_s
[0], maxcount
);
2081 return replace_delete_substring(self
, from_s
, from_len
, maxcount
);
2085 /* Handle special case where both bytes have the same length */
2087 if (from_len
== to_len
) {
2088 if (from_len
== 1) {
2089 return replace_single_character_in_place(
2095 return replace_substring_in_place(
2096 self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
2100 /* Otherwise use the more generic algorithms */
2101 if (from_len
== 1) {
2102 return replace_single_character(self
, from_s
[0],
2103 to_s
, to_len
, maxcount
);
2105 /* len('from')>=2, len('to')>=1 */
2106 return replace_substring(self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
2111 PyDoc_STRVAR(replace__doc__
,
2112 "B.replace(old, new[, count]) -> bytes\n\
2114 Return a copy of B with all occurrences of subsection\n\
2115 old replaced by new. If the optional argument count is\n\
2116 given, only the first count occurrences are replaced.");
2119 bytes_replace(PyByteArrayObject
*self
, PyObject
*args
)
2121 Py_ssize_t count
= -1;
2122 PyObject
*from
, *to
, *res
;
2123 Py_buffer vfrom
, vto
;
2125 if (!PyArg_ParseTuple(args
, "OO|n:replace", &from
, &to
, &count
))
2128 if (_getbuffer(from
, &vfrom
) < 0)
2130 if (_getbuffer(to
, &vto
) < 0) {
2131 PyBuffer_Release(&vfrom
);
2135 res
= (PyObject
*)replace((PyByteArrayObject
*) self
,
2136 vfrom
.buf
, vfrom
.len
,
2137 vto
.buf
, vto
.len
, count
);
2139 PyBuffer_Release(&vfrom
);
2140 PyBuffer_Release(&vto
);
2145 /* Overallocate the initial list to reduce the number of reallocs for small
2146 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2147 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2148 text (roughly 11 words per line) and field delimited data (usually 1-10
2149 fields). For large strings the split algorithms are bandwidth limited
2150 so increasing the preallocation likely will not improve things.*/
2152 #define MAX_PREALLOC 12
2154 /* 5 splits gives 6 elements */
2155 #define PREALLOC_SIZE(maxsplit) \
2156 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2158 #define SPLIT_APPEND(data, left, right) \
2159 str = PyByteArray_FromStringAndSize((data) + (left), \
2160 (right) - (left)); \
2163 if (PyList_Append(list, str)) { \
2170 #define SPLIT_ADD(data, left, right) { \
2171 str = PyByteArray_FromStringAndSize((data) + (left), \
2172 (right) - (left)); \
2175 if (count < MAX_PREALLOC) { \
2176 PyList_SET_ITEM(list, count, str); \
2178 if (PyList_Append(list, str)) { \
2187 /* Always force the list to the expected size. */
2188 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2191 Py_LOCAL_INLINE(PyObject
*)
2192 split_char(const char *s
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
2194 register Py_ssize_t i
, j
, count
= 0;
2196 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2202 while ((j
< len
) && (maxcount
-- > 0)) {
2203 for(; j
< len
; j
++) {
2204 /* I found that using memchr makes no difference */
2213 SPLIT_ADD(s
, i
, len
);
2215 FIX_PREALLOC_SIZE(list
);
2224 Py_LOCAL_INLINE(PyObject
*)
2225 split_whitespace(const char *s
, Py_ssize_t len
, Py_ssize_t maxcount
)
2227 register Py_ssize_t i
, j
, count
= 0;
2229 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2234 for (i
= j
= 0; i
< len
; ) {
2236 while (i
< len
&& ISSPACE(s
[i
]))
2239 while (i
< len
&& !ISSPACE(s
[i
]))
2242 if (maxcount
-- <= 0)
2245 while (i
< len
&& ISSPACE(s
[i
]))
2251 SPLIT_ADD(s
, j
, len
);
2253 FIX_PREALLOC_SIZE(list
);
2261 PyDoc_STRVAR(split__doc__
,
2262 "B.split([sep[, maxsplit]]) -> list of bytearray\n\
2264 Return a list of the sections in B, using sep as the delimiter.\n\
2265 If sep is not given, B is split on ASCII whitespace characters\n\
2266 (space, tab, return, newline, formfeed, vertical tab).\n\
2267 If maxsplit is given, at most maxsplit splits are done.");
2270 bytes_split(PyByteArrayObject
*self
, PyObject
*args
)
2272 Py_ssize_t len
= PyByteArray_GET_SIZE(self
), n
, i
, j
;
2273 Py_ssize_t maxsplit
= -1, count
= 0;
2274 const char *s
= PyByteArray_AS_STRING(self
), *sub
;
2275 PyObject
*list
, *str
, *subobj
= Py_None
;
2281 if (!PyArg_ParseTuple(args
, "|On:split", &subobj
, &maxsplit
))
2284 maxsplit
= PY_SSIZE_T_MAX
;
2286 if (subobj
== Py_None
)
2287 return split_whitespace(s
, len
, maxsplit
);
2289 if (_getbuffer(subobj
, &vsub
) < 0)
2295 PyErr_SetString(PyExc_ValueError
, "empty separator");
2296 PyBuffer_Release(&vsub
);
2300 list
= split_char(s
, len
, sub
[0], maxsplit
);
2301 PyBuffer_Release(&vsub
);
2305 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
2307 PyBuffer_Release(&vsub
);
2313 while (maxsplit
-- > 0) {
2314 pos
= fastsearch(s
+i
, len
-i
, sub
, n
, FAST_SEARCH
);
2323 while ((j
+n
<= len
) && (maxsplit
-- > 0)) {
2324 for (; j
+n
<= len
; j
++) {
2325 if (Py_STRING_MATCH(s
, j
, sub
, n
)) {
2333 SPLIT_ADD(s
, i
, len
);
2334 FIX_PREALLOC_SIZE(list
);
2335 PyBuffer_Release(&vsub
);
2340 PyBuffer_Release(&vsub
);
2344 /* stringlib's partition shares nullbytes in some cases.
2345 undo this, we don't want the nullbytes to be shared. */
2347 make_nullbytes_unique(PyObject
*result
)
2349 if (result
!= NULL
) {
2351 assert(PyTuple_Check(result
));
2352 assert(PyTuple_GET_SIZE(result
) == 3);
2353 for (i
= 0; i
< 3; i
++) {
2354 if (PyTuple_GET_ITEM(result
, i
) == (PyObject
*)nullbytes
) {
2355 PyObject
*new = PyByteArray_FromStringAndSize(NULL
, 0);
2361 Py_DECREF(nullbytes
);
2362 PyTuple_SET_ITEM(result
, i
, new);
2369 PyDoc_STRVAR(partition__doc__
,
2370 "B.partition(sep) -> (head, sep, tail)\n\
2372 Searches for the separator sep in B, and returns the part before it,\n\
2373 the separator itself, and the part after it. If the separator is not\n\
2374 found, returns B and two empty bytearray objects.");
2377 bytes_partition(PyByteArrayObject
*self
, PyObject
*sep_obj
)
2379 PyObject
*bytesep
, *result
;
2381 bytesep
= PyByteArray_FromObject(sep_obj
);
2385 result
= stringlib_partition(
2387 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
2389 PyByteArray_AS_STRING(bytesep
), PyByteArray_GET_SIZE(bytesep
)
2393 return make_nullbytes_unique(result
);
2396 PyDoc_STRVAR(rpartition__doc__
,
2397 "B.rpartition(sep) -> (tail, sep, head)\n\
2399 Searches for the separator sep in B, starting at the end of B,\n\
2400 and returns the part before it, the separator itself, and the\n\
2401 part after it. If the separator is not found, returns two empty\n\
2402 bytearray objects and B.");
2405 bytes_rpartition(PyByteArrayObject
*self
, PyObject
*sep_obj
)
2407 PyObject
*bytesep
, *result
;
2409 bytesep
= PyByteArray_FromObject(sep_obj
);
2413 result
= stringlib_rpartition(
2415 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
2417 PyByteArray_AS_STRING(bytesep
), PyByteArray_GET_SIZE(bytesep
)
2421 return make_nullbytes_unique(result
);
2424 Py_LOCAL_INLINE(PyObject
*)
2425 rsplit_char(const char *s
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
2427 register Py_ssize_t i
, j
, count
=0;
2429 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2435 while ((i
>= 0) && (maxcount
-- > 0)) {
2436 for (; i
>= 0; i
--) {
2438 SPLIT_ADD(s
, i
+ 1, j
+ 1);
2445 SPLIT_ADD(s
, 0, j
+ 1);
2447 FIX_PREALLOC_SIZE(list
);
2448 if (PyList_Reverse(list
) < 0)
2458 Py_LOCAL_INLINE(PyObject
*)
2459 rsplit_whitespace(const char *s
, Py_ssize_t len
, Py_ssize_t maxcount
)
2461 register Py_ssize_t i
, j
, count
= 0;
2463 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2468 for (i
= j
= len
- 1; i
>= 0; ) {
2470 while (i
>= 0 && ISSPACE(s
[i
]))
2473 while (i
>= 0 && !ISSPACE(s
[i
]))
2476 if (maxcount
-- <= 0)
2478 SPLIT_ADD(s
, i
+ 1, j
+ 1);
2479 while (i
>= 0 && ISSPACE(s
[i
]))
2485 SPLIT_ADD(s
, 0, j
+ 1);
2487 FIX_PREALLOC_SIZE(list
);
2488 if (PyList_Reverse(list
) < 0)
2498 PyDoc_STRVAR(rsplit__doc__
,
2499 "B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2501 Return a list of the sections in B, using sep as the delimiter,\n\
2502 starting at the end of B and working to the front.\n\
2503 If sep is not given, B is split on ASCII whitespace characters\n\
2504 (space, tab, return, newline, formfeed, vertical tab).\n\
2505 If maxsplit is given, at most maxsplit splits are done.");
2508 bytes_rsplit(PyByteArrayObject
*self
, PyObject
*args
)
2510 Py_ssize_t len
= PyByteArray_GET_SIZE(self
), n
, i
, j
;
2511 Py_ssize_t maxsplit
= -1, count
= 0;
2512 const char *s
= PyByteArray_AS_STRING(self
), *sub
;
2513 PyObject
*list
, *str
, *subobj
= Py_None
;
2516 if (!PyArg_ParseTuple(args
, "|On:rsplit", &subobj
, &maxsplit
))
2519 maxsplit
= PY_SSIZE_T_MAX
;
2521 if (subobj
== Py_None
)
2522 return rsplit_whitespace(s
, len
, maxsplit
);
2524 if (_getbuffer(subobj
, &vsub
) < 0)
2530 PyErr_SetString(PyExc_ValueError
, "empty separator");
2531 PyBuffer_Release(&vsub
);
2535 list
= rsplit_char(s
, len
, sub
[0], maxsplit
);
2536 PyBuffer_Release(&vsub
);
2540 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
2542 PyBuffer_Release(&vsub
);
2549 while ( (i
>= 0) && (maxsplit
-- > 0) ) {
2551 if (Py_STRING_MATCH(s
, i
, sub
, n
)) {
2552 SPLIT_ADD(s
, i
+ n
, j
);
2560 FIX_PREALLOC_SIZE(list
);
2561 if (PyList_Reverse(list
) < 0)
2563 PyBuffer_Release(&vsub
);
2568 PyBuffer_Release(&vsub
);
2572 PyDoc_STRVAR(reverse__doc__
,
2573 "B.reverse() -> None\n\
2575 Reverse the order of the values in B in place.");
2577 bytes_reverse(PyByteArrayObject
*self
, PyObject
*unused
)
2579 char swap
, *head
, *tail
;
2580 Py_ssize_t i
, j
, n
= Py_SIZE(self
);
2583 head
= self
->ob_bytes
;
2584 tail
= head
+ n
- 1;
2585 for (i
= 0; i
< j
; i
++) {
2594 PyDoc_STRVAR(insert__doc__
,
2595 "B.insert(index, int) -> None\n\
2597 Insert a single item into the bytearray before the given index.");
2599 bytes_insert(PyByteArrayObject
*self
, PyObject
*args
)
2603 Py_ssize_t where
, n
= Py_SIZE(self
);
2605 if (!PyArg_ParseTuple(args
, "nO:insert", &where
, &value
))
2608 if (n
== PY_SSIZE_T_MAX
) {
2609 PyErr_SetString(PyExc_OverflowError
,
2610 "cannot add more objects to bytes");
2613 if (!_getbytevalue(value
, &ival
))
2615 if (PyByteArray_Resize((PyObject
*)self
, n
+ 1) < 0)
2625 memmove(self
->ob_bytes
+ where
+ 1, self
->ob_bytes
+ where
, n
- where
);
2626 self
->ob_bytes
[where
] = ival
;
2631 PyDoc_STRVAR(append__doc__
,
2632 "B.append(int) -> None\n\
2634 Append a single item to the end of B.");
2636 bytes_append(PyByteArrayObject
*self
, PyObject
*arg
)
2639 Py_ssize_t n
= Py_SIZE(self
);
2641 if (! _getbytevalue(arg
, &value
))
2643 if (n
== PY_SSIZE_T_MAX
) {
2644 PyErr_SetString(PyExc_OverflowError
,
2645 "cannot add more objects to bytes");
2648 if (PyByteArray_Resize((PyObject
*)self
, n
+ 1) < 0)
2651 self
->ob_bytes
[n
] = value
;
2656 PyDoc_STRVAR(extend__doc__
,
2657 "B.extend(iterable int) -> None\n\
2659 Append all the elements from the iterator or sequence to the\n\
2662 bytes_extend(PyByteArrayObject
*self
, PyObject
*arg
)
2664 PyObject
*it
, *item
, *bytes_obj
;
2665 Py_ssize_t buf_size
= 0, len
= 0;
2669 /* bytes_setslice code only accepts something supporting PEP 3118. */
2670 if (PyObject_CheckBuffer(arg
)) {
2671 if (bytes_setslice(self
, Py_SIZE(self
), Py_SIZE(self
), arg
) == -1)
2677 it
= PyObject_GetIter(arg
);
2681 /* Try to determine the length of the argument. 32 is abitrary. */
2682 buf_size
= _PyObject_LengthHint(arg
, 32);
2684 bytes_obj
= PyByteArray_FromStringAndSize(NULL
, buf_size
);
2685 if (bytes_obj
== NULL
)
2687 buf
= PyByteArray_AS_STRING(bytes_obj
);
2689 while ((item
= PyIter_Next(it
)) != NULL
) {
2690 if (! _getbytevalue(item
, &value
)) {
2693 Py_DECREF(bytes_obj
);
2699 if (len
>= buf_size
) {
2700 buf_size
= len
+ (len
>> 1) + 1;
2701 if (PyByteArray_Resize((PyObject
*)bytes_obj
, buf_size
) < 0) {
2703 Py_DECREF(bytes_obj
);
2706 /* Recompute the `buf' pointer, since the resizing operation may
2707 have invalidated it. */
2708 buf
= PyByteArray_AS_STRING(bytes_obj
);
2713 /* Resize down to exact size. */
2714 if (PyByteArray_Resize((PyObject
*)bytes_obj
, len
) < 0) {
2715 Py_DECREF(bytes_obj
);
2719 if (bytes_setslice(self
, Py_SIZE(self
), Py_SIZE(self
), bytes_obj
) == -1)
2721 Py_DECREF(bytes_obj
);
2726 PyDoc_STRVAR(pop__doc__
,
2727 "B.pop([index]) -> int\n\
2729 Remove and return a single item from B. If no index\n\
2730 argument is given, will pop the last value.");
2732 bytes_pop(PyByteArrayObject
*self
, PyObject
*args
)
2735 Py_ssize_t where
= -1, n
= Py_SIZE(self
);
2737 if (!PyArg_ParseTuple(args
, "|n:pop", &where
))
2741 PyErr_SetString(PyExc_OverflowError
,
2742 "cannot pop an empty bytes");
2746 where
+= Py_SIZE(self
);
2747 if (where
< 0 || where
>= Py_SIZE(self
)) {
2748 PyErr_SetString(PyExc_IndexError
, "pop index out of range");
2752 value
= self
->ob_bytes
[where
];
2753 memmove(self
->ob_bytes
+ where
, self
->ob_bytes
+ where
+ 1, n
- where
);
2754 if (PyByteArray_Resize((PyObject
*)self
, n
- 1) < 0)
2757 return PyInt_FromLong(value
);
2760 PyDoc_STRVAR(remove__doc__
,
2761 "B.remove(int) -> None\n\
2763 Remove the first occurance of a value in B.");
2765 bytes_remove(PyByteArrayObject
*self
, PyObject
*arg
)
2768 Py_ssize_t where
, n
= Py_SIZE(self
);
2770 if (! _getbytevalue(arg
, &value
))
2773 for (where
= 0; where
< n
; where
++) {
2774 if (self
->ob_bytes
[where
] == value
)
2778 PyErr_SetString(PyExc_ValueError
, "value not found in bytes");
2782 memmove(self
->ob_bytes
+ where
, self
->ob_bytes
+ where
+ 1, n
- where
);
2783 if (PyByteArray_Resize((PyObject
*)self
, n
- 1) < 0)
2789 /* XXX These two helpers could be optimized if argsize == 1 */
2792 lstrip_helper(unsigned char *myptr
, Py_ssize_t mysize
,
2793 void *argptr
, Py_ssize_t argsize
)
2796 while (i
< mysize
&& memchr(argptr
, myptr
[i
], argsize
))
2802 rstrip_helper(unsigned char *myptr
, Py_ssize_t mysize
,
2803 void *argptr
, Py_ssize_t argsize
)
2805 Py_ssize_t i
= mysize
- 1;
2806 while (i
>= 0 && memchr(argptr
, myptr
[i
], argsize
))
2811 PyDoc_STRVAR(strip__doc__
,
2812 "B.strip([bytes]) -> bytearray\n\
2814 Strip leading and trailing bytes contained in the argument.\n\
2815 If the argument is omitted, strip ASCII whitespace.");
2817 bytes_strip(PyByteArrayObject
*self
, PyObject
*args
)
2819 Py_ssize_t left
, right
, mysize
, argsize
;
2820 void *myptr
, *argptr
;
2821 PyObject
*arg
= Py_None
;
2823 if (!PyArg_ParseTuple(args
, "|O:strip", &arg
))
2825 if (arg
== Py_None
) {
2826 argptr
= "\t\n\r\f\v ";
2830 if (_getbuffer(arg
, &varg
) < 0)
2835 myptr
= self
->ob_bytes
;
2836 mysize
= Py_SIZE(self
);
2837 left
= lstrip_helper(myptr
, mysize
, argptr
, argsize
);
2841 right
= rstrip_helper(myptr
, mysize
, argptr
, argsize
);
2843 PyBuffer_Release(&varg
);
2844 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2847 PyDoc_STRVAR(lstrip__doc__
,
2848 "B.lstrip([bytes]) -> bytearray\n\
2850 Strip leading bytes contained in the argument.\n\
2851 If the argument is omitted, strip leading ASCII whitespace.");
2853 bytes_lstrip(PyByteArrayObject
*self
, PyObject
*args
)
2855 Py_ssize_t left
, right
, mysize
, argsize
;
2856 void *myptr
, *argptr
;
2857 PyObject
*arg
= Py_None
;
2859 if (!PyArg_ParseTuple(args
, "|O:lstrip", &arg
))
2861 if (arg
== Py_None
) {
2862 argptr
= "\t\n\r\f\v ";
2866 if (_getbuffer(arg
, &varg
) < 0)
2871 myptr
= self
->ob_bytes
;
2872 mysize
= Py_SIZE(self
);
2873 left
= lstrip_helper(myptr
, mysize
, argptr
, argsize
);
2876 PyBuffer_Release(&varg
);
2877 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2880 PyDoc_STRVAR(rstrip__doc__
,
2881 "B.rstrip([bytes]) -> bytearray\n\
2883 Strip trailing bytes contained in the argument.\n\
2884 If the argument is omitted, strip trailing ASCII whitespace.");
2886 bytes_rstrip(PyByteArrayObject
*self
, PyObject
*args
)
2888 Py_ssize_t left
, right
, mysize
, argsize
;
2889 void *myptr
, *argptr
;
2890 PyObject
*arg
= Py_None
;
2892 if (!PyArg_ParseTuple(args
, "|O:rstrip", &arg
))
2894 if (arg
== Py_None
) {
2895 argptr
= "\t\n\r\f\v ";
2899 if (_getbuffer(arg
, &varg
) < 0)
2904 myptr
= self
->ob_bytes
;
2905 mysize
= Py_SIZE(self
);
2907 right
= rstrip_helper(myptr
, mysize
, argptr
, argsize
);
2909 PyBuffer_Release(&varg
);
2910 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2913 PyDoc_STRVAR(decode_doc
,
2914 "B.decode([encoding[, errors]]) -> unicode object.\n\
2916 Decodes B using the codec registered for encoding. encoding defaults\n\
2917 to the default encoding. errors may be given to set a different error\n\
2918 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2919 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2920 as well as any other name registered with codecs.register_error that is\n\
2921 able to handle UnicodeDecodeErrors.");
2924 bytes_decode(PyObject
*self
, PyObject
*args
)
2926 const char *encoding
= NULL
;
2927 const char *errors
= NULL
;
2929 if (!PyArg_ParseTuple(args
, "|ss:decode", &encoding
, &errors
))
2931 if (encoding
== NULL
)
2932 encoding
= PyUnicode_GetDefaultEncoding();
2933 return PyCodec_Decode(self
, encoding
, errors
);
2936 PyDoc_STRVAR(alloc_doc
,
2937 "B.__alloc__() -> int\n\
2939 Returns the number of bytes actually allocated.");
2942 bytes_alloc(PyByteArrayObject
*self
)
2944 return PyInt_FromSsize_t(self
->ob_alloc
);
2947 PyDoc_STRVAR(join_doc
,
2948 "B.join(iterable_of_bytes) -> bytes\n\
2950 Concatenates any number of bytearray objects, with B in between each pair.");
2953 bytes_join(PyByteArrayObject
*self
, PyObject
*it
)
2956 Py_ssize_t mysize
= Py_SIZE(self
);
2960 Py_ssize_t totalsize
= 0;
2964 seq
= PySequence_Fast(it
, "can only join an iterable");
2967 n
= PySequence_Fast_GET_SIZE(seq
);
2968 items
= PySequence_Fast_ITEMS(seq
);
2970 /* Compute the total size, and check that they are all bytes */
2971 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2972 for (i
= 0; i
< n
; i
++) {
2973 PyObject
*obj
= items
[i
];
2974 if (!PyByteArray_Check(obj
) && !PyBytes_Check(obj
)) {
2975 PyErr_Format(PyExc_TypeError
,
2976 "can only join an iterable of bytes "
2977 "(item %ld has type '%.100s')",
2978 /* XXX %ld isn't right on Win64 */
2979 (long)i
, Py_TYPE(obj
)->tp_name
);
2983 totalsize
+= mysize
;
2984 totalsize
+= Py_SIZE(obj
);
2985 if (totalsize
< 0) {
2991 /* Allocate the result, and copy the bytes */
2992 result
= PyByteArray_FromStringAndSize(NULL
, totalsize
);
2995 dest
= PyByteArray_AS_STRING(result
);
2996 for (i
= 0; i
< n
; i
++) {
2997 PyObject
*obj
= items
[i
];
2998 Py_ssize_t size
= Py_SIZE(obj
);
3000 if (PyByteArray_Check(obj
))
3001 buf
= PyByteArray_AS_STRING(obj
);
3003 buf
= PyBytes_AS_STRING(obj
);
3005 memcpy(dest
, self
->ob_bytes
, mysize
);
3008 memcpy(dest
, buf
, size
);
3016 /* Error handling */
3022 PyDoc_STRVAR(fromhex_doc
,
3023 "bytearray.fromhex(string) -> bytearray\n\
3025 Create a bytearray object from a string of hexadecimal numbers.\n\
3026 Spaces between two numbers are accepted.\n\
3027 Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3030 hex_digit_to_int(Py_UNICODE c
)
3039 if (c
>= 'a' && c
<= 'f')
3040 return c
- 'a' + 10;
3046 bytes_fromhex(PyObject
*cls
, PyObject
*args
)
3048 PyObject
*newbytes
, *hexobj
;
3051 Py_ssize_t hexlen
, byteslen
, i
, j
;
3054 if (!PyArg_ParseTuple(args
, "U:fromhex", &hexobj
))
3056 assert(PyUnicode_Check(hexobj
));
3057 hexlen
= PyUnicode_GET_SIZE(hexobj
);
3058 hex
= PyUnicode_AS_UNICODE(hexobj
);
3059 byteslen
= hexlen
/2; /* This overestimates if there are spaces */
3060 newbytes
= PyByteArray_FromStringAndSize(NULL
, byteslen
);
3063 buf
= PyByteArray_AS_STRING(newbytes
);
3064 for (i
= j
= 0; i
< hexlen
; i
+= 2) {
3065 /* skip over spaces in the input */
3066 while (hex
[i
] == ' ')
3070 top
= hex_digit_to_int(hex
[i
]);
3071 bot
= hex_digit_to_int(hex
[i
+1]);
3072 if (top
== -1 || bot
== -1) {
3073 PyErr_Format(PyExc_ValueError
,
3074 "non-hexadecimal number found in "
3075 "fromhex() arg at position %zd", i
);
3078 buf
[j
++] = (top
<< 4) + bot
;
3080 if (PyByteArray_Resize(newbytes
, j
) < 0)
3085 Py_DECREF(newbytes
);
3089 PyDoc_STRVAR(reduce_doc
, "Return state information for pickling.");
3092 bytes_reduce(PyByteArrayObject
*self
)
3094 PyObject
*latin1
, *dict
;
3096 latin1
= PyUnicode_DecodeLatin1(self
->ob_bytes
,
3097 Py_SIZE(self
), NULL
);
3099 latin1
= PyUnicode_FromString("");
3101 dict
= PyObject_GetAttrString((PyObject
*)self
, "__dict__");
3108 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self
), latin1
, "latin-1", dict
);
3111 PyDoc_STRVAR(sizeof_doc
,
3112 "B.__sizeof__() -> int\n\
3114 Returns the size of B in memory, in bytes");
3116 bytes_sizeof(PyByteArrayObject
*self
)
3120 res
= sizeof(PyByteArrayObject
) + self
->ob_alloc
* sizeof(char);
3121 return PyInt_FromSsize_t(res
);
3124 static PySequenceMethods bytes_as_sequence
= {
3125 (lenfunc
)bytes_length
, /* sq_length */
3126 (binaryfunc
)PyByteArray_Concat
, /* sq_concat */
3127 (ssizeargfunc
)bytes_repeat
, /* sq_repeat */
3128 (ssizeargfunc
)bytes_getitem
, /* sq_item */
3130 (ssizeobjargproc
)bytes_setitem
, /* sq_ass_item */
3131 0, /* sq_ass_slice */
3132 (objobjproc
)bytes_contains
, /* sq_contains */
3133 (binaryfunc
)bytes_iconcat
, /* sq_inplace_concat */
3134 (ssizeargfunc
)bytes_irepeat
, /* sq_inplace_repeat */
3137 static PyMappingMethods bytes_as_mapping
= {
3138 (lenfunc
)bytes_length
,
3139 (binaryfunc
)bytes_subscript
,
3140 (objobjargproc
)bytes_ass_subscript
,
3143 static PyBufferProcs bytes_as_buffer
= {
3144 (readbufferproc
)bytes_buffer_getreadbuf
,
3145 (writebufferproc
)bytes_buffer_getwritebuf
,
3146 (segcountproc
)bytes_buffer_getsegcount
,
3147 (charbufferproc
)bytes_buffer_getcharbuf
,
3148 (getbufferproc
)bytes_getbuffer
,
3149 (releasebufferproc
)bytes_releasebuffer
,
3154 {"__alloc__", (PyCFunction
)bytes_alloc
, METH_NOARGS
, alloc_doc
},
3155 {"__reduce__", (PyCFunction
)bytes_reduce
, METH_NOARGS
, reduce_doc
},
3156 {"__sizeof__", (PyCFunction
)bytes_sizeof
, METH_NOARGS
, sizeof_doc
},
3157 {"append", (PyCFunction
)bytes_append
, METH_O
, append__doc__
},
3158 {"capitalize", (PyCFunction
)stringlib_capitalize
, METH_NOARGS
,
3159 _Py_capitalize__doc__
},
3160 {"center", (PyCFunction
)stringlib_center
, METH_VARARGS
, center__doc__
},
3161 {"count", (PyCFunction
)bytes_count
, METH_VARARGS
, count__doc__
},
3162 {"decode", (PyCFunction
)bytes_decode
, METH_VARARGS
, decode_doc
},
3163 {"endswith", (PyCFunction
)bytes_endswith
, METH_VARARGS
, endswith__doc__
},
3164 {"expandtabs", (PyCFunction
)stringlib_expandtabs
, METH_VARARGS
,
3166 {"extend", (PyCFunction
)bytes_extend
, METH_O
, extend__doc__
},
3167 {"find", (PyCFunction
)bytes_find
, METH_VARARGS
, find__doc__
},
3168 {"fromhex", (PyCFunction
)bytes_fromhex
, METH_VARARGS
|METH_CLASS
,
3170 {"index", (PyCFunction
)bytes_index
, METH_VARARGS
, index__doc__
},
3171 {"insert", (PyCFunction
)bytes_insert
, METH_VARARGS
, insert__doc__
},
3172 {"isalnum", (PyCFunction
)stringlib_isalnum
, METH_NOARGS
,
3173 _Py_isalnum__doc__
},
3174 {"isalpha", (PyCFunction
)stringlib_isalpha
, METH_NOARGS
,
3175 _Py_isalpha__doc__
},
3176 {"isdigit", (PyCFunction
)stringlib_isdigit
, METH_NOARGS
,
3177 _Py_isdigit__doc__
},
3178 {"islower", (PyCFunction
)stringlib_islower
, METH_NOARGS
,
3179 _Py_islower__doc__
},
3180 {"isspace", (PyCFunction
)stringlib_isspace
, METH_NOARGS
,
3181 _Py_isspace__doc__
},
3182 {"istitle", (PyCFunction
)stringlib_istitle
, METH_NOARGS
,
3183 _Py_istitle__doc__
},
3184 {"isupper", (PyCFunction
)stringlib_isupper
, METH_NOARGS
,
3185 _Py_isupper__doc__
},
3186 {"join", (PyCFunction
)bytes_join
, METH_O
, join_doc
},
3187 {"ljust", (PyCFunction
)stringlib_ljust
, METH_VARARGS
, ljust__doc__
},
3188 {"lower", (PyCFunction
)stringlib_lower
, METH_NOARGS
, _Py_lower__doc__
},
3189 {"lstrip", (PyCFunction
)bytes_lstrip
, METH_VARARGS
, lstrip__doc__
},
3190 {"partition", (PyCFunction
)bytes_partition
, METH_O
, partition__doc__
},
3191 {"pop", (PyCFunction
)bytes_pop
, METH_VARARGS
, pop__doc__
},
3192 {"remove", (PyCFunction
)bytes_remove
, METH_O
, remove__doc__
},
3193 {"replace", (PyCFunction
)bytes_replace
, METH_VARARGS
, replace__doc__
},
3194 {"reverse", (PyCFunction
)bytes_reverse
, METH_NOARGS
, reverse__doc__
},
3195 {"rfind", (PyCFunction
)bytes_rfind
, METH_VARARGS
, rfind__doc__
},
3196 {"rindex", (PyCFunction
)bytes_rindex
, METH_VARARGS
, rindex__doc__
},
3197 {"rjust", (PyCFunction
)stringlib_rjust
, METH_VARARGS
, rjust__doc__
},
3198 {"rpartition", (PyCFunction
)bytes_rpartition
, METH_O
, rpartition__doc__
},
3199 {"rsplit", (PyCFunction
)bytes_rsplit
, METH_VARARGS
, rsplit__doc__
},
3200 {"rstrip", (PyCFunction
)bytes_rstrip
, METH_VARARGS
, rstrip__doc__
},
3201 {"split", (PyCFunction
)bytes_split
, METH_VARARGS
, split__doc__
},
3202 {"splitlines", (PyCFunction
)stringlib_splitlines
, METH_VARARGS
,
3204 {"startswith", (PyCFunction
)bytes_startswith
, METH_VARARGS
,
3206 {"strip", (PyCFunction
)bytes_strip
, METH_VARARGS
, strip__doc__
},
3207 {"swapcase", (PyCFunction
)stringlib_swapcase
, METH_NOARGS
,
3208 _Py_swapcase__doc__
},
3209 {"title", (PyCFunction
)stringlib_title
, METH_NOARGS
, _Py_title__doc__
},
3210 {"translate", (PyCFunction
)bytes_translate
, METH_VARARGS
,
3212 {"upper", (PyCFunction
)stringlib_upper
, METH_NOARGS
, _Py_upper__doc__
},
3213 {"zfill", (PyCFunction
)stringlib_zfill
, METH_VARARGS
, zfill__doc__
},
3217 PyDoc_STRVAR(bytes_doc
,
3218 "bytearray(iterable_of_ints) -> bytearray.\n\
3219 bytearray(string, encoding[, errors]) -> bytearray.\n\
3220 bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3221 bytearray(memory_view) -> bytearray.\n\
3223 Construct an mutable bytearray object from:\n\
3224 - an iterable yielding integers in range(256)\n\
3225 - a text string encoded using the specified encoding\n\
3226 - a bytes or a bytearray object\n\
3227 - any object implementing the buffer API.\n\
3229 bytearray(int) -> bytearray.\n\
3231 Construct a zero-initialized bytearray of the given length.");
3234 static PyObject
*bytes_iter(PyObject
*seq
);
3236 PyTypeObject PyByteArray_Type
= {
3237 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
3239 sizeof(PyByteArrayObject
),
3241 (destructor
)bytes_dealloc
, /* tp_dealloc */
3246 (reprfunc
)bytes_repr
, /* tp_repr */
3247 0, /* tp_as_number */
3248 &bytes_as_sequence
, /* tp_as_sequence */
3249 &bytes_as_mapping
, /* tp_as_mapping */
3252 bytes_str
, /* tp_str */
3253 PyObject_GenericGetAttr
, /* tp_getattro */
3254 0, /* tp_setattro */
3255 &bytes_as_buffer
, /* tp_as_buffer */
3256 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
|
3257 Py_TPFLAGS_HAVE_NEWBUFFER
, /* tp_flags */
3258 bytes_doc
, /* tp_doc */
3259 0, /* tp_traverse */
3261 (richcmpfunc
)bytes_richcompare
, /* tp_richcompare */
3262 0, /* tp_weaklistoffset */
3263 bytes_iter
, /* tp_iter */
3264 0, /* tp_iternext */
3265 bytes_methods
, /* tp_methods */
3270 0, /* tp_descr_get */
3271 0, /* tp_descr_set */
3272 0, /* tp_dictoffset */
3273 (initproc
)bytes_init
, /* tp_init */
3274 PyType_GenericAlloc
, /* tp_alloc */
3275 PyType_GenericNew
, /* tp_new */
3276 PyObject_Del
, /* tp_free */
3279 /*********************** Bytes Iterator ****************************/
3283 Py_ssize_t it_index
;
3284 PyByteArrayObject
*it_seq
; /* Set to NULL when iterator is exhausted */
3288 bytesiter_dealloc(bytesiterobject
*it
)
3290 _PyObject_GC_UNTRACK(it
);
3291 Py_XDECREF(it
->it_seq
);
3292 PyObject_GC_Del(it
);
3296 bytesiter_traverse(bytesiterobject
*it
, visitproc visit
, void *arg
)
3298 Py_VISIT(it
->it_seq
);
3303 bytesiter_next(bytesiterobject
*it
)
3305 PyByteArrayObject
*seq
;
3312 assert(PyByteArray_Check(seq
));
3314 if (it
->it_index
< PyByteArray_GET_SIZE(seq
)) {
3315 item
= PyInt_FromLong(
3316 (unsigned char)seq
->ob_bytes
[it
->it_index
]);
3328 bytesiter_length_hint(bytesiterobject
*it
)
3332 len
= PyByteArray_GET_SIZE(it
->it_seq
) - it
->it_index
;
3333 return PyInt_FromSsize_t(len
);
3336 PyDoc_STRVAR(length_hint_doc
,
3337 "Private method returning an estimate of len(list(it)).");
3339 static PyMethodDef bytesiter_methods
[] = {
3340 {"__length_hint__", (PyCFunction
)bytesiter_length_hint
, METH_NOARGS
,
3342 {NULL
, NULL
} /* sentinel */
3345 PyTypeObject PyByteArrayIter_Type
= {
3346 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
3347 "bytearray_iterator", /* tp_name */
3348 sizeof(bytesiterobject
), /* tp_basicsize */
3349 0, /* tp_itemsize */
3351 (destructor
)bytesiter_dealloc
, /* tp_dealloc */
3357 0, /* tp_as_number */
3358 0, /* tp_as_sequence */
3359 0, /* tp_as_mapping */
3363 PyObject_GenericGetAttr
, /* tp_getattro */
3364 0, /* tp_setattro */
3365 0, /* tp_as_buffer */
3366 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_HAVE_GC
, /* tp_flags */
3368 (traverseproc
)bytesiter_traverse
, /* tp_traverse */
3370 0, /* tp_richcompare */
3371 0, /* tp_weaklistoffset */
3372 PyObject_SelfIter
, /* tp_iter */
3373 (iternextfunc
)bytesiter_next
, /* tp_iternext */
3374 bytesiter_methods
, /* tp_methods */
3379 bytes_iter(PyObject
*seq
)
3381 bytesiterobject
*it
;
3383 if (!PyByteArray_Check(seq
)) {
3384 PyErr_BadInternalCall();
3387 it
= PyObject_GC_New(bytesiterobject
, &PyByteArrayIter_Type
);
3392 it
->it_seq
= (PyByteArrayObject
*)seq
;
3393 _PyObject_GC_TRACK(it
);
3394 return (PyObject
*)it
;