1 /* PyBytes (bytearray) implementation */
3 #define PY_SSIZE_T_CLEAN
5 #include "structmember.h"
6 #include "bytes_methods.h"
8 static PyByteArrayObject
*nullbytes
= NULL
;
11 PyByteArray_Fini(void)
17 PyByteArray_Init(void)
19 nullbytes
= PyObject_New(PyByteArrayObject
, &PyByteArray_Type
);
20 if (nullbytes
== NULL
)
22 nullbytes
->ob_bytes
= NULL
;
23 Py_SIZE(nullbytes
) = nullbytes
->ob_alloc
= 0;
24 nullbytes
->ob_exports
= 0;
28 /* end nullbytes support */
33 _getbytevalue(PyObject
* arg
, int *value
)
37 if (PyInt_Check(arg
)) {
38 face_value
= PyInt_AsLong(arg
);
39 if (face_value
< 0 || face_value
>= 256) {
40 PyErr_SetString(PyExc_ValueError
, "byte must be in range(0, 256)");
44 else if (PyBytes_CheckExact(arg
)) {
45 if (Py_SIZE(arg
) != 1) {
46 PyErr_SetString(PyExc_ValueError
, "string must be of size 1");
49 face_value
= Py_CHARMASK(((PyBytesObject
*)arg
)->ob_sval
[0]);
52 PyErr_Format(PyExc_TypeError
, "an integer or string of size 1 is required");
61 bytes_buffer_getreadbuf(PyByteArrayObject
*self
, Py_ssize_t index
, const void **ptr
)
64 PyErr_SetString(PyExc_SystemError
,
65 "accessing non-existent bytes segment");
68 *ptr
= (void *)self
->ob_bytes
;
73 bytes_buffer_getwritebuf(PyByteArrayObject
*self
, Py_ssize_t index
, const void **ptr
)
76 PyErr_SetString(PyExc_SystemError
,
77 "accessing non-existent bytes segment");
80 *ptr
= (void *)self
->ob_bytes
;
85 bytes_buffer_getsegcount(PyByteArrayObject
*self
, Py_ssize_t
*lenp
)
88 *lenp
= Py_SIZE(self
);
93 bytes_buffer_getcharbuf(PyByteArrayObject
*self
, Py_ssize_t index
, const char **ptr
)
96 PyErr_SetString(PyExc_SystemError
,
97 "accessing non-existent bytes segment");
100 *ptr
= self
->ob_bytes
;
101 return Py_SIZE(self
);
105 bytes_getbuffer(PyByteArrayObject
*obj
, Py_buffer
*view
, int flags
)
113 if (obj
->ob_bytes
== NULL
)
117 ret
= PyBuffer_FillInfo(view
, ptr
, Py_SIZE(obj
), 0, flags
);
125 bytes_releasebuffer(PyByteArrayObject
*obj
, Py_buffer
*view
)
131 _getbuffer(PyObject
*obj
, Py_buffer
*view
)
133 PyBufferProcs
*buffer
= Py_TYPE(obj
)->tp_as_buffer
;
135 if (buffer
== NULL
|| buffer
->bf_getbuffer
== NULL
)
137 PyErr_Format(PyExc_TypeError
,
138 "Type %.100s doesn't support the buffer API",
139 Py_TYPE(obj
)->tp_name
);
143 if (buffer
->bf_getbuffer(obj
, view
, PyBUF_SIMPLE
) < 0)
148 /* Direct API functions */
151 PyByteArray_FromObject(PyObject
*input
)
153 return PyObject_CallFunctionObjArgs((PyObject
*)&PyByteArray_Type
,
158 PyByteArray_FromStringAndSize(const char *bytes
, Py_ssize_t size
)
160 PyByteArrayObject
*new;
164 PyErr_SetString(PyExc_SystemError
,
165 "Negative size passed to PyByteArray_FromStringAndSize");
169 new = PyObject_New(PyByteArrayObject
, &PyByteArray_Type
);
174 new->ob_bytes
= NULL
;
179 new->ob_bytes
= PyMem_Malloc(alloc
);
180 if (new->ob_bytes
== NULL
) {
182 return PyErr_NoMemory();
185 memcpy(new->ob_bytes
, bytes
, size
);
186 new->ob_bytes
[size
] = '\0'; /* Trailing null byte */
189 new->ob_alloc
= alloc
;
192 return (PyObject
*)new;
196 PyByteArray_Size(PyObject
*self
)
198 assert(self
!= NULL
);
199 assert(PyByteArray_Check(self
));
201 return PyByteArray_GET_SIZE(self
);
205 PyByteArray_AsString(PyObject
*self
)
207 assert(self
!= NULL
);
208 assert(PyByteArray_Check(self
));
210 return PyByteArray_AS_STRING(self
);
214 PyByteArray_Resize(PyObject
*self
, Py_ssize_t size
)
217 Py_ssize_t alloc
= ((PyByteArrayObject
*)self
)->ob_alloc
;
219 assert(self
!= NULL
);
220 assert(PyByteArray_Check(self
));
223 if (size
< alloc
/ 2) {
224 /* Major downsize; resize down to exact size */
227 else if (size
< alloc
) {
228 /* Within allocated size; quick exit */
229 Py_SIZE(self
) = size
;
230 ((PyByteArrayObject
*)self
)->ob_bytes
[size
] = '\0'; /* Trailing null */
233 else if (size
<= alloc
* 1.125) {
234 /* Moderate upsize; overallocate similar to list_resize() */
235 alloc
= size
+ (size
>> 3) + (size
< 9 ? 3 : 6);
238 /* Major upsize; resize up to exact size */
242 if (((PyByteArrayObject
*)self
)->ob_exports
> 0) {
244 fprintf(stderr, "%d: %s", ((PyByteArrayObject *)self)->ob_exports,
245 ((PyByteArrayObject *)self)->ob_bytes);
247 PyErr_SetString(PyExc_BufferError
,
248 "Existing exports of data: object cannot be re-sized");
252 sval
= PyMem_Realloc(((PyByteArrayObject
*)self
)->ob_bytes
, alloc
);
258 ((PyByteArrayObject
*)self
)->ob_bytes
= sval
;
259 Py_SIZE(self
) = size
;
260 ((PyByteArrayObject
*)self
)->ob_alloc
= alloc
;
261 ((PyByteArrayObject
*)self
)->ob_bytes
[size
] = '\0'; /* Trailing null byte */
267 PyByteArray_Concat(PyObject
*a
, PyObject
*b
)
271 PyByteArrayObject
*result
= NULL
;
275 if (_getbuffer(a
, &va
) < 0 ||
276 _getbuffer(b
, &vb
) < 0) {
277 PyErr_Format(PyExc_TypeError
, "can't concat %.100s to %.100s",
278 Py_TYPE(a
)->tp_name
, Py_TYPE(b
)->tp_name
);
282 size
= va
.len
+ vb
.len
;
284 return PyErr_NoMemory();
288 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, size
);
289 if (result
!= NULL
) {
290 memcpy(result
->ob_bytes
, va
.buf
, va
.len
);
291 memcpy(result
->ob_bytes
+ va
.len
, vb
.buf
, vb
.len
);
296 PyObject_ReleaseBuffer(a
, &va
);
298 PyObject_ReleaseBuffer(b
, &vb
);
299 return (PyObject
*)result
;
302 /* Functions stuffed into the type object */
305 bytes_length(PyByteArrayObject
*self
)
307 return Py_SIZE(self
);
311 bytes_iconcat(PyByteArrayObject
*self
, PyObject
*other
)
317 if (_getbuffer(other
, &vo
) < 0) {
318 PyErr_Format(PyExc_TypeError
, "can't concat %.100s to %.100s",
319 Py_TYPE(other
)->tp_name
, Py_TYPE(self
)->tp_name
);
323 mysize
= Py_SIZE(self
);
324 size
= mysize
+ vo
.len
;
326 PyObject_ReleaseBuffer(other
, &vo
);
327 return PyErr_NoMemory();
329 if (size
< self
->ob_alloc
) {
330 Py_SIZE(self
) = size
;
331 self
->ob_bytes
[Py_SIZE(self
)] = '\0'; /* Trailing null byte */
333 else if (PyByteArray_Resize((PyObject
*)self
, size
) < 0) {
334 PyObject_ReleaseBuffer(other
, &vo
);
337 memcpy(self
->ob_bytes
+ mysize
, vo
.buf
, vo
.len
);
338 PyObject_ReleaseBuffer(other
, &vo
);
340 return (PyObject
*)self
;
344 bytes_repeat(PyByteArrayObject
*self
, Py_ssize_t count
)
346 PyByteArrayObject
*result
;
352 mysize
= Py_SIZE(self
);
353 size
= mysize
* count
;
354 if (count
!= 0 && size
/ count
!= mysize
)
355 return PyErr_NoMemory();
356 result
= (PyByteArrayObject
*)PyByteArray_FromStringAndSize(NULL
, size
);
357 if (result
!= NULL
&& size
!= 0) {
359 memset(result
->ob_bytes
, self
->ob_bytes
[0], size
);
362 for (i
= 0; i
< count
; i
++)
363 memcpy(result
->ob_bytes
+ i
*mysize
, self
->ob_bytes
, mysize
);
366 return (PyObject
*)result
;
370 bytes_irepeat(PyByteArrayObject
*self
, Py_ssize_t count
)
377 mysize
= Py_SIZE(self
);
378 size
= mysize
* count
;
379 if (count
!= 0 && size
/ count
!= mysize
)
380 return PyErr_NoMemory();
381 if (size
< self
->ob_alloc
) {
382 Py_SIZE(self
) = size
;
383 self
->ob_bytes
[Py_SIZE(self
)] = '\0'; /* Trailing null byte */
385 else if (PyByteArray_Resize((PyObject
*)self
, size
) < 0)
389 memset(self
->ob_bytes
, self
->ob_bytes
[0], size
);
392 for (i
= 1; i
< count
; i
++)
393 memcpy(self
->ob_bytes
+ i
*mysize
, self
->ob_bytes
, mysize
);
397 return (PyObject
*)self
;
401 bytes_getitem(PyByteArrayObject
*self
, Py_ssize_t i
)
405 if (i
< 0 || i
>= Py_SIZE(self
)) {
406 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
409 return PyInt_FromLong((unsigned char)(self
->ob_bytes
[i
]));
413 bytes_subscript(PyByteArrayObject
*self
, PyObject
*item
)
415 if (PyIndex_Check(item
)) {
416 Py_ssize_t i
= PyNumber_AsSsize_t(item
, PyExc_IndexError
);
418 if (i
== -1 && PyErr_Occurred())
422 i
+= PyByteArray_GET_SIZE(self
);
424 if (i
< 0 || i
>= Py_SIZE(self
)) {
425 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
428 return PyInt_FromLong((unsigned char)(self
->ob_bytes
[i
]));
430 else if (PySlice_Check(item
)) {
431 Py_ssize_t start
, stop
, step
, slicelength
, cur
, i
;
432 if (PySlice_GetIndicesEx((PySliceObject
*)item
,
433 PyByteArray_GET_SIZE(self
),
434 &start
, &stop
, &step
, &slicelength
) < 0) {
438 if (slicelength
<= 0)
439 return PyByteArray_FromStringAndSize("", 0);
440 else if (step
== 1) {
441 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ start
,
445 char *source_buf
= PyByteArray_AS_STRING(self
);
446 char *result_buf
= (char *)PyMem_Malloc(slicelength
);
449 if (result_buf
== NULL
)
450 return PyErr_NoMemory();
452 for (cur
= start
, i
= 0; i
< slicelength
;
454 result_buf
[i
] = source_buf
[cur
];
456 result
= PyByteArray_FromStringAndSize(result_buf
, slicelength
);
457 PyMem_Free(result_buf
);
462 PyErr_SetString(PyExc_TypeError
, "bytearray indices must be integers");
468 bytes_setslice(PyByteArrayObject
*self
, Py_ssize_t lo
, Py_ssize_t hi
,
471 Py_ssize_t avail
, needed
;
477 if (values
== (PyObject
*)self
) {
478 /* Make a copy and call this function recursively */
480 values
= PyByteArray_FromObject(values
);
483 err
= bytes_setslice(self
, lo
, hi
, values
);
487 if (values
== NULL
) {
493 if (_getbuffer(values
, &vbytes
) < 0) {
494 PyErr_Format(PyExc_TypeError
,
495 "can't set bytes slice from %.100s",
496 Py_TYPE(values
)->tp_name
);
507 if (hi
> Py_SIZE(self
))
514 if (avail
!= needed
) {
515 if (avail
> needed
) {
518 | |<----avail----->|<-----tomove------>|
519 | |<-needed->|<-----tomove------>|
522 memmove(self
->ob_bytes
+ lo
+ needed
, self
->ob_bytes
+ hi
,
525 /* XXX(nnorwitz): need to verify this can't overflow! */
526 if (PyByteArray_Resize((PyObject
*)self
,
527 Py_SIZE(self
) + needed
- avail
) < 0) {
531 if (avail
< needed
) {
534 | |<-avail->|<-----tomove------>|
535 | |<----needed---->|<-----tomove------>|
538 memmove(self
->ob_bytes
+ lo
+ needed
, self
->ob_bytes
+ hi
,
539 Py_SIZE(self
) - lo
- needed
);
544 memcpy(self
->ob_bytes
+ lo
, bytes
, needed
);
548 if (vbytes
.len
!= -1)
549 PyObject_ReleaseBuffer(values
, &vbytes
);
554 bytes_setitem(PyByteArrayObject
*self
, Py_ssize_t i
, PyObject
*value
)
561 if (i
< 0 || i
>= Py_SIZE(self
)) {
562 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
567 return bytes_setslice(self
, i
, i
+1, NULL
);
569 if (!_getbytevalue(value
, &ival
))
572 self
->ob_bytes
[i
] = ival
;
577 bytes_ass_subscript(PyByteArrayObject
*self
, PyObject
*item
, PyObject
*values
)
579 Py_ssize_t start
, stop
, step
, slicelen
, needed
;
582 if (PyIndex_Check(item
)) {
583 Py_ssize_t i
= PyNumber_AsSsize_t(item
, PyExc_IndexError
);
585 if (i
== -1 && PyErr_Occurred())
589 i
+= PyByteArray_GET_SIZE(self
);
591 if (i
< 0 || i
>= Py_SIZE(self
)) {
592 PyErr_SetString(PyExc_IndexError
, "bytearray index out of range");
596 if (values
== NULL
) {
597 /* Fall through to slice assignment */
604 Py_ssize_t ival
= PyNumber_AsSsize_t(values
, PyExc_ValueError
);
605 if (ival
== -1 && PyErr_Occurred()) {
607 /* Also accept str of size 1 in 2.x */
609 if (!_getbytevalue(values
, &int_value
))
611 ival
= (int) int_value
;
612 } else if (ival
< 0 || ival
>= 256) {
613 PyErr_SetString(PyExc_ValueError
,
614 "byte must be in range(0, 256)");
617 self
->ob_bytes
[i
] = (char)ival
;
621 else if (PySlice_Check(item
)) {
622 if (PySlice_GetIndicesEx((PySliceObject
*)item
,
623 PyByteArray_GET_SIZE(self
),
624 &start
, &stop
, &step
, &slicelen
) < 0) {
629 PyErr_SetString(PyExc_TypeError
, "bytearray indices must be integer");
633 if (values
== NULL
) {
637 else if (values
== (PyObject
*)self
|| !PyByteArray_Check(values
)) {
638 /* Make a copy an call this function recursively */
640 values
= PyByteArray_FromObject(values
);
643 err
= bytes_ass_subscript(self
, item
, values
);
648 assert(PyByteArray_Check(values
));
649 bytes
= ((PyByteArrayObject
*)values
)->ob_bytes
;
650 needed
= Py_SIZE(values
);
652 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
653 if ((step
< 0 && start
< stop
) ||
654 (step
> 0 && start
> stop
))
657 if (slicelen
!= needed
) {
658 if (slicelen
> needed
) {
660 0 start stop old_size
661 | |<---slicelen--->|<-----tomove------>|
662 | |<-needed->|<-----tomove------>|
665 memmove(self
->ob_bytes
+ start
+ needed
, self
->ob_bytes
+ stop
,
666 Py_SIZE(self
) - stop
);
668 if (PyByteArray_Resize((PyObject
*)self
,
669 Py_SIZE(self
) + needed
- slicelen
) < 0)
671 if (slicelen
< needed
) {
674 | |<-avail->|<-----tomove------>|
675 | |<----needed---->|<-----tomove------>|
678 memmove(self
->ob_bytes
+ start
+ needed
, self
->ob_bytes
+ stop
,
679 Py_SIZE(self
) - start
- needed
);
684 memcpy(self
->ob_bytes
+ start
, bytes
, needed
);
695 start
= stop
+ step
* (slicelen
- 1) - 1;
698 for (cur
= start
, i
= 0;
699 i
< slicelen
; cur
+= step
, i
++) {
700 Py_ssize_t lim
= step
- 1;
702 if (cur
+ step
>= PyByteArray_GET_SIZE(self
))
703 lim
= PyByteArray_GET_SIZE(self
) - cur
- 1;
705 memmove(self
->ob_bytes
+ cur
- i
,
706 self
->ob_bytes
+ cur
+ 1, lim
);
708 /* Move the tail of the bytes, in one chunk */
709 cur
= start
+ slicelen
*step
;
710 if (cur
< PyByteArray_GET_SIZE(self
)) {
711 memmove(self
->ob_bytes
+ cur
- slicelen
,
712 self
->ob_bytes
+ cur
,
713 PyByteArray_GET_SIZE(self
) - cur
);
715 if (PyByteArray_Resize((PyObject
*)self
,
716 PyByteArray_GET_SIZE(self
) - slicelen
) < 0)
725 if (needed
!= slicelen
) {
726 PyErr_Format(PyExc_ValueError
,
727 "attempt to assign bytes of size %zd "
728 "to extended slice of size %zd",
732 for (cur
= start
, i
= 0; i
< slicelen
; cur
+= step
, i
++)
733 self
->ob_bytes
[cur
] = bytes
[i
];
740 bytes_init(PyByteArrayObject
*self
, PyObject
*args
, PyObject
*kwds
)
742 static char *kwlist
[] = {"source", "encoding", "errors", 0};
743 PyObject
*arg
= NULL
;
744 const char *encoding
= NULL
;
745 const char *errors
= NULL
;
748 PyObject
*(*iternext
)(PyObject
*);
750 if (Py_SIZE(self
) != 0) {
751 /* Empty previous contents (yes, do this first of all!) */
752 if (PyByteArray_Resize((PyObject
*)self
, 0) < 0)
756 /* Parse arguments */
757 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|Oss:bytes", kwlist
,
758 &arg
, &encoding
, &errors
))
761 /* Make a quick exit if no first argument */
763 if (encoding
!= NULL
|| errors
!= NULL
) {
764 PyErr_SetString(PyExc_TypeError
,
765 "encoding or errors without sequence argument");
771 if (PyBytes_Check(arg
)) {
772 PyObject
*new, *encoded
;
773 if (encoding
!= NULL
) {
774 encoded
= PyCodec_Encode(arg
, encoding
, errors
);
777 assert(PyBytes_Check(encoded
));
783 new = bytes_iconcat(self
, arg
);
791 if (PyUnicode_Check(arg
)) {
792 /* Encode via the codec registry */
793 PyObject
*encoded
, *new;
794 if (encoding
== NULL
) {
795 PyErr_SetString(PyExc_TypeError
,
796 "unicode argument without an encoding");
799 encoded
= PyCodec_Encode(arg
, encoding
, errors
);
802 assert(PyBytes_Check(encoded
));
803 new = bytes_iconcat(self
, encoded
);
811 /* If it's not unicode, there can't be encoding or errors */
812 if (encoding
!= NULL
|| errors
!= NULL
) {
813 PyErr_SetString(PyExc_TypeError
,
814 "encoding or errors without a string argument");
819 count
= PyNumber_AsSsize_t(arg
, PyExc_ValueError
);
820 if (count
== -1 && PyErr_Occurred())
824 PyErr_SetString(PyExc_ValueError
, "negative count");
828 if (PyByteArray_Resize((PyObject
*)self
, count
))
830 memset(self
->ob_bytes
, 0, count
);
835 /* Use the buffer API */
836 if (PyObject_CheckBuffer(arg
)) {
839 if (PyObject_GetBuffer(arg
, &view
, PyBUF_FULL_RO
) < 0)
842 if (PyByteArray_Resize((PyObject
*)self
, size
) < 0) goto fail
;
843 if (PyBuffer_ToContiguous(self
->ob_bytes
, &view
, size
, 'C') < 0)
845 PyObject_ReleaseBuffer(arg
, &view
);
848 PyObject_ReleaseBuffer(arg
, &view
);
852 /* XXX Optimize this if the arguments is a list, tuple */
854 /* Get the iterator */
855 it
= PyObject_GetIter(arg
);
858 iternext
= *Py_TYPE(it
)->tp_iternext
;
860 /* Run the iterator to exhaustion */
865 /* Get the next item */
868 if (PyErr_Occurred()) {
869 if (!PyErr_ExceptionMatches(PyExc_StopIteration
))
876 /* Interpret it as an int (__index__) */
877 value
= PyNumber_AsSsize_t(item
, PyExc_ValueError
);
879 if (value
== -1 && PyErr_Occurred())
883 if (value
< 0 || value
>= 256) {
884 PyErr_SetString(PyExc_ValueError
,
885 "bytes must be in range(0, 256)");
889 /* Append the byte */
890 if (Py_SIZE(self
) < self
->ob_alloc
)
892 else if (PyByteArray_Resize((PyObject
*)self
, Py_SIZE(self
)+1) < 0)
894 self
->ob_bytes
[Py_SIZE(self
)-1] = value
;
897 /* Clean up and return success */
902 /* Error handling when it != NULL */
907 /* Mostly copied from string_repr, but without the
908 "smart quote" functionality. */
910 bytes_repr(PyByteArrayObject
*self
)
912 static const char *hexdigits
= "0123456789abcdef";
913 const char *quote_prefix
= "bytearray(b";
914 const char *quote_postfix
= ")";
915 Py_ssize_t length
= Py_SIZE(self
);
916 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
917 size_t newsize
= 14 + 4 * length
;
919 if (newsize
> PY_SSIZE_T_MAX
|| newsize
/ 4 - 3 != length
) {
920 PyErr_SetString(PyExc_OverflowError
,
921 "bytearray object is too large to make repr");
924 v
= PyUnicode_FromUnicode(NULL
, newsize
);
929 register Py_ssize_t i
;
930 register Py_UNICODE c
;
931 register Py_UNICODE
*p
;
934 /* Figure out which quote to use; single is preferred */
938 start
= PyByteArray_AS_STRING(self
);
939 for (test
= start
; test
< start
+length
; ++test
) {
941 quote
= '\''; /* back to single */
944 else if (*test
== '\'')
951 p
= PyUnicode_AS_UNICODE(v
);
952 while (*quote_prefix
)
953 *p
++ = *quote_prefix
++;
956 for (i
= 0; i
< length
; i
++) {
957 /* There's at least enough room for a hex escape
958 and a closing quote. */
959 assert(newsize
- (p
- PyUnicode_AS_UNICODE(v
)) >= 5);
960 c
= self
->ob_bytes
[i
];
961 if (c
== '\'' || c
== '\\')
962 *p
++ = '\\', *p
++ = c
;
964 *p
++ = '\\', *p
++ = 't';
966 *p
++ = '\\', *p
++ = 'n';
968 *p
++ = '\\', *p
++ = 'r';
970 *p
++ = '\\', *p
++ = 'x', *p
++ = '0', *p
++ = '0';
971 else if (c
< ' ' || c
>= 0x7f) {
974 *p
++ = hexdigits
[(c
& 0xf0) >> 4];
975 *p
++ = hexdigits
[c
& 0xf];
980 assert(newsize
- (p
- PyUnicode_AS_UNICODE(v
)) >= 1);
982 while (*quote_postfix
) {
983 *p
++ = *quote_postfix
++;
986 if (PyUnicode_Resize(&v
, (p
- PyUnicode_AS_UNICODE(v
)))) {
995 bytes_str(PyObject
*op
)
998 if (Py_BytesWarningFlag
) {
999 if (PyErr_WarnEx(PyExc_BytesWarning
,
1000 "str() on a bytearray instance", 1))
1003 return bytes_repr((PyByteArrayObject
*)op
);
1005 return PyBytes_FromStringAndSize(((PyByteArrayObject
*)op
)->ob_bytes
, Py_SIZE(op
));
1009 bytes_richcompare(PyObject
*self
, PyObject
*other
, int op
)
1011 Py_ssize_t self_size
, other_size
;
1012 Py_buffer self_bytes
, other_bytes
;
1017 /* Bytes can be compared to anything that supports the (binary)
1018 buffer API. Except that a comparison with Unicode is always an
1019 error, even if the comparison is for equality. */
1020 if (PyObject_IsInstance(self
, (PyObject
*)&PyUnicode_Type
) ||
1021 PyObject_IsInstance(other
, (PyObject
*)&PyUnicode_Type
)) {
1022 if (Py_BytesWarningFlag
&& op
== Py_EQ
) {
1023 if (PyErr_WarnEx(PyExc_BytesWarning
,
1024 "Comparsion between bytearray and string", 1))
1028 Py_INCREF(Py_NotImplemented
);
1029 return Py_NotImplemented
;
1032 self_size
= _getbuffer(self
, &self_bytes
);
1033 if (self_size
< 0) {
1035 Py_INCREF(Py_NotImplemented
);
1036 return Py_NotImplemented
;
1039 other_size
= _getbuffer(other
, &other_bytes
);
1040 if (other_size
< 0) {
1042 PyObject_ReleaseBuffer(self
, &self_bytes
);
1043 Py_INCREF(Py_NotImplemented
);
1044 return Py_NotImplemented
;
1047 if (self_size
!= other_size
&& (op
== Py_EQ
|| op
== Py_NE
)) {
1048 /* Shortcut: if the lengths differ, the objects differ */
1049 cmp
= (op
== Py_NE
);
1052 minsize
= self_size
;
1053 if (other_size
< minsize
)
1054 minsize
= other_size
;
1056 cmp
= memcmp(self_bytes
.buf
, other_bytes
.buf
, minsize
);
1057 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1060 if (self_size
< other_size
)
1062 else if (self_size
> other_size
)
1067 case Py_LT
: cmp
= cmp
< 0; break;
1068 case Py_LE
: cmp
= cmp
<= 0; break;
1069 case Py_EQ
: cmp
= cmp
== 0; break;
1070 case Py_NE
: cmp
= cmp
!= 0; break;
1071 case Py_GT
: cmp
= cmp
> 0; break;
1072 case Py_GE
: cmp
= cmp
>= 0; break;
1076 res
= cmp
? Py_True
: Py_False
;
1077 PyObject_ReleaseBuffer(self
, &self_bytes
);
1078 PyObject_ReleaseBuffer(other
, &other_bytes
);
1084 bytes_dealloc(PyByteArrayObject
*self
)
1086 if (self
->ob_bytes
!= 0) {
1087 PyMem_Free(self
->ob_bytes
);
1089 Py_TYPE(self
)->tp_free((PyObject
*)self
);
1093 /* -------------------------------------------------------------------- */
1096 #define STRINGLIB_CHAR char
1097 #define STRINGLIB_CMP memcmp
1098 #define STRINGLIB_LEN PyByteArray_GET_SIZE
1099 #define STRINGLIB_STR PyByteArray_AS_STRING
1100 #define STRINGLIB_NEW PyByteArray_FromStringAndSize
1101 #define STRINGLIB_EMPTY nullbytes
1102 #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1103 #define STRINGLIB_MUTABLE 1
1105 #include "stringlib/fastsearch.h"
1106 #include "stringlib/count.h"
1107 #include "stringlib/find.h"
1108 #include "stringlib/partition.h"
1109 #include "stringlib/ctype.h"
1110 #include "stringlib/transmogrify.h"
1113 /* The following Py_LOCAL_INLINE and Py_LOCAL functions
1114 were copied from the old char* style string object. */
1116 Py_LOCAL_INLINE(void)
1117 _adjust_indices(Py_ssize_t
*start
, Py_ssize_t
*end
, Py_ssize_t len
)
1132 Py_LOCAL_INLINE(Py_ssize_t
)
1133 bytes_find_internal(PyByteArrayObject
*self
, PyObject
*args
, int dir
)
1137 Py_ssize_t start
=0, end
=PY_SSIZE_T_MAX
;
1140 if (!PyArg_ParseTuple(args
, "O|O&O&:find/rfind/index/rindex", &subobj
,
1141 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1143 if (_getbuffer(subobj
, &subbuf
) < 0)
1146 res
= stringlib_find_slice(
1147 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
1148 subbuf
.buf
, subbuf
.len
, start
, end
);
1150 res
= stringlib_rfind_slice(
1151 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
1152 subbuf
.buf
, subbuf
.len
, start
, end
);
1153 PyObject_ReleaseBuffer(subobj
, &subbuf
);
1157 PyDoc_STRVAR(find__doc__
,
1158 "B.find(sub [,start [,end]]) -> int\n\
1160 Return the lowest index in B where subsection sub is found,\n\
1161 such that sub is contained within s[start,end]. Optional\n\
1162 arguments start and end are interpreted as in slice notation.\n\
1164 Return -1 on failure.");
1167 bytes_find(PyByteArrayObject
*self
, PyObject
*args
)
1169 Py_ssize_t result
= bytes_find_internal(self
, args
, +1);
1172 return PyInt_FromSsize_t(result
);
1175 PyDoc_STRVAR(count__doc__
,
1176 "B.count(sub [,start [,end]]) -> int\n\
1178 Return the number of non-overlapping occurrences of subsection sub in\n\
1179 bytes B[start:end]. Optional arguments start and end are interpreted\n\
1180 as in slice notation.");
1183 bytes_count(PyByteArrayObject
*self
, PyObject
*args
)
1186 const char *str
= PyByteArray_AS_STRING(self
);
1187 Py_ssize_t start
= 0, end
= PY_SSIZE_T_MAX
;
1189 PyObject
*count_obj
;
1191 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &sub_obj
,
1192 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1195 if (_getbuffer(sub_obj
, &vsub
) < 0)
1198 _adjust_indices(&start
, &end
, PyByteArray_GET_SIZE(self
));
1200 count_obj
= PyInt_FromSsize_t(
1201 stringlib_count(str
+ start
, end
- start
, vsub
.buf
, vsub
.len
)
1203 PyObject_ReleaseBuffer(sub_obj
, &vsub
);
1208 PyDoc_STRVAR(index__doc__
,
1209 "B.index(sub [,start [,end]]) -> int\n\
1211 Like B.find() but raise ValueError when the subsection is not found.");
1214 bytes_index(PyByteArrayObject
*self
, PyObject
*args
)
1216 Py_ssize_t result
= bytes_find_internal(self
, args
, +1);
1220 PyErr_SetString(PyExc_ValueError
,
1221 "subsection not found");
1224 return PyInt_FromSsize_t(result
);
1228 PyDoc_STRVAR(rfind__doc__
,
1229 "B.rfind(sub [,start [,end]]) -> int\n\
1231 Return the highest index in B where subsection sub is found,\n\
1232 such that sub is contained within s[start,end]. Optional\n\
1233 arguments start and end are interpreted as in slice notation.\n\
1235 Return -1 on failure.");
1238 bytes_rfind(PyByteArrayObject
*self
, PyObject
*args
)
1240 Py_ssize_t result
= bytes_find_internal(self
, args
, -1);
1243 return PyInt_FromSsize_t(result
);
1247 PyDoc_STRVAR(rindex__doc__
,
1248 "B.rindex(sub [,start [,end]]) -> int\n\
1250 Like B.rfind() but raise ValueError when the subsection is not found.");
1253 bytes_rindex(PyByteArrayObject
*self
, PyObject
*args
)
1255 Py_ssize_t result
= bytes_find_internal(self
, args
, -1);
1259 PyErr_SetString(PyExc_ValueError
,
1260 "subsection not found");
1263 return PyInt_FromSsize_t(result
);
1268 bytes_contains(PyObject
*self
, PyObject
*arg
)
1270 Py_ssize_t ival
= PyNumber_AsSsize_t(arg
, PyExc_ValueError
);
1271 if (ival
== -1 && PyErr_Occurred()) {
1275 if (_getbuffer(arg
, &varg
) < 0)
1277 pos
= stringlib_find(PyByteArray_AS_STRING(self
), Py_SIZE(self
),
1278 varg
.buf
, varg
.len
, 0);
1279 PyObject_ReleaseBuffer(arg
, &varg
);
1282 if (ival
< 0 || ival
>= 256) {
1283 PyErr_SetString(PyExc_ValueError
, "byte must be in range(0, 256)");
1287 return memchr(PyByteArray_AS_STRING(self
), ival
, Py_SIZE(self
)) != NULL
;
1291 /* Matches the end (direction >= 0) or start (direction < 0) of self
1292 * against substr, using the start and end arguments. Returns
1293 * -1 on error, 0 if not found and 1 if found.
1296 _bytes_tailmatch(PyByteArrayObject
*self
, PyObject
*substr
, Py_ssize_t start
,
1297 Py_ssize_t end
, int direction
)
1299 Py_ssize_t len
= PyByteArray_GET_SIZE(self
);
1304 str
= PyByteArray_AS_STRING(self
);
1306 if (_getbuffer(substr
, &vsubstr
) < 0)
1309 _adjust_indices(&start
, &end
, len
);
1311 if (direction
< 0) {
1313 if (start
+vsubstr
.len
> len
) {
1318 if (end
-start
< vsubstr
.len
|| start
> len
) {
1322 if (end
-vsubstr
.len
> start
)
1323 start
= end
- vsubstr
.len
;
1325 if (end
-start
>= vsubstr
.len
)
1326 rv
= ! memcmp(str
+start
, vsubstr
.buf
, vsubstr
.len
);
1329 PyObject_ReleaseBuffer(substr
, &vsubstr
);
1334 PyDoc_STRVAR(startswith__doc__
,
1335 "B.startswith(prefix [,start [,end]]) -> bool\n\
1337 Return True if B starts with the specified prefix, False otherwise.\n\
1338 With optional start, test B beginning at that position.\n\
1339 With optional end, stop comparing B at that position.\n\
1340 prefix can also be a tuple of strings to try.");
1343 bytes_startswith(PyByteArrayObject
*self
, PyObject
*args
)
1345 Py_ssize_t start
= 0;
1346 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1350 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
1351 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1353 if (PyTuple_Check(subobj
)) {
1355 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
1356 result
= _bytes_tailmatch(self
,
1357 PyTuple_GET_ITEM(subobj
, i
),
1367 result
= _bytes_tailmatch(self
, subobj
, start
, end
, -1);
1371 return PyBool_FromLong(result
);
1374 PyDoc_STRVAR(endswith__doc__
,
1375 "B.endswith(suffix [,start [,end]]) -> bool\n\
1377 Return True if B ends with the specified suffix, False otherwise.\n\
1378 With optional start, test B beginning at that position.\n\
1379 With optional end, stop comparing B at that position.\n\
1380 suffix can also be a tuple of strings to try.");
1383 bytes_endswith(PyByteArrayObject
*self
, PyObject
*args
)
1385 Py_ssize_t start
= 0;
1386 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1390 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
1391 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1393 if (PyTuple_Check(subobj
)) {
1395 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
1396 result
= _bytes_tailmatch(self
,
1397 PyTuple_GET_ITEM(subobj
, i
),
1407 result
= _bytes_tailmatch(self
, subobj
, start
, end
, +1);
1411 return PyBool_FromLong(result
);
1415 PyDoc_STRVAR(translate__doc__
,
1416 "B.translate(table[, deletechars]) -> bytearray\n\
1418 Return a copy of B, where all characters occurring in the\n\
1419 optional argument deletechars are removed, and the remaining\n\
1420 characters have been mapped through the given translation\n\
1421 table, which must be a bytes object of length 256.");
1424 bytes_translate(PyByteArrayObject
*self
, PyObject
*args
)
1426 register char *input
, *output
;
1427 register const char *table
;
1428 register Py_ssize_t i
, c
, changed
= 0;
1429 PyObject
*input_obj
= (PyObject
*)self
;
1430 const char *output_start
;
1433 int trans_table
[256];
1434 PyObject
*tableobj
, *delobj
= NULL
;
1435 Py_buffer vtable
, vdel
;
1437 if (!PyArg_UnpackTuple(args
, "translate", 1, 2,
1438 &tableobj
, &delobj
))
1441 if (_getbuffer(tableobj
, &vtable
) < 0)
1444 if (vtable
.len
!= 256) {
1445 PyErr_SetString(PyExc_ValueError
,
1446 "translation table must be 256 characters long");
1451 if (delobj
!= NULL
) {
1452 if (_getbuffer(delobj
, &vdel
) < 0) {
1462 table
= (const char *)vtable
.buf
;
1463 inlen
= PyByteArray_GET_SIZE(input_obj
);
1464 result
= PyByteArray_FromStringAndSize((char *)NULL
, inlen
);
1467 output_start
= output
= PyByteArray_AsString(result
);
1468 input
= PyByteArray_AS_STRING(input_obj
);
1470 if (vdel
.len
== 0) {
1471 /* If no deletions are required, use faster code */
1472 for (i
= inlen
; --i
>= 0; ) {
1473 c
= Py_CHARMASK(*input
++);
1474 if (Py_CHARMASK((*output
++ = table
[c
])) != c
)
1477 if (changed
|| !PyByteArray_CheckExact(input_obj
))
1480 Py_INCREF(input_obj
);
1485 for (i
= 0; i
< 256; i
++)
1486 trans_table
[i
] = Py_CHARMASK(table
[i
]);
1488 for (i
= 0; i
< vdel
.len
; i
++)
1489 trans_table
[(int) Py_CHARMASK( ((unsigned char*)vdel
.buf
)[i
] )] = -1;
1491 for (i
= inlen
; --i
>= 0; ) {
1492 c
= Py_CHARMASK(*input
++);
1493 if (trans_table
[c
] != -1)
1494 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
1498 if (!changed
&& PyByteArray_CheckExact(input_obj
)) {
1500 Py_INCREF(input_obj
);
1504 /* Fix the size of the resulting string */
1506 PyByteArray_Resize(result
, output
- output_start
);
1509 PyObject_ReleaseBuffer(tableobj
, &vtable
);
1511 PyObject_ReleaseBuffer(delobj
, &vdel
);
1519 /* find and count characters and substrings */
1521 #define findchar(target, target_len, c) \
1522 ((char *)memchr((const void *)(target), c, target_len))
1524 /* Don't call if length < 2 */
1525 #define Py_STRING_MATCH(target, offset, pattern, length) \
1526 (target[offset] == pattern[0] && \
1527 target[offset+length-1] == pattern[length-1] && \
1528 !memcmp(target+offset+1, pattern+1, length-2) )
1531 /* Bytes ops must return a string. */
1532 /* If the object is subclass of bytes, create a copy */
1533 Py_LOCAL(PyByteArrayObject
*)
1534 return_self(PyByteArrayObject
*self
)
1536 if (PyByteArray_CheckExact(self
)) {
1538 return (PyByteArrayObject
*)self
;
1540 return (PyByteArrayObject
*)PyByteArray_FromStringAndSize(
1541 PyByteArray_AS_STRING(self
),
1542 PyByteArray_GET_SIZE(self
));
1545 Py_LOCAL_INLINE(Py_ssize_t
)
1546 countchar(const char *target
, Py_ssize_t target_len
, char c
, Py_ssize_t maxcount
)
1549 const char *start
=target
;
1550 const char *end
=target
+target_len
;
1552 while ( (start
=findchar(start
, end
-start
, c
)) != NULL
) {
1554 if (count
>= maxcount
)
1561 Py_LOCAL(Py_ssize_t
)
1562 findstring(const char *target
, Py_ssize_t target_len
,
1563 const char *pattern
, Py_ssize_t pattern_len
,
1569 start
+= target_len
;
1573 if (end
> target_len
) {
1575 } else if (end
< 0) {
1581 /* zero-length substrings always match at the first attempt */
1582 if (pattern_len
== 0)
1583 return (direction
> 0) ? start
: end
;
1587 if (direction
< 0) {
1588 for (; end
>= start
; end
--)
1589 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
))
1592 for (; start
<= end
; start
++)
1593 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
))
1599 Py_LOCAL_INLINE(Py_ssize_t
)
1600 countstring(const char *target
, Py_ssize_t target_len
,
1601 const char *pattern
, Py_ssize_t pattern_len
,
1604 int direction
, Py_ssize_t maxcount
)
1609 start
+= target_len
;
1613 if (end
> target_len
) {
1615 } else if (end
< 0) {
1621 /* zero-length substrings match everywhere */
1622 if (pattern_len
== 0 || maxcount
== 0) {
1623 if (target_len
+1 < maxcount
)
1624 return target_len
+1;
1629 if (direction
< 0) {
1630 for (; (end
>= start
); end
--)
1631 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
)) {
1633 if (--maxcount
<= 0) break;
1634 end
-= pattern_len
-1;
1637 for (; (start
<= end
); start
++)
1638 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
)) {
1640 if (--maxcount
<= 0)
1642 start
+= pattern_len
-1;
1649 /* Algorithms for different cases of string replacement */
1651 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1652 Py_LOCAL(PyByteArrayObject
*)
1653 replace_interleave(PyByteArrayObject
*self
,
1654 const char *to_s
, Py_ssize_t to_len
,
1655 Py_ssize_t maxcount
)
1657 char *self_s
, *result_s
;
1658 Py_ssize_t self_len
, result_len
;
1659 Py_ssize_t count
, i
, product
;
1660 PyByteArrayObject
*result
;
1662 self_len
= PyByteArray_GET_SIZE(self
);
1664 /* 1 at the end plus 1 after every character */
1666 if (maxcount
< count
)
1669 /* Check for overflow */
1670 /* result_len = count * to_len + self_len; */
1671 product
= count
* to_len
;
1672 if (product
/ to_len
!= count
) {
1673 PyErr_SetString(PyExc_OverflowError
,
1674 "replace string is too long");
1677 result_len
= product
+ self_len
;
1678 if (result_len
< 0) {
1679 PyErr_SetString(PyExc_OverflowError
,
1680 "replace string is too long");
1684 if (! (result
= (PyByteArrayObject
*)
1685 PyByteArray_FromStringAndSize(NULL
, result_len
)) )
1688 self_s
= PyByteArray_AS_STRING(self
);
1689 result_s
= PyByteArray_AS_STRING(result
);
1691 /* TODO: special case single character, which doesn't need memcpy */
1693 /* Lay the first one down (guaranteed this will occur) */
1694 Py_MEMCPY(result_s
, to_s
, to_len
);
1698 for (i
=0; i
<count
; i
++) {
1699 *result_s
++ = *self_s
++;
1700 Py_MEMCPY(result_s
, to_s
, to_len
);
1704 /* Copy the rest of the original string */
1705 Py_MEMCPY(result_s
, self_s
, self_len
-i
);
1710 /* Special case for deleting a single character */
1711 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1712 Py_LOCAL(PyByteArrayObject
*)
1713 replace_delete_single_character(PyByteArrayObject
*self
,
1714 char from_c
, Py_ssize_t maxcount
)
1716 char *self_s
, *result_s
;
1717 char *start
, *next
, *end
;
1718 Py_ssize_t self_len
, result_len
;
1720 PyByteArrayObject
*result
;
1722 self_len
= PyByteArray_GET_SIZE(self
);
1723 self_s
= PyByteArray_AS_STRING(self
);
1725 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
1727 return return_self(self
);
1730 result_len
= self_len
- count
; /* from_len == 1 */
1731 assert(result_len
>=0);
1733 if ( (result
= (PyByteArrayObject
*)
1734 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1736 result_s
= PyByteArray_AS_STRING(result
);
1739 end
= self_s
+ self_len
;
1740 while (count
-- > 0) {
1741 next
= findchar(start
, end
-start
, from_c
);
1744 Py_MEMCPY(result_s
, start
, next
-start
);
1745 result_s
+= (next
-start
);
1748 Py_MEMCPY(result_s
, start
, end
-start
);
1753 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1755 Py_LOCAL(PyByteArrayObject
*)
1756 replace_delete_substring(PyByteArrayObject
*self
,
1757 const char *from_s
, Py_ssize_t from_len
,
1758 Py_ssize_t maxcount
)
1760 char *self_s
, *result_s
;
1761 char *start
, *next
, *end
;
1762 Py_ssize_t self_len
, result_len
;
1763 Py_ssize_t count
, offset
;
1764 PyByteArrayObject
*result
;
1766 self_len
= PyByteArray_GET_SIZE(self
);
1767 self_s
= PyByteArray_AS_STRING(self
);
1769 count
= countstring(self_s
, self_len
,
1776 return return_self(self
);
1779 result_len
= self_len
- (count
* from_len
);
1780 assert (result_len
>=0);
1782 if ( (result
= (PyByteArrayObject
*)
1783 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1786 result_s
= PyByteArray_AS_STRING(result
);
1789 end
= self_s
+ self_len
;
1790 while (count
-- > 0) {
1791 offset
= findstring(start
, end
-start
,
1793 0, end
-start
, FORWARD
);
1796 next
= start
+ offset
;
1798 Py_MEMCPY(result_s
, start
, next
-start
);
1800 result_s
+= (next
-start
);
1801 start
= next
+from_len
;
1803 Py_MEMCPY(result_s
, start
, end
-start
);
1807 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1808 Py_LOCAL(PyByteArrayObject
*)
1809 replace_single_character_in_place(PyByteArrayObject
*self
,
1810 char from_c
, char to_c
,
1811 Py_ssize_t maxcount
)
1813 char *self_s
, *result_s
, *start
, *end
, *next
;
1814 Py_ssize_t self_len
;
1815 PyByteArrayObject
*result
;
1817 /* The result string will be the same size */
1818 self_s
= PyByteArray_AS_STRING(self
);
1819 self_len
= PyByteArray_GET_SIZE(self
);
1821 next
= findchar(self_s
, self_len
, from_c
);
1824 /* No matches; return the original bytes */
1825 return return_self(self
);
1828 /* Need to make a new bytes */
1829 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, self_len
);
1832 result_s
= PyByteArray_AS_STRING(result
);
1833 Py_MEMCPY(result_s
, self_s
, self_len
);
1835 /* change everything in-place, starting with this one */
1836 start
= result_s
+ (next
-self_s
);
1839 end
= result_s
+ self_len
;
1841 while (--maxcount
> 0) {
1842 next
= findchar(start
, end
-start
, from_c
);
1852 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1853 Py_LOCAL(PyByteArrayObject
*)
1854 replace_substring_in_place(PyByteArrayObject
*self
,
1855 const char *from_s
, Py_ssize_t from_len
,
1856 const char *to_s
, Py_ssize_t to_len
,
1857 Py_ssize_t maxcount
)
1859 char *result_s
, *start
, *end
;
1861 Py_ssize_t self_len
, offset
;
1862 PyByteArrayObject
*result
;
1864 /* The result bytes will be the same size */
1866 self_s
= PyByteArray_AS_STRING(self
);
1867 self_len
= PyByteArray_GET_SIZE(self
);
1869 offset
= findstring(self_s
, self_len
,
1871 0, self_len
, FORWARD
);
1873 /* No matches; return the original bytes */
1874 return return_self(self
);
1877 /* Need to make a new bytes */
1878 result
= (PyByteArrayObject
*) PyByteArray_FromStringAndSize(NULL
, self_len
);
1881 result_s
= PyByteArray_AS_STRING(result
);
1882 Py_MEMCPY(result_s
, self_s
, self_len
);
1884 /* change everything in-place, starting with this one */
1885 start
= result_s
+ offset
;
1886 Py_MEMCPY(start
, to_s
, from_len
);
1888 end
= result_s
+ self_len
;
1890 while ( --maxcount
> 0) {
1891 offset
= findstring(start
, end
-start
,
1893 0, end
-start
, FORWARD
);
1896 Py_MEMCPY(start
+offset
, to_s
, from_len
);
1897 start
+= offset
+from_len
;
1903 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1904 Py_LOCAL(PyByteArrayObject
*)
1905 replace_single_character(PyByteArrayObject
*self
,
1907 const char *to_s
, Py_ssize_t to_len
,
1908 Py_ssize_t maxcount
)
1910 char *self_s
, *result_s
;
1911 char *start
, *next
, *end
;
1912 Py_ssize_t self_len
, result_len
;
1913 Py_ssize_t count
, product
;
1914 PyByteArrayObject
*result
;
1916 self_s
= PyByteArray_AS_STRING(self
);
1917 self_len
= PyByteArray_GET_SIZE(self
);
1919 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
1921 /* no matches, return unchanged */
1922 return return_self(self
);
1925 /* use the difference between current and new, hence the "-1" */
1926 /* result_len = self_len + count * (to_len-1) */
1927 product
= count
* (to_len
-1);
1928 if (product
/ (to_len
-1) != count
) {
1929 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
1932 result_len
= self_len
+ product
;
1933 if (result_len
< 0) {
1934 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
1938 if ( (result
= (PyByteArrayObject
*)
1939 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
1941 result_s
= PyByteArray_AS_STRING(result
);
1944 end
= self_s
+ self_len
;
1945 while (count
-- > 0) {
1946 next
= findchar(start
, end
-start
, from_c
);
1950 if (next
== start
) {
1951 /* replace with the 'to' */
1952 Py_MEMCPY(result_s
, to_s
, to_len
);
1956 /* copy the unchanged old then the 'to' */
1957 Py_MEMCPY(result_s
, start
, next
-start
);
1958 result_s
+= (next
-start
);
1959 Py_MEMCPY(result_s
, to_s
, to_len
);
1964 /* Copy the remainder of the remaining bytes */
1965 Py_MEMCPY(result_s
, start
, end
-start
);
1970 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1971 Py_LOCAL(PyByteArrayObject
*)
1972 replace_substring(PyByteArrayObject
*self
,
1973 const char *from_s
, Py_ssize_t from_len
,
1974 const char *to_s
, Py_ssize_t to_len
,
1975 Py_ssize_t maxcount
)
1977 char *self_s
, *result_s
;
1978 char *start
, *next
, *end
;
1979 Py_ssize_t self_len
, result_len
;
1980 Py_ssize_t count
, offset
, product
;
1981 PyByteArrayObject
*result
;
1983 self_s
= PyByteArray_AS_STRING(self
);
1984 self_len
= PyByteArray_GET_SIZE(self
);
1986 count
= countstring(self_s
, self_len
,
1988 0, self_len
, FORWARD
, maxcount
);
1990 /* no matches, return unchanged */
1991 return return_self(self
);
1994 /* Check for overflow */
1995 /* result_len = self_len + count * (to_len-from_len) */
1996 product
= count
* (to_len
-from_len
);
1997 if (product
/ (to_len
-from_len
) != count
) {
1998 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
2001 result_len
= self_len
+ product
;
2002 if (result_len
< 0) {
2003 PyErr_SetString(PyExc_OverflowError
, "replace bytes is too long");
2007 if ( (result
= (PyByteArrayObject
*)
2008 PyByteArray_FromStringAndSize(NULL
, result_len
)) == NULL
)
2010 result_s
= PyByteArray_AS_STRING(result
);
2013 end
= self_s
+ self_len
;
2014 while (count
-- > 0) {
2015 offset
= findstring(start
, end
-start
,
2017 0, end
-start
, FORWARD
);
2020 next
= start
+offset
;
2021 if (next
== start
) {
2022 /* replace with the 'to' */
2023 Py_MEMCPY(result_s
, to_s
, to_len
);
2027 /* copy the unchanged old then the 'to' */
2028 Py_MEMCPY(result_s
, start
, next
-start
);
2029 result_s
+= (next
-start
);
2030 Py_MEMCPY(result_s
, to_s
, to_len
);
2032 start
= next
+from_len
;
2035 /* Copy the remainder of the remaining bytes */
2036 Py_MEMCPY(result_s
, start
, end
-start
);
2042 Py_LOCAL(PyByteArrayObject
*)
2043 replace(PyByteArrayObject
*self
,
2044 const char *from_s
, Py_ssize_t from_len
,
2045 const char *to_s
, Py_ssize_t to_len
,
2046 Py_ssize_t maxcount
)
2049 maxcount
= PY_SSIZE_T_MAX
;
2050 } else if (maxcount
== 0 || PyByteArray_GET_SIZE(self
) == 0) {
2051 /* nothing to do; return the original bytes */
2052 return return_self(self
);
2055 if (maxcount
== 0 ||
2056 (from_len
== 0 && to_len
== 0)) {
2057 /* nothing to do; return the original bytes */
2058 return return_self(self
);
2061 /* Handle zero-length special cases */
2063 if (from_len
== 0) {
2064 /* insert the 'to' bytes everywhere. */
2065 /* >>> "Python".replace("", ".") */
2066 /* '.P.y.t.h.o.n.' */
2067 return replace_interleave(self
, to_s
, to_len
, maxcount
);
2070 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2071 /* point for an empty self bytes to generate a non-empty bytes */
2072 /* Special case so the remaining code always gets a non-empty bytes */
2073 if (PyByteArray_GET_SIZE(self
) == 0) {
2074 return return_self(self
);
2078 /* delete all occurances of 'from' bytes */
2079 if (from_len
== 1) {
2080 return replace_delete_single_character(
2081 self
, from_s
[0], maxcount
);
2083 return replace_delete_substring(self
, from_s
, from_len
, maxcount
);
2087 /* Handle special case where both bytes have the same length */
2089 if (from_len
== to_len
) {
2090 if (from_len
== 1) {
2091 return replace_single_character_in_place(
2097 return replace_substring_in_place(
2098 self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
2102 /* Otherwise use the more generic algorithms */
2103 if (from_len
== 1) {
2104 return replace_single_character(self
, from_s
[0],
2105 to_s
, to_len
, maxcount
);
2107 /* len('from')>=2, len('to')>=1 */
2108 return replace_substring(self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
2113 PyDoc_STRVAR(replace__doc__
,
2114 "B.replace(old, new[, count]) -> bytes\n\
2116 Return a copy of B with all occurrences of subsection\n\
2117 old replaced by new. If the optional argument count is\n\
2118 given, only the first count occurrences are replaced.");
2121 bytes_replace(PyByteArrayObject
*self
, PyObject
*args
)
2123 Py_ssize_t count
= -1;
2124 PyObject
*from
, *to
, *res
;
2125 Py_buffer vfrom
, vto
;
2127 if (!PyArg_ParseTuple(args
, "OO|n:replace", &from
, &to
, &count
))
2130 if (_getbuffer(from
, &vfrom
) < 0)
2132 if (_getbuffer(to
, &vto
) < 0) {
2133 PyObject_ReleaseBuffer(from
, &vfrom
);
2137 res
= (PyObject
*)replace((PyByteArrayObject
*) self
,
2138 vfrom
.buf
, vfrom
.len
,
2139 vto
.buf
, vto
.len
, count
);
2141 PyObject_ReleaseBuffer(from
, &vfrom
);
2142 PyObject_ReleaseBuffer(to
, &vto
);
2147 /* Overallocate the initial list to reduce the number of reallocs for small
2148 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2149 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2150 text (roughly 11 words per line) and field delimited data (usually 1-10
2151 fields). For large strings the split algorithms are bandwidth limited
2152 so increasing the preallocation likely will not improve things.*/
2154 #define MAX_PREALLOC 12
2156 /* 5 splits gives 6 elements */
2157 #define PREALLOC_SIZE(maxsplit) \
2158 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2160 #define SPLIT_APPEND(data, left, right) \
2161 str = PyByteArray_FromStringAndSize((data) + (left), \
2162 (right) - (left)); \
2165 if (PyList_Append(list, str)) { \
2172 #define SPLIT_ADD(data, left, right) { \
2173 str = PyByteArray_FromStringAndSize((data) + (left), \
2174 (right) - (left)); \
2177 if (count < MAX_PREALLOC) { \
2178 PyList_SET_ITEM(list, count, str); \
2180 if (PyList_Append(list, str)) { \
2189 /* Always force the list to the expected size. */
2190 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2193 Py_LOCAL_INLINE(PyObject
*)
2194 split_char(const char *s
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
2196 register Py_ssize_t i
, j
, count
= 0;
2198 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2204 while ((j
< len
) && (maxcount
-- > 0)) {
2205 for(; j
< len
; j
++) {
2206 /* I found that using memchr makes no difference */
2215 SPLIT_ADD(s
, i
, len
);
2217 FIX_PREALLOC_SIZE(list
);
2226 Py_LOCAL_INLINE(PyObject
*)
2227 split_whitespace(const char *s
, Py_ssize_t len
, Py_ssize_t maxcount
)
2229 register Py_ssize_t i
, j
, count
= 0;
2231 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2236 for (i
= j
= 0; i
< len
; ) {
2238 while (i
< len
&& ISSPACE(s
[i
]))
2241 while (i
< len
&& !ISSPACE(s
[i
]))
2244 if (maxcount
-- <= 0)
2247 while (i
< len
&& ISSPACE(s
[i
]))
2253 SPLIT_ADD(s
, j
, len
);
2255 FIX_PREALLOC_SIZE(list
);
2263 PyDoc_STRVAR(split__doc__
,
2264 "B.split([sep[, maxsplit]]) -> list of bytearray\n\
2266 Return a list of the sections in B, using sep as the delimiter.\n\
2267 If sep is not given, B is split on ASCII whitespace characters\n\
2268 (space, tab, return, newline, formfeed, vertical tab).\n\
2269 If maxsplit is given, at most maxsplit splits are done.");
2272 bytes_split(PyByteArrayObject
*self
, PyObject
*args
)
2274 Py_ssize_t len
= PyByteArray_GET_SIZE(self
), n
, i
, j
;
2275 Py_ssize_t maxsplit
= -1, count
= 0;
2276 const char *s
= PyByteArray_AS_STRING(self
), *sub
;
2277 PyObject
*list
, *str
, *subobj
= Py_None
;
2283 if (!PyArg_ParseTuple(args
, "|On:split", &subobj
, &maxsplit
))
2286 maxsplit
= PY_SSIZE_T_MAX
;
2288 if (subobj
== Py_None
)
2289 return split_whitespace(s
, len
, maxsplit
);
2291 if (_getbuffer(subobj
, &vsub
) < 0)
2297 PyErr_SetString(PyExc_ValueError
, "empty separator");
2298 PyObject_ReleaseBuffer(subobj
, &vsub
);
2302 return split_char(s
, len
, sub
[0], maxsplit
);
2304 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
2306 PyObject_ReleaseBuffer(subobj
, &vsub
);
2312 while (maxsplit
-- > 0) {
2313 pos
= fastsearch(s
+i
, len
-i
, sub
, n
, FAST_SEARCH
);
2322 while ((j
+n
<= len
) && (maxsplit
-- > 0)) {
2323 for (; j
+n
<= len
; j
++) {
2324 if (Py_STRING_MATCH(s
, j
, sub
, n
)) {
2332 SPLIT_ADD(s
, i
, len
);
2333 FIX_PREALLOC_SIZE(list
);
2334 PyObject_ReleaseBuffer(subobj
, &vsub
);
2339 PyObject_ReleaseBuffer(subobj
, &vsub
);
2343 /* stringlib's partition shares nullbytes in some cases.
2344 undo this, we don't want the nullbytes to be shared. */
2346 make_nullbytes_unique(PyObject
*result
)
2348 if (result
!= NULL
) {
2350 assert(PyTuple_Check(result
));
2351 assert(PyTuple_GET_SIZE(result
) == 3);
2352 for (i
= 0; i
< 3; i
++) {
2353 if (PyTuple_GET_ITEM(result
, i
) == (PyObject
*)nullbytes
) {
2354 PyObject
*new = PyByteArray_FromStringAndSize(NULL
, 0);
2360 Py_DECREF(nullbytes
);
2361 PyTuple_SET_ITEM(result
, i
, new);
2368 PyDoc_STRVAR(partition__doc__
,
2369 "B.partition(sep) -> (head, sep, tail)\n\
2371 Searches for the separator sep in B, and returns the part before it,\n\
2372 the separator itself, and the part after it. If the separator is not\n\
2373 found, returns B and two empty bytearray objects.");
2376 bytes_partition(PyByteArrayObject
*self
, PyObject
*sep_obj
)
2378 PyObject
*bytesep
, *result
;
2380 bytesep
= PyByteArray_FromObject(sep_obj
);
2384 result
= stringlib_partition(
2386 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
2388 PyByteArray_AS_STRING(bytesep
), PyByteArray_GET_SIZE(bytesep
)
2392 return make_nullbytes_unique(result
);
2395 PyDoc_STRVAR(rpartition__doc__
,
2396 "B.rpartition(sep) -> (tail, sep, head)\n\
2398 Searches for the separator sep in B, starting at the end of B,\n\
2399 and returns the part before it, the separator itself, and the\n\
2400 part after it. If the separator is not found, returns two empty\n\
2401 bytearray objects and B.");
2404 bytes_rpartition(PyByteArrayObject
*self
, PyObject
*sep_obj
)
2406 PyObject
*bytesep
, *result
;
2408 bytesep
= PyByteArray_FromObject(sep_obj
);
2412 result
= stringlib_rpartition(
2414 PyByteArray_AS_STRING(self
), PyByteArray_GET_SIZE(self
),
2416 PyByteArray_AS_STRING(bytesep
), PyByteArray_GET_SIZE(bytesep
)
2420 return make_nullbytes_unique(result
);
2423 Py_LOCAL_INLINE(PyObject
*)
2424 rsplit_char(const char *s
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
2426 register Py_ssize_t i
, j
, count
=0;
2428 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2434 while ((i
>= 0) && (maxcount
-- > 0)) {
2435 for (; i
>= 0; i
--) {
2437 SPLIT_ADD(s
, i
+ 1, j
+ 1);
2444 SPLIT_ADD(s
, 0, j
+ 1);
2446 FIX_PREALLOC_SIZE(list
);
2447 if (PyList_Reverse(list
) < 0)
2457 Py_LOCAL_INLINE(PyObject
*)
2458 rsplit_whitespace(const char *s
, Py_ssize_t len
, Py_ssize_t maxcount
)
2460 register Py_ssize_t i
, j
, count
= 0;
2462 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
2467 for (i
= j
= len
- 1; i
>= 0; ) {
2469 while (i
>= 0 && ISSPACE(s
[i
]))
2472 while (i
>= 0 && !ISSPACE(s
[i
]))
2475 if (maxcount
-- <= 0)
2477 SPLIT_ADD(s
, i
+ 1, j
+ 1);
2478 while (i
>= 0 && ISSPACE(s
[i
]))
2484 SPLIT_ADD(s
, 0, j
+ 1);
2486 FIX_PREALLOC_SIZE(list
);
2487 if (PyList_Reverse(list
) < 0)
2497 PyDoc_STRVAR(rsplit__doc__
,
2498 "B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2500 Return a list of the sections in B, using sep as the delimiter,\n\
2501 starting at the end of B and working to the front.\n\
2502 If sep is not given, B is split on ASCII whitespace characters\n\
2503 (space, tab, return, newline, formfeed, vertical tab).\n\
2504 If maxsplit is given, at most maxsplit splits are done.");
2507 bytes_rsplit(PyByteArrayObject
*self
, PyObject
*args
)
2509 Py_ssize_t len
= PyByteArray_GET_SIZE(self
), n
, i
, j
;
2510 Py_ssize_t maxsplit
= -1, count
= 0;
2511 const char *s
= PyByteArray_AS_STRING(self
), *sub
;
2512 PyObject
*list
, *str
, *subobj
= Py_None
;
2515 if (!PyArg_ParseTuple(args
, "|On:rsplit", &subobj
, &maxsplit
))
2518 maxsplit
= PY_SSIZE_T_MAX
;
2520 if (subobj
== Py_None
)
2521 return rsplit_whitespace(s
, len
, maxsplit
);
2523 if (_getbuffer(subobj
, &vsub
) < 0)
2529 PyErr_SetString(PyExc_ValueError
, "empty separator");
2530 PyObject_ReleaseBuffer(subobj
, &vsub
);
2534 return rsplit_char(s
, len
, sub
[0], maxsplit
);
2536 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
2538 PyObject_ReleaseBuffer(subobj
, &vsub
);
2545 while ( (i
>= 0) && (maxsplit
-- > 0) ) {
2547 if (Py_STRING_MATCH(s
, i
, sub
, n
)) {
2548 SPLIT_ADD(s
, i
+ n
, j
);
2556 FIX_PREALLOC_SIZE(list
);
2557 if (PyList_Reverse(list
) < 0)
2559 PyObject_ReleaseBuffer(subobj
, &vsub
);
2564 PyObject_ReleaseBuffer(subobj
, &vsub
);
2568 PyDoc_STRVAR(reverse__doc__
,
2569 "B.reverse() -> None\n\
2571 Reverse the order of the values in B in place.");
2573 bytes_reverse(PyByteArrayObject
*self
, PyObject
*unused
)
2575 char swap
, *head
, *tail
;
2576 Py_ssize_t i
, j
, n
= Py_SIZE(self
);
2579 head
= self
->ob_bytes
;
2580 tail
= head
+ n
- 1;
2581 for (i
= 0; i
< j
; i
++) {
2590 PyDoc_STRVAR(insert__doc__
,
2591 "B.insert(index, int) -> None\n\
2593 Insert a single item into the bytearray before the given index.");
2595 bytes_insert(PyByteArrayObject
*self
, PyObject
*args
)
2598 Py_ssize_t where
, n
= Py_SIZE(self
);
2600 if (!PyArg_ParseTuple(args
, "ni:insert", &where
, &value
))
2603 if (n
== PY_SSIZE_T_MAX
) {
2604 PyErr_SetString(PyExc_OverflowError
,
2605 "cannot add more objects to bytes");
2608 if (value
< 0 || value
>= 256) {
2609 PyErr_SetString(PyExc_ValueError
,
2610 "byte must be in range(0, 256)");
2613 if (PyByteArray_Resize((PyObject
*)self
, n
+ 1) < 0)
2623 memmove(self
->ob_bytes
+ where
+ 1, self
->ob_bytes
+ where
, n
- where
);
2624 self
->ob_bytes
[where
] = value
;
2629 PyDoc_STRVAR(append__doc__
,
2630 "B.append(int) -> None\n\
2632 Append a single item to the end of B.");
2634 bytes_append(PyByteArrayObject
*self
, PyObject
*arg
)
2637 Py_ssize_t n
= Py_SIZE(self
);
2639 if (! _getbytevalue(arg
, &value
))
2641 if (n
== PY_SSIZE_T_MAX
) {
2642 PyErr_SetString(PyExc_OverflowError
,
2643 "cannot add more objects to bytes");
2646 if (PyByteArray_Resize((PyObject
*)self
, n
+ 1) < 0)
2649 self
->ob_bytes
[n
] = value
;
2654 PyDoc_STRVAR(extend__doc__
,
2655 "B.extend(iterable int) -> None\n\
2657 Append all the elements from the iterator or sequence to the\n\
2660 bytes_extend(PyByteArrayObject
*self
, PyObject
*arg
)
2662 PyObject
*it
, *item
, *bytes_obj
;
2663 Py_ssize_t buf_size
= 0, len
= 0;
2667 /* bytes_setslice code only accepts something supporting PEP 3118. */
2668 if (PyObject_CheckBuffer(arg
)) {
2669 if (bytes_setslice(self
, Py_SIZE(self
), Py_SIZE(self
), arg
) == -1)
2675 it
= PyObject_GetIter(arg
);
2679 /* Try to determine the length of the argument. 32 is abitrary. */
2680 buf_size
= _PyObject_LengthHint(arg
, 32);
2682 bytes_obj
= PyByteArray_FromStringAndSize(NULL
, buf_size
);
2683 if (bytes_obj
== NULL
)
2685 buf
= PyByteArray_AS_STRING(bytes_obj
);
2687 while ((item
= PyIter_Next(it
)) != NULL
) {
2688 if (! _getbytevalue(item
, &value
)) {
2691 Py_DECREF(bytes_obj
);
2697 if (len
>= buf_size
) {
2698 buf_size
= len
+ (len
>> 1) + 1;
2699 if (PyByteArray_Resize((PyObject
*)bytes_obj
, buf_size
) < 0) {
2701 Py_DECREF(bytes_obj
);
2704 /* Recompute the `buf' pointer, since the resizing operation may
2705 have invalidated it. */
2706 buf
= PyByteArray_AS_STRING(bytes_obj
);
2711 /* Resize down to exact size. */
2712 if (PyByteArray_Resize((PyObject
*)bytes_obj
, len
) < 0) {
2713 Py_DECREF(bytes_obj
);
2717 if (bytes_setslice(self
, Py_SIZE(self
), Py_SIZE(self
), bytes_obj
) == -1)
2719 Py_DECREF(bytes_obj
);
2724 PyDoc_STRVAR(pop__doc__
,
2725 "B.pop([index]) -> int\n\
2727 Remove and return a single item from B. If no index\n\
2728 argument is give, will pop the last value.");
2730 bytes_pop(PyByteArrayObject
*self
, PyObject
*args
)
2733 Py_ssize_t where
= -1, n
= Py_SIZE(self
);
2735 if (!PyArg_ParseTuple(args
, "|n:pop", &where
))
2739 PyErr_SetString(PyExc_OverflowError
,
2740 "cannot pop an empty bytes");
2744 where
+= Py_SIZE(self
);
2745 if (where
< 0 || where
>= Py_SIZE(self
)) {
2746 PyErr_SetString(PyExc_IndexError
, "pop index out of range");
2750 value
= self
->ob_bytes
[where
];
2751 memmove(self
->ob_bytes
+ where
, self
->ob_bytes
+ where
+ 1, n
- where
);
2752 if (PyByteArray_Resize((PyObject
*)self
, n
- 1) < 0)
2755 return PyInt_FromLong(value
);
2758 PyDoc_STRVAR(remove__doc__
,
2759 "B.remove(int) -> None\n\
2761 Remove the first occurance of a value in B.");
2763 bytes_remove(PyByteArrayObject
*self
, PyObject
*arg
)
2766 Py_ssize_t where
, n
= Py_SIZE(self
);
2768 if (! _getbytevalue(arg
, &value
))
2771 for (where
= 0; where
< n
; where
++) {
2772 if (self
->ob_bytes
[where
] == value
)
2776 PyErr_SetString(PyExc_ValueError
, "value not found in bytes");
2780 memmove(self
->ob_bytes
+ where
, self
->ob_bytes
+ where
+ 1, n
- where
);
2781 if (PyByteArray_Resize((PyObject
*)self
, n
- 1) < 0)
2787 /* XXX These two helpers could be optimized if argsize == 1 */
2790 lstrip_helper(unsigned char *myptr
, Py_ssize_t mysize
,
2791 void *argptr
, Py_ssize_t argsize
)
2794 while (i
< mysize
&& memchr(argptr
, myptr
[i
], argsize
))
2800 rstrip_helper(unsigned char *myptr
, Py_ssize_t mysize
,
2801 void *argptr
, Py_ssize_t argsize
)
2803 Py_ssize_t i
= mysize
- 1;
2804 while (i
>= 0 && memchr(argptr
, myptr
[i
], argsize
))
2809 PyDoc_STRVAR(strip__doc__
,
2810 "B.strip([bytes]) -> bytearray\n\
2812 Strip leading and trailing bytes contained in the argument.\n\
2813 If the argument is omitted, strip ASCII whitespace.");
2815 bytes_strip(PyByteArrayObject
*self
, PyObject
*args
)
2817 Py_ssize_t left
, right
, mysize
, argsize
;
2818 void *myptr
, *argptr
;
2819 PyObject
*arg
= Py_None
;
2821 if (!PyArg_ParseTuple(args
, "|O:strip", &arg
))
2823 if (arg
== Py_None
) {
2824 argptr
= "\t\n\r\f\v ";
2828 if (_getbuffer(arg
, &varg
) < 0)
2833 myptr
= self
->ob_bytes
;
2834 mysize
= Py_SIZE(self
);
2835 left
= lstrip_helper(myptr
, mysize
, argptr
, argsize
);
2839 right
= rstrip_helper(myptr
, mysize
, argptr
, argsize
);
2841 PyObject_ReleaseBuffer(arg
, &varg
);
2842 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2845 PyDoc_STRVAR(lstrip__doc__
,
2846 "B.lstrip([bytes]) -> bytearray\n\
2848 Strip leading bytes contained in the argument.\n\
2849 If the argument is omitted, strip leading ASCII whitespace.");
2851 bytes_lstrip(PyByteArrayObject
*self
, PyObject
*args
)
2853 Py_ssize_t left
, right
, mysize
, argsize
;
2854 void *myptr
, *argptr
;
2855 PyObject
*arg
= Py_None
;
2857 if (!PyArg_ParseTuple(args
, "|O:lstrip", &arg
))
2859 if (arg
== Py_None
) {
2860 argptr
= "\t\n\r\f\v ";
2864 if (_getbuffer(arg
, &varg
) < 0)
2869 myptr
= self
->ob_bytes
;
2870 mysize
= Py_SIZE(self
);
2871 left
= lstrip_helper(myptr
, mysize
, argptr
, argsize
);
2874 PyObject_ReleaseBuffer(arg
, &varg
);
2875 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2878 PyDoc_STRVAR(rstrip__doc__
,
2879 "B.rstrip([bytes]) -> bytearray\n\
2881 Strip trailing bytes contained in the argument.\n\
2882 If the argument is omitted, strip trailing ASCII whitespace.");
2884 bytes_rstrip(PyByteArrayObject
*self
, PyObject
*args
)
2886 Py_ssize_t left
, right
, mysize
, argsize
;
2887 void *myptr
, *argptr
;
2888 PyObject
*arg
= Py_None
;
2890 if (!PyArg_ParseTuple(args
, "|O:rstrip", &arg
))
2892 if (arg
== Py_None
) {
2893 argptr
= "\t\n\r\f\v ";
2897 if (_getbuffer(arg
, &varg
) < 0)
2902 myptr
= self
->ob_bytes
;
2903 mysize
= Py_SIZE(self
);
2905 right
= rstrip_helper(myptr
, mysize
, argptr
, argsize
);
2907 PyObject_ReleaseBuffer(arg
, &varg
);
2908 return PyByteArray_FromStringAndSize(self
->ob_bytes
+ left
, right
- left
);
2911 PyDoc_STRVAR(decode_doc
,
2912 "B.decode([encoding[, errors]]) -> unicode object.\n\
2914 Decodes B using the codec registered for encoding. encoding defaults\n\
2915 to the default encoding. errors may be given to set a different error\n\
2916 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2917 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2918 as well as any other name registered with codecs.register_error that is\n\
2919 able to handle UnicodeDecodeErrors.");
2922 bytes_decode(PyObject
*self
, PyObject
*args
)
2924 const char *encoding
= NULL
;
2925 const char *errors
= NULL
;
2927 if (!PyArg_ParseTuple(args
, "|ss:decode", &encoding
, &errors
))
2929 if (encoding
== NULL
)
2930 encoding
= PyUnicode_GetDefaultEncoding();
2931 return PyCodec_Decode(self
, encoding
, errors
);
2934 PyDoc_STRVAR(alloc_doc
,
2935 "B.__alloc__() -> int\n\
2937 Returns the number of bytes actually allocated.");
2940 bytes_alloc(PyByteArrayObject
*self
)
2942 return PyInt_FromSsize_t(self
->ob_alloc
);
2945 PyDoc_STRVAR(join_doc
,
2946 "B.join(iterable_of_bytes) -> bytes\n\
2948 Concatenates any number of bytearray objects, with B in between each pair.");
2951 bytes_join(PyByteArrayObject
*self
, PyObject
*it
)
2954 Py_ssize_t mysize
= Py_SIZE(self
);
2958 Py_ssize_t totalsize
= 0;
2962 seq
= PySequence_Fast(it
, "can only join an iterable");
2965 n
= PySequence_Fast_GET_SIZE(seq
);
2966 items
= PySequence_Fast_ITEMS(seq
);
2968 /* Compute the total size, and check that they are all bytes */
2969 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2970 for (i
= 0; i
< n
; i
++) {
2971 PyObject
*obj
= items
[i
];
2972 if (!PyByteArray_Check(obj
) && !PyBytes_Check(obj
)) {
2973 PyErr_Format(PyExc_TypeError
,
2974 "can only join an iterable of bytes "
2975 "(item %ld has type '%.100s')",
2976 /* XXX %ld isn't right on Win64 */
2977 (long)i
, Py_TYPE(obj
)->tp_name
);
2981 totalsize
+= mysize
;
2982 totalsize
+= Py_SIZE(obj
);
2983 if (totalsize
< 0) {
2989 /* Allocate the result, and copy the bytes */
2990 result
= PyByteArray_FromStringAndSize(NULL
, totalsize
);
2993 dest
= PyByteArray_AS_STRING(result
);
2994 for (i
= 0; i
< n
; i
++) {
2995 PyObject
*obj
= items
[i
];
2996 Py_ssize_t size
= Py_SIZE(obj
);
2998 if (PyByteArray_Check(obj
))
2999 buf
= PyByteArray_AS_STRING(obj
);
3001 buf
= PyBytes_AS_STRING(obj
);
3003 memcpy(dest
, self
->ob_bytes
, mysize
);
3006 memcpy(dest
, buf
, size
);
3014 /* Error handling */
3020 PyDoc_STRVAR(fromhex_doc
,
3021 "bytearray.fromhex(string) -> bytearray\n\
3023 Create a bytearray object from a string of hexadecimal numbers.\n\
3024 Spaces between two numbers are accepted.\n\
3025 Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3028 hex_digit_to_int(Py_UNICODE c
)
3037 if (c
>= 'a' && c
<= 'f')
3038 return c
- 'a' + 10;
3044 bytes_fromhex(PyObject
*cls
, PyObject
*args
)
3046 PyObject
*newbytes
, *hexobj
;
3049 Py_ssize_t hexlen
, byteslen
, i
, j
;
3052 if (!PyArg_ParseTuple(args
, "U:fromhex", &hexobj
))
3054 assert(PyUnicode_Check(hexobj
));
3055 hexlen
= PyUnicode_GET_SIZE(hexobj
);
3056 hex
= PyUnicode_AS_UNICODE(hexobj
);
3057 byteslen
= hexlen
/2; /* This overestimates if there are spaces */
3058 newbytes
= PyByteArray_FromStringAndSize(NULL
, byteslen
);
3061 buf
= PyByteArray_AS_STRING(newbytes
);
3062 for (i
= j
= 0; i
< hexlen
; i
+= 2) {
3063 /* skip over spaces in the input */
3064 while (hex
[i
] == ' ')
3068 top
= hex_digit_to_int(hex
[i
]);
3069 bot
= hex_digit_to_int(hex
[i
+1]);
3070 if (top
== -1 || bot
== -1) {
3071 PyErr_Format(PyExc_ValueError
,
3072 "non-hexadecimal number found in "
3073 "fromhex() arg at position %zd", i
);
3076 buf
[j
++] = (top
<< 4) + bot
;
3078 if (PyByteArray_Resize(newbytes
, j
) < 0)
3083 Py_DECREF(newbytes
);
3087 PyDoc_STRVAR(reduce_doc
, "Return state information for pickling.");
3090 bytes_reduce(PyByteArrayObject
*self
)
3092 PyObject
*latin1
, *dict
;
3094 latin1
= PyUnicode_DecodeLatin1(self
->ob_bytes
,
3095 Py_SIZE(self
), NULL
);
3097 latin1
= PyUnicode_FromString("");
3099 dict
= PyObject_GetAttrString((PyObject
*)self
, "__dict__");
3106 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self
), latin1
, "latin-1", dict
);
3109 static PySequenceMethods bytes_as_sequence
= {
3110 (lenfunc
)bytes_length
, /* sq_length */
3111 (binaryfunc
)PyByteArray_Concat
, /* sq_concat */
3112 (ssizeargfunc
)bytes_repeat
, /* sq_repeat */
3113 (ssizeargfunc
)bytes_getitem
, /* sq_item */
3115 (ssizeobjargproc
)bytes_setitem
, /* sq_ass_item */
3116 0, /* sq_ass_slice */
3117 (objobjproc
)bytes_contains
, /* sq_contains */
3118 (binaryfunc
)bytes_iconcat
, /* sq_inplace_concat */
3119 (ssizeargfunc
)bytes_irepeat
, /* sq_inplace_repeat */
3122 static PyMappingMethods bytes_as_mapping
= {
3123 (lenfunc
)bytes_length
,
3124 (binaryfunc
)bytes_subscript
,
3125 (objobjargproc
)bytes_ass_subscript
,
3128 static PyBufferProcs bytes_as_buffer
= {
3129 (readbufferproc
)bytes_buffer_getreadbuf
,
3130 (writebufferproc
)bytes_buffer_getwritebuf
,
3131 (segcountproc
)bytes_buffer_getsegcount
,
3132 (charbufferproc
)bytes_buffer_getcharbuf
,
3133 (getbufferproc
)bytes_getbuffer
,
3134 (releasebufferproc
)bytes_releasebuffer
,
3139 {"__alloc__", (PyCFunction
)bytes_alloc
, METH_NOARGS
, alloc_doc
},
3140 {"__reduce__", (PyCFunction
)bytes_reduce
, METH_NOARGS
, reduce_doc
},
3141 {"append", (PyCFunction
)bytes_append
, METH_O
, append__doc__
},
3142 {"capitalize", (PyCFunction
)stringlib_capitalize
, METH_NOARGS
,
3143 _Py_capitalize__doc__
},
3144 {"center", (PyCFunction
)stringlib_center
, METH_VARARGS
, center__doc__
},
3145 {"count", (PyCFunction
)bytes_count
, METH_VARARGS
, count__doc__
},
3146 {"decode", (PyCFunction
)bytes_decode
, METH_VARARGS
, decode_doc
},
3147 {"endswith", (PyCFunction
)bytes_endswith
, METH_VARARGS
, endswith__doc__
},
3148 {"expandtabs", (PyCFunction
)stringlib_expandtabs
, METH_VARARGS
,
3150 {"extend", (PyCFunction
)bytes_extend
, METH_O
, extend__doc__
},
3151 {"find", (PyCFunction
)bytes_find
, METH_VARARGS
, find__doc__
},
3152 {"fromhex", (PyCFunction
)bytes_fromhex
, METH_VARARGS
|METH_CLASS
,
3154 {"index", (PyCFunction
)bytes_index
, METH_VARARGS
, index__doc__
},
3155 {"insert", (PyCFunction
)bytes_insert
, METH_VARARGS
, insert__doc__
},
3156 {"isalnum", (PyCFunction
)stringlib_isalnum
, METH_NOARGS
,
3157 _Py_isalnum__doc__
},
3158 {"isalpha", (PyCFunction
)stringlib_isalpha
, METH_NOARGS
,
3159 _Py_isalpha__doc__
},
3160 {"isdigit", (PyCFunction
)stringlib_isdigit
, METH_NOARGS
,
3161 _Py_isdigit__doc__
},
3162 {"islower", (PyCFunction
)stringlib_islower
, METH_NOARGS
,
3163 _Py_islower__doc__
},
3164 {"isspace", (PyCFunction
)stringlib_isspace
, METH_NOARGS
,
3165 _Py_isspace__doc__
},
3166 {"istitle", (PyCFunction
)stringlib_istitle
, METH_NOARGS
,
3167 _Py_istitle__doc__
},
3168 {"isupper", (PyCFunction
)stringlib_isupper
, METH_NOARGS
,
3169 _Py_isupper__doc__
},
3170 {"join", (PyCFunction
)bytes_join
, METH_O
, join_doc
},
3171 {"ljust", (PyCFunction
)stringlib_ljust
, METH_VARARGS
, ljust__doc__
},
3172 {"lower", (PyCFunction
)stringlib_lower
, METH_NOARGS
, _Py_lower__doc__
},
3173 {"lstrip", (PyCFunction
)bytes_lstrip
, METH_VARARGS
, lstrip__doc__
},
3174 {"partition", (PyCFunction
)bytes_partition
, METH_O
, partition__doc__
},
3175 {"pop", (PyCFunction
)bytes_pop
, METH_VARARGS
, pop__doc__
},
3176 {"remove", (PyCFunction
)bytes_remove
, METH_O
, remove__doc__
},
3177 {"replace", (PyCFunction
)bytes_replace
, METH_VARARGS
, replace__doc__
},
3178 {"reverse", (PyCFunction
)bytes_reverse
, METH_NOARGS
, reverse__doc__
},
3179 {"rfind", (PyCFunction
)bytes_rfind
, METH_VARARGS
, rfind__doc__
},
3180 {"rindex", (PyCFunction
)bytes_rindex
, METH_VARARGS
, rindex__doc__
},
3181 {"rjust", (PyCFunction
)stringlib_rjust
, METH_VARARGS
, rjust__doc__
},
3182 {"rpartition", (PyCFunction
)bytes_rpartition
, METH_O
, rpartition__doc__
},
3183 {"rsplit", (PyCFunction
)bytes_rsplit
, METH_VARARGS
, rsplit__doc__
},
3184 {"rstrip", (PyCFunction
)bytes_rstrip
, METH_VARARGS
, rstrip__doc__
},
3185 {"split", (PyCFunction
)bytes_split
, METH_VARARGS
, split__doc__
},
3186 {"splitlines", (PyCFunction
)stringlib_splitlines
, METH_VARARGS
,
3188 {"startswith", (PyCFunction
)bytes_startswith
, METH_VARARGS
,
3190 {"strip", (PyCFunction
)bytes_strip
, METH_VARARGS
, strip__doc__
},
3191 {"swapcase", (PyCFunction
)stringlib_swapcase
, METH_NOARGS
,
3192 _Py_swapcase__doc__
},
3193 {"title", (PyCFunction
)stringlib_title
, METH_NOARGS
, _Py_title__doc__
},
3194 {"translate", (PyCFunction
)bytes_translate
, METH_VARARGS
,
3196 {"upper", (PyCFunction
)stringlib_upper
, METH_NOARGS
, _Py_upper__doc__
},
3197 {"zfill", (PyCFunction
)stringlib_zfill
, METH_VARARGS
, zfill__doc__
},
3201 PyDoc_STRVAR(bytes_doc
,
3202 "bytearray(iterable_of_ints) -> bytearray.\n\
3203 bytearray(string, encoding[, errors]) -> bytearray.\n\
3204 bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3205 bytearray(memory_view) -> bytearray.\n\
3207 Construct an mutable bytearray object from:\n\
3208 - an iterable yielding integers in range(256)\n\
3209 - a text string encoded using the specified encoding\n\
3210 - a bytes or a bytearray object\n\
3211 - any object implementing the buffer API.\n\
3213 bytearray(int) -> bytearray.\n\
3215 Construct a zero-initialized bytearray of the given length.");
3218 static PyObject
*bytes_iter(PyObject
*seq
);
3220 PyTypeObject PyByteArray_Type
= {
3221 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
3223 sizeof(PyByteArrayObject
),
3225 (destructor
)bytes_dealloc
, /* tp_dealloc */
3230 (reprfunc
)bytes_repr
, /* tp_repr */
3231 0, /* tp_as_number */
3232 &bytes_as_sequence
, /* tp_as_sequence */
3233 &bytes_as_mapping
, /* tp_as_mapping */
3236 bytes_str
, /* tp_str */
3237 PyObject_GenericGetAttr
, /* tp_getattro */
3238 0, /* tp_setattro */
3239 &bytes_as_buffer
, /* tp_as_buffer */
3240 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
|
3241 Py_TPFLAGS_HAVE_NEWBUFFER
, /* tp_flags */
3242 bytes_doc
, /* tp_doc */
3243 0, /* tp_traverse */
3245 (richcmpfunc
)bytes_richcompare
, /* tp_richcompare */
3246 0, /* tp_weaklistoffset */
3247 bytes_iter
, /* tp_iter */
3248 0, /* tp_iternext */
3249 bytes_methods
, /* tp_methods */
3254 0, /* tp_descr_get */
3255 0, /* tp_descr_set */
3256 0, /* tp_dictoffset */
3257 (initproc
)bytes_init
, /* tp_init */
3258 PyType_GenericAlloc
, /* tp_alloc */
3259 PyType_GenericNew
, /* tp_new */
3260 PyObject_Del
, /* tp_free */
3263 /*********************** Bytes Iterator ****************************/
3267 Py_ssize_t it_index
;
3268 PyByteArrayObject
*it_seq
; /* Set to NULL when iterator is exhausted */
3272 bytesiter_dealloc(bytesiterobject
*it
)
3274 _PyObject_GC_UNTRACK(it
);
3275 Py_XDECREF(it
->it_seq
);
3276 PyObject_GC_Del(it
);
3280 bytesiter_traverse(bytesiterobject
*it
, visitproc visit
, void *arg
)
3282 Py_VISIT(it
->it_seq
);
3287 bytesiter_next(bytesiterobject
*it
)
3289 PyByteArrayObject
*seq
;
3296 assert(PyByteArray_Check(seq
));
3298 if (it
->it_index
< PyByteArray_GET_SIZE(seq
)) {
3299 item
= PyInt_FromLong(
3300 (unsigned char)seq
->ob_bytes
[it
->it_index
]);
3312 bytesiter_length_hint(bytesiterobject
*it
)
3316 len
= PyByteArray_GET_SIZE(it
->it_seq
) - it
->it_index
;
3317 return PyInt_FromSsize_t(len
);
3320 PyDoc_STRVAR(length_hint_doc
,
3321 "Private method returning an estimate of len(list(it)).");
3323 static PyMethodDef bytesiter_methods
[] = {
3324 {"__length_hint__", (PyCFunction
)bytesiter_length_hint
, METH_NOARGS
,
3326 {NULL
, NULL
} /* sentinel */
3329 PyTypeObject PyByteArrayIter_Type
= {
3330 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
3331 "bytearray_iterator", /* tp_name */
3332 sizeof(bytesiterobject
), /* tp_basicsize */
3333 0, /* tp_itemsize */
3335 (destructor
)bytesiter_dealloc
, /* tp_dealloc */
3341 0, /* tp_as_number */
3342 0, /* tp_as_sequence */
3343 0, /* tp_as_mapping */
3347 PyObject_GenericGetAttr
, /* tp_getattro */
3348 0, /* tp_setattro */
3349 0, /* tp_as_buffer */
3350 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_HAVE_GC
, /* tp_flags */
3352 (traverseproc
)bytesiter_traverse
, /* tp_traverse */
3354 0, /* tp_richcompare */
3355 0, /* tp_weaklistoffset */
3356 PyObject_SelfIter
, /* tp_iter */
3357 (iternextfunc
)bytesiter_next
, /* tp_iternext */
3358 bytesiter_methods
, /* tp_methods */
3363 bytes_iter(PyObject
*seq
)
3365 bytesiterobject
*it
;
3367 if (!PyByteArray_Check(seq
)) {
3368 PyErr_BadInternalCall();
3371 it
= PyObject_GC_New(bytesiterobject
, &PyByteArrayIter_Type
);
3376 it
->it_seq
= (PyByteArrayObject
*)seq
;
3377 _PyObject_GC_TRACK(it
);
3378 return (PyObject
*)it
;