1 /* String (str/bytes) object implementation */
3 #define PY_SSIZE_T_CLEAN
10 Py_ssize_t null_strings
, one_strings
;
13 static PyStringObject
*characters
[UCHAR_MAX
+ 1];
14 static PyStringObject
*nullstring
;
16 /* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
24 static PyObject
*interned
;
26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
35 For both PyString_FromString() and PyString_FromStringAndSize(), the
36 parameter `size' denotes number of characters to allocate, not counting any
37 null terminating character.
39 For PyString_FromString(), the parameter `str' points to a null-terminated
40 string containing exactly `size' bytes.
42 For PyString_FromStringAndSize(), the parameter the parameter `str' is
43 either NULL or else points to a string containing at least `size' bytes.
44 For PyString_FromStringAndSize(), the string in the `str' parameter does
45 not have to be null-terminated. (Therefore it is safe to construct a
46 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
48 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
50 PyString object must be treated as immutable and you must not fill in nor
51 alter the data yourself, since the strings may be shared.
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
55 allocated for string data, not counting the null terminating character. It
56 is therefore equal to the equal to the `size' parameter (for
57 PyString_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyString_FromString()).
61 PyString_FromStringAndSize(const char *str
, Py_ssize_t size
)
63 register PyStringObject
*op
;
65 PyErr_SetString(PyExc_SystemError
,
66 "Negative size passed to PyString_FromStringAndSize");
69 if (size
== 0 && (op
= nullstring
) != NULL
) {
74 return (PyObject
*)op
;
76 if (size
== 1 && str
!= NULL
&&
77 (op
= characters
[*str
& UCHAR_MAX
]) != NULL
)
83 return (PyObject
*)op
;
86 if (size
> PY_SSIZE_T_MAX
- PyStringObject_SIZE
) {
87 PyErr_SetString(PyExc_OverflowError
, "string is too large");
91 /* Inline PyObject_NewVar */
92 op
= (PyStringObject
*)PyObject_MALLOC(PyStringObject_SIZE
+ size
);
94 return PyErr_NoMemory();
95 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
97 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
99 Py_MEMCPY(op
->ob_sval
, str
, size
);
100 op
->ob_sval
[size
] = '\0';
101 /* share short strings */
103 PyObject
*t
= (PyObject
*)op
;
104 PyString_InternInPlace(&t
);
105 op
= (PyStringObject
*)t
;
108 } else if (size
== 1 && str
!= NULL
) {
109 PyObject
*t
= (PyObject
*)op
;
110 PyString_InternInPlace(&t
);
111 op
= (PyStringObject
*)t
;
112 characters
[*str
& UCHAR_MAX
] = op
;
115 return (PyObject
*) op
;
119 PyString_FromString(const char *str
)
121 register size_t size
;
122 register PyStringObject
*op
;
126 if (size
> PY_SSIZE_T_MAX
- PyStringObject_SIZE
) {
127 PyErr_SetString(PyExc_OverflowError
,
128 "string is too long for a Python string");
131 if (size
== 0 && (op
= nullstring
) != NULL
) {
136 return (PyObject
*)op
;
138 if (size
== 1 && (op
= characters
[*str
& UCHAR_MAX
]) != NULL
) {
143 return (PyObject
*)op
;
146 /* Inline PyObject_NewVar */
147 op
= (PyStringObject
*)PyObject_MALLOC(PyStringObject_SIZE
+ size
);
149 return PyErr_NoMemory();
150 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
152 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
153 Py_MEMCPY(op
->ob_sval
, str
, size
+1);
154 /* share short strings */
156 PyObject
*t
= (PyObject
*)op
;
157 PyString_InternInPlace(&t
);
158 op
= (PyStringObject
*)t
;
161 } else if (size
== 1) {
162 PyObject
*t
= (PyObject
*)op
;
163 PyString_InternInPlace(&t
);
164 op
= (PyStringObject
*)t
;
165 characters
[*str
& UCHAR_MAX
] = op
;
168 return (PyObject
*) op
;
172 PyString_FromFormatV(const char *format
, va_list vargs
)
180 #ifdef VA_LIST_IS_ARRAY
181 Py_MEMCPY(count
, vargs
, sizeof(va_list));
184 __va_copy(count
, vargs
);
189 /* step 1: figure out how large a buffer we need */
190 for (f
= format
; *f
; f
++) {
192 #ifdef HAVE_LONG_LONG
193 int longlongflag
= 0;
196 while (*++f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
199 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
200 * they don't affect the amount of space we reserve.
203 if (f
[1] == 'd' || f
[1] == 'u') {
206 #ifdef HAVE_LONG_LONG
207 else if (f
[1] == 'l' &&
208 (f
[2] == 'd' || f
[2] == 'u')) {
214 else if (*f
== 'z' && (f
[1] == 'd' || f
[1] == 'u')) {
220 (void)va_arg(count
, int);
221 /* fall through... */
225 case 'd': case 'u': case 'i': case 'x':
226 (void) va_arg(count
, int);
227 #ifdef HAVE_LONG_LONG
229 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
230 plus 1 for the sign. 53/22 is an upper
231 bound for log10(256). */
233 n
+= 2 + (SIZEOF_LONG_LONG
*53-1) / 22;
236 /* 20 bytes is enough to hold a 64-bit
237 integer. Decimal takes the most
238 space. This isn't enough for
244 s
= va_arg(count
, char*);
248 (void) va_arg(count
, int);
249 /* maximum 64-bit pointer representation:
251 * so 19 characters is enough.
252 * XXX I count 18 -- what's the extra for?
257 /* if we stumble upon an unknown
258 formatting code, copy the rest of
259 the format string to the output
260 string. (we cannot just skip the
261 code, since there's no way to know
262 what's in the argument list) */
270 /* step 2: fill the buffer */
271 /* Since we've analyzed how much space we need for the worst case,
272 use sprintf directly instead of the slower PyOS_snprintf. */
273 string
= PyString_FromStringAndSize(NULL
, n
);
277 s
= PyString_AsString(string
);
279 for (f
= format
; *f
; f
++) {
284 #ifdef HAVE_LONG_LONG
285 int longlongflag
= 0;
288 /* parse the width.precision part (we're only
289 interested in the precision value, if any) */
291 while (isdigit(Py_CHARMASK(*f
)))
292 n
= (n
*10) + *f
++ - '0';
296 while (isdigit(Py_CHARMASK(*f
)))
297 n
= (n
*10) + *f
++ - '0';
299 while (*f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
301 /* Handle %ld, %lu, %lld and %llu. */
303 if (f
[1] == 'd' || f
[1] == 'u') {
307 #ifdef HAVE_LONG_LONG
308 else if (f
[1] == 'l' &&
309 (f
[2] == 'd' || f
[2] == 'u')) {
315 /* handle the size_t flag. */
316 else if (*f
== 'z' && (f
[1] == 'd' || f
[1] == 'u')) {
323 *s
++ = va_arg(vargs
, int);
327 sprintf(s
, "%ld", va_arg(vargs
, long));
328 #ifdef HAVE_LONG_LONG
329 else if (longlongflag
)
330 sprintf(s
, "%" PY_FORMAT_LONG_LONG
"d",
331 va_arg(vargs
, PY_LONG_LONG
));
334 sprintf(s
, "%" PY_FORMAT_SIZE_T
"d",
335 va_arg(vargs
, Py_ssize_t
));
337 sprintf(s
, "%d", va_arg(vargs
, int));
343 va_arg(vargs
, unsigned long));
344 #ifdef HAVE_LONG_LONG
345 else if (longlongflag
)
346 sprintf(s
, "%" PY_FORMAT_LONG_LONG
"u",
347 va_arg(vargs
, PY_LONG_LONG
));
350 sprintf(s
, "%" PY_FORMAT_SIZE_T
"u",
351 va_arg(vargs
, size_t));
354 va_arg(vargs
, unsigned int));
358 sprintf(s
, "%i", va_arg(vargs
, int));
362 sprintf(s
, "%x", va_arg(vargs
, int));
366 p
= va_arg(vargs
, char*);
374 sprintf(s
, "%p", va_arg(vargs
, void*));
375 /* %p is ill-defined: ensure leading 0x. */
378 else if (s
[1] != 'x') {
379 memmove(s
+2, s
, strlen(s
)+1);
398 _PyString_Resize(&string
, s
- PyString_AS_STRING(string
));
403 PyString_FromFormat(const char *format
, ...)
408 #ifdef HAVE_STDARG_PROTOTYPES
409 va_start(vargs
, format
);
413 ret
= PyString_FromFormatV(format
, vargs
);
419 PyObject
*PyString_Decode(const char *s
,
421 const char *encoding
,
426 str
= PyString_FromStringAndSize(s
, size
);
429 v
= PyString_AsDecodedString(str
, encoding
, errors
);
434 PyObject
*PyString_AsDecodedObject(PyObject
*str
,
435 const char *encoding
,
440 if (!PyString_Check(str
)) {
445 if (encoding
== NULL
) {
446 #ifdef Py_USING_UNICODE
447 encoding
= PyUnicode_GetDefaultEncoding();
449 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
454 /* Decode via the codec registry */
455 v
= PyCodec_Decode(str
, encoding
, errors
);
465 PyObject
*PyString_AsDecodedString(PyObject
*str
,
466 const char *encoding
,
471 v
= PyString_AsDecodedObject(str
, encoding
, errors
);
475 #ifdef Py_USING_UNICODE
476 /* Convert Unicode to a string using the default encoding */
477 if (PyUnicode_Check(v
)) {
479 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
485 if (!PyString_Check(v
)) {
486 PyErr_Format(PyExc_TypeError
,
487 "decoder did not return a string object (type=%.400s)",
488 Py_TYPE(v
)->tp_name
);
499 PyObject
*PyString_Encode(const char *s
,
501 const char *encoding
,
506 str
= PyString_FromStringAndSize(s
, size
);
509 v
= PyString_AsEncodedString(str
, encoding
, errors
);
514 PyObject
*PyString_AsEncodedObject(PyObject
*str
,
515 const char *encoding
,
520 if (!PyString_Check(str
)) {
525 if (encoding
== NULL
) {
526 #ifdef Py_USING_UNICODE
527 encoding
= PyUnicode_GetDefaultEncoding();
529 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
534 /* Encode via the codec registry */
535 v
= PyCodec_Encode(str
, encoding
, errors
);
545 PyObject
*PyString_AsEncodedString(PyObject
*str
,
546 const char *encoding
,
551 v
= PyString_AsEncodedObject(str
, encoding
, errors
);
555 #ifdef Py_USING_UNICODE
556 /* Convert Unicode to a string using the default encoding */
557 if (PyUnicode_Check(v
)) {
559 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
565 if (!PyString_Check(v
)) {
566 PyErr_Format(PyExc_TypeError
,
567 "encoder did not return a string object (type=%.400s)",
568 Py_TYPE(v
)->tp_name
);
580 string_dealloc(PyObject
*op
)
582 switch (PyString_CHECK_INTERNED(op
)) {
583 case SSTATE_NOT_INTERNED
:
586 case SSTATE_INTERNED_MORTAL
:
587 /* revive dead object temporarily for DelItem */
589 if (PyDict_DelItem(interned
, op
) != 0)
591 "deletion of interned string failed");
594 case SSTATE_INTERNED_IMMORTAL
:
595 Py_FatalError("Immortal interned string died.");
598 Py_FatalError("Inconsistent interned string state.");
600 Py_TYPE(op
)->tp_free(op
);
603 /* Unescape a backslash-escaped string. If unicode is non-zero,
604 the string is a u-literal. If recode_encoding is non-zero,
605 the string is UTF-8 encoded and should be re-encoded in the
606 specified encoding. */
608 PyObject
*PyString_DecodeEscape(const char *s
,
612 const char *recode_encoding
)
618 Py_ssize_t newlen
= recode_encoding
? 4*len
:len
;
619 v
= PyString_FromStringAndSize((char *)NULL
, newlen
);
622 p
= buf
= PyString_AsString(v
);
627 #ifdef Py_USING_UNICODE
628 if (recode_encoding
&& (*s
& 0x80)) {
634 /* Decode non-ASCII bytes as UTF-8. */
635 while (t
< end
&& (*t
& 0x80)) t
++;
636 u
= PyUnicode_DecodeUTF8(s
, t
- s
, errors
);
639 /* Recode them in target encoding. */
640 w
= PyUnicode_AsEncodedString(
641 u
, recode_encoding
, errors
);
645 /* Append bytes to output buffer. */
646 assert(PyString_Check(w
));
647 r
= PyString_AS_STRING(w
);
648 rn
= PyString_GET_SIZE(w
);
663 PyErr_SetString(PyExc_ValueError
,
664 "Trailing \\ in string");
668 /* XXX This assumes ASCII! */
670 case '\\': *p
++ = '\\'; break;
671 case '\'': *p
++ = '\''; break;
672 case '\"': *p
++ = '\"'; break;
673 case 'b': *p
++ = '\b'; break;
674 case 'f': *p
++ = '\014'; break; /* FF */
675 case 't': *p
++ = '\t'; break;
676 case 'n': *p
++ = '\n'; break;
677 case 'r': *p
++ = '\r'; break;
678 case 'v': *p
++ = '\013'; break; /* VT */
679 case 'a': *p
++ = '\007'; break; /* BEL, not classic C */
680 case '0': case '1': case '2': case '3':
681 case '4': case '5': case '6': case '7':
683 if (s
< end
&& '0' <= *s
&& *s
<= '7') {
684 c
= (c
<<3) + *s
++ - '0';
685 if (s
< end
&& '0' <= *s
&& *s
<= '7')
686 c
= (c
<<3) + *s
++ - '0';
692 isxdigit(Py_CHARMASK(s
[0])) &&
693 isxdigit(Py_CHARMASK(s
[1])))
716 if (!errors
|| strcmp(errors
, "strict") == 0) {
717 PyErr_SetString(PyExc_ValueError
,
718 "invalid \\x escape");
721 if (strcmp(errors
, "replace") == 0) {
723 } else if (strcmp(errors
, "ignore") == 0)
726 PyErr_Format(PyExc_ValueError
,
728 "unknown error handling code: %.400s",
732 #ifndef Py_USING_UNICODE
737 PyErr_SetString(PyExc_ValueError
,
738 "Unicode escapes not legal "
739 "when Unicode disabled");
746 goto non_esc
; /* an arbitry number of unescaped
747 UTF-8 bytes may follow. */
751 _PyString_Resize(&v
, p
- buf
);
758 /* -------------------------------------------------------------------- */
762 string_getsize(register PyObject
*op
)
766 if (PyString_AsStringAndSize(op
, &s
, &len
))
771 static /*const*/ char *
772 string_getbuffer(register PyObject
*op
)
776 if (PyString_AsStringAndSize(op
, &s
, &len
))
782 PyString_Size(register PyObject
*op
)
784 if (!PyString_Check(op
))
785 return string_getsize(op
);
790 PyString_AsString(register PyObject
*op
)
792 if (!PyString_Check(op
))
793 return string_getbuffer(op
);
794 return ((PyStringObject
*)op
) -> ob_sval
;
798 PyString_AsStringAndSize(register PyObject
*obj
,
800 register Py_ssize_t
*len
)
803 PyErr_BadInternalCall();
807 if (!PyString_Check(obj
)) {
808 #ifdef Py_USING_UNICODE
809 if (PyUnicode_Check(obj
)) {
810 obj
= _PyUnicode_AsDefaultEncodedString(obj
, NULL
);
817 PyErr_Format(PyExc_TypeError
,
818 "expected string or Unicode object, "
819 "%.200s found", Py_TYPE(obj
)->tp_name
);
824 *s
= PyString_AS_STRING(obj
);
826 *len
= PyString_GET_SIZE(obj
);
827 else if (strlen(*s
) != (size_t)PyString_GET_SIZE(obj
)) {
828 PyErr_SetString(PyExc_TypeError
,
829 "expected string without null bytes");
835 /* -------------------------------------------------------------------- */
838 #include "stringlib/stringdefs.h"
839 #include "stringlib/fastsearch.h"
841 #include "stringlib/count.h"
842 #include "stringlib/find.h"
843 #include "stringlib/partition.h"
845 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
846 #include "stringlib/localeutil.h"
851 string_print(PyStringObject
*op
, FILE *fp
, int flags
)
853 Py_ssize_t i
, str_len
;
857 /* XXX Ought to check for interrupts when writing long strings */
858 if (! PyString_CheckExact(op
)) {
860 /* A str subclass may have its own __str__ method. */
861 op
= (PyStringObject
*) PyObject_Str((PyObject
*)op
);
864 ret
= string_print(op
, fp
, flags
);
868 if (flags
& Py_PRINT_RAW
) {
869 char *data
= op
->ob_sval
;
870 Py_ssize_t size
= Py_SIZE(op
);
871 Py_BEGIN_ALLOW_THREADS
872 while (size
> INT_MAX
) {
873 /* Very long strings cannot be written atomically.
874 * But don't write exactly INT_MAX bytes at a time
875 * to avoid memory aligment issues.
877 const int chunk_size
= INT_MAX
& ~0x3FFF;
878 fwrite(data
, 1, chunk_size
, fp
);
883 if (size
) fwrite(data
, (int)size
, 1, fp
);
885 fwrite(data
, 1, (int)size
, fp
);
891 /* figure out which quote to use; single is preferred */
893 if (memchr(op
->ob_sval
, '\'', Py_SIZE(op
)) &&
894 !memchr(op
->ob_sval
, '"', Py_SIZE(op
)))
897 str_len
= Py_SIZE(op
);
898 Py_BEGIN_ALLOW_THREADS
900 for (i
= 0; i
< str_len
; i
++) {
901 /* Since strings are immutable and the caller should have a
902 reference, accessing the interal buffer should not be an issue
903 with the GIL released. */
905 if (c
== quote
|| c
== '\\')
906 fprintf(fp
, "\\%c", c
);
913 else if (c
< ' ' || c
>= 0x7f)
914 fprintf(fp
, "\\x%02x", c
& 0xff);
924 PyString_Repr(PyObject
*obj
, int smartquotes
)
926 register PyStringObject
* op
= (PyStringObject
*) obj
;
927 size_t newsize
= 2 + 4 * Py_SIZE(op
);
929 if (newsize
> PY_SSIZE_T_MAX
|| newsize
/ 4 != Py_SIZE(op
)) {
930 PyErr_SetString(PyExc_OverflowError
,
931 "string is too large to make repr");
934 v
= PyString_FromStringAndSize((char *)NULL
, newsize
);
939 register Py_ssize_t i
;
944 /* figure out which quote to use; single is preferred */
947 memchr(op
->ob_sval
, '\'', Py_SIZE(op
)) &&
948 !memchr(op
->ob_sval
, '"', Py_SIZE(op
)))
951 p
= PyString_AS_STRING(v
);
953 for (i
= 0; i
< Py_SIZE(op
); i
++) {
954 /* There's at least enough room for a hex escape
955 and a closing quote. */
956 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 5);
958 if (c
== quote
|| c
== '\\')
959 *p
++ = '\\', *p
++ = c
;
961 *p
++ = '\\', *p
++ = 't';
963 *p
++ = '\\', *p
++ = 'n';
965 *p
++ = '\\', *p
++ = 'r';
966 else if (c
< ' ' || c
>= 0x7f) {
967 /* For performance, we don't want to call
968 PyOS_snprintf here (extra layers of
970 sprintf(p
, "\\x%02x", c
& 0xff);
976 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 1);
980 &v
, (p
- PyString_AS_STRING(v
)));
986 string_repr(PyObject
*op
)
988 return PyString_Repr(op
, 1);
992 string_str(PyObject
*s
)
994 assert(PyString_Check(s
));
995 if (PyString_CheckExact(s
)) {
1000 /* Subtype -- return genuine string with the same value. */
1001 PyStringObject
*t
= (PyStringObject
*) s
;
1002 return PyString_FromStringAndSize(t
->ob_sval
, Py_SIZE(t
));
1007 string_length(PyStringObject
*a
)
1013 string_concat(register PyStringObject
*a
, register PyObject
*bb
)
1015 register Py_ssize_t size
;
1016 register PyStringObject
*op
;
1017 if (!PyString_Check(bb
)) {
1018 #ifdef Py_USING_UNICODE
1019 if (PyUnicode_Check(bb
))
1020 return PyUnicode_Concat((PyObject
*)a
, bb
);
1022 if (PyByteArray_Check(bb
))
1023 return PyByteArray_Concat((PyObject
*)a
, bb
);
1024 PyErr_Format(PyExc_TypeError
,
1025 "cannot concatenate 'str' and '%.200s' objects",
1026 Py_TYPE(bb
)->tp_name
);
1029 #define b ((PyStringObject *)bb)
1030 /* Optimize cases with empty left or right operand */
1031 if ((Py_SIZE(a
) == 0 || Py_SIZE(b
) == 0) &&
1032 PyString_CheckExact(a
) && PyString_CheckExact(b
)) {
1033 if (Py_SIZE(a
) == 0) {
1038 return (PyObject
*)a
;
1040 size
= Py_SIZE(a
) + Py_SIZE(b
);
1041 /* Check that string sizes are not negative, to prevent an
1042 overflow in cases where we are passed incorrectly-created
1043 strings with negative lengths (due to a bug in other code).
1045 if (Py_SIZE(a
) < 0 || Py_SIZE(b
) < 0 ||
1046 Py_SIZE(a
) > PY_SSIZE_T_MAX
- Py_SIZE(b
)) {
1047 PyErr_SetString(PyExc_OverflowError
,
1048 "strings are too large to concat");
1052 /* Inline PyObject_NewVar */
1053 if (size
> PY_SSIZE_T_MAX
- PyStringObject_SIZE
) {
1054 PyErr_SetString(PyExc_OverflowError
,
1055 "strings are too large to concat");
1058 op
= (PyStringObject
*)PyObject_MALLOC(PyStringObject_SIZE
+ size
);
1060 return PyErr_NoMemory();
1061 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
1063 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
1064 Py_MEMCPY(op
->ob_sval
, a
->ob_sval
, Py_SIZE(a
));
1065 Py_MEMCPY(op
->ob_sval
+ Py_SIZE(a
), b
->ob_sval
, Py_SIZE(b
));
1066 op
->ob_sval
[size
] = '\0';
1067 return (PyObject
*) op
;
1072 string_repeat(register PyStringObject
*a
, register Py_ssize_t n
)
1074 register Py_ssize_t i
;
1075 register Py_ssize_t j
;
1076 register Py_ssize_t size
;
1077 register PyStringObject
*op
;
1081 /* watch out for overflows: the size can overflow int,
1082 * and the # of bytes needed can overflow size_t
1084 size
= Py_SIZE(a
) * n
;
1085 if (n
&& size
/ n
!= Py_SIZE(a
)) {
1086 PyErr_SetString(PyExc_OverflowError
,
1087 "repeated string is too long");
1090 if (size
== Py_SIZE(a
) && PyString_CheckExact(a
)) {
1092 return (PyObject
*)a
;
1094 nbytes
= (size_t)size
;
1095 if (nbytes
+ PyStringObject_SIZE
<= nbytes
) {
1096 PyErr_SetString(PyExc_OverflowError
,
1097 "repeated string is too long");
1100 op
= (PyStringObject
*)PyObject_MALLOC(PyStringObject_SIZE
+ nbytes
);
1102 return PyErr_NoMemory();
1103 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
1105 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
1106 op
->ob_sval
[size
] = '\0';
1107 if (Py_SIZE(a
) == 1 && n
> 0) {
1108 memset(op
->ob_sval
, a
->ob_sval
[0] , n
);
1109 return (PyObject
*) op
;
1113 Py_MEMCPY(op
->ob_sval
, a
->ob_sval
, Py_SIZE(a
));
1117 j
= (i
<= size
-i
) ? i
: size
-i
;
1118 Py_MEMCPY(op
->ob_sval
+i
, op
->ob_sval
, j
);
1121 return (PyObject
*) op
;
1124 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1127 string_slice(register PyStringObject
*a
, register Py_ssize_t i
,
1128 register Py_ssize_t j
)
1129 /* j -- may be negative! */
1134 j
= 0; /* Avoid signed/unsigned bug in next line */
1137 if (i
== 0 && j
== Py_SIZE(a
) && PyString_CheckExact(a
)) {
1138 /* It's the same as a */
1140 return (PyObject
*)a
;
1144 return PyString_FromStringAndSize(a
->ob_sval
+ i
, j
-i
);
1148 string_contains(PyObject
*str_obj
, PyObject
*sub_obj
)
1150 if (!PyString_CheckExact(sub_obj
)) {
1151 #ifdef Py_USING_UNICODE
1152 if (PyUnicode_Check(sub_obj
))
1153 return PyUnicode_Contains(str_obj
, sub_obj
);
1155 if (!PyString_Check(sub_obj
)) {
1156 PyErr_Format(PyExc_TypeError
,
1157 "'in <string>' requires string as left operand, "
1158 "not %.200s", Py_TYPE(sub_obj
)->tp_name
);
1163 return stringlib_contains_obj(str_obj
, sub_obj
);
1167 string_item(PyStringObject
*a
, register Py_ssize_t i
)
1171 if (i
< 0 || i
>= Py_SIZE(a
)) {
1172 PyErr_SetString(PyExc_IndexError
, "string index out of range");
1175 pchar
= a
->ob_sval
[i
];
1176 v
= (PyObject
*)characters
[pchar
& UCHAR_MAX
];
1178 v
= PyString_FromStringAndSize(&pchar
, 1);
1189 string_richcompare(PyStringObject
*a
, PyStringObject
*b
, int op
)
1192 Py_ssize_t len_a
, len_b
;
1196 /* Make sure both arguments are strings. */
1197 if (!(PyString_Check(a
) && PyString_Check(b
))) {
1198 result
= Py_NotImplemented
;
1203 case Py_EQ
:case Py_LE
:case Py_GE
:
1206 case Py_NE
:case Py_LT
:case Py_GT
:
1212 /* Supporting Py_NE here as well does not save
1213 much time, since Py_NE is rarely used. */
1214 if (Py_SIZE(a
) == Py_SIZE(b
)
1215 && (a
->ob_sval
[0] == b
->ob_sval
[0]
1216 && memcmp(a
->ob_sval
, b
->ob_sval
, Py_SIZE(a
)) == 0)) {
1223 len_a
= Py_SIZE(a
); len_b
= Py_SIZE(b
);
1224 min_len
= (len_a
< len_b
) ? len_a
: len_b
;
1226 c
= Py_CHARMASK(*a
->ob_sval
) - Py_CHARMASK(*b
->ob_sval
);
1228 c
= memcmp(a
->ob_sval
, b
->ob_sval
, min_len
);
1232 c
= (len_a
< len_b
) ? -1 : (len_a
> len_b
) ? 1 : 0;
1234 case Py_LT
: c
= c
< 0; break;
1235 case Py_LE
: c
= c
<= 0; break;
1236 case Py_EQ
: assert(0); break; /* unreachable */
1237 case Py_NE
: c
= c
!= 0; break;
1238 case Py_GT
: c
= c
> 0; break;
1239 case Py_GE
: c
= c
>= 0; break;
1241 result
= Py_NotImplemented
;
1244 result
= c
? Py_True
: Py_False
;
1251 _PyString_Eq(PyObject
*o1
, PyObject
*o2
)
1253 PyStringObject
*a
= (PyStringObject
*) o1
;
1254 PyStringObject
*b
= (PyStringObject
*) o2
;
1255 return Py_SIZE(a
) == Py_SIZE(b
)
1256 && *a
->ob_sval
== *b
->ob_sval
1257 && memcmp(a
->ob_sval
, b
->ob_sval
, Py_SIZE(a
)) == 0;
1261 string_hash(PyStringObject
*a
)
1263 register Py_ssize_t len
;
1264 register unsigned char *p
;
1267 if (a
->ob_shash
!= -1)
1270 p
= (unsigned char *) a
->ob_sval
;
1273 x
= (1000003*x
) ^ *p
++;
1282 string_subscript(PyStringObject
* self
, PyObject
* item
)
1284 if (PyIndex_Check(item
)) {
1285 Py_ssize_t i
= PyNumber_AsSsize_t(item
, PyExc_IndexError
);
1286 if (i
== -1 && PyErr_Occurred())
1289 i
+= PyString_GET_SIZE(self
);
1290 return string_item(self
, i
);
1292 else if (PySlice_Check(item
)) {
1293 Py_ssize_t start
, stop
, step
, slicelength
, cur
, i
;
1298 if (PySlice_GetIndicesEx((PySliceObject
*)item
,
1299 PyString_GET_SIZE(self
),
1300 &start
, &stop
, &step
, &slicelength
) < 0) {
1304 if (slicelength
<= 0) {
1305 return PyString_FromStringAndSize("", 0);
1307 else if (start
== 0 && step
== 1 &&
1308 slicelength
== PyString_GET_SIZE(self
) &&
1309 PyString_CheckExact(self
)) {
1311 return (PyObject
*)self
;
1313 else if (step
== 1) {
1314 return PyString_FromStringAndSize(
1315 PyString_AS_STRING(self
) + start
,
1319 source_buf
= PyString_AsString((PyObject
*)self
);
1320 result_buf
= (char *)PyMem_Malloc(slicelength
);
1321 if (result_buf
== NULL
)
1322 return PyErr_NoMemory();
1324 for (cur
= start
, i
= 0; i
< slicelength
;
1326 result_buf
[i
] = source_buf
[cur
];
1329 result
= PyString_FromStringAndSize(result_buf
,
1331 PyMem_Free(result_buf
);
1336 PyErr_Format(PyExc_TypeError
,
1337 "string indices must be integers, not %.200s",
1338 Py_TYPE(item
)->tp_name
);
1344 string_buffer_getreadbuf(PyStringObject
*self
, Py_ssize_t index
, const void **ptr
)
1347 PyErr_SetString(PyExc_SystemError
,
1348 "accessing non-existent string segment");
1351 *ptr
= (void *)self
->ob_sval
;
1352 return Py_SIZE(self
);
1356 string_buffer_getwritebuf(PyStringObject
*self
, Py_ssize_t index
, const void **ptr
)
1358 PyErr_SetString(PyExc_TypeError
,
1359 "Cannot use string as modifiable buffer");
1364 string_buffer_getsegcount(PyStringObject
*self
, Py_ssize_t
*lenp
)
1367 *lenp
= Py_SIZE(self
);
1372 string_buffer_getcharbuf(PyStringObject
*self
, Py_ssize_t index
, const char **ptr
)
1375 PyErr_SetString(PyExc_SystemError
,
1376 "accessing non-existent string segment");
1379 *ptr
= self
->ob_sval
;
1380 return Py_SIZE(self
);
1384 string_buffer_getbuffer(PyStringObject
*self
, Py_buffer
*view
, int flags
)
1386 return PyBuffer_FillInfo(view
, (PyObject
*)self
,
1387 (void *)self
->ob_sval
, Py_SIZE(self
),
1391 static PySequenceMethods string_as_sequence
= {
1392 (lenfunc
)string_length
, /*sq_length*/
1393 (binaryfunc
)string_concat
, /*sq_concat*/
1394 (ssizeargfunc
)string_repeat
, /*sq_repeat*/
1395 (ssizeargfunc
)string_item
, /*sq_item*/
1396 (ssizessizeargfunc
)string_slice
, /*sq_slice*/
1399 (objobjproc
)string_contains
/*sq_contains*/
1402 static PyMappingMethods string_as_mapping
= {
1403 (lenfunc
)string_length
,
1404 (binaryfunc
)string_subscript
,
1408 static PyBufferProcs string_as_buffer
= {
1409 (readbufferproc
)string_buffer_getreadbuf
,
1410 (writebufferproc
)string_buffer_getwritebuf
,
1411 (segcountproc
)string_buffer_getsegcount
,
1412 (charbufferproc
)string_buffer_getcharbuf
,
1413 (getbufferproc
)string_buffer_getbuffer
,
1420 #define RIGHTSTRIP 1
1423 /* Arrays indexed by above */
1424 static const char *stripformat
[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1426 #define STRIPNAME(i) (stripformat[i]+3)
1429 /* Don't call if length < 2 */
1430 #define Py_STRING_MATCH(target, offset, pattern, length) \
1431 (target[offset] == pattern[0] && \
1432 target[offset+length-1] == pattern[length-1] && \
1433 !memcmp(target+offset+1, pattern+1, length-2) )
1436 /* Overallocate the initial list to reduce the number of reallocs for small
1437 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1438 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1439 text (roughly 11 words per line) and field delimited data (usually 1-10
1440 fields). For large strings the split algorithms are bandwidth limited
1441 so increasing the preallocation likely will not improve things.*/
1443 #define MAX_PREALLOC 12
1445 /* 5 splits gives 6 elements */
1446 #define PREALLOC_SIZE(maxsplit) \
1447 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1449 #define SPLIT_APPEND(data, left, right) \
1450 str = PyString_FromStringAndSize((data) + (left), \
1451 (right) - (left)); \
1454 if (PyList_Append(list, str)) { \
1461 #define SPLIT_ADD(data, left, right) { \
1462 str = PyString_FromStringAndSize((data) + (left), \
1463 (right) - (left)); \
1466 if (count < MAX_PREALLOC) { \
1467 PyList_SET_ITEM(list, count, str); \
1469 if (PyList_Append(list, str)) { \
1478 /* Always force the list to the expected size. */
1479 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1481 #define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1482 #define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1483 #define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1484 #define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1486 Py_LOCAL_INLINE(PyObject
*)
1487 split_whitespace(PyStringObject
*self
, Py_ssize_t len
, Py_ssize_t maxsplit
)
1489 const char *s
= PyString_AS_STRING(self
);
1490 Py_ssize_t i
, j
, count
=0;
1492 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1499 while (maxsplit
-- > 0) {
1500 SKIP_SPACE(s
, i
, len
);
1503 SKIP_NONSPACE(s
, i
, len
);
1504 if (j
== 0 && i
== len
&& PyString_CheckExact(self
)) {
1505 /* No whitespace in self, so just use it as list[0] */
1507 PyList_SET_ITEM(list
, 0, (PyObject
*)self
);
1515 /* Only occurs when maxsplit was reached */
1516 /* Skip any remaining whitespace and copy to end of string */
1517 SKIP_SPACE(s
, i
, len
);
1519 SPLIT_ADD(s
, i
, len
);
1521 FIX_PREALLOC_SIZE(list
);
1528 Py_LOCAL_INLINE(PyObject
*)
1529 split_char(PyStringObject
*self
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
1531 const char *s
= PyString_AS_STRING(self
);
1532 register Py_ssize_t i
, j
, count
=0;
1534 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
1540 while ((j
< len
) && (maxcount
-- > 0)) {
1542 /* I found that using memchr makes no difference */
1550 if (i
== 0 && count
== 0 && PyString_CheckExact(self
)) {
1551 /* ch not in self, so just use self as list[0] */
1553 PyList_SET_ITEM(list
, 0, (PyObject
*)self
);
1556 else if (i
<= len
) {
1557 SPLIT_ADD(s
, i
, len
);
1559 FIX_PREALLOC_SIZE(list
);
1567 PyDoc_STRVAR(split__doc__
,
1568 "S.split([sep [,maxsplit]]) -> list of strings\n\
1570 Return a list of the words in the string S, using sep as the\n\
1571 delimiter string. If maxsplit is given, at most maxsplit\n\
1572 splits are done. If sep is not specified or is None, any\n\
1573 whitespace string is a separator and empty strings are removed\n\
1577 string_split(PyStringObject
*self
, PyObject
*args
)
1579 Py_ssize_t len
= PyString_GET_SIZE(self
), n
, i
, j
;
1580 Py_ssize_t maxsplit
= -1, count
=0;
1581 const char *s
= PyString_AS_STRING(self
), *sub
;
1582 PyObject
*list
, *str
, *subobj
= Py_None
;
1587 if (!PyArg_ParseTuple(args
, "|On:split", &subobj
, &maxsplit
))
1590 maxsplit
= PY_SSIZE_T_MAX
;
1591 if (subobj
== Py_None
)
1592 return split_whitespace(self
, len
, maxsplit
);
1593 if (PyString_Check(subobj
)) {
1594 sub
= PyString_AS_STRING(subobj
);
1595 n
= PyString_GET_SIZE(subobj
);
1597 #ifdef Py_USING_UNICODE
1598 else if (PyUnicode_Check(subobj
))
1599 return PyUnicode_Split((PyObject
*)self
, subobj
, maxsplit
);
1601 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1605 PyErr_SetString(PyExc_ValueError
, "empty separator");
1609 return split_char(self
, len
, sub
[0], maxsplit
);
1611 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1617 while (maxsplit
-- > 0) {
1618 pos
= fastsearch(s
+i
, len
-i
, sub
, n
, FAST_SEARCH
);
1627 while ((j
+n
<= len
) && (maxsplit
-- > 0)) {
1628 for (; j
+n
<= len
; j
++) {
1629 if (Py_STRING_MATCH(s
, j
, sub
, n
)) {
1637 SPLIT_ADD(s
, i
, len
);
1638 FIX_PREALLOC_SIZE(list
);
1646 PyDoc_STRVAR(partition__doc__
,
1647 "S.partition(sep) -> (head, sep, tail)\n\
1649 Search for the separator sep in S, and return the part before it,\n\
1650 the separator itself, and the part after it. If the separator is not\n\
1651 found, return S and two empty strings.");
1654 string_partition(PyStringObject
*self
, PyObject
*sep_obj
)
1659 if (PyString_Check(sep_obj
)) {
1660 sep
= PyString_AS_STRING(sep_obj
);
1661 sep_len
= PyString_GET_SIZE(sep_obj
);
1663 #ifdef Py_USING_UNICODE
1664 else if (PyUnicode_Check(sep_obj
))
1665 return PyUnicode_Partition((PyObject
*) self
, sep_obj
);
1667 else if (PyObject_AsCharBuffer(sep_obj
, &sep
, &sep_len
))
1670 return stringlib_partition(
1672 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1673 sep_obj
, sep
, sep_len
1677 PyDoc_STRVAR(rpartition__doc__
,
1678 "S.rpartition(sep) -> (tail, sep, head)\n\
1680 Search for the separator sep in S, starting at the end of S, and return\n\
1681 the part before it, the separator itself, and the part after it. If the\n\
1682 separator is not found, return two empty strings and S.");
1685 string_rpartition(PyStringObject
*self
, PyObject
*sep_obj
)
1690 if (PyString_Check(sep_obj
)) {
1691 sep
= PyString_AS_STRING(sep_obj
);
1692 sep_len
= PyString_GET_SIZE(sep_obj
);
1694 #ifdef Py_USING_UNICODE
1695 else if (PyUnicode_Check(sep_obj
))
1696 return PyUnicode_RPartition((PyObject
*) self
, sep_obj
);
1698 else if (PyObject_AsCharBuffer(sep_obj
, &sep
, &sep_len
))
1701 return stringlib_rpartition(
1703 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1704 sep_obj
, sep
, sep_len
1708 Py_LOCAL_INLINE(PyObject
*)
1709 rsplit_whitespace(PyStringObject
*self
, Py_ssize_t len
, Py_ssize_t maxsplit
)
1711 const char *s
= PyString_AS_STRING(self
);
1712 Py_ssize_t i
, j
, count
=0;
1714 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1721 while (maxsplit
-- > 0) {
1725 RSKIP_NONSPACE(s
, i
);
1726 if (j
== len
-1 && i
< 0 && PyString_CheckExact(self
)) {
1727 /* No whitespace in self, so just use it as list[0] */
1729 PyList_SET_ITEM(list
, 0, (PyObject
*)self
);
1733 SPLIT_ADD(s
, i
+ 1, j
+ 1);
1736 /* Only occurs when maxsplit was reached */
1737 /* Skip any remaining whitespace and copy to beginning of string */
1740 SPLIT_ADD(s
, 0, i
+ 1);
1743 FIX_PREALLOC_SIZE(list
);
1744 if (PyList_Reverse(list
) < 0)
1752 Py_LOCAL_INLINE(PyObject
*)
1753 rsplit_char(PyStringObject
*self
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
1755 const char *s
= PyString_AS_STRING(self
);
1756 register Py_ssize_t i
, j
, count
=0;
1758 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
1764 while ((i
>= 0) && (maxcount
-- > 0)) {
1765 for (; i
>= 0; i
--) {
1767 SPLIT_ADD(s
, i
+ 1, j
+ 1);
1773 if (i
< 0 && count
== 0 && PyString_CheckExact(self
)) {
1774 /* ch not in self, so just use self as list[0] */
1776 PyList_SET_ITEM(list
, 0, (PyObject
*)self
);
1780 SPLIT_ADD(s
, 0, j
+ 1);
1782 FIX_PREALLOC_SIZE(list
);
1783 if (PyList_Reverse(list
) < 0)
1792 PyDoc_STRVAR(rsplit__doc__
,
1793 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1795 Return a list of the words in the string S, using sep as the\n\
1796 delimiter string, starting at the end of the string and working\n\
1797 to the front. If maxsplit is given, at most maxsplit splits are\n\
1798 done. If sep is not specified or is None, any whitespace string\n\
1802 string_rsplit(PyStringObject
*self
, PyObject
*args
)
1804 Py_ssize_t len
= PyString_GET_SIZE(self
), n
, i
, j
;
1805 Py_ssize_t maxsplit
= -1, count
=0;
1806 const char *s
, *sub
;
1807 PyObject
*list
, *str
, *subobj
= Py_None
;
1809 if (!PyArg_ParseTuple(args
, "|On:rsplit", &subobj
, &maxsplit
))
1812 maxsplit
= PY_SSIZE_T_MAX
;
1813 if (subobj
== Py_None
)
1814 return rsplit_whitespace(self
, len
, maxsplit
);
1815 if (PyString_Check(subobj
)) {
1816 sub
= PyString_AS_STRING(subobj
);
1817 n
= PyString_GET_SIZE(subobj
);
1819 #ifdef Py_USING_UNICODE
1820 else if (PyUnicode_Check(subobj
))
1821 return PyUnicode_RSplit((PyObject
*)self
, subobj
, maxsplit
);
1823 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1827 PyErr_SetString(PyExc_ValueError
, "empty separator");
1831 return rsplit_char(self
, len
, sub
[0], maxsplit
);
1833 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1840 s
= PyString_AS_STRING(self
);
1841 while ( (i
>= 0) && (maxsplit
-- > 0) ) {
1843 if (Py_STRING_MATCH(s
, i
, sub
, n
)) {
1844 SPLIT_ADD(s
, i
+ n
, j
);
1852 FIX_PREALLOC_SIZE(list
);
1853 if (PyList_Reverse(list
) < 0)
1863 PyDoc_STRVAR(join__doc__
,
1864 "S.join(iterable) -> string\n\
1866 Return a string which is the concatenation of the strings in the\n\
1867 iterable. The separator between elements is S.");
1870 string_join(PyStringObject
*self
, PyObject
*orig
)
1872 char *sep
= PyString_AS_STRING(self
);
1873 const Py_ssize_t seplen
= PyString_GET_SIZE(self
);
1874 PyObject
*res
= NULL
;
1876 Py_ssize_t seqlen
= 0;
1879 PyObject
*seq
, *item
;
1881 seq
= PySequence_Fast(orig
, "");
1886 seqlen
= PySequence_Size(seq
);
1889 return PyString_FromString("");
1892 item
= PySequence_Fast_GET_ITEM(seq
, 0);
1893 if (PyString_CheckExact(item
) || PyUnicode_CheckExact(item
)) {
1900 /* There are at least two things to join, or else we have a subclass
1901 * of the builtin types in the sequence.
1902 * Do a pre-pass to figure out the total amount of space we'll
1903 * need (sz), see whether any argument is absurd, and defer to
1904 * the Unicode join if appropriate.
1906 for (i
= 0; i
< seqlen
; i
++) {
1907 const size_t old_sz
= sz
;
1908 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1909 if (!PyString_Check(item
)){
1910 #ifdef Py_USING_UNICODE
1911 if (PyUnicode_Check(item
)) {
1912 /* Defer to Unicode join.
1913 * CAUTION: There's no gurantee that the
1914 * original sequence can be iterated over
1915 * again, so we must pass seq here.
1918 result
= PyUnicode_Join((PyObject
*)self
, seq
);
1923 PyErr_Format(PyExc_TypeError
,
1924 "sequence item %zd: expected string,"
1926 i
, Py_TYPE(item
)->tp_name
);
1930 sz
+= PyString_GET_SIZE(item
);
1933 if (sz
< old_sz
|| sz
> PY_SSIZE_T_MAX
) {
1934 PyErr_SetString(PyExc_OverflowError
,
1935 "join() result is too long for a Python string");
1941 /* Allocate result space. */
1942 res
= PyString_FromStringAndSize((char*)NULL
, sz
);
1948 /* Catenate everything. */
1949 p
= PyString_AS_STRING(res
);
1950 for (i
= 0; i
< seqlen
; ++i
) {
1952 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1953 n
= PyString_GET_SIZE(item
);
1954 Py_MEMCPY(p
, PyString_AS_STRING(item
), n
);
1956 if (i
< seqlen
- 1) {
1957 Py_MEMCPY(p
, sep
, seplen
);
1967 _PyString_Join(PyObject
*sep
, PyObject
*x
)
1969 assert(sep
!= NULL
&& PyString_Check(sep
));
1971 return string_join((PyStringObject
*)sep
, x
);
1974 Py_LOCAL_INLINE(void)
1975 string_adjust_indices(Py_ssize_t
*start
, Py_ssize_t
*end
, Py_ssize_t len
)
1989 Py_LOCAL_INLINE(Py_ssize_t
)
1990 string_find_internal(PyStringObject
*self
, PyObject
*args
, int dir
)
1995 Py_ssize_t start
=0, end
=PY_SSIZE_T_MAX
;
1996 PyObject
*obj_start
=Py_None
, *obj_end
=Py_None
;
1998 if (!PyArg_ParseTuple(args
, "O|OO:find/rfind/index/rindex", &subobj
,
1999 &obj_start
, &obj_end
))
2001 /* To support None in "start" and "end" arguments, meaning
2002 the same as if they were not passed.
2004 if (obj_start
!= Py_None
)
2005 if (!_PyEval_SliceIndex(obj_start
, &start
))
2007 if (obj_end
!= Py_None
)
2008 if (!_PyEval_SliceIndex(obj_end
, &end
))
2011 if (PyString_Check(subobj
)) {
2012 sub
= PyString_AS_STRING(subobj
);
2013 sub_len
= PyString_GET_SIZE(subobj
);
2015 #ifdef Py_USING_UNICODE
2016 else if (PyUnicode_Check(subobj
))
2017 return PyUnicode_Find(
2018 (PyObject
*)self
, subobj
, start
, end
, dir
);
2020 else if (PyObject_AsCharBuffer(subobj
, &sub
, &sub_len
))
2021 /* XXX - the "expected a character buffer object" is pretty
2022 confusing for a non-expert. remap to something else ? */
2026 return stringlib_find_slice(
2027 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
2028 sub
, sub_len
, start
, end
);
2030 return stringlib_rfind_slice(
2031 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
2032 sub
, sub_len
, start
, end
);
2036 PyDoc_STRVAR(find__doc__
,
2037 "S.find(sub [,start [,end]]) -> int\n\
2039 Return the lowest index in S where substring sub is found,\n\
2040 such that sub is contained within s[start:end]. Optional\n\
2041 arguments start and end are interpreted as in slice notation.\n\
2043 Return -1 on failure.");
2046 string_find(PyStringObject
*self
, PyObject
*args
)
2048 Py_ssize_t result
= string_find_internal(self
, args
, +1);
2051 return PyInt_FromSsize_t(result
);
2055 PyDoc_STRVAR(index__doc__
,
2056 "S.index(sub [,start [,end]]) -> int\n\
2058 Like S.find() but raise ValueError when the substring is not found.");
2061 string_index(PyStringObject
*self
, PyObject
*args
)
2063 Py_ssize_t result
= string_find_internal(self
, args
, +1);
2067 PyErr_SetString(PyExc_ValueError
,
2068 "substring not found");
2071 return PyInt_FromSsize_t(result
);
2075 PyDoc_STRVAR(rfind__doc__
,
2076 "S.rfind(sub [,start [,end]]) -> int\n\
2078 Return the highest index in S where substring sub is found,\n\
2079 such that sub is contained within s[start:end]. Optional\n\
2080 arguments start and end are interpreted as in slice notation.\n\
2082 Return -1 on failure.");
2085 string_rfind(PyStringObject
*self
, PyObject
*args
)
2087 Py_ssize_t result
= string_find_internal(self
, args
, -1);
2090 return PyInt_FromSsize_t(result
);
2094 PyDoc_STRVAR(rindex__doc__
,
2095 "S.rindex(sub [,start [,end]]) -> int\n\
2097 Like S.rfind() but raise ValueError when the substring is not found.");
2100 string_rindex(PyStringObject
*self
, PyObject
*args
)
2102 Py_ssize_t result
= string_find_internal(self
, args
, -1);
2106 PyErr_SetString(PyExc_ValueError
,
2107 "substring not found");
2110 return PyInt_FromSsize_t(result
);
2114 Py_LOCAL_INLINE(PyObject
*)
2115 do_xstrip(PyStringObject
*self
, int striptype
, PyObject
*sepobj
)
2117 char *s
= PyString_AS_STRING(self
);
2118 Py_ssize_t len
= PyString_GET_SIZE(self
);
2119 char *sep
= PyString_AS_STRING(sepobj
);
2120 Py_ssize_t seplen
= PyString_GET_SIZE(sepobj
);
2124 if (striptype
!= RIGHTSTRIP
) {
2125 while (i
< len
&& memchr(sep
, Py_CHARMASK(s
[i
]), seplen
)) {
2131 if (striptype
!= LEFTSTRIP
) {
2134 } while (j
>= i
&& memchr(sep
, Py_CHARMASK(s
[j
]), seplen
));
2138 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
2140 return (PyObject
*)self
;
2143 return PyString_FromStringAndSize(s
+i
, j
-i
);
2147 Py_LOCAL_INLINE(PyObject
*)
2148 do_strip(PyStringObject
*self
, int striptype
)
2150 char *s
= PyString_AS_STRING(self
);
2151 Py_ssize_t len
= PyString_GET_SIZE(self
), i
, j
;
2154 if (striptype
!= RIGHTSTRIP
) {
2155 while (i
< len
&& isspace(Py_CHARMASK(s
[i
]))) {
2161 if (striptype
!= LEFTSTRIP
) {
2164 } while (j
>= i
&& isspace(Py_CHARMASK(s
[j
])));
2168 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
2170 return (PyObject
*)self
;
2173 return PyString_FromStringAndSize(s
+i
, j
-i
);
2177 Py_LOCAL_INLINE(PyObject
*)
2178 do_argstrip(PyStringObject
*self
, int striptype
, PyObject
*args
)
2180 PyObject
*sep
= NULL
;
2182 if (!PyArg_ParseTuple(args
, (char *)stripformat
[striptype
], &sep
))
2185 if (sep
!= NULL
&& sep
!= Py_None
) {
2186 if (PyString_Check(sep
))
2187 return do_xstrip(self
, striptype
, sep
);
2188 #ifdef Py_USING_UNICODE
2189 else if (PyUnicode_Check(sep
)) {
2190 PyObject
*uniself
= PyUnicode_FromObject((PyObject
*)self
);
2194 res
= _PyUnicode_XStrip((PyUnicodeObject
*)uniself
,
2200 PyErr_Format(PyExc_TypeError
,
2201 #ifdef Py_USING_UNICODE
2202 "%s arg must be None, str or unicode",
2204 "%s arg must be None or str",
2206 STRIPNAME(striptype
));
2210 return do_strip(self
, striptype
);
2214 PyDoc_STRVAR(strip__doc__
,
2215 "S.strip([chars]) -> string or unicode\n\
2217 Return a copy of the string S with leading and trailing\n\
2218 whitespace removed.\n\
2219 If chars is given and not None, remove characters in chars instead.\n\
2220 If chars is unicode, S will be converted to unicode before stripping");
2223 string_strip(PyStringObject
*self
, PyObject
*args
)
2225 if (PyTuple_GET_SIZE(args
) == 0)
2226 return do_strip(self
, BOTHSTRIP
); /* Common case */
2228 return do_argstrip(self
, BOTHSTRIP
, args
);
2232 PyDoc_STRVAR(lstrip__doc__
,
2233 "S.lstrip([chars]) -> string or unicode\n\
2235 Return a copy of the string S with leading whitespace removed.\n\
2236 If chars is given and not None, remove characters in chars instead.\n\
2237 If chars is unicode, S will be converted to unicode before stripping");
2240 string_lstrip(PyStringObject
*self
, PyObject
*args
)
2242 if (PyTuple_GET_SIZE(args
) == 0)
2243 return do_strip(self
, LEFTSTRIP
); /* Common case */
2245 return do_argstrip(self
, LEFTSTRIP
, args
);
2249 PyDoc_STRVAR(rstrip__doc__
,
2250 "S.rstrip([chars]) -> string or unicode\n\
2252 Return a copy of the string S with trailing whitespace removed.\n\
2253 If chars is given and not None, remove characters in chars instead.\n\
2254 If chars is unicode, S will be converted to unicode before stripping");
2257 string_rstrip(PyStringObject
*self
, PyObject
*args
)
2259 if (PyTuple_GET_SIZE(args
) == 0)
2260 return do_strip(self
, RIGHTSTRIP
); /* Common case */
2262 return do_argstrip(self
, RIGHTSTRIP
, args
);
2266 PyDoc_STRVAR(lower__doc__
,
2267 "S.lower() -> string\n\
2269 Return a copy of the string S converted to lowercase.");
2271 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2273 #define _tolower tolower
2277 string_lower(PyStringObject
*self
)
2280 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2283 newobj
= PyString_FromStringAndSize(NULL
, n
);
2287 s
= PyString_AS_STRING(newobj
);
2289 Py_MEMCPY(s
, PyString_AS_STRING(self
), n
);
2291 for (i
= 0; i
< n
; i
++) {
2292 int c
= Py_CHARMASK(s
[i
]);
2300 PyDoc_STRVAR(upper__doc__
,
2301 "S.upper() -> string\n\
2303 Return a copy of the string S converted to uppercase.");
2306 #define _toupper toupper
2310 string_upper(PyStringObject
*self
)
2313 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2316 newobj
= PyString_FromStringAndSize(NULL
, n
);
2320 s
= PyString_AS_STRING(newobj
);
2322 Py_MEMCPY(s
, PyString_AS_STRING(self
), n
);
2324 for (i
= 0; i
< n
; i
++) {
2325 int c
= Py_CHARMASK(s
[i
]);
2333 PyDoc_STRVAR(title__doc__
,
2334 "S.title() -> string\n\
2336 Return a titlecased version of S, i.e. words start with uppercase\n\
2337 characters, all remaining cased characters have lowercase.");
2340 string_title(PyStringObject
*self
)
2342 char *s
= PyString_AS_STRING(self
), *s_new
;
2343 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2344 int previous_is_cased
= 0;
2347 newobj
= PyString_FromStringAndSize(NULL
, n
);
2350 s_new
= PyString_AsString(newobj
);
2351 for (i
= 0; i
< n
; i
++) {
2352 int c
= Py_CHARMASK(*s
++);
2354 if (!previous_is_cased
)
2356 previous_is_cased
= 1;
2357 } else if (isupper(c
)) {
2358 if (previous_is_cased
)
2360 previous_is_cased
= 1;
2362 previous_is_cased
= 0;
2368 PyDoc_STRVAR(capitalize__doc__
,
2369 "S.capitalize() -> string\n\
2371 Return a copy of the string S with only its first character\n\
2375 string_capitalize(PyStringObject
*self
)
2377 char *s
= PyString_AS_STRING(self
), *s_new
;
2378 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2381 newobj
= PyString_FromStringAndSize(NULL
, n
);
2384 s_new
= PyString_AsString(newobj
);
2386 int c
= Py_CHARMASK(*s
++);
2388 *s_new
= toupper(c
);
2393 for (i
= 1; i
< n
; i
++) {
2394 int c
= Py_CHARMASK(*s
++);
2396 *s_new
= tolower(c
);
2405 PyDoc_STRVAR(count__doc__
,
2406 "S.count(sub[, start[, end]]) -> int\n\
2408 Return the number of non-overlapping occurrences of substring sub in\n\
2409 string S[start:end]. Optional arguments start and end are interpreted\n\
2410 as in slice notation.");
2413 string_count(PyStringObject
*self
, PyObject
*args
)
2416 const char *str
= PyString_AS_STRING(self
), *sub
;
2418 Py_ssize_t start
= 0, end
= PY_SSIZE_T_MAX
;
2420 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &sub_obj
,
2421 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
2424 if (PyString_Check(sub_obj
)) {
2425 sub
= PyString_AS_STRING(sub_obj
);
2426 sub_len
= PyString_GET_SIZE(sub_obj
);
2428 #ifdef Py_USING_UNICODE
2429 else if (PyUnicode_Check(sub_obj
)) {
2431 count
= PyUnicode_Count((PyObject
*)self
, sub_obj
, start
, end
);
2435 return PyInt_FromSsize_t(count
);
2438 else if (PyObject_AsCharBuffer(sub_obj
, &sub
, &sub_len
))
2441 string_adjust_indices(&start
, &end
, PyString_GET_SIZE(self
));
2443 return PyInt_FromSsize_t(
2444 stringlib_count(str
+ start
, end
- start
, sub
, sub_len
)
2448 PyDoc_STRVAR(swapcase__doc__
,
2449 "S.swapcase() -> string\n\
2451 Return a copy of the string S with uppercase characters\n\
2452 converted to lowercase and vice versa.");
2455 string_swapcase(PyStringObject
*self
)
2457 char *s
= PyString_AS_STRING(self
), *s_new
;
2458 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2461 newobj
= PyString_FromStringAndSize(NULL
, n
);
2464 s_new
= PyString_AsString(newobj
);
2465 for (i
= 0; i
< n
; i
++) {
2466 int c
= Py_CHARMASK(*s
++);
2468 *s_new
= toupper(c
);
2470 else if (isupper(c
)) {
2471 *s_new
= tolower(c
);
2481 PyDoc_STRVAR(translate__doc__
,
2482 "S.translate(table [,deletechars]) -> string\n\
2484 Return a copy of the string S, where all characters occurring\n\
2485 in the optional argument deletechars are removed, and the\n\
2486 remaining characters have been mapped through the given\n\
2487 translation table, which must be a string of length 256.");
2490 string_translate(PyStringObject
*self
, PyObject
*args
)
2492 register char *input
, *output
;
2494 register Py_ssize_t i
, c
, changed
= 0;
2495 PyObject
*input_obj
= (PyObject
*)self
;
2496 const char *output_start
, *del_table
=NULL
;
2497 Py_ssize_t inlen
, tablen
, dellen
= 0;
2499 int trans_table
[256];
2500 PyObject
*tableobj
, *delobj
= NULL
;
2502 if (!PyArg_UnpackTuple(args
, "translate", 1, 2,
2503 &tableobj
, &delobj
))
2506 if (PyString_Check(tableobj
)) {
2507 table
= PyString_AS_STRING(tableobj
);
2508 tablen
= PyString_GET_SIZE(tableobj
);
2510 else if (tableobj
== Py_None
) {
2514 #ifdef Py_USING_UNICODE
2515 else if (PyUnicode_Check(tableobj
)) {
2516 /* Unicode .translate() does not support the deletechars
2517 parameter; instead a mapping to None will cause characters
2519 if (delobj
!= NULL
) {
2520 PyErr_SetString(PyExc_TypeError
,
2521 "deletions are implemented differently for unicode");
2524 return PyUnicode_Translate((PyObject
*)self
, tableobj
, NULL
);
2527 else if (PyObject_AsCharBuffer(tableobj
, &table
, &tablen
))
2530 if (tablen
!= 256) {
2531 PyErr_SetString(PyExc_ValueError
,
2532 "translation table must be 256 characters long");
2536 if (delobj
!= NULL
) {
2537 if (PyString_Check(delobj
)) {
2538 del_table
= PyString_AS_STRING(delobj
);
2539 dellen
= PyString_GET_SIZE(delobj
);
2541 #ifdef Py_USING_UNICODE
2542 else if (PyUnicode_Check(delobj
)) {
2543 PyErr_SetString(PyExc_TypeError
,
2544 "deletions are implemented differently for unicode");
2548 else if (PyObject_AsCharBuffer(delobj
, &del_table
, &dellen
))
2556 inlen
= PyString_GET_SIZE(input_obj
);
2557 result
= PyString_FromStringAndSize((char *)NULL
, inlen
);
2560 output_start
= output
= PyString_AsString(result
);
2561 input
= PyString_AS_STRING(input_obj
);
2563 if (dellen
== 0 && table
!= NULL
) {
2564 /* If no deletions are required, use faster code */
2565 for (i
= inlen
; --i
>= 0; ) {
2566 c
= Py_CHARMASK(*input
++);
2567 if (Py_CHARMASK((*output
++ = table
[c
])) != c
)
2570 if (changed
|| !PyString_CheckExact(input_obj
))
2573 Py_INCREF(input_obj
);
2577 if (table
== NULL
) {
2578 for (i
= 0; i
< 256; i
++)
2579 trans_table
[i
] = Py_CHARMASK(i
);
2581 for (i
= 0; i
< 256; i
++)
2582 trans_table
[i
] = Py_CHARMASK(table
[i
]);
2585 for (i
= 0; i
< dellen
; i
++)
2586 trans_table
[(int) Py_CHARMASK(del_table
[i
])] = -1;
2588 for (i
= inlen
; --i
>= 0; ) {
2589 c
= Py_CHARMASK(*input
++);
2590 if (trans_table
[c
] != -1)
2591 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
2595 if (!changed
&& PyString_CheckExact(input_obj
)) {
2597 Py_INCREF(input_obj
);
2600 /* Fix the size of the resulting string */
2602 _PyString_Resize(&result
, output
- output_start
);
2610 /* find and count characters and substrings */
2612 #define findchar(target, target_len, c) \
2613 ((char *)memchr((const void *)(target), c, target_len))
2615 /* String ops must return a string. */
2616 /* If the object is subclass of string, create a copy */
2617 Py_LOCAL(PyStringObject
*)
2618 return_self(PyStringObject
*self
)
2620 if (PyString_CheckExact(self
)) {
2624 return (PyStringObject
*)PyString_FromStringAndSize(
2625 PyString_AS_STRING(self
),
2626 PyString_GET_SIZE(self
));
2629 Py_LOCAL_INLINE(Py_ssize_t
)
2630 countchar(const char *target
, int target_len
, char c
, Py_ssize_t maxcount
)
2633 const char *start
=target
;
2634 const char *end
=target
+target_len
;
2636 while ( (start
=findchar(start
, end
-start
, c
)) != NULL
) {
2638 if (count
>= maxcount
)
2645 Py_LOCAL(Py_ssize_t
)
2646 findstring(const char *target
, Py_ssize_t target_len
,
2647 const char *pattern
, Py_ssize_t pattern_len
,
2653 start
+= target_len
;
2657 if (end
> target_len
) {
2659 } else if (end
< 0) {
2665 /* zero-length substrings always match at the first attempt */
2666 if (pattern_len
== 0)
2667 return (direction
> 0) ? start
: end
;
2671 if (direction
< 0) {
2672 for (; end
>= start
; end
--)
2673 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
))
2676 for (; start
<= end
; start
++)
2677 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
))
2683 Py_LOCAL_INLINE(Py_ssize_t
)
2684 countstring(const char *target
, Py_ssize_t target_len
,
2685 const char *pattern
, Py_ssize_t pattern_len
,
2688 int direction
, Py_ssize_t maxcount
)
2693 start
+= target_len
;
2697 if (end
> target_len
) {
2699 } else if (end
< 0) {
2705 /* zero-length substrings match everywhere */
2706 if (pattern_len
== 0 || maxcount
== 0) {
2707 if (target_len
+1 < maxcount
)
2708 return target_len
+1;
2713 if (direction
< 0) {
2714 for (; (end
>= start
); end
--)
2715 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
)) {
2717 if (--maxcount
<= 0) break;
2718 end
-= pattern_len
-1;
2721 for (; (start
<= end
); start
++)
2722 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
)) {
2724 if (--maxcount
<= 0)
2726 start
+= pattern_len
-1;
2733 /* Algorithms for different cases of string replacement */
2735 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2736 Py_LOCAL(PyStringObject
*)
2737 replace_interleave(PyStringObject
*self
,
2738 const char *to_s
, Py_ssize_t to_len
,
2739 Py_ssize_t maxcount
)
2741 char *self_s
, *result_s
;
2742 Py_ssize_t self_len
, result_len
;
2743 Py_ssize_t count
, i
, product
;
2744 PyStringObject
*result
;
2746 self_len
= PyString_GET_SIZE(self
);
2748 /* 1 at the end plus 1 after every character */
2750 if (maxcount
< count
)
2753 /* Check for overflow */
2754 /* result_len = count * to_len + self_len; */
2755 product
= count
* to_len
;
2756 if (product
/ to_len
!= count
) {
2757 PyErr_SetString(PyExc_OverflowError
,
2758 "replace string is too long");
2761 result_len
= product
+ self_len
;
2762 if (result_len
< 0) {
2763 PyErr_SetString(PyExc_OverflowError
,
2764 "replace string is too long");
2768 if (! (result
= (PyStringObject
*)
2769 PyString_FromStringAndSize(NULL
, result_len
)) )
2772 self_s
= PyString_AS_STRING(self
);
2773 result_s
= PyString_AS_STRING(result
);
2775 /* TODO: special case single character, which doesn't need memcpy */
2777 /* Lay the first one down (guaranteed this will occur) */
2778 Py_MEMCPY(result_s
, to_s
, to_len
);
2782 for (i
=0; i
<count
; i
++) {
2783 *result_s
++ = *self_s
++;
2784 Py_MEMCPY(result_s
, to_s
, to_len
);
2788 /* Copy the rest of the original string */
2789 Py_MEMCPY(result_s
, self_s
, self_len
-i
);
2794 /* Special case for deleting a single character */
2795 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2796 Py_LOCAL(PyStringObject
*)
2797 replace_delete_single_character(PyStringObject
*self
,
2798 char from_c
, Py_ssize_t maxcount
)
2800 char *self_s
, *result_s
;
2801 char *start
, *next
, *end
;
2802 Py_ssize_t self_len
, result_len
;
2804 PyStringObject
*result
;
2806 self_len
= PyString_GET_SIZE(self
);
2807 self_s
= PyString_AS_STRING(self
);
2809 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
2811 return return_self(self
);
2814 result_len
= self_len
- count
; /* from_len == 1 */
2815 assert(result_len
>=0);
2817 if ( (result
= (PyStringObject
*)
2818 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2820 result_s
= PyString_AS_STRING(result
);
2823 end
= self_s
+ self_len
;
2824 while (count
-- > 0) {
2825 next
= findchar(start
, end
-start
, from_c
);
2828 Py_MEMCPY(result_s
, start
, next
-start
);
2829 result_s
+= (next
-start
);
2832 Py_MEMCPY(result_s
, start
, end
-start
);
2837 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2839 Py_LOCAL(PyStringObject
*)
2840 replace_delete_substring(PyStringObject
*self
,
2841 const char *from_s
, Py_ssize_t from_len
,
2842 Py_ssize_t maxcount
) {
2843 char *self_s
, *result_s
;
2844 char *start
, *next
, *end
;
2845 Py_ssize_t self_len
, result_len
;
2846 Py_ssize_t count
, offset
;
2847 PyStringObject
*result
;
2849 self_len
= PyString_GET_SIZE(self
);
2850 self_s
= PyString_AS_STRING(self
);
2852 count
= countstring(self_s
, self_len
,
2859 return return_self(self
);
2862 result_len
= self_len
- (count
* from_len
);
2863 assert (result_len
>=0);
2865 if ( (result
= (PyStringObject
*)
2866 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2869 result_s
= PyString_AS_STRING(result
);
2872 end
= self_s
+ self_len
;
2873 while (count
-- > 0) {
2874 offset
= findstring(start
, end
-start
,
2876 0, end
-start
, FORWARD
);
2879 next
= start
+ offset
;
2881 Py_MEMCPY(result_s
, start
, next
-start
);
2883 result_s
+= (next
-start
);
2884 start
= next
+from_len
;
2886 Py_MEMCPY(result_s
, start
, end
-start
);
2890 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2891 Py_LOCAL(PyStringObject
*)
2892 replace_single_character_in_place(PyStringObject
*self
,
2893 char from_c
, char to_c
,
2894 Py_ssize_t maxcount
)
2896 char *self_s
, *result_s
, *start
, *end
, *next
;
2897 Py_ssize_t self_len
;
2898 PyStringObject
*result
;
2900 /* The result string will be the same size */
2901 self_s
= PyString_AS_STRING(self
);
2902 self_len
= PyString_GET_SIZE(self
);
2904 next
= findchar(self_s
, self_len
, from_c
);
2907 /* No matches; return the original string */
2908 return return_self(self
);
2911 /* Need to make a new string */
2912 result
= (PyStringObject
*) PyString_FromStringAndSize(NULL
, self_len
);
2915 result_s
= PyString_AS_STRING(result
);
2916 Py_MEMCPY(result_s
, self_s
, self_len
);
2918 /* change everything in-place, starting with this one */
2919 start
= result_s
+ (next
-self_s
);
2922 end
= result_s
+ self_len
;
2924 while (--maxcount
> 0) {
2925 next
= findchar(start
, end
-start
, from_c
);
2935 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2936 Py_LOCAL(PyStringObject
*)
2937 replace_substring_in_place(PyStringObject
*self
,
2938 const char *from_s
, Py_ssize_t from_len
,
2939 const char *to_s
, Py_ssize_t to_len
,
2940 Py_ssize_t maxcount
)
2942 char *result_s
, *start
, *end
;
2944 Py_ssize_t self_len
, offset
;
2945 PyStringObject
*result
;
2947 /* The result string will be the same size */
2949 self_s
= PyString_AS_STRING(self
);
2950 self_len
= PyString_GET_SIZE(self
);
2952 offset
= findstring(self_s
, self_len
,
2954 0, self_len
, FORWARD
);
2956 /* No matches; return the original string */
2957 return return_self(self
);
2960 /* Need to make a new string */
2961 result
= (PyStringObject
*) PyString_FromStringAndSize(NULL
, self_len
);
2964 result_s
= PyString_AS_STRING(result
);
2965 Py_MEMCPY(result_s
, self_s
, self_len
);
2967 /* change everything in-place, starting with this one */
2968 start
= result_s
+ offset
;
2969 Py_MEMCPY(start
, to_s
, from_len
);
2971 end
= result_s
+ self_len
;
2973 while ( --maxcount
> 0) {
2974 offset
= findstring(start
, end
-start
,
2976 0, end
-start
, FORWARD
);
2979 Py_MEMCPY(start
+offset
, to_s
, from_len
);
2980 start
+= offset
+from_len
;
2986 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2987 Py_LOCAL(PyStringObject
*)
2988 replace_single_character(PyStringObject
*self
,
2990 const char *to_s
, Py_ssize_t to_len
,
2991 Py_ssize_t maxcount
)
2993 char *self_s
, *result_s
;
2994 char *start
, *next
, *end
;
2995 Py_ssize_t self_len
, result_len
;
2996 Py_ssize_t count
, product
;
2997 PyStringObject
*result
;
2999 self_s
= PyString_AS_STRING(self
);
3000 self_len
= PyString_GET_SIZE(self
);
3002 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
3004 /* no matches, return unchanged */
3005 return return_self(self
);
3008 /* use the difference between current and new, hence the "-1" */
3009 /* result_len = self_len + count * (to_len-1) */
3010 product
= count
* (to_len
-1);
3011 if (product
/ (to_len
-1) != count
) {
3012 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
3015 result_len
= self_len
+ product
;
3016 if (result_len
< 0) {
3017 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
3021 if ( (result
= (PyStringObject
*)
3022 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
3024 result_s
= PyString_AS_STRING(result
);
3027 end
= self_s
+ self_len
;
3028 while (count
-- > 0) {
3029 next
= findchar(start
, end
-start
, from_c
);
3033 if (next
== start
) {
3034 /* replace with the 'to' */
3035 Py_MEMCPY(result_s
, to_s
, to_len
);
3039 /* copy the unchanged old then the 'to' */
3040 Py_MEMCPY(result_s
, start
, next
-start
);
3041 result_s
+= (next
-start
);
3042 Py_MEMCPY(result_s
, to_s
, to_len
);
3047 /* Copy the remainder of the remaining string */
3048 Py_MEMCPY(result_s
, start
, end
-start
);
3053 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
3054 Py_LOCAL(PyStringObject
*)
3055 replace_substring(PyStringObject
*self
,
3056 const char *from_s
, Py_ssize_t from_len
,
3057 const char *to_s
, Py_ssize_t to_len
,
3058 Py_ssize_t maxcount
) {
3059 char *self_s
, *result_s
;
3060 char *start
, *next
, *end
;
3061 Py_ssize_t self_len
, result_len
;
3062 Py_ssize_t count
, offset
, product
;
3063 PyStringObject
*result
;
3065 self_s
= PyString_AS_STRING(self
);
3066 self_len
= PyString_GET_SIZE(self
);
3068 count
= countstring(self_s
, self_len
,
3070 0, self_len
, FORWARD
, maxcount
);
3072 /* no matches, return unchanged */
3073 return return_self(self
);
3076 /* Check for overflow */
3077 /* result_len = self_len + count * (to_len-from_len) */
3078 product
= count
* (to_len
-from_len
);
3079 if (product
/ (to_len
-from_len
) != count
) {
3080 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
3083 result_len
= self_len
+ product
;
3084 if (result_len
< 0) {
3085 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
3089 if ( (result
= (PyStringObject
*)
3090 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
3092 result_s
= PyString_AS_STRING(result
);
3095 end
= self_s
+ self_len
;
3096 while (count
-- > 0) {
3097 offset
= findstring(start
, end
-start
,
3099 0, end
-start
, FORWARD
);
3102 next
= start
+offset
;
3103 if (next
== start
) {
3104 /* replace with the 'to' */
3105 Py_MEMCPY(result_s
, to_s
, to_len
);
3109 /* copy the unchanged old then the 'to' */
3110 Py_MEMCPY(result_s
, start
, next
-start
);
3111 result_s
+= (next
-start
);
3112 Py_MEMCPY(result_s
, to_s
, to_len
);
3114 start
= next
+from_len
;
3117 /* Copy the remainder of the remaining string */
3118 Py_MEMCPY(result_s
, start
, end
-start
);
3124 Py_LOCAL(PyStringObject
*)
3125 replace(PyStringObject
*self
,
3126 const char *from_s
, Py_ssize_t from_len
,
3127 const char *to_s
, Py_ssize_t to_len
,
3128 Py_ssize_t maxcount
)
3131 maxcount
= PY_SSIZE_T_MAX
;
3132 } else if (maxcount
== 0 || PyString_GET_SIZE(self
) == 0) {
3133 /* nothing to do; return the original string */
3134 return return_self(self
);
3137 if (maxcount
== 0 ||
3138 (from_len
== 0 && to_len
== 0)) {
3139 /* nothing to do; return the original string */
3140 return return_self(self
);
3143 /* Handle zero-length special cases */
3145 if (from_len
== 0) {
3146 /* insert the 'to' string everywhere. */
3147 /* >>> "Python".replace("", ".") */
3148 /* '.P.y.t.h.o.n.' */
3149 return replace_interleave(self
, to_s
, to_len
, maxcount
);
3152 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3153 /* point for an empty self string to generate a non-empty string */
3154 /* Special case so the remaining code always gets a non-empty string */
3155 if (PyString_GET_SIZE(self
) == 0) {
3156 return return_self(self
);
3160 /* delete all occurances of 'from' string */
3161 if (from_len
== 1) {
3162 return replace_delete_single_character(
3163 self
, from_s
[0], maxcount
);
3165 return replace_delete_substring(self
, from_s
, from_len
, maxcount
);
3169 /* Handle special case where both strings have the same length */
3171 if (from_len
== to_len
) {
3172 if (from_len
== 1) {
3173 return replace_single_character_in_place(
3179 return replace_substring_in_place(
3180 self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
3184 /* Otherwise use the more generic algorithms */
3185 if (from_len
== 1) {
3186 return replace_single_character(self
, from_s
[0],
3187 to_s
, to_len
, maxcount
);
3189 /* len('from')>=2, len('to')>=1 */
3190 return replace_substring(self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
3194 PyDoc_STRVAR(replace__doc__
,
3195 "S.replace (old, new[, count]) -> string\n\
3197 Return a copy of string S with all occurrences of substring\n\
3198 old replaced by new. If the optional argument count is\n\
3199 given, only the first count occurrences are replaced.");
3202 string_replace(PyStringObject
*self
, PyObject
*args
)
3204 Py_ssize_t count
= -1;
3205 PyObject
*from
, *to
;
3206 const char *from_s
, *to_s
;
3207 Py_ssize_t from_len
, to_len
;
3209 if (!PyArg_ParseTuple(args
, "OO|n:replace", &from
, &to
, &count
))
3212 if (PyString_Check(from
)) {
3213 from_s
= PyString_AS_STRING(from
);
3214 from_len
= PyString_GET_SIZE(from
);
3216 #ifdef Py_USING_UNICODE
3217 if (PyUnicode_Check(from
))
3218 return PyUnicode_Replace((PyObject
*)self
,
3221 else if (PyObject_AsCharBuffer(from
, &from_s
, &from_len
))
3224 if (PyString_Check(to
)) {
3225 to_s
= PyString_AS_STRING(to
);
3226 to_len
= PyString_GET_SIZE(to
);
3228 #ifdef Py_USING_UNICODE
3229 else if (PyUnicode_Check(to
))
3230 return PyUnicode_Replace((PyObject
*)self
,
3233 else if (PyObject_AsCharBuffer(to
, &to_s
, &to_len
))
3236 return (PyObject
*)replace((PyStringObject
*) self
,
3238 to_s
, to_len
, count
);
3243 /* Matches the end (direction >= 0) or start (direction < 0) of self
3244 * against substr, using the start and end arguments. Returns
3245 * -1 on error, 0 if not found and 1 if found.
3248 _string_tailmatch(PyStringObject
*self
, PyObject
*substr
, Py_ssize_t start
,
3249 Py_ssize_t end
, int direction
)
3251 Py_ssize_t len
= PyString_GET_SIZE(self
);
3256 if (PyString_Check(substr
)) {
3257 sub
= PyString_AS_STRING(substr
);
3258 slen
= PyString_GET_SIZE(substr
);
3260 #ifdef Py_USING_UNICODE
3261 else if (PyUnicode_Check(substr
))
3262 return PyUnicode_Tailmatch((PyObject
*)self
,
3263 substr
, start
, end
, direction
);
3265 else if (PyObject_AsCharBuffer(substr
, &sub
, &slen
))
3267 str
= PyString_AS_STRING(self
);
3269 string_adjust_indices(&start
, &end
, len
);
3271 if (direction
< 0) {
3273 if (start
+slen
> len
)
3277 if (end
-start
< slen
|| start
> len
)
3280 if (end
-slen
> start
)
3283 if (end
-start
>= slen
)
3284 return ! memcmp(str
+start
, sub
, slen
);
3289 PyDoc_STRVAR(startswith__doc__
,
3290 "S.startswith(prefix[, start[, end]]) -> bool\n\
3292 Return True if S starts with the specified prefix, False otherwise.\n\
3293 With optional start, test S beginning at that position.\n\
3294 With optional end, stop comparing S at that position.\n\
3295 prefix can also be a tuple of strings to try.");
3298 string_startswith(PyStringObject
*self
, PyObject
*args
)
3300 Py_ssize_t start
= 0;
3301 Py_ssize_t end
= PY_SSIZE_T_MAX
;
3305 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
3306 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
3308 if (PyTuple_Check(subobj
)) {
3310 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
3311 result
= _string_tailmatch(self
,
3312 PyTuple_GET_ITEM(subobj
, i
),
3322 result
= _string_tailmatch(self
, subobj
, start
, end
, -1);
3326 return PyBool_FromLong(result
);
3330 PyDoc_STRVAR(endswith__doc__
,
3331 "S.endswith(suffix[, start[, end]]) -> bool\n\
3333 Return True if S ends with the specified suffix, False otherwise.\n\
3334 With optional start, test S beginning at that position.\n\
3335 With optional end, stop comparing S at that position.\n\
3336 suffix can also be a tuple of strings to try.");
3339 string_endswith(PyStringObject
*self
, PyObject
*args
)
3341 Py_ssize_t start
= 0;
3342 Py_ssize_t end
= PY_SSIZE_T_MAX
;
3346 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
3347 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
3349 if (PyTuple_Check(subobj
)) {
3351 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
3352 result
= _string_tailmatch(self
,
3353 PyTuple_GET_ITEM(subobj
, i
),
3363 result
= _string_tailmatch(self
, subobj
, start
, end
, +1);
3367 return PyBool_FromLong(result
);
3371 PyDoc_STRVAR(encode__doc__
,
3372 "S.encode([encoding[,errors]]) -> object\n\
3374 Encodes S using the codec registered for encoding. encoding defaults\n\
3375 to the default encoding. errors may be given to set a different error\n\
3376 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3377 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3378 'xmlcharrefreplace' as well as any other name registered with\n\
3379 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3382 string_encode(PyStringObject
*self
, PyObject
*args
, PyObject
*kwargs
)
3384 static char *kwlist
[] = {"encoding", "errors", 0};
3385 char *encoding
= NULL
;
3386 char *errors
= NULL
;
3389 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "|ss:encode",
3390 kwlist
, &encoding
, &errors
))
3392 v
= PyString_AsEncodedObject((PyObject
*)self
, encoding
, errors
);
3395 if (!PyString_Check(v
) && !PyUnicode_Check(v
)) {
3396 PyErr_Format(PyExc_TypeError
,
3397 "encoder did not return a string/unicode object "
3399 Py_TYPE(v
)->tp_name
);
3410 PyDoc_STRVAR(decode__doc__
,
3411 "S.decode([encoding[,errors]]) -> object\n\
3413 Decodes S using the codec registered for encoding. encoding defaults\n\
3414 to the default encoding. errors may be given to set a different error\n\
3415 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3416 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3417 as well as any other name registered with codecs.register_error that is\n\
3418 able to handle UnicodeDecodeErrors.");
3421 string_decode(PyStringObject
*self
, PyObject
*args
, PyObject
*kwargs
)
3423 static char *kwlist
[] = {"encoding", "errors", 0};
3424 char *encoding
= NULL
;
3425 char *errors
= NULL
;
3428 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "|ss:decode",
3429 kwlist
, &encoding
, &errors
))
3431 v
= PyString_AsDecodedObject((PyObject
*)self
, encoding
, errors
);
3434 if (!PyString_Check(v
) && !PyUnicode_Check(v
)) {
3435 PyErr_Format(PyExc_TypeError
,
3436 "decoder did not return a string/unicode object "
3438 Py_TYPE(v
)->tp_name
);
3449 PyDoc_STRVAR(expandtabs__doc__
,
3450 "S.expandtabs([tabsize]) -> string\n\
3452 Return a copy of S where all tab characters are expanded using spaces.\n\
3453 If tabsize is not given, a tab size of 8 characters is assumed.");
3456 string_expandtabs(PyStringObject
*self
, PyObject
*args
)
3458 const char *e
, *p
, *qe
;
3460 Py_ssize_t i
, j
, incr
;
3464 if (!PyArg_ParseTuple(args
, "|i:expandtabs", &tabsize
))
3467 /* First pass: determine size of output string */
3468 i
= 0; /* chars up to and including most recent \n or \r */
3469 j
= 0; /* chars since most recent \n or \r (use in tab calculations) */
3470 e
= PyString_AS_STRING(self
) + PyString_GET_SIZE(self
); /* end of input */
3471 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
3474 incr
= tabsize
- (j
% tabsize
);
3475 if (j
> PY_SSIZE_T_MAX
- incr
)
3481 if (j
> PY_SSIZE_T_MAX
- 1)
3484 if (*p
== '\n' || *p
== '\r') {
3485 if (i
> PY_SSIZE_T_MAX
- j
)
3492 if (i
> PY_SSIZE_T_MAX
- j
)
3495 /* Second pass: create output string and fill it */
3496 u
= PyString_FromStringAndSize(NULL
, i
+ j
);
3500 j
= 0; /* same as in first pass */
3501 q
= PyString_AS_STRING(u
); /* next output char */
3502 qe
= PyString_AS_STRING(u
) + PyString_GET_SIZE(u
); /* end of output */
3504 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
3507 i
= tabsize
- (j
% tabsize
);
3521 if (*p
== '\n' || *p
== '\r')
3530 PyErr_SetString(PyExc_OverflowError
, "new string is too long");
3534 Py_LOCAL_INLINE(PyObject
*)
3535 pad(PyStringObject
*self
, Py_ssize_t left
, Py_ssize_t right
, char fill
)
3544 if (left
== 0 && right
== 0 && PyString_CheckExact(self
)) {
3546 return (PyObject
*)self
;
3549 u
= PyString_FromStringAndSize(NULL
,
3550 left
+ PyString_GET_SIZE(self
) + right
);
3553 memset(PyString_AS_STRING(u
), fill
, left
);
3554 Py_MEMCPY(PyString_AS_STRING(u
) + left
,
3555 PyString_AS_STRING(self
),
3556 PyString_GET_SIZE(self
));
3558 memset(PyString_AS_STRING(u
) + left
+ PyString_GET_SIZE(self
),
3565 PyDoc_STRVAR(ljust__doc__
,
3566 "S.ljust(width[, fillchar]) -> string\n"
3568 "Return S left-justified in a string of length width. Padding is\n"
3569 "done using the specified fill character (default is a space).");
3572 string_ljust(PyStringObject
*self
, PyObject
*args
)
3575 char fillchar
= ' ';
3577 if (!PyArg_ParseTuple(args
, "n|c:ljust", &width
, &fillchar
))
3580 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3582 return (PyObject
*) self
;
3585 return pad(self
, 0, width
- PyString_GET_SIZE(self
), fillchar
);
3589 PyDoc_STRVAR(rjust__doc__
,
3590 "S.rjust(width[, fillchar]) -> string\n"
3592 "Return S right-justified in a string of length width. Padding is\n"
3593 "done using the specified fill character (default is a space)");
3596 string_rjust(PyStringObject
*self
, PyObject
*args
)
3599 char fillchar
= ' ';
3601 if (!PyArg_ParseTuple(args
, "n|c:rjust", &width
, &fillchar
))
3604 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3606 return (PyObject
*) self
;
3609 return pad(self
, width
- PyString_GET_SIZE(self
), 0, fillchar
);
3613 PyDoc_STRVAR(center__doc__
,
3614 "S.center(width[, fillchar]) -> string\n"
3616 "Return S centered in a string of length width. Padding is\n"
3617 "done using the specified fill character (default is a space)");
3620 string_center(PyStringObject
*self
, PyObject
*args
)
3622 Py_ssize_t marg
, left
;
3624 char fillchar
= ' ';
3626 if (!PyArg_ParseTuple(args
, "n|c:center", &width
, &fillchar
))
3629 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3631 return (PyObject
*) self
;
3634 marg
= width
- PyString_GET_SIZE(self
);
3635 left
= marg
/ 2 + (marg
& width
& 1);
3637 return pad(self
, left
, marg
- left
, fillchar
);
3640 PyDoc_STRVAR(zfill__doc__
,
3641 "S.zfill(width) -> string\n"
3643 "Pad a numeric string S with zeros on the left, to fill a field\n"
3644 "of the specified width. The string S is never truncated.");
3647 string_zfill(PyStringObject
*self
, PyObject
*args
)
3654 if (!PyArg_ParseTuple(args
, "n:zfill", &width
))
3657 if (PyString_GET_SIZE(self
) >= width
) {
3658 if (PyString_CheckExact(self
)) {
3660 return (PyObject
*) self
;
3663 return PyString_FromStringAndSize(
3664 PyString_AS_STRING(self
),
3665 PyString_GET_SIZE(self
)
3669 fill
= width
- PyString_GET_SIZE(self
);
3671 s
= pad(self
, fill
, 0, '0');
3676 p
= PyString_AS_STRING(s
);
3677 if (p
[fill
] == '+' || p
[fill
] == '-') {
3678 /* move sign to beginning of string */
3683 return (PyObject
*) s
;
3686 PyDoc_STRVAR(isspace__doc__
,
3687 "S.isspace() -> bool\n\
3689 Return True if all characters in S are whitespace\n\
3690 and there is at least one character in S, False otherwise.");
3693 string_isspace(PyStringObject
*self
)
3695 register const unsigned char *p
3696 = (unsigned char *) PyString_AS_STRING(self
);
3697 register const unsigned char *e
;
3699 /* Shortcut for single character strings */
3700 if (PyString_GET_SIZE(self
) == 1 &&
3702 return PyBool_FromLong(1);
3704 /* Special case for empty strings */
3705 if (PyString_GET_SIZE(self
) == 0)
3706 return PyBool_FromLong(0);
3708 e
= p
+ PyString_GET_SIZE(self
);
3709 for (; p
< e
; p
++) {
3711 return PyBool_FromLong(0);
3713 return PyBool_FromLong(1);
3717 PyDoc_STRVAR(isalpha__doc__
,
3718 "S.isalpha() -> bool\n\
3720 Return True if all characters in S are alphabetic\n\
3721 and there is at least one character in S, False otherwise.");
3724 string_isalpha(PyStringObject
*self
)
3726 register const unsigned char *p
3727 = (unsigned char *) PyString_AS_STRING(self
);
3728 register const unsigned char *e
;
3730 /* Shortcut for single character strings */
3731 if (PyString_GET_SIZE(self
) == 1 &&
3733 return PyBool_FromLong(1);
3735 /* Special case for empty strings */
3736 if (PyString_GET_SIZE(self
) == 0)
3737 return PyBool_FromLong(0);
3739 e
= p
+ PyString_GET_SIZE(self
);
3740 for (; p
< e
; p
++) {
3742 return PyBool_FromLong(0);
3744 return PyBool_FromLong(1);
3748 PyDoc_STRVAR(isalnum__doc__
,
3749 "S.isalnum() -> bool\n\
3751 Return True if all characters in S are alphanumeric\n\
3752 and there is at least one character in S, False otherwise.");
3755 string_isalnum(PyStringObject
*self
)
3757 register const unsigned char *p
3758 = (unsigned char *) PyString_AS_STRING(self
);
3759 register const unsigned char *e
;
3761 /* Shortcut for single character strings */
3762 if (PyString_GET_SIZE(self
) == 1 &&
3764 return PyBool_FromLong(1);
3766 /* Special case for empty strings */
3767 if (PyString_GET_SIZE(self
) == 0)
3768 return PyBool_FromLong(0);
3770 e
= p
+ PyString_GET_SIZE(self
);
3771 for (; p
< e
; p
++) {
3773 return PyBool_FromLong(0);
3775 return PyBool_FromLong(1);
3779 PyDoc_STRVAR(isdigit__doc__
,
3780 "S.isdigit() -> bool\n\
3782 Return True if all characters in S are digits\n\
3783 and there is at least one character in S, False otherwise.");
3786 string_isdigit(PyStringObject
*self
)
3788 register const unsigned char *p
3789 = (unsigned char *) PyString_AS_STRING(self
);
3790 register const unsigned char *e
;
3792 /* Shortcut for single character strings */
3793 if (PyString_GET_SIZE(self
) == 1 &&
3795 return PyBool_FromLong(1);
3797 /* Special case for empty strings */
3798 if (PyString_GET_SIZE(self
) == 0)
3799 return PyBool_FromLong(0);
3801 e
= p
+ PyString_GET_SIZE(self
);
3802 for (; p
< e
; p
++) {
3804 return PyBool_FromLong(0);
3806 return PyBool_FromLong(1);
3810 PyDoc_STRVAR(islower__doc__
,
3811 "S.islower() -> bool\n\
3813 Return True if all cased characters in S are lowercase and there is\n\
3814 at least one cased character in S, False otherwise.");
3817 string_islower(PyStringObject
*self
)
3819 register const unsigned char *p
3820 = (unsigned char *) PyString_AS_STRING(self
);
3821 register const unsigned char *e
;
3824 /* Shortcut for single character strings */
3825 if (PyString_GET_SIZE(self
) == 1)
3826 return PyBool_FromLong(islower(*p
) != 0);
3828 /* Special case for empty strings */
3829 if (PyString_GET_SIZE(self
) == 0)
3830 return PyBool_FromLong(0);
3832 e
= p
+ PyString_GET_SIZE(self
);
3834 for (; p
< e
; p
++) {
3836 return PyBool_FromLong(0);
3837 else if (!cased
&& islower(*p
))
3840 return PyBool_FromLong(cased
);
3844 PyDoc_STRVAR(isupper__doc__
,
3845 "S.isupper() -> bool\n\
3847 Return True if all cased characters in S are uppercase and there is\n\
3848 at least one cased character in S, False otherwise.");
3851 string_isupper(PyStringObject
*self
)
3853 register const unsigned char *p
3854 = (unsigned char *) PyString_AS_STRING(self
);
3855 register const unsigned char *e
;
3858 /* Shortcut for single character strings */
3859 if (PyString_GET_SIZE(self
) == 1)
3860 return PyBool_FromLong(isupper(*p
) != 0);
3862 /* Special case for empty strings */
3863 if (PyString_GET_SIZE(self
) == 0)
3864 return PyBool_FromLong(0);
3866 e
= p
+ PyString_GET_SIZE(self
);
3868 for (; p
< e
; p
++) {
3870 return PyBool_FromLong(0);
3871 else if (!cased
&& isupper(*p
))
3874 return PyBool_FromLong(cased
);
3878 PyDoc_STRVAR(istitle__doc__
,
3879 "S.istitle() -> bool\n\
3881 Return True if S is a titlecased string and there is at least one\n\
3882 character in S, i.e. uppercase characters may only follow uncased\n\
3883 characters and lowercase characters only cased ones. Return False\n\
3887 string_istitle(PyStringObject
*self
, PyObject
*uncased
)
3889 register const unsigned char *p
3890 = (unsigned char *) PyString_AS_STRING(self
);
3891 register const unsigned char *e
;
3892 int cased
, previous_is_cased
;
3894 /* Shortcut for single character strings */
3895 if (PyString_GET_SIZE(self
) == 1)
3896 return PyBool_FromLong(isupper(*p
) != 0);
3898 /* Special case for empty strings */
3899 if (PyString_GET_SIZE(self
) == 0)
3900 return PyBool_FromLong(0);
3902 e
= p
+ PyString_GET_SIZE(self
);
3904 previous_is_cased
= 0;
3905 for (; p
< e
; p
++) {
3906 register const unsigned char ch
= *p
;
3909 if (previous_is_cased
)
3910 return PyBool_FromLong(0);
3911 previous_is_cased
= 1;
3914 else if (islower(ch
)) {
3915 if (!previous_is_cased
)
3916 return PyBool_FromLong(0);
3917 previous_is_cased
= 1;
3921 previous_is_cased
= 0;
3923 return PyBool_FromLong(cased
);
3927 PyDoc_STRVAR(splitlines__doc__
,
3928 "S.splitlines([keepends]) -> list of strings\n\
3930 Return a list of the lines in S, breaking at line boundaries.\n\
3931 Line breaks are not included in the resulting list unless keepends\n\
3932 is given and true.");
3935 string_splitlines(PyStringObject
*self
, PyObject
*args
)
3937 register Py_ssize_t i
;
3938 register Py_ssize_t j
;
3945 if (!PyArg_ParseTuple(args
, "|i:splitlines", &keepends
))
3948 data
= PyString_AS_STRING(self
);
3949 len
= PyString_GET_SIZE(self
);
3951 /* This does not use the preallocated list because splitlines is
3952 usually run with hundreds of newlines. The overhead of
3953 switching between PyList_SET_ITEM and append causes about a
3954 2-3% slowdown for that common case. A smarter implementation
3955 could move the if check out, so the SET_ITEMs are done first
3956 and the appends only done when the prealloc buffer is full.
3957 That's too much work for little gain.*/
3959 list
= PyList_New(0);
3963 for (i
= j
= 0; i
< len
; ) {
3966 /* Find a line and append it */
3967 while (i
< len
&& data
[i
] != '\n' && data
[i
] != '\r')
3970 /* Skip the line break reading CRLF as one line break */
3973 if (data
[i
] == '\r' && i
+ 1 < len
&&
3981 SPLIT_APPEND(data
, j
, eol
);
3985 SPLIT_APPEND(data
, j
, len
);
3995 PyDoc_STRVAR(sizeof__doc__
,
3996 "S.__sizeof__() -> size of S in memory, in bytes");
3999 string_sizeof(PyStringObject
*v
)
4002 res
= PyStringObject_SIZE
+ PyString_GET_SIZE(v
) * Py_TYPE(v
)->tp_itemsize
;
4003 return PyInt_FromSsize_t(res
);
4009 #undef PREALLOC_SIZE
4012 string_getnewargs(PyStringObject
*v
)
4014 return Py_BuildValue("(s#)", v
->ob_sval
, Py_SIZE(v
));
4018 #include "stringlib/string_format.h"
4020 PyDoc_STRVAR(format__doc__
,
4021 "S.format(*args, **kwargs) -> unicode\n\
4026 string__format__(PyObject
* self
, PyObject
* args
)
4028 PyObject
*format_spec
;
4029 PyObject
*result
= NULL
;
4030 PyObject
*tmp
= NULL
;
4032 /* If 2.x, convert format_spec to the same type as value */
4033 /* This is to allow things like u''.format('') */
4034 if (!PyArg_ParseTuple(args
, "O:__format__", &format_spec
))
4036 if (!(PyString_Check(format_spec
) || PyUnicode_Check(format_spec
))) {
4037 PyErr_Format(PyExc_TypeError
, "__format__ arg must be str "
4038 "or unicode, not %s", Py_TYPE(format_spec
)->tp_name
);
4041 tmp
= PyObject_Str(format_spec
);
4046 result
= _PyBytes_FormatAdvanced(self
,
4047 PyString_AS_STRING(format_spec
),
4048 PyString_GET_SIZE(format_spec
));
4054 PyDoc_STRVAR(p_format__doc__
,
4055 "S.__format__(format_spec) -> unicode\n\
4061 string_methods
[] = {
4062 /* Counterparts of the obsolete stropmodule functions; except
4063 string.maketrans(). */
4064 {"join", (PyCFunction
)string_join
, METH_O
, join__doc__
},
4065 {"split", (PyCFunction
)string_split
, METH_VARARGS
, split__doc__
},
4066 {"rsplit", (PyCFunction
)string_rsplit
, METH_VARARGS
, rsplit__doc__
},
4067 {"lower", (PyCFunction
)string_lower
, METH_NOARGS
, lower__doc__
},
4068 {"upper", (PyCFunction
)string_upper
, METH_NOARGS
, upper__doc__
},
4069 {"islower", (PyCFunction
)string_islower
, METH_NOARGS
, islower__doc__
},
4070 {"isupper", (PyCFunction
)string_isupper
, METH_NOARGS
, isupper__doc__
},
4071 {"isspace", (PyCFunction
)string_isspace
, METH_NOARGS
, isspace__doc__
},
4072 {"isdigit", (PyCFunction
)string_isdigit
, METH_NOARGS
, isdigit__doc__
},
4073 {"istitle", (PyCFunction
)string_istitle
, METH_NOARGS
, istitle__doc__
},
4074 {"isalpha", (PyCFunction
)string_isalpha
, METH_NOARGS
, isalpha__doc__
},
4075 {"isalnum", (PyCFunction
)string_isalnum
, METH_NOARGS
, isalnum__doc__
},
4076 {"capitalize", (PyCFunction
)string_capitalize
, METH_NOARGS
,
4078 {"count", (PyCFunction
)string_count
, METH_VARARGS
, count__doc__
},
4079 {"endswith", (PyCFunction
)string_endswith
, METH_VARARGS
,
4081 {"partition", (PyCFunction
)string_partition
, METH_O
, partition__doc__
},
4082 {"find", (PyCFunction
)string_find
, METH_VARARGS
, find__doc__
},
4083 {"index", (PyCFunction
)string_index
, METH_VARARGS
, index__doc__
},
4084 {"lstrip", (PyCFunction
)string_lstrip
, METH_VARARGS
, lstrip__doc__
},
4085 {"replace", (PyCFunction
)string_replace
, METH_VARARGS
, replace__doc__
},
4086 {"rfind", (PyCFunction
)string_rfind
, METH_VARARGS
, rfind__doc__
},
4087 {"rindex", (PyCFunction
)string_rindex
, METH_VARARGS
, rindex__doc__
},
4088 {"rstrip", (PyCFunction
)string_rstrip
, METH_VARARGS
, rstrip__doc__
},
4089 {"rpartition", (PyCFunction
)string_rpartition
, METH_O
,
4091 {"startswith", (PyCFunction
)string_startswith
, METH_VARARGS
,
4093 {"strip", (PyCFunction
)string_strip
, METH_VARARGS
, strip__doc__
},
4094 {"swapcase", (PyCFunction
)string_swapcase
, METH_NOARGS
,
4096 {"translate", (PyCFunction
)string_translate
, METH_VARARGS
,
4098 {"title", (PyCFunction
)string_title
, METH_NOARGS
, title__doc__
},
4099 {"ljust", (PyCFunction
)string_ljust
, METH_VARARGS
, ljust__doc__
},
4100 {"rjust", (PyCFunction
)string_rjust
, METH_VARARGS
, rjust__doc__
},
4101 {"center", (PyCFunction
)string_center
, METH_VARARGS
, center__doc__
},
4102 {"zfill", (PyCFunction
)string_zfill
, METH_VARARGS
, zfill__doc__
},
4103 {"format", (PyCFunction
) do_string_format
, METH_VARARGS
| METH_KEYWORDS
, format__doc__
},
4104 {"__format__", (PyCFunction
) string__format__
, METH_VARARGS
, p_format__doc__
},
4105 {"_formatter_field_name_split", (PyCFunction
) formatter_field_name_split
, METH_NOARGS
},
4106 {"_formatter_parser", (PyCFunction
) formatter_parser
, METH_NOARGS
},
4107 {"encode", (PyCFunction
)string_encode
, METH_VARARGS
| METH_KEYWORDS
, encode__doc__
},
4108 {"decode", (PyCFunction
)string_decode
, METH_VARARGS
| METH_KEYWORDS
, decode__doc__
},
4109 {"expandtabs", (PyCFunction
)string_expandtabs
, METH_VARARGS
,
4111 {"splitlines", (PyCFunction
)string_splitlines
, METH_VARARGS
,
4113 {"__sizeof__", (PyCFunction
)string_sizeof
, METH_NOARGS
,
4115 {"__getnewargs__", (PyCFunction
)string_getnewargs
, METH_NOARGS
},
4116 {NULL
, NULL
} /* sentinel */
4120 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
);
4123 string_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
4126 static char *kwlist
[] = {"object", 0};
4128 if (type
!= &PyString_Type
)
4129 return str_subtype_new(type
, args
, kwds
);
4130 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|O:str", kwlist
, &x
))
4133 return PyString_FromString("");
4134 return PyObject_Str(x
);
4138 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
4140 PyObject
*tmp
, *pnew
;
4143 assert(PyType_IsSubtype(type
, &PyString_Type
));
4144 tmp
= string_new(&PyString_Type
, args
, kwds
);
4147 assert(PyString_CheckExact(tmp
));
4148 n
= PyString_GET_SIZE(tmp
);
4149 pnew
= type
->tp_alloc(type
, n
);
4151 Py_MEMCPY(PyString_AS_STRING(pnew
), PyString_AS_STRING(tmp
), n
+1);
4152 ((PyStringObject
*)pnew
)->ob_shash
=
4153 ((PyStringObject
*)tmp
)->ob_shash
;
4154 ((PyStringObject
*)pnew
)->ob_sstate
= SSTATE_NOT_INTERNED
;
4161 basestring_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
4163 PyErr_SetString(PyExc_TypeError
,
4164 "The basestring type cannot be instantiated");
4169 string_mod(PyObject
*v
, PyObject
*w
)
4171 if (!PyString_Check(v
)) {
4172 Py_INCREF(Py_NotImplemented
);
4173 return Py_NotImplemented
;
4175 return PyString_Format(v
, w
);
4178 PyDoc_STRVAR(basestring_doc
,
4179 "Type basestring cannot be instantiated; it is the base for str and unicode.");
4181 static PyNumberMethods string_as_number
= {
4186 string_mod
, /*nb_remainder*/
4190 PyTypeObject PyBaseString_Type
= {
4191 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
4201 0, /* tp_as_number */
4202 0, /* tp_as_sequence */
4203 0, /* tp_as_mapping */
4207 0, /* tp_getattro */
4208 0, /* tp_setattro */
4209 0, /* tp_as_buffer */
4210 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /* tp_flags */
4211 basestring_doc
, /* tp_doc */
4212 0, /* tp_traverse */
4214 0, /* tp_richcompare */
4215 0, /* tp_weaklistoffset */
4217 0, /* tp_iternext */
4221 &PyBaseObject_Type
, /* tp_base */
4223 0, /* tp_descr_get */
4224 0, /* tp_descr_set */
4225 0, /* tp_dictoffset */
4228 basestring_new
, /* tp_new */
4232 PyDoc_STRVAR(string_doc
,
4233 "str(object) -> string\n\
4235 Return a nice string representation of the object.\n\
4236 If the argument is a string, the return value is the same object.");
4238 PyTypeObject PyString_Type
= {
4239 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
4241 PyStringObject_SIZE
,
4243 string_dealloc
, /* tp_dealloc */
4244 (printfunc
)string_print
, /* tp_print */
4248 string_repr
, /* tp_repr */
4249 &string_as_number
, /* tp_as_number */
4250 &string_as_sequence
, /* tp_as_sequence */
4251 &string_as_mapping
, /* tp_as_mapping */
4252 (hashfunc
)string_hash
, /* tp_hash */
4254 string_str
, /* tp_str */
4255 PyObject_GenericGetAttr
, /* tp_getattro */
4256 0, /* tp_setattro */
4257 &string_as_buffer
, /* tp_as_buffer */
4258 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_CHECKTYPES
|
4259 Py_TPFLAGS_BASETYPE
| Py_TPFLAGS_STRING_SUBCLASS
|
4260 Py_TPFLAGS_HAVE_NEWBUFFER
, /* tp_flags */
4261 string_doc
, /* tp_doc */
4262 0, /* tp_traverse */
4264 (richcmpfunc
)string_richcompare
, /* tp_richcompare */
4265 0, /* tp_weaklistoffset */
4267 0, /* tp_iternext */
4268 string_methods
, /* tp_methods */
4271 &PyBaseString_Type
, /* tp_base */
4273 0, /* tp_descr_get */
4274 0, /* tp_descr_set */
4275 0, /* tp_dictoffset */
4278 string_new
, /* tp_new */
4279 PyObject_Del
, /* tp_free */
4283 PyString_Concat(register PyObject
**pv
, register PyObject
*w
)
4285 register PyObject
*v
;
4288 if (w
== NULL
|| !PyString_Check(*pv
)) {
4293 v
= string_concat((PyStringObject
*) *pv
, w
);
4299 PyString_ConcatAndDel(register PyObject
**pv
, register PyObject
*w
)
4301 PyString_Concat(pv
, w
);
4306 /* The following function breaks the notion that strings are immutable:
4307 it changes the size of a string. We get away with this only if there
4308 is only one module referencing the object. You can also think of it
4309 as creating a new string object and destroying the old one, only
4310 more efficiently. In any case, don't use this if the string may
4311 already be known to some other part of the code...
4312 Note that if there's not enough memory to resize the string, the original
4313 string object at *pv is deallocated, *pv is set to NULL, an "out of
4314 memory" exception is set, and -1 is returned. Else (on success) 0 is
4315 returned, and the value in *pv may or may not be the same as on input.
4316 As always, an extra byte is allocated for a trailing \0 byte (newsize
4317 does *not* include that), and a trailing \0 byte is stored.
4321 _PyString_Resize(PyObject
**pv
, Py_ssize_t newsize
)
4323 register PyObject
*v
;
4324 register PyStringObject
*sv
;
4326 if (!PyString_Check(v
) || Py_REFCNT(v
) != 1 || newsize
< 0 ||
4327 PyString_CHECK_INTERNED(v
)) {
4330 PyErr_BadInternalCall();
4333 /* XXX UNREF/NEWREF interface should be more symmetrical */
4335 _Py_ForgetReference(v
);
4337 PyObject_REALLOC((char *)v
, PyStringObject_SIZE
+ newsize
);
4343 _Py_NewReference(*pv
);
4344 sv
= (PyStringObject
*) *pv
;
4345 Py_SIZE(sv
) = newsize
;
4346 sv
->ob_sval
[newsize
] = '\0';
4347 sv
->ob_shash
= -1; /* invalidate cached hash value */
4351 /* Helpers for formatstring */
4353 Py_LOCAL_INLINE(PyObject
*)
4354 getnextarg(PyObject
*args
, Py_ssize_t arglen
, Py_ssize_t
*p_argidx
)
4356 Py_ssize_t argidx
= *p_argidx
;
4357 if (argidx
< arglen
) {
4362 return PyTuple_GetItem(args
, argidx
);
4364 PyErr_SetString(PyExc_TypeError
,
4365 "not enough arguments for format string");
4376 #define F_LJUST (1<<0)
4377 #define F_SIGN (1<<1)
4378 #define F_BLANK (1<<2)
4379 #define F_ALT (1<<3)
4380 #define F_ZERO (1<<4)
4382 /* Returns a new reference to a PyString object, or NULL on failure. */
4385 formatfloat(PyObject
*v
, int flags
, int prec
, int type
)
4391 x
= PyFloat_AsDouble(v
);
4392 if (x
== -1.0 && PyErr_Occurred()) {
4393 PyErr_Format(PyExc_TypeError
, "float argument required, "
4394 "not %.200s", Py_TYPE(v
)->tp_name
);
4401 p
= PyOS_double_to_string(x
, type
, prec
,
4402 (flags
& F_ALT
) ? Py_DTSF_ALT
: 0, NULL
);
4406 result
= PyString_FromStringAndSize(p
, strlen(p
));
4411 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4412 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4413 * Python's regular ints.
4414 * Return value: a new PyString*, or NULL if error.
4415 * . *pbuf is set to point into it,
4416 * *plen set to the # of chars following that.
4417 * Caller must decref it when done using pbuf.
4418 * The string starting at *pbuf is of the form
4419 * "-"? ("0x" | "0X")? digit+
4420 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4421 * set in flags. The case of hex digits will be correct,
4422 * There will be at least prec digits, zero-filled on the left if
4423 * necessary to get that many.
4424 * val object to be converted
4425 * flags bitmask of format flags; only F_ALT is looked at
4426 * prec minimum number of digits; 0-fill on left if needed
4427 * type a character in [duoxX]; u acts the same as d
4429 * CAUTION: o, x and X conversions on regular ints can never
4430 * produce a '-' sign, but can for Python's unbounded ints.
4433 _PyString_FormatLong(PyObject
*val
, int flags
, int prec
, int type
,
4434 char **pbuf
, int *plen
)
4436 PyObject
*result
= NULL
;
4439 int sign
; /* 1 if '-', else 0 */
4440 int len
; /* number of characters */
4442 int numdigits
; /* len == numnondigits + numdigits */
4443 int numnondigits
= 0;
4448 result
= Py_TYPE(val
)->tp_str(val
);
4451 result
= Py_TYPE(val
)->tp_as_number
->nb_oct(val
);
4456 result
= Py_TYPE(val
)->tp_as_number
->nb_hex(val
);
4459 assert(!"'type' not in [duoxX]");
4464 buf
= PyString_AsString(result
);
4470 /* To modify the string in-place, there can only be one reference. */
4471 if (Py_REFCNT(result
) != 1) {
4472 PyErr_BadInternalCall();
4475 llen
= PyString_Size(result
);
4476 if (llen
> INT_MAX
) {
4477 PyErr_SetString(PyExc_ValueError
, "string too large in _PyString_FormatLong");
4481 if (buf
[len
-1] == 'L') {
4485 sign
= buf
[0] == '-';
4486 numnondigits
+= sign
;
4487 numdigits
= len
- numnondigits
;
4488 assert(numdigits
> 0);
4490 /* Get rid of base marker unless F_ALT */
4491 if ((flags
& F_ALT
) == 0) {
4492 /* Need to skip 0x, 0X or 0. */
4496 assert(buf
[sign
] == '0');
4497 /* If 0 is only digit, leave it alone. */
4498 if (numdigits
> 1) {
4505 assert(buf
[sign
] == '0');
4506 assert(buf
[sign
+ 1] == 'x');
4517 assert(len
== numnondigits
+ numdigits
);
4518 assert(numdigits
> 0);
4521 /* Fill with leading zeroes to meet minimum width. */
4522 if (prec
> numdigits
) {
4523 PyObject
*r1
= PyString_FromStringAndSize(NULL
,
4524 numnondigits
+ prec
);
4530 b1
= PyString_AS_STRING(r1
);
4531 for (i
= 0; i
< numnondigits
; ++i
)
4533 for (i
= 0; i
< prec
- numdigits
; i
++)
4535 for (i
= 0; i
< numdigits
; i
++)
4540 buf
= PyString_AS_STRING(result
);
4541 len
= numnondigits
+ prec
;
4544 /* Fix up case for hex conversions. */
4546 /* Need to convert all lower case letters to upper case.
4547 and need to convert 0x to 0X (and -0x to -0X). */
4548 for (i
= 0; i
< len
; i
++)
4549 if (buf
[i
] >= 'a' && buf
[i
] <= 'x')
4557 Py_LOCAL_INLINE(int)
4558 formatint(char *buf
, size_t buflen
, int flags
,
4559 int prec
, int type
, PyObject
*v
)
4561 /* fmt = '%#.' + `prec` + 'l' + `type`
4562 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4564 char fmt
[64]; /* plenty big enough! */
4568 x
= PyInt_AsLong(v
);
4569 if (x
== -1 && PyErr_Occurred()) {
4570 PyErr_Format(PyExc_TypeError
, "int argument required, not %.200s",
4571 Py_TYPE(v
)->tp_name
);
4574 if (x
< 0 && type
== 'u') {
4577 if (x
< 0 && (type
== 'x' || type
== 'X' || type
== 'o'))
4584 if ((flags
& F_ALT
) &&
4585 (type
== 'x' || type
== 'X')) {
4586 /* When converting under %#x or %#X, there are a number
4587 * of issues that cause pain:
4588 * - when 0 is being converted, the C standard leaves off
4589 * the '0x' or '0X', which is inconsistent with other
4590 * %#x/%#X conversions and inconsistent with Python's
4592 * - there are platforms that violate the standard and
4593 * convert 0 with the '0x' or '0X'
4594 * (Metrowerks, Compaq Tru64)
4595 * - there are platforms that give '0x' when converting
4596 * under %#X, but convert 0 in accordance with the
4597 * standard (OS/2 EMX)
4599 * We can achieve the desired consistency by inserting our
4600 * own '0x' or '0X' prefix, and substituting %x/%X in place
4603 * Note that this is the same approach as used in
4604 * formatint() in unicodeobject.c
4606 PyOS_snprintf(fmt
, sizeof(fmt
), "%s0%c%%.%dl%c",
4607 sign
, type
, prec
, type
);
4610 PyOS_snprintf(fmt
, sizeof(fmt
), "%s%%%s.%dl%c",
4611 sign
, (flags
&F_ALT
) ? "#" : "",
4615 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4616 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4618 if (buflen
<= 14 || buflen
<= (size_t)3 + (size_t)prec
) {
4619 PyErr_SetString(PyExc_OverflowError
,
4620 "formatted integer is too long (precision too large?)");
4624 PyOS_snprintf(buf
, buflen
, fmt
, -x
);
4626 PyOS_snprintf(buf
, buflen
, fmt
, x
);
4627 return (int)strlen(buf
);
4630 Py_LOCAL_INLINE(int)
4631 formatchar(char *buf
, size_t buflen
, PyObject
*v
)
4633 /* presume that the buffer is at least 2 characters long */
4634 if (PyString_Check(v
)) {
4635 if (!PyArg_Parse(v
, "c;%c requires int or char", &buf
[0]))
4639 if (!PyArg_Parse(v
, "b;%c requires int or char", &buf
[0]))
4646 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4648 FORMATBUFLEN is the length of the buffer in which the ints &
4649 chars are formatted. XXX This is a magic number. Each formatting
4650 routine does bounds checking to ensure no overflow, but a better
4651 solution may be to malloc a buffer of appropriate size for each
4652 format. For now, the current solution is sufficient.
4654 #define FORMATBUFLEN (size_t)120
4657 PyString_Format(PyObject
*format
, PyObject
*args
)
4660 Py_ssize_t arglen
, argidx
;
4661 Py_ssize_t reslen
, rescnt
, fmtcnt
;
4663 PyObject
*result
, *orig_args
;
4664 #ifdef Py_USING_UNICODE
4667 PyObject
*dict
= NULL
;
4668 if (format
== NULL
|| !PyString_Check(format
) || args
== NULL
) {
4669 PyErr_BadInternalCall();
4673 fmt
= PyString_AS_STRING(format
);
4674 fmtcnt
= PyString_GET_SIZE(format
);
4675 reslen
= rescnt
= fmtcnt
+ 100;
4676 result
= PyString_FromStringAndSize((char *)NULL
, reslen
);
4679 res
= PyString_AsString(result
);
4680 if (PyTuple_Check(args
)) {
4681 arglen
= PyTuple_GET_SIZE(args
);
4688 if (Py_TYPE(args
)->tp_as_mapping
&& !PyTuple_Check(args
) &&
4689 !PyObject_TypeCheck(args
, &PyBaseString_Type
))
4691 while (--fmtcnt
>= 0) {
4694 rescnt
= fmtcnt
+ 100;
4696 if (_PyString_Resize(&result
, reslen
) < 0)
4698 res
= PyString_AS_STRING(result
)
4705 /* Got a format specifier */
4707 Py_ssize_t width
= -1;
4713 PyObject
*temp
= NULL
;
4717 char formatbuf
[FORMATBUFLEN
];
4718 /* For format{int,char}() */
4719 #ifdef Py_USING_UNICODE
4720 char *fmt_start
= fmt
;
4721 Py_ssize_t argidx_start
= argidx
;
4732 PyErr_SetString(PyExc_TypeError
,
4733 "format requires a mapping");
4739 /* Skip over balanced parentheses */
4740 while (pcount
> 0 && --fmtcnt
>= 0) {
4743 else if (*fmt
== '(')
4747 keylen
= fmt
- keystart
- 1;
4748 if (fmtcnt
< 0 || pcount
> 0) {
4749 PyErr_SetString(PyExc_ValueError
,
4750 "incomplete format key");
4753 key
= PyString_FromStringAndSize(keystart
,
4761 args
= PyObject_GetItem(dict
, key
);
4770 while (--fmtcnt
>= 0) {
4771 switch (c
= *fmt
++) {
4772 case '-': flags
|= F_LJUST
; continue;
4773 case '+': flags
|= F_SIGN
; continue;
4774 case ' ': flags
|= F_BLANK
; continue;
4775 case '#': flags
|= F_ALT
; continue;
4776 case '0': flags
|= F_ZERO
; continue;
4781 v
= getnextarg(args
, arglen
, &argidx
);
4784 if (!PyInt_Check(v
)) {
4785 PyErr_SetString(PyExc_TypeError
,
4789 width
= PyInt_AsLong(v
);
4797 else if (c
>= 0 && isdigit(c
)) {
4799 while (--fmtcnt
>= 0) {
4800 c
= Py_CHARMASK(*fmt
++);
4803 if ((width
*10) / 10 != width
) {
4809 width
= width
*10 + (c
- '0');
4817 v
= getnextarg(args
, arglen
, &argidx
);
4820 if (!PyInt_Check(v
)) {
4826 prec
= PyInt_AsLong(v
);
4832 else if (c
>= 0 && isdigit(c
)) {
4834 while (--fmtcnt
>= 0) {
4835 c
= Py_CHARMASK(*fmt
++);
4838 if ((prec
*10) / 10 != prec
) {
4844 prec
= prec
*10 + (c
- '0');
4849 if (c
== 'h' || c
== 'l' || c
== 'L') {
4855 PyErr_SetString(PyExc_ValueError
,
4856 "incomplete format");
4860 v
= getnextarg(args
, arglen
, &argidx
);
4872 #ifdef Py_USING_UNICODE
4873 if (PyUnicode_Check(v
)) {
4875 argidx
= argidx_start
;
4879 temp
= _PyObject_Str(v
);
4880 #ifdef Py_USING_UNICODE
4881 if (temp
!= NULL
&& PyUnicode_Check(temp
)) {
4884 argidx
= argidx_start
;
4891 temp
= PyObject_Repr(v
);
4894 if (!PyString_Check(temp
)) {
4895 PyErr_SetString(PyExc_TypeError
,
4896 "%s argument has non-string str()");
4900 pbuf
= PyString_AS_STRING(temp
);
4901 len
= PyString_GET_SIZE(temp
);
4902 if (prec
>= 0 && len
> prec
)
4914 if (PyNumber_Check(v
)) {
4915 PyObject
*iobj
=NULL
;
4917 if (PyInt_Check(v
) || (PyLong_Check(v
))) {
4922 iobj
= PyNumber_Int(v
);
4923 if (iobj
==NULL
) iobj
= PyNumber_Long(v
);
4926 if (PyInt_Check(iobj
)) {
4929 len
= formatint(pbuf
,
4931 flags
, prec
, c
, iobj
);
4937 else if (PyLong_Check(iobj
)) {
4941 temp
= _PyString_FormatLong(iobj
, flags
,
4942 prec
, c
, &pbuf
, &ilen
);
4955 PyErr_Format(PyExc_TypeError
,
4956 "%%%c format: a number is required, "
4957 "not %.200s", c
, Py_TYPE(v
)->tp_name
);
4969 temp
= formatfloat(v
, flags
, prec
, c
);
4972 pbuf
= PyString_AS_STRING(temp
);
4973 len
= PyString_GET_SIZE(temp
);
4979 #ifdef Py_USING_UNICODE
4980 if (PyUnicode_Check(v
)) {
4982 argidx
= argidx_start
;
4987 len
= formatchar(pbuf
, sizeof(formatbuf
), v
);
4992 PyErr_Format(PyExc_ValueError
,
4993 "unsupported format character '%c' (0x%x) "
4996 (Py_ssize_t
)(fmt
- 1 -
4997 PyString_AsString(format
)));
5001 if (*pbuf
== '-' || *pbuf
== '+') {
5005 else if (flags
& F_SIGN
)
5007 else if (flags
& F_BLANK
)
5014 if (rescnt
- (sign
!= 0) < width
) {
5016 rescnt
= width
+ fmtcnt
+ 100;
5021 return PyErr_NoMemory();
5023 if (_PyString_Resize(&result
, reslen
) < 0) {
5027 res
= PyString_AS_STRING(result
)
5037 if ((flags
& F_ALT
) && (c
== 'x' || c
== 'X')) {
5038 assert(pbuf
[0] == '0');
5039 assert(pbuf
[1] == c
);
5050 if (width
> len
&& !(flags
& F_LJUST
)) {
5054 } while (--width
> len
);
5059 if ((flags
& F_ALT
) &&
5060 (c
== 'x' || c
== 'X')) {
5061 assert(pbuf
[0] == '0');
5062 assert(pbuf
[1] == c
);
5067 Py_MEMCPY(res
, pbuf
, len
);
5070 while (--width
>= len
) {
5074 if (dict
&& (argidx
< arglen
) && c
!= '%') {
5075 PyErr_SetString(PyExc_TypeError
,
5076 "not all arguments converted during string formatting");
5083 if (argidx
< arglen
&& !dict
) {
5084 PyErr_SetString(PyExc_TypeError
,
5085 "not all arguments converted during string formatting");
5091 _PyString_Resize(&result
, reslen
- rescnt
);
5094 #ifdef Py_USING_UNICODE
5100 /* Fiddle args right (remove the first argidx arguments) */
5101 if (PyTuple_Check(orig_args
) && argidx
> 0) {
5103 Py_ssize_t n
= PyTuple_GET_SIZE(orig_args
) - argidx
;
5108 PyObject
*w
= PyTuple_GET_ITEM(orig_args
, n
+ argidx
);
5110 PyTuple_SET_ITEM(v
, n
, w
);
5114 Py_INCREF(orig_args
);
5118 /* Take what we have of the result and let the Unicode formatting
5119 function format the rest of the input. */
5120 rescnt
= res
- PyString_AS_STRING(result
);
5121 if (_PyString_Resize(&result
, rescnt
))
5123 fmtcnt
= PyString_GET_SIZE(format
) - \
5124 (fmt
- PyString_AS_STRING(format
));
5125 format
= PyUnicode_Decode(fmt
, fmtcnt
, NULL
, NULL
);
5128 v
= PyUnicode_Format(format
, args
);
5132 /* Paste what we have (result) to what the Unicode formatting
5133 function returned (v) and return the result (or error) */
5134 w
= PyUnicode_Concat(result
, v
);
5139 #endif /* Py_USING_UNICODE */
5150 PyString_InternInPlace(PyObject
**p
)
5152 register PyStringObject
*s
= (PyStringObject
*)(*p
);
5154 if (s
== NULL
|| !PyString_Check(s
))
5155 Py_FatalError("PyString_InternInPlace: strings only please!");
5156 /* If it's a string subclass, we don't really know what putting
5157 it in the interned dict might do. */
5158 if (!PyString_CheckExact(s
))
5160 if (PyString_CHECK_INTERNED(s
))
5162 if (interned
== NULL
) {
5163 interned
= PyDict_New();
5164 if (interned
== NULL
) {
5165 PyErr_Clear(); /* Don't leave an exception */
5169 t
= PyDict_GetItem(interned
, (PyObject
*)s
);
5177 if (PyDict_SetItem(interned
, (PyObject
*)s
, (PyObject
*)s
) < 0) {
5181 /* The two references in interned are not counted by refcnt.
5182 The string deallocator will take care of this */
5184 PyString_CHECK_INTERNED(s
) = SSTATE_INTERNED_MORTAL
;
5188 PyString_InternImmortal(PyObject
**p
)
5190 PyString_InternInPlace(p
);
5191 if (PyString_CHECK_INTERNED(*p
) != SSTATE_INTERNED_IMMORTAL
) {
5192 PyString_CHECK_INTERNED(*p
) = SSTATE_INTERNED_IMMORTAL
;
5199 PyString_InternFromString(const char *cp
)
5201 PyObject
*s
= PyString_FromString(cp
);
5204 PyString_InternInPlace(&s
);
5212 for (i
= 0; i
< UCHAR_MAX
+ 1; i
++) {
5213 Py_XDECREF(characters
[i
]);
5214 characters
[i
] = NULL
;
5216 Py_XDECREF(nullstring
);
5220 void _Py_ReleaseInternedStrings(void)
5225 Py_ssize_t immortal_size
= 0, mortal_size
= 0;
5227 if (interned
== NULL
|| !PyDict_Check(interned
))
5229 keys
= PyDict_Keys(interned
);
5230 if (keys
== NULL
|| !PyList_Check(keys
)) {
5235 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5236 detector, interned strings are not forcibly deallocated; rather, we
5237 give them their stolen references back, and then clear and DECREF
5238 the interned dict. */
5240 n
= PyList_GET_SIZE(keys
);
5241 fprintf(stderr
, "releasing %" PY_FORMAT_SIZE_T
"d interned strings\n",
5243 for (i
= 0; i
< n
; i
++) {
5244 s
= (PyStringObject
*) PyList_GET_ITEM(keys
, i
);
5245 switch (s
->ob_sstate
) {
5246 case SSTATE_NOT_INTERNED
:
5247 /* XXX Shouldn't happen */
5249 case SSTATE_INTERNED_IMMORTAL
:
5251 immortal_size
+= Py_SIZE(s
);
5253 case SSTATE_INTERNED_MORTAL
:
5255 mortal_size
+= Py_SIZE(s
);
5258 Py_FatalError("Inconsistent interned string state.");
5260 s
->ob_sstate
= SSTATE_NOT_INTERNED
;
5262 fprintf(stderr
, "total size of all interned strings: "
5263 "%" PY_FORMAT_SIZE_T
"d/%" PY_FORMAT_SIZE_T
"d "
5264 "mortal/immortal\n", mortal_size
, immortal_size
);
5266 PyDict_Clear(interned
);
5267 Py_DECREF(interned
);