1 /* String (str/bytes) object implementation */
3 #define PY_SSIZE_T_CLEAN
10 Py_ssize_t null_strings
, one_strings
;
13 static PyStringObject
*characters
[UCHAR_MAX
+ 1];
14 static PyStringObject
*nullstring
;
16 /* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
24 static PyObject
*interned
;
26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
35 For both PyString_FromString() and PyString_FromStringAndSize(), the
36 parameter `size' denotes number of characters to allocate, not counting any
37 null terminating character.
39 For PyString_FromString(), the parameter `str' points to a null-terminated
40 string containing exactly `size' bytes.
42 For PyString_FromStringAndSize(), the parameter the parameter `str' is
43 either NULL or else points to a string containing at least `size' bytes.
44 For PyString_FromStringAndSize(), the string in the `str' parameter does
45 not have to be null-terminated. (Therefore it is safe to construct a
46 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
48 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
50 PyString object must be treated as immutable and you must not fill in nor
51 alter the data yourself, since the strings may be shared.
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
55 allocated for string data, not counting the null terminating character. It
56 is therefore equal to the equal to the `size' parameter (for
57 PyString_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyString_FromString()).
61 PyString_FromStringAndSize(const char *str
, Py_ssize_t size
)
63 register PyStringObject
*op
;
65 PyErr_SetString(PyExc_SystemError
,
66 "Negative size passed to PyString_FromStringAndSize");
69 if (size
== 0 && (op
= nullstring
) != NULL
) {
74 return (PyObject
*)op
;
76 if (size
== 1 && str
!= NULL
&&
77 (op
= characters
[*str
& UCHAR_MAX
]) != NULL
)
83 return (PyObject
*)op
;
86 if (size
> PY_SSIZE_T_MAX
- PyStringObject_SIZE
) {
87 PyErr_SetString(PyExc_OverflowError
, "string is too large");
91 /* Inline PyObject_NewVar */
92 op
= (PyStringObject
*)PyObject_MALLOC(PyStringObject_SIZE
+ size
);
94 return PyErr_NoMemory();
95 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
97 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
99 Py_MEMCPY(op
->ob_sval
, str
, size
);
100 op
->ob_sval
[size
] = '\0';
101 /* share short strings */
103 PyObject
*t
= (PyObject
*)op
;
104 PyString_InternInPlace(&t
);
105 op
= (PyStringObject
*)t
;
108 } else if (size
== 1 && str
!= NULL
) {
109 PyObject
*t
= (PyObject
*)op
;
110 PyString_InternInPlace(&t
);
111 op
= (PyStringObject
*)t
;
112 characters
[*str
& UCHAR_MAX
] = op
;
115 return (PyObject
*) op
;
119 PyString_FromString(const char *str
)
121 register size_t size
;
122 register PyStringObject
*op
;
126 if (size
> PY_SSIZE_T_MAX
- PyStringObject_SIZE
) {
127 PyErr_SetString(PyExc_OverflowError
,
128 "string is too long for a Python string");
131 if (size
== 0 && (op
= nullstring
) != NULL
) {
136 return (PyObject
*)op
;
138 if (size
== 1 && (op
= characters
[*str
& UCHAR_MAX
]) != NULL
) {
143 return (PyObject
*)op
;
146 /* Inline PyObject_NewVar */
147 op
= (PyStringObject
*)PyObject_MALLOC(PyStringObject_SIZE
+ size
);
149 return PyErr_NoMemory();
150 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
152 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
153 Py_MEMCPY(op
->ob_sval
, str
, size
+1);
154 /* share short strings */
156 PyObject
*t
= (PyObject
*)op
;
157 PyString_InternInPlace(&t
);
158 op
= (PyStringObject
*)t
;
161 } else if (size
== 1) {
162 PyObject
*t
= (PyObject
*)op
;
163 PyString_InternInPlace(&t
);
164 op
= (PyStringObject
*)t
;
165 characters
[*str
& UCHAR_MAX
] = op
;
168 return (PyObject
*) op
;
172 PyString_FromFormatV(const char *format
, va_list vargs
)
180 #ifdef VA_LIST_IS_ARRAY
181 Py_MEMCPY(count
, vargs
, sizeof(va_list));
184 __va_copy(count
, vargs
);
189 /* step 1: figure out how large a buffer we need */
190 for (f
= format
; *f
; f
++) {
192 #ifdef HAVE_LONG_LONG
193 int longlongflag
= 0;
196 while (*++f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
199 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
200 * they don't affect the amount of space we reserve.
203 if (f
[1] == 'd' || f
[1] == 'u') {
206 #ifdef HAVE_LONG_LONG
207 else if (f
[1] == 'l' &&
208 (f
[2] == 'd' || f
[2] == 'u')) {
214 else if (*f
== 'z' && (f
[1] == 'd' || f
[1] == 'u')) {
220 (void)va_arg(count
, int);
221 /* fall through... */
225 case 'd': case 'u': case 'i': case 'x':
226 (void) va_arg(count
, int);
227 #ifdef HAVE_LONG_LONG
229 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
230 plus 1 for the sign. 53/22 is an upper
231 bound for log10(256). */
233 n
+= 2 + (SIZEOF_LONG_LONG
*53-1) / 22;
236 /* 20 bytes is enough to hold a 64-bit
237 integer. Decimal takes the most
238 space. This isn't enough for
244 s
= va_arg(count
, char*);
248 (void) va_arg(count
, int);
249 /* maximum 64-bit pointer representation:
251 * so 19 characters is enough.
252 * XXX I count 18 -- what's the extra for?
257 /* if we stumble upon an unknown
258 formatting code, copy the rest of
259 the format string to the output
260 string. (we cannot just skip the
261 code, since there's no way to know
262 what's in the argument list) */
270 /* step 2: fill the buffer */
271 /* Since we've analyzed how much space we need for the worst case,
272 use sprintf directly instead of the slower PyOS_snprintf. */
273 string
= PyString_FromStringAndSize(NULL
, n
);
277 s
= PyString_AsString(string
);
279 for (f
= format
; *f
; f
++) {
284 #ifdef HAVE_LONG_LONG
285 int longlongflag
= 0;
288 /* parse the width.precision part (we're only
289 interested in the precision value, if any) */
291 while (isdigit(Py_CHARMASK(*f
)))
292 n
= (n
*10) + *f
++ - '0';
296 while (isdigit(Py_CHARMASK(*f
)))
297 n
= (n
*10) + *f
++ - '0';
299 while (*f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
301 /* Handle %ld, %lu, %lld and %llu. */
303 if (f
[1] == 'd' || f
[1] == 'u') {
307 #ifdef HAVE_LONG_LONG
308 else if (f
[1] == 'l' &&
309 (f
[2] == 'd' || f
[2] == 'u')) {
315 /* handle the size_t flag. */
316 else if (*f
== 'z' && (f
[1] == 'd' || f
[1] == 'u')) {
323 *s
++ = va_arg(vargs
, int);
327 sprintf(s
, "%ld", va_arg(vargs
, long));
328 #ifdef HAVE_LONG_LONG
329 else if (longlongflag
)
330 sprintf(s
, "%" PY_FORMAT_LONG_LONG
"d",
331 va_arg(vargs
, PY_LONG_LONG
));
334 sprintf(s
, "%" PY_FORMAT_SIZE_T
"d",
335 va_arg(vargs
, Py_ssize_t
));
337 sprintf(s
, "%d", va_arg(vargs
, int));
343 va_arg(vargs
, unsigned long));
344 #ifdef HAVE_LONG_LONG
345 else if (longlongflag
)
346 sprintf(s
, "%" PY_FORMAT_LONG_LONG
"u",
347 va_arg(vargs
, PY_LONG_LONG
));
350 sprintf(s
, "%" PY_FORMAT_SIZE_T
"u",
351 va_arg(vargs
, size_t));
354 va_arg(vargs
, unsigned int));
358 sprintf(s
, "%i", va_arg(vargs
, int));
362 sprintf(s
, "%x", va_arg(vargs
, int));
366 p
= va_arg(vargs
, char*);
374 sprintf(s
, "%p", va_arg(vargs
, void*));
375 /* %p is ill-defined: ensure leading 0x. */
378 else if (s
[1] != 'x') {
379 memmove(s
+2, s
, strlen(s
)+1);
398 _PyString_Resize(&string
, s
- PyString_AS_STRING(string
));
403 PyString_FromFormat(const char *format
, ...)
408 #ifdef HAVE_STDARG_PROTOTYPES
409 va_start(vargs
, format
);
413 ret
= PyString_FromFormatV(format
, vargs
);
419 PyObject
*PyString_Decode(const char *s
,
421 const char *encoding
,
426 str
= PyString_FromStringAndSize(s
, size
);
429 v
= PyString_AsDecodedString(str
, encoding
, errors
);
434 PyObject
*PyString_AsDecodedObject(PyObject
*str
,
435 const char *encoding
,
440 if (!PyString_Check(str
)) {
445 if (encoding
== NULL
) {
446 #ifdef Py_USING_UNICODE
447 encoding
= PyUnicode_GetDefaultEncoding();
449 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
454 /* Decode via the codec registry */
455 v
= PyCodec_Decode(str
, encoding
, errors
);
465 PyObject
*PyString_AsDecodedString(PyObject
*str
,
466 const char *encoding
,
471 v
= PyString_AsDecodedObject(str
, encoding
, errors
);
475 #ifdef Py_USING_UNICODE
476 /* Convert Unicode to a string using the default encoding */
477 if (PyUnicode_Check(v
)) {
479 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
485 if (!PyString_Check(v
)) {
486 PyErr_Format(PyExc_TypeError
,
487 "decoder did not return a string object (type=%.400s)",
488 Py_TYPE(v
)->tp_name
);
499 PyObject
*PyString_Encode(const char *s
,
501 const char *encoding
,
506 str
= PyString_FromStringAndSize(s
, size
);
509 v
= PyString_AsEncodedString(str
, encoding
, errors
);
514 PyObject
*PyString_AsEncodedObject(PyObject
*str
,
515 const char *encoding
,
520 if (!PyString_Check(str
)) {
525 if (encoding
== NULL
) {
526 #ifdef Py_USING_UNICODE
527 encoding
= PyUnicode_GetDefaultEncoding();
529 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
534 /* Encode via the codec registry */
535 v
= PyCodec_Encode(str
, encoding
, errors
);
545 PyObject
*PyString_AsEncodedString(PyObject
*str
,
546 const char *encoding
,
551 v
= PyString_AsEncodedObject(str
, encoding
, errors
);
555 #ifdef Py_USING_UNICODE
556 /* Convert Unicode to a string using the default encoding */
557 if (PyUnicode_Check(v
)) {
559 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
565 if (!PyString_Check(v
)) {
566 PyErr_Format(PyExc_TypeError
,
567 "encoder did not return a string object (type=%.400s)",
568 Py_TYPE(v
)->tp_name
);
580 string_dealloc(PyObject
*op
)
582 switch (PyString_CHECK_INTERNED(op
)) {
583 case SSTATE_NOT_INTERNED
:
586 case SSTATE_INTERNED_MORTAL
:
587 /* revive dead object temporarily for DelItem */
589 if (PyDict_DelItem(interned
, op
) != 0)
591 "deletion of interned string failed");
594 case SSTATE_INTERNED_IMMORTAL
:
595 Py_FatalError("Immortal interned string died.");
598 Py_FatalError("Inconsistent interned string state.");
600 Py_TYPE(op
)->tp_free(op
);
603 /* Unescape a backslash-escaped string. If unicode is non-zero,
604 the string is a u-literal. If recode_encoding is non-zero,
605 the string is UTF-8 encoded and should be re-encoded in the
606 specified encoding. */
608 PyObject
*PyString_DecodeEscape(const char *s
,
612 const char *recode_encoding
)
618 Py_ssize_t newlen
= recode_encoding
? 4*len
:len
;
619 v
= PyString_FromStringAndSize((char *)NULL
, newlen
);
622 p
= buf
= PyString_AsString(v
);
627 #ifdef Py_USING_UNICODE
628 if (recode_encoding
&& (*s
& 0x80)) {
634 /* Decode non-ASCII bytes as UTF-8. */
635 while (t
< end
&& (*t
& 0x80)) t
++;
636 u
= PyUnicode_DecodeUTF8(s
, t
- s
, errors
);
639 /* Recode them in target encoding. */
640 w
= PyUnicode_AsEncodedString(
641 u
, recode_encoding
, errors
);
645 /* Append bytes to output buffer. */
646 assert(PyString_Check(w
));
647 r
= PyString_AS_STRING(w
);
648 rn
= PyString_GET_SIZE(w
);
663 PyErr_SetString(PyExc_ValueError
,
664 "Trailing \\ in string");
668 /* XXX This assumes ASCII! */
670 case '\\': *p
++ = '\\'; break;
671 case '\'': *p
++ = '\''; break;
672 case '\"': *p
++ = '\"'; break;
673 case 'b': *p
++ = '\b'; break;
674 case 'f': *p
++ = '\014'; break; /* FF */
675 case 't': *p
++ = '\t'; break;
676 case 'n': *p
++ = '\n'; break;
677 case 'r': *p
++ = '\r'; break;
678 case 'v': *p
++ = '\013'; break; /* VT */
679 case 'a': *p
++ = '\007'; break; /* BEL, not classic C */
680 case '0': case '1': case '2': case '3':
681 case '4': case '5': case '6': case '7':
683 if (s
< end
&& '0' <= *s
&& *s
<= '7') {
684 c
= (c
<<3) + *s
++ - '0';
685 if (s
< end
&& '0' <= *s
&& *s
<= '7')
686 c
= (c
<<3) + *s
++ - '0';
692 isxdigit(Py_CHARMASK(s
[0])) &&
693 isxdigit(Py_CHARMASK(s
[1])))
716 if (!errors
|| strcmp(errors
, "strict") == 0) {
717 PyErr_SetString(PyExc_ValueError
,
718 "invalid \\x escape");
721 if (strcmp(errors
, "replace") == 0) {
723 } else if (strcmp(errors
, "ignore") == 0)
726 PyErr_Format(PyExc_ValueError
,
728 "unknown error handling code: %.400s",
732 #ifndef Py_USING_UNICODE
737 PyErr_SetString(PyExc_ValueError
,
738 "Unicode escapes not legal "
739 "when Unicode disabled");
746 goto non_esc
; /* an arbitry number of unescaped
747 UTF-8 bytes may follow. */
751 _PyString_Resize(&v
, p
- buf
);
758 /* -------------------------------------------------------------------- */
762 string_getsize(register PyObject
*op
)
766 if (PyString_AsStringAndSize(op
, &s
, &len
))
771 static /*const*/ char *
772 string_getbuffer(register PyObject
*op
)
776 if (PyString_AsStringAndSize(op
, &s
, &len
))
782 PyString_Size(register PyObject
*op
)
784 if (!PyString_Check(op
))
785 return string_getsize(op
);
790 PyString_AsString(register PyObject
*op
)
792 if (!PyString_Check(op
))
793 return string_getbuffer(op
);
794 return ((PyStringObject
*)op
) -> ob_sval
;
798 PyString_AsStringAndSize(register PyObject
*obj
,
800 register Py_ssize_t
*len
)
803 PyErr_BadInternalCall();
807 if (!PyString_Check(obj
)) {
808 #ifdef Py_USING_UNICODE
809 if (PyUnicode_Check(obj
)) {
810 obj
= _PyUnicode_AsDefaultEncodedString(obj
, NULL
);
817 PyErr_Format(PyExc_TypeError
,
818 "expected string or Unicode object, "
819 "%.200s found", Py_TYPE(obj
)->tp_name
);
824 *s
= PyString_AS_STRING(obj
);
826 *len
= PyString_GET_SIZE(obj
);
827 else if (strlen(*s
) != (size_t)PyString_GET_SIZE(obj
)) {
828 PyErr_SetString(PyExc_TypeError
,
829 "expected string without null bytes");
835 /* -------------------------------------------------------------------- */
838 #include "stringlib/stringdefs.h"
839 #include "stringlib/fastsearch.h"
841 #include "stringlib/count.h"
842 #include "stringlib/find.h"
843 #include "stringlib/partition.h"
844 #include "stringlib/split.h"
846 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
847 #include "stringlib/localeutil.h"
852 string_print(PyStringObject
*op
, FILE *fp
, int flags
)
854 Py_ssize_t i
, str_len
;
858 /* XXX Ought to check for interrupts when writing long strings */
859 if (! PyString_CheckExact(op
)) {
861 /* A str subclass may have its own __str__ method. */
862 op
= (PyStringObject
*) PyObject_Str((PyObject
*)op
);
865 ret
= string_print(op
, fp
, flags
);
869 if (flags
& Py_PRINT_RAW
) {
870 char *data
= op
->ob_sval
;
871 Py_ssize_t size
= Py_SIZE(op
);
872 Py_BEGIN_ALLOW_THREADS
873 while (size
> INT_MAX
) {
874 /* Very long strings cannot be written atomically.
875 * But don't write exactly INT_MAX bytes at a time
876 * to avoid memory aligment issues.
878 const int chunk_size
= INT_MAX
& ~0x3FFF;
879 fwrite(data
, 1, chunk_size
, fp
);
884 if (size
) fwrite(data
, (int)size
, 1, fp
);
886 fwrite(data
, 1, (int)size
, fp
);
892 /* figure out which quote to use; single is preferred */
894 if (memchr(op
->ob_sval
, '\'', Py_SIZE(op
)) &&
895 !memchr(op
->ob_sval
, '"', Py_SIZE(op
)))
898 str_len
= Py_SIZE(op
);
899 Py_BEGIN_ALLOW_THREADS
901 for (i
= 0; i
< str_len
; i
++) {
902 /* Since strings are immutable and the caller should have a
903 reference, accessing the interal buffer should not be an issue
904 with the GIL released. */
906 if (c
== quote
|| c
== '\\')
907 fprintf(fp
, "\\%c", c
);
914 else if (c
< ' ' || c
>= 0x7f)
915 fprintf(fp
, "\\x%02x", c
& 0xff);
925 PyString_Repr(PyObject
*obj
, int smartquotes
)
927 register PyStringObject
* op
= (PyStringObject
*) obj
;
928 size_t newsize
= 2 + 4 * Py_SIZE(op
);
930 if (newsize
> PY_SSIZE_T_MAX
|| newsize
/ 4 != Py_SIZE(op
)) {
931 PyErr_SetString(PyExc_OverflowError
,
932 "string is too large to make repr");
935 v
= PyString_FromStringAndSize((char *)NULL
, newsize
);
940 register Py_ssize_t i
;
945 /* figure out which quote to use; single is preferred */
948 memchr(op
->ob_sval
, '\'', Py_SIZE(op
)) &&
949 !memchr(op
->ob_sval
, '"', Py_SIZE(op
)))
952 p
= PyString_AS_STRING(v
);
954 for (i
= 0; i
< Py_SIZE(op
); i
++) {
955 /* There's at least enough room for a hex escape
956 and a closing quote. */
957 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 5);
959 if (c
== quote
|| c
== '\\')
960 *p
++ = '\\', *p
++ = c
;
962 *p
++ = '\\', *p
++ = 't';
964 *p
++ = '\\', *p
++ = 'n';
966 *p
++ = '\\', *p
++ = 'r';
967 else if (c
< ' ' || c
>= 0x7f) {
968 /* For performance, we don't want to call
969 PyOS_snprintf here (extra layers of
971 sprintf(p
, "\\x%02x", c
& 0xff);
977 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 1);
981 &v
, (p
- PyString_AS_STRING(v
)));
987 string_repr(PyObject
*op
)
989 return PyString_Repr(op
, 1);
993 string_str(PyObject
*s
)
995 assert(PyString_Check(s
));
996 if (PyString_CheckExact(s
)) {
1001 /* Subtype -- return genuine string with the same value. */
1002 PyStringObject
*t
= (PyStringObject
*) s
;
1003 return PyString_FromStringAndSize(t
->ob_sval
, Py_SIZE(t
));
1008 string_length(PyStringObject
*a
)
1014 string_concat(register PyStringObject
*a
, register PyObject
*bb
)
1016 register Py_ssize_t size
;
1017 register PyStringObject
*op
;
1018 if (!PyString_Check(bb
)) {
1019 #ifdef Py_USING_UNICODE
1020 if (PyUnicode_Check(bb
))
1021 return PyUnicode_Concat((PyObject
*)a
, bb
);
1023 if (PyByteArray_Check(bb
))
1024 return PyByteArray_Concat((PyObject
*)a
, bb
);
1025 PyErr_Format(PyExc_TypeError
,
1026 "cannot concatenate 'str' and '%.200s' objects",
1027 Py_TYPE(bb
)->tp_name
);
1030 #define b ((PyStringObject *)bb)
1031 /* Optimize cases with empty left or right operand */
1032 if ((Py_SIZE(a
) == 0 || Py_SIZE(b
) == 0) &&
1033 PyString_CheckExact(a
) && PyString_CheckExact(b
)) {
1034 if (Py_SIZE(a
) == 0) {
1039 return (PyObject
*)a
;
1041 size
= Py_SIZE(a
) + Py_SIZE(b
);
1042 /* Check that string sizes are not negative, to prevent an
1043 overflow in cases where we are passed incorrectly-created
1044 strings with negative lengths (due to a bug in other code).
1046 if (Py_SIZE(a
) < 0 || Py_SIZE(b
) < 0 ||
1047 Py_SIZE(a
) > PY_SSIZE_T_MAX
- Py_SIZE(b
)) {
1048 PyErr_SetString(PyExc_OverflowError
,
1049 "strings are too large to concat");
1053 /* Inline PyObject_NewVar */
1054 if (size
> PY_SSIZE_T_MAX
- PyStringObject_SIZE
) {
1055 PyErr_SetString(PyExc_OverflowError
,
1056 "strings are too large to concat");
1059 op
= (PyStringObject
*)PyObject_MALLOC(PyStringObject_SIZE
+ size
);
1061 return PyErr_NoMemory();
1062 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
1064 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
1065 Py_MEMCPY(op
->ob_sval
, a
->ob_sval
, Py_SIZE(a
));
1066 Py_MEMCPY(op
->ob_sval
+ Py_SIZE(a
), b
->ob_sval
, Py_SIZE(b
));
1067 op
->ob_sval
[size
] = '\0';
1068 return (PyObject
*) op
;
1073 string_repeat(register PyStringObject
*a
, register Py_ssize_t n
)
1075 register Py_ssize_t i
;
1076 register Py_ssize_t j
;
1077 register Py_ssize_t size
;
1078 register PyStringObject
*op
;
1082 /* watch out for overflows: the size can overflow int,
1083 * and the # of bytes needed can overflow size_t
1085 size
= Py_SIZE(a
) * n
;
1086 if (n
&& size
/ n
!= Py_SIZE(a
)) {
1087 PyErr_SetString(PyExc_OverflowError
,
1088 "repeated string is too long");
1091 if (size
== Py_SIZE(a
) && PyString_CheckExact(a
)) {
1093 return (PyObject
*)a
;
1095 nbytes
= (size_t)size
;
1096 if (nbytes
+ PyStringObject_SIZE
<= nbytes
) {
1097 PyErr_SetString(PyExc_OverflowError
,
1098 "repeated string is too long");
1101 op
= (PyStringObject
*)PyObject_MALLOC(PyStringObject_SIZE
+ nbytes
);
1103 return PyErr_NoMemory();
1104 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
1106 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
1107 op
->ob_sval
[size
] = '\0';
1108 if (Py_SIZE(a
) == 1 && n
> 0) {
1109 memset(op
->ob_sval
, a
->ob_sval
[0] , n
);
1110 return (PyObject
*) op
;
1114 Py_MEMCPY(op
->ob_sval
, a
->ob_sval
, Py_SIZE(a
));
1118 j
= (i
<= size
-i
) ? i
: size
-i
;
1119 Py_MEMCPY(op
->ob_sval
+i
, op
->ob_sval
, j
);
1122 return (PyObject
*) op
;
1125 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1128 string_slice(register PyStringObject
*a
, register Py_ssize_t i
,
1129 register Py_ssize_t j
)
1130 /* j -- may be negative! */
1135 j
= 0; /* Avoid signed/unsigned bug in next line */
1138 if (i
== 0 && j
== Py_SIZE(a
) && PyString_CheckExact(a
)) {
1139 /* It's the same as a */
1141 return (PyObject
*)a
;
1145 return PyString_FromStringAndSize(a
->ob_sval
+ i
, j
-i
);
1149 string_contains(PyObject
*str_obj
, PyObject
*sub_obj
)
1151 if (!PyString_CheckExact(sub_obj
)) {
1152 #ifdef Py_USING_UNICODE
1153 if (PyUnicode_Check(sub_obj
))
1154 return PyUnicode_Contains(str_obj
, sub_obj
);
1156 if (!PyString_Check(sub_obj
)) {
1157 PyErr_Format(PyExc_TypeError
,
1158 "'in <string>' requires string as left operand, "
1159 "not %.200s", Py_TYPE(sub_obj
)->tp_name
);
1164 return stringlib_contains_obj(str_obj
, sub_obj
);
1168 string_item(PyStringObject
*a
, register Py_ssize_t i
)
1172 if (i
< 0 || i
>= Py_SIZE(a
)) {
1173 PyErr_SetString(PyExc_IndexError
, "string index out of range");
1176 pchar
= a
->ob_sval
[i
];
1177 v
= (PyObject
*)characters
[pchar
& UCHAR_MAX
];
1179 v
= PyString_FromStringAndSize(&pchar
, 1);
1190 string_richcompare(PyStringObject
*a
, PyStringObject
*b
, int op
)
1193 Py_ssize_t len_a
, len_b
;
1197 /* Make sure both arguments are strings. */
1198 if (!(PyString_Check(a
) && PyString_Check(b
))) {
1199 result
= Py_NotImplemented
;
1204 case Py_EQ
:case Py_LE
:case Py_GE
:
1207 case Py_NE
:case Py_LT
:case Py_GT
:
1213 /* Supporting Py_NE here as well does not save
1214 much time, since Py_NE is rarely used. */
1215 if (Py_SIZE(a
) == Py_SIZE(b
)
1216 && (a
->ob_sval
[0] == b
->ob_sval
[0]
1217 && memcmp(a
->ob_sval
, b
->ob_sval
, Py_SIZE(a
)) == 0)) {
1224 len_a
= Py_SIZE(a
); len_b
= Py_SIZE(b
);
1225 min_len
= (len_a
< len_b
) ? len_a
: len_b
;
1227 c
= Py_CHARMASK(*a
->ob_sval
) - Py_CHARMASK(*b
->ob_sval
);
1229 c
= memcmp(a
->ob_sval
, b
->ob_sval
, min_len
);
1233 c
= (len_a
< len_b
) ? -1 : (len_a
> len_b
) ? 1 : 0;
1235 case Py_LT
: c
= c
< 0; break;
1236 case Py_LE
: c
= c
<= 0; break;
1237 case Py_EQ
: assert(0); break; /* unreachable */
1238 case Py_NE
: c
= c
!= 0; break;
1239 case Py_GT
: c
= c
> 0; break;
1240 case Py_GE
: c
= c
>= 0; break;
1242 result
= Py_NotImplemented
;
1245 result
= c
? Py_True
: Py_False
;
1252 _PyString_Eq(PyObject
*o1
, PyObject
*o2
)
1254 PyStringObject
*a
= (PyStringObject
*) o1
;
1255 PyStringObject
*b
= (PyStringObject
*) o2
;
1256 return Py_SIZE(a
) == Py_SIZE(b
)
1257 && *a
->ob_sval
== *b
->ob_sval
1258 && memcmp(a
->ob_sval
, b
->ob_sval
, Py_SIZE(a
)) == 0;
1262 string_hash(PyStringObject
*a
)
1264 register Py_ssize_t len
;
1265 register unsigned char *p
;
1268 if (a
->ob_shash
!= -1)
1271 p
= (unsigned char *) a
->ob_sval
;
1274 x
= (1000003*x
) ^ *p
++;
1283 string_subscript(PyStringObject
* self
, PyObject
* item
)
1285 if (PyIndex_Check(item
)) {
1286 Py_ssize_t i
= PyNumber_AsSsize_t(item
, PyExc_IndexError
);
1287 if (i
== -1 && PyErr_Occurred())
1290 i
+= PyString_GET_SIZE(self
);
1291 return string_item(self
, i
);
1293 else if (PySlice_Check(item
)) {
1294 Py_ssize_t start
, stop
, step
, slicelength
, cur
, i
;
1299 if (PySlice_GetIndicesEx((PySliceObject
*)item
,
1300 PyString_GET_SIZE(self
),
1301 &start
, &stop
, &step
, &slicelength
) < 0) {
1305 if (slicelength
<= 0) {
1306 return PyString_FromStringAndSize("", 0);
1308 else if (start
== 0 && step
== 1 &&
1309 slicelength
== PyString_GET_SIZE(self
) &&
1310 PyString_CheckExact(self
)) {
1312 return (PyObject
*)self
;
1314 else if (step
== 1) {
1315 return PyString_FromStringAndSize(
1316 PyString_AS_STRING(self
) + start
,
1320 source_buf
= PyString_AsString((PyObject
*)self
);
1321 result_buf
= (char *)PyMem_Malloc(slicelength
);
1322 if (result_buf
== NULL
)
1323 return PyErr_NoMemory();
1325 for (cur
= start
, i
= 0; i
< slicelength
;
1327 result_buf
[i
] = source_buf
[cur
];
1330 result
= PyString_FromStringAndSize(result_buf
,
1332 PyMem_Free(result_buf
);
1337 PyErr_Format(PyExc_TypeError
,
1338 "string indices must be integers, not %.200s",
1339 Py_TYPE(item
)->tp_name
);
1345 string_buffer_getreadbuf(PyStringObject
*self
, Py_ssize_t index
, const void **ptr
)
1348 PyErr_SetString(PyExc_SystemError
,
1349 "accessing non-existent string segment");
1352 *ptr
= (void *)self
->ob_sval
;
1353 return Py_SIZE(self
);
1357 string_buffer_getwritebuf(PyStringObject
*self
, Py_ssize_t index
, const void **ptr
)
1359 PyErr_SetString(PyExc_TypeError
,
1360 "Cannot use string as modifiable buffer");
1365 string_buffer_getsegcount(PyStringObject
*self
, Py_ssize_t
*lenp
)
1368 *lenp
= Py_SIZE(self
);
1373 string_buffer_getcharbuf(PyStringObject
*self
, Py_ssize_t index
, const char **ptr
)
1376 PyErr_SetString(PyExc_SystemError
,
1377 "accessing non-existent string segment");
1380 *ptr
= self
->ob_sval
;
1381 return Py_SIZE(self
);
1385 string_buffer_getbuffer(PyStringObject
*self
, Py_buffer
*view
, int flags
)
1387 return PyBuffer_FillInfo(view
, (PyObject
*)self
,
1388 (void *)self
->ob_sval
, Py_SIZE(self
),
1392 static PySequenceMethods string_as_sequence
= {
1393 (lenfunc
)string_length
, /*sq_length*/
1394 (binaryfunc
)string_concat
, /*sq_concat*/
1395 (ssizeargfunc
)string_repeat
, /*sq_repeat*/
1396 (ssizeargfunc
)string_item
, /*sq_item*/
1397 (ssizessizeargfunc
)string_slice
, /*sq_slice*/
1400 (objobjproc
)string_contains
/*sq_contains*/
1403 static PyMappingMethods string_as_mapping
= {
1404 (lenfunc
)string_length
,
1405 (binaryfunc
)string_subscript
,
1409 static PyBufferProcs string_as_buffer
= {
1410 (readbufferproc
)string_buffer_getreadbuf
,
1411 (writebufferproc
)string_buffer_getwritebuf
,
1412 (segcountproc
)string_buffer_getsegcount
,
1413 (charbufferproc
)string_buffer_getcharbuf
,
1414 (getbufferproc
)string_buffer_getbuffer
,
1421 #define RIGHTSTRIP 1
1424 /* Arrays indexed by above */
1425 static const char *stripformat
[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1427 #define STRIPNAME(i) (stripformat[i]+3)
1429 PyDoc_STRVAR(split__doc__
,
1430 "S.split([sep [,maxsplit]]) -> list of strings\n\
1432 Return a list of the words in the string S, using sep as the\n\
1433 delimiter string. If maxsplit is given, at most maxsplit\n\
1434 splits are done. If sep is not specified or is None, any\n\
1435 whitespace string is a separator and empty strings are removed\n\
1439 string_split(PyStringObject
*self
, PyObject
*args
)
1441 Py_ssize_t len
= PyString_GET_SIZE(self
), n
;
1442 Py_ssize_t maxsplit
= -1;
1443 const char *s
= PyString_AS_STRING(self
), *sub
;
1444 PyObject
*subobj
= Py_None
;
1446 if (!PyArg_ParseTuple(args
, "|On:split", &subobj
, &maxsplit
))
1449 maxsplit
= PY_SSIZE_T_MAX
;
1450 if (subobj
== Py_None
)
1451 return stringlib_split_whitespace((PyObject
*) self
, s
, len
, maxsplit
);
1452 if (PyString_Check(subobj
)) {
1453 sub
= PyString_AS_STRING(subobj
);
1454 n
= PyString_GET_SIZE(subobj
);
1456 #ifdef Py_USING_UNICODE
1457 else if (PyUnicode_Check(subobj
))
1458 return PyUnicode_Split((PyObject
*)self
, subobj
, maxsplit
);
1460 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1463 return stringlib_split((PyObject
*) self
, s
, len
, sub
, n
, maxsplit
);
1466 PyDoc_STRVAR(partition__doc__
,
1467 "S.partition(sep) -> (head, sep, tail)\n\
1469 Search for the separator sep in S, and return the part before it,\n\
1470 the separator itself, and the part after it. If the separator is not\n\
1471 found, return S and two empty strings.");
1474 string_partition(PyStringObject
*self
, PyObject
*sep_obj
)
1479 if (PyString_Check(sep_obj
)) {
1480 sep
= PyString_AS_STRING(sep_obj
);
1481 sep_len
= PyString_GET_SIZE(sep_obj
);
1483 #ifdef Py_USING_UNICODE
1484 else if (PyUnicode_Check(sep_obj
))
1485 return PyUnicode_Partition((PyObject
*) self
, sep_obj
);
1487 else if (PyObject_AsCharBuffer(sep_obj
, &sep
, &sep_len
))
1490 return stringlib_partition(
1492 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1493 sep_obj
, sep
, sep_len
1497 PyDoc_STRVAR(rpartition__doc__
,
1498 "S.rpartition(sep) -> (tail, sep, head)\n\
1500 Search for the separator sep in S, starting at the end of S, and return\n\
1501 the part before it, the separator itself, and the part after it. If the\n\
1502 separator is not found, return two empty strings and S.");
1505 string_rpartition(PyStringObject
*self
, PyObject
*sep_obj
)
1510 if (PyString_Check(sep_obj
)) {
1511 sep
= PyString_AS_STRING(sep_obj
);
1512 sep_len
= PyString_GET_SIZE(sep_obj
);
1514 #ifdef Py_USING_UNICODE
1515 else if (PyUnicode_Check(sep_obj
))
1516 return PyUnicode_RPartition((PyObject
*) self
, sep_obj
);
1518 else if (PyObject_AsCharBuffer(sep_obj
, &sep
, &sep_len
))
1521 return stringlib_rpartition(
1523 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1524 sep_obj
, sep
, sep_len
1528 PyDoc_STRVAR(rsplit__doc__
,
1529 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1531 Return a list of the words in the string S, using sep as the\n\
1532 delimiter string, starting at the end of the string and working\n\
1533 to the front. If maxsplit is given, at most maxsplit splits are\n\
1534 done. If sep is not specified or is None, any whitespace string\n\
1538 string_rsplit(PyStringObject
*self
, PyObject
*args
)
1540 Py_ssize_t len
= PyString_GET_SIZE(self
), n
;
1541 Py_ssize_t maxsplit
= -1;
1542 const char *s
= PyString_AS_STRING(self
), *sub
;
1543 PyObject
*subobj
= Py_None
;
1545 if (!PyArg_ParseTuple(args
, "|On:rsplit", &subobj
, &maxsplit
))
1548 maxsplit
= PY_SSIZE_T_MAX
;
1549 if (subobj
== Py_None
)
1550 return stringlib_rsplit_whitespace((PyObject
*) self
, s
, len
, maxsplit
);
1551 if (PyString_Check(subobj
)) {
1552 sub
= PyString_AS_STRING(subobj
);
1553 n
= PyString_GET_SIZE(subobj
);
1555 #ifdef Py_USING_UNICODE
1556 else if (PyUnicode_Check(subobj
))
1557 return PyUnicode_RSplit((PyObject
*)self
, subobj
, maxsplit
);
1559 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1562 return stringlib_rsplit((PyObject
*) self
, s
, len
, sub
, n
, maxsplit
);
1566 PyDoc_STRVAR(join__doc__
,
1567 "S.join(iterable) -> string\n\
1569 Return a string which is the concatenation of the strings in the\n\
1570 iterable. The separator between elements is S.");
1573 string_join(PyStringObject
*self
, PyObject
*orig
)
1575 char *sep
= PyString_AS_STRING(self
);
1576 const Py_ssize_t seplen
= PyString_GET_SIZE(self
);
1577 PyObject
*res
= NULL
;
1579 Py_ssize_t seqlen
= 0;
1582 PyObject
*seq
, *item
;
1584 seq
= PySequence_Fast(orig
, "");
1589 seqlen
= PySequence_Size(seq
);
1592 return PyString_FromString("");
1595 item
= PySequence_Fast_GET_ITEM(seq
, 0);
1596 if (PyString_CheckExact(item
) || PyUnicode_CheckExact(item
)) {
1603 /* There are at least two things to join, or else we have a subclass
1604 * of the builtin types in the sequence.
1605 * Do a pre-pass to figure out the total amount of space we'll
1606 * need (sz), see whether any argument is absurd, and defer to
1607 * the Unicode join if appropriate.
1609 for (i
= 0; i
< seqlen
; i
++) {
1610 const size_t old_sz
= sz
;
1611 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1612 if (!PyString_Check(item
)){
1613 #ifdef Py_USING_UNICODE
1614 if (PyUnicode_Check(item
)) {
1615 /* Defer to Unicode join.
1616 * CAUTION: There's no gurantee that the
1617 * original sequence can be iterated over
1618 * again, so we must pass seq here.
1621 result
= PyUnicode_Join((PyObject
*)self
, seq
);
1626 PyErr_Format(PyExc_TypeError
,
1627 "sequence item %zd: expected string,"
1629 i
, Py_TYPE(item
)->tp_name
);
1633 sz
+= PyString_GET_SIZE(item
);
1636 if (sz
< old_sz
|| sz
> PY_SSIZE_T_MAX
) {
1637 PyErr_SetString(PyExc_OverflowError
,
1638 "join() result is too long for a Python string");
1644 /* Allocate result space. */
1645 res
= PyString_FromStringAndSize((char*)NULL
, sz
);
1651 /* Catenate everything. */
1652 p
= PyString_AS_STRING(res
);
1653 for (i
= 0; i
< seqlen
; ++i
) {
1655 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1656 n
= PyString_GET_SIZE(item
);
1657 Py_MEMCPY(p
, PyString_AS_STRING(item
), n
);
1659 if (i
< seqlen
- 1) {
1660 Py_MEMCPY(p
, sep
, seplen
);
1670 _PyString_Join(PyObject
*sep
, PyObject
*x
)
1672 assert(sep
!= NULL
&& PyString_Check(sep
));
1674 return string_join((PyStringObject
*)sep
, x
);
1677 /* helper macro to fixup start/end slice values */
1678 #define ADJUST_INDICES(start, end, len) \
1681 else if (end < 0) { \
1692 Py_LOCAL_INLINE(Py_ssize_t
)
1693 string_find_internal(PyStringObject
*self
, PyObject
*args
, int dir
)
1698 Py_ssize_t start
=0, end
=PY_SSIZE_T_MAX
;
1699 PyObject
*obj_start
=Py_None
, *obj_end
=Py_None
;
1701 if (!PyArg_ParseTuple(args
, "O|OO:find/rfind/index/rindex", &subobj
,
1702 &obj_start
, &obj_end
))
1704 /* To support None in "start" and "end" arguments, meaning
1705 the same as if they were not passed.
1707 if (obj_start
!= Py_None
)
1708 if (!_PyEval_SliceIndex(obj_start
, &start
))
1710 if (obj_end
!= Py_None
)
1711 if (!_PyEval_SliceIndex(obj_end
, &end
))
1714 if (PyString_Check(subobj
)) {
1715 sub
= PyString_AS_STRING(subobj
);
1716 sub_len
= PyString_GET_SIZE(subobj
);
1718 #ifdef Py_USING_UNICODE
1719 else if (PyUnicode_Check(subobj
))
1720 return PyUnicode_Find(
1721 (PyObject
*)self
, subobj
, start
, end
, dir
);
1723 else if (PyObject_AsCharBuffer(subobj
, &sub
, &sub_len
))
1724 /* XXX - the "expected a character buffer object" is pretty
1725 confusing for a non-expert. remap to something else ? */
1729 return stringlib_find_slice(
1730 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1731 sub
, sub_len
, start
, end
);
1733 return stringlib_rfind_slice(
1734 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1735 sub
, sub_len
, start
, end
);
1739 PyDoc_STRVAR(find__doc__
,
1740 "S.find(sub [,start [,end]]) -> int\n\
1742 Return the lowest index in S where substring sub is found,\n\
1743 such that sub is contained within s[start:end]. Optional\n\
1744 arguments start and end are interpreted as in slice notation.\n\
1746 Return -1 on failure.");
1749 string_find(PyStringObject
*self
, PyObject
*args
)
1751 Py_ssize_t result
= string_find_internal(self
, args
, +1);
1754 return PyInt_FromSsize_t(result
);
1758 PyDoc_STRVAR(index__doc__
,
1759 "S.index(sub [,start [,end]]) -> int\n\
1761 Like S.find() but raise ValueError when the substring is not found.");
1764 string_index(PyStringObject
*self
, PyObject
*args
)
1766 Py_ssize_t result
= string_find_internal(self
, args
, +1);
1770 PyErr_SetString(PyExc_ValueError
,
1771 "substring not found");
1774 return PyInt_FromSsize_t(result
);
1778 PyDoc_STRVAR(rfind__doc__
,
1779 "S.rfind(sub [,start [,end]]) -> int\n\
1781 Return the highest index in S where substring sub is found,\n\
1782 such that sub is contained within s[start:end]. Optional\n\
1783 arguments start and end are interpreted as in slice notation.\n\
1785 Return -1 on failure.");
1788 string_rfind(PyStringObject
*self
, PyObject
*args
)
1790 Py_ssize_t result
= string_find_internal(self
, args
, -1);
1793 return PyInt_FromSsize_t(result
);
1797 PyDoc_STRVAR(rindex__doc__
,
1798 "S.rindex(sub [,start [,end]]) -> int\n\
1800 Like S.rfind() but raise ValueError when the substring is not found.");
1803 string_rindex(PyStringObject
*self
, PyObject
*args
)
1805 Py_ssize_t result
= string_find_internal(self
, args
, -1);
1809 PyErr_SetString(PyExc_ValueError
,
1810 "substring not found");
1813 return PyInt_FromSsize_t(result
);
1817 Py_LOCAL_INLINE(PyObject
*)
1818 do_xstrip(PyStringObject
*self
, int striptype
, PyObject
*sepobj
)
1820 char *s
= PyString_AS_STRING(self
);
1821 Py_ssize_t len
= PyString_GET_SIZE(self
);
1822 char *sep
= PyString_AS_STRING(sepobj
);
1823 Py_ssize_t seplen
= PyString_GET_SIZE(sepobj
);
1827 if (striptype
!= RIGHTSTRIP
) {
1828 while (i
< len
&& memchr(sep
, Py_CHARMASK(s
[i
]), seplen
)) {
1834 if (striptype
!= LEFTSTRIP
) {
1837 } while (j
>= i
&& memchr(sep
, Py_CHARMASK(s
[j
]), seplen
));
1841 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
1843 return (PyObject
*)self
;
1846 return PyString_FromStringAndSize(s
+i
, j
-i
);
1850 Py_LOCAL_INLINE(PyObject
*)
1851 do_strip(PyStringObject
*self
, int striptype
)
1853 char *s
= PyString_AS_STRING(self
);
1854 Py_ssize_t len
= PyString_GET_SIZE(self
), i
, j
;
1857 if (striptype
!= RIGHTSTRIP
) {
1858 while (i
< len
&& isspace(Py_CHARMASK(s
[i
]))) {
1864 if (striptype
!= LEFTSTRIP
) {
1867 } while (j
>= i
&& isspace(Py_CHARMASK(s
[j
])));
1871 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
1873 return (PyObject
*)self
;
1876 return PyString_FromStringAndSize(s
+i
, j
-i
);
1880 Py_LOCAL_INLINE(PyObject
*)
1881 do_argstrip(PyStringObject
*self
, int striptype
, PyObject
*args
)
1883 PyObject
*sep
= NULL
;
1885 if (!PyArg_ParseTuple(args
, (char *)stripformat
[striptype
], &sep
))
1888 if (sep
!= NULL
&& sep
!= Py_None
) {
1889 if (PyString_Check(sep
))
1890 return do_xstrip(self
, striptype
, sep
);
1891 #ifdef Py_USING_UNICODE
1892 else if (PyUnicode_Check(sep
)) {
1893 PyObject
*uniself
= PyUnicode_FromObject((PyObject
*)self
);
1897 res
= _PyUnicode_XStrip((PyUnicodeObject
*)uniself
,
1903 PyErr_Format(PyExc_TypeError
,
1904 #ifdef Py_USING_UNICODE
1905 "%s arg must be None, str or unicode",
1907 "%s arg must be None or str",
1909 STRIPNAME(striptype
));
1913 return do_strip(self
, striptype
);
1917 PyDoc_STRVAR(strip__doc__
,
1918 "S.strip([chars]) -> string or unicode\n\
1920 Return a copy of the string S with leading and trailing\n\
1921 whitespace removed.\n\
1922 If chars is given and not None, remove characters in chars instead.\n\
1923 If chars is unicode, S will be converted to unicode before stripping");
1926 string_strip(PyStringObject
*self
, PyObject
*args
)
1928 if (PyTuple_GET_SIZE(args
) == 0)
1929 return do_strip(self
, BOTHSTRIP
); /* Common case */
1931 return do_argstrip(self
, BOTHSTRIP
, args
);
1935 PyDoc_STRVAR(lstrip__doc__
,
1936 "S.lstrip([chars]) -> string or unicode\n\
1938 Return a copy of the string S with leading whitespace removed.\n\
1939 If chars is given and not None, remove characters in chars instead.\n\
1940 If chars is unicode, S will be converted to unicode before stripping");
1943 string_lstrip(PyStringObject
*self
, PyObject
*args
)
1945 if (PyTuple_GET_SIZE(args
) == 0)
1946 return do_strip(self
, LEFTSTRIP
); /* Common case */
1948 return do_argstrip(self
, LEFTSTRIP
, args
);
1952 PyDoc_STRVAR(rstrip__doc__
,
1953 "S.rstrip([chars]) -> string or unicode\n\
1955 Return a copy of the string S with trailing whitespace removed.\n\
1956 If chars is given and not None, remove characters in chars instead.\n\
1957 If chars is unicode, S will be converted to unicode before stripping");
1960 string_rstrip(PyStringObject
*self
, PyObject
*args
)
1962 if (PyTuple_GET_SIZE(args
) == 0)
1963 return do_strip(self
, RIGHTSTRIP
); /* Common case */
1965 return do_argstrip(self
, RIGHTSTRIP
, args
);
1969 PyDoc_STRVAR(lower__doc__
,
1970 "S.lower() -> string\n\
1972 Return a copy of the string S converted to lowercase.");
1974 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1976 #define _tolower tolower
1980 string_lower(PyStringObject
*self
)
1983 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
1986 newobj
= PyString_FromStringAndSize(NULL
, n
);
1990 s
= PyString_AS_STRING(newobj
);
1992 Py_MEMCPY(s
, PyString_AS_STRING(self
), n
);
1994 for (i
= 0; i
< n
; i
++) {
1995 int c
= Py_CHARMASK(s
[i
]);
2003 PyDoc_STRVAR(upper__doc__
,
2004 "S.upper() -> string\n\
2006 Return a copy of the string S converted to uppercase.");
2009 #define _toupper toupper
2013 string_upper(PyStringObject
*self
)
2016 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2019 newobj
= PyString_FromStringAndSize(NULL
, n
);
2023 s
= PyString_AS_STRING(newobj
);
2025 Py_MEMCPY(s
, PyString_AS_STRING(self
), n
);
2027 for (i
= 0; i
< n
; i
++) {
2028 int c
= Py_CHARMASK(s
[i
]);
2036 PyDoc_STRVAR(title__doc__
,
2037 "S.title() -> string\n\
2039 Return a titlecased version of S, i.e. words start with uppercase\n\
2040 characters, all remaining cased characters have lowercase.");
2043 string_title(PyStringObject
*self
)
2045 char *s
= PyString_AS_STRING(self
), *s_new
;
2046 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2047 int previous_is_cased
= 0;
2050 newobj
= PyString_FromStringAndSize(NULL
, n
);
2053 s_new
= PyString_AsString(newobj
);
2054 for (i
= 0; i
< n
; i
++) {
2055 int c
= Py_CHARMASK(*s
++);
2057 if (!previous_is_cased
)
2059 previous_is_cased
= 1;
2060 } else if (isupper(c
)) {
2061 if (previous_is_cased
)
2063 previous_is_cased
= 1;
2065 previous_is_cased
= 0;
2071 PyDoc_STRVAR(capitalize__doc__
,
2072 "S.capitalize() -> string\n\
2074 Return a copy of the string S with only its first character\n\
2078 string_capitalize(PyStringObject
*self
)
2080 char *s
= PyString_AS_STRING(self
), *s_new
;
2081 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2084 newobj
= PyString_FromStringAndSize(NULL
, n
);
2087 s_new
= PyString_AsString(newobj
);
2089 int c
= Py_CHARMASK(*s
++);
2091 *s_new
= toupper(c
);
2096 for (i
= 1; i
< n
; i
++) {
2097 int c
= Py_CHARMASK(*s
++);
2099 *s_new
= tolower(c
);
2108 PyDoc_STRVAR(count__doc__
,
2109 "S.count(sub[, start[, end]]) -> int\n\
2111 Return the number of non-overlapping occurrences of substring sub in\n\
2112 string S[start:end]. Optional arguments start and end are interpreted\n\
2113 as in slice notation.");
2116 string_count(PyStringObject
*self
, PyObject
*args
)
2119 const char *str
= PyString_AS_STRING(self
), *sub
;
2121 Py_ssize_t start
= 0, end
= PY_SSIZE_T_MAX
;
2123 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &sub_obj
,
2124 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
2127 if (PyString_Check(sub_obj
)) {
2128 sub
= PyString_AS_STRING(sub_obj
);
2129 sub_len
= PyString_GET_SIZE(sub_obj
);
2131 #ifdef Py_USING_UNICODE
2132 else if (PyUnicode_Check(sub_obj
)) {
2134 count
= PyUnicode_Count((PyObject
*)self
, sub_obj
, start
, end
);
2138 return PyInt_FromSsize_t(count
);
2141 else if (PyObject_AsCharBuffer(sub_obj
, &sub
, &sub_len
))
2144 ADJUST_INDICES(start
, end
, PyString_GET_SIZE(self
));
2146 return PyInt_FromSsize_t(
2147 stringlib_count(str
+ start
, end
- start
, sub
, sub_len
, PY_SSIZE_T_MAX
)
2151 PyDoc_STRVAR(swapcase__doc__
,
2152 "S.swapcase() -> string\n\
2154 Return a copy of the string S with uppercase characters\n\
2155 converted to lowercase and vice versa.");
2158 string_swapcase(PyStringObject
*self
)
2160 char *s
= PyString_AS_STRING(self
), *s_new
;
2161 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2164 newobj
= PyString_FromStringAndSize(NULL
, n
);
2167 s_new
= PyString_AsString(newobj
);
2168 for (i
= 0; i
< n
; i
++) {
2169 int c
= Py_CHARMASK(*s
++);
2171 *s_new
= toupper(c
);
2173 else if (isupper(c
)) {
2174 *s_new
= tolower(c
);
2184 PyDoc_STRVAR(translate__doc__
,
2185 "S.translate(table [,deletechars]) -> string\n\
2187 Return a copy of the string S, where all characters occurring\n\
2188 in the optional argument deletechars are removed, and the\n\
2189 remaining characters have been mapped through the given\n\
2190 translation table, which must be a string of length 256.");
2193 string_translate(PyStringObject
*self
, PyObject
*args
)
2195 register char *input
, *output
;
2197 register Py_ssize_t i
, c
, changed
= 0;
2198 PyObject
*input_obj
= (PyObject
*)self
;
2199 const char *output_start
, *del_table
=NULL
;
2200 Py_ssize_t inlen
, tablen
, dellen
= 0;
2202 int trans_table
[256];
2203 PyObject
*tableobj
, *delobj
= NULL
;
2205 if (!PyArg_UnpackTuple(args
, "translate", 1, 2,
2206 &tableobj
, &delobj
))
2209 if (PyString_Check(tableobj
)) {
2210 table
= PyString_AS_STRING(tableobj
);
2211 tablen
= PyString_GET_SIZE(tableobj
);
2213 else if (tableobj
== Py_None
) {
2217 #ifdef Py_USING_UNICODE
2218 else if (PyUnicode_Check(tableobj
)) {
2219 /* Unicode .translate() does not support the deletechars
2220 parameter; instead a mapping to None will cause characters
2222 if (delobj
!= NULL
) {
2223 PyErr_SetString(PyExc_TypeError
,
2224 "deletions are implemented differently for unicode");
2227 return PyUnicode_Translate((PyObject
*)self
, tableobj
, NULL
);
2230 else if (PyObject_AsCharBuffer(tableobj
, &table
, &tablen
))
2233 if (tablen
!= 256) {
2234 PyErr_SetString(PyExc_ValueError
,
2235 "translation table must be 256 characters long");
2239 if (delobj
!= NULL
) {
2240 if (PyString_Check(delobj
)) {
2241 del_table
= PyString_AS_STRING(delobj
);
2242 dellen
= PyString_GET_SIZE(delobj
);
2244 #ifdef Py_USING_UNICODE
2245 else if (PyUnicode_Check(delobj
)) {
2246 PyErr_SetString(PyExc_TypeError
,
2247 "deletions are implemented differently for unicode");
2251 else if (PyObject_AsCharBuffer(delobj
, &del_table
, &dellen
))
2259 inlen
= PyString_GET_SIZE(input_obj
);
2260 result
= PyString_FromStringAndSize((char *)NULL
, inlen
);
2263 output_start
= output
= PyString_AsString(result
);
2264 input
= PyString_AS_STRING(input_obj
);
2266 if (dellen
== 0 && table
!= NULL
) {
2267 /* If no deletions are required, use faster code */
2268 for (i
= inlen
; --i
>= 0; ) {
2269 c
= Py_CHARMASK(*input
++);
2270 if (Py_CHARMASK((*output
++ = table
[c
])) != c
)
2273 if (changed
|| !PyString_CheckExact(input_obj
))
2276 Py_INCREF(input_obj
);
2280 if (table
== NULL
) {
2281 for (i
= 0; i
< 256; i
++)
2282 trans_table
[i
] = Py_CHARMASK(i
);
2284 for (i
= 0; i
< 256; i
++)
2285 trans_table
[i
] = Py_CHARMASK(table
[i
]);
2288 for (i
= 0; i
< dellen
; i
++)
2289 trans_table
[(int) Py_CHARMASK(del_table
[i
])] = -1;
2291 for (i
= inlen
; --i
>= 0; ) {
2292 c
= Py_CHARMASK(*input
++);
2293 if (trans_table
[c
] != -1)
2294 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
2298 if (!changed
&& PyString_CheckExact(input_obj
)) {
2300 Py_INCREF(input_obj
);
2303 /* Fix the size of the resulting string */
2305 _PyString_Resize(&result
, output
- output_start
);
2310 /* find and count characters and substrings */
2312 #define findchar(target, target_len, c) \
2313 ((char *)memchr((const void *)(target), c, target_len))
2315 /* String ops must return a string. */
2316 /* If the object is subclass of string, create a copy */
2317 Py_LOCAL(PyStringObject
*)
2318 return_self(PyStringObject
*self
)
2320 if (PyString_CheckExact(self
)) {
2324 return (PyStringObject
*)PyString_FromStringAndSize(
2325 PyString_AS_STRING(self
),
2326 PyString_GET_SIZE(self
));
2329 Py_LOCAL_INLINE(Py_ssize_t
)
2330 countchar(const char *target
, int target_len
, char c
, Py_ssize_t maxcount
)
2333 const char *start
=target
;
2334 const char *end
=target
+target_len
;
2336 while ( (start
=findchar(start
, end
-start
, c
)) != NULL
) {
2338 if (count
>= maxcount
)
2346 /* Algorithms for different cases of string replacement */
2348 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2349 Py_LOCAL(PyStringObject
*)
2350 replace_interleave(PyStringObject
*self
,
2351 const char *to_s
, Py_ssize_t to_len
,
2352 Py_ssize_t maxcount
)
2354 char *self_s
, *result_s
;
2355 Py_ssize_t self_len
, result_len
;
2356 Py_ssize_t count
, i
, product
;
2357 PyStringObject
*result
;
2359 self_len
= PyString_GET_SIZE(self
);
2361 /* 1 at the end plus 1 after every character */
2363 if (maxcount
< count
)
2366 /* Check for overflow */
2367 /* result_len = count * to_len + self_len; */
2368 product
= count
* to_len
;
2369 if (product
/ to_len
!= count
) {
2370 PyErr_SetString(PyExc_OverflowError
,
2371 "replace string is too long");
2374 result_len
= product
+ self_len
;
2375 if (result_len
< 0) {
2376 PyErr_SetString(PyExc_OverflowError
,
2377 "replace string is too long");
2381 if (! (result
= (PyStringObject
*)
2382 PyString_FromStringAndSize(NULL
, result_len
)) )
2385 self_s
= PyString_AS_STRING(self
);
2386 result_s
= PyString_AS_STRING(result
);
2388 /* TODO: special case single character, which doesn't need memcpy */
2390 /* Lay the first one down (guaranteed this will occur) */
2391 Py_MEMCPY(result_s
, to_s
, to_len
);
2395 for (i
=0; i
<count
; i
++) {
2396 *result_s
++ = *self_s
++;
2397 Py_MEMCPY(result_s
, to_s
, to_len
);
2401 /* Copy the rest of the original string */
2402 Py_MEMCPY(result_s
, self_s
, self_len
-i
);
2407 /* Special case for deleting a single character */
2408 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2409 Py_LOCAL(PyStringObject
*)
2410 replace_delete_single_character(PyStringObject
*self
,
2411 char from_c
, Py_ssize_t maxcount
)
2413 char *self_s
, *result_s
;
2414 char *start
, *next
, *end
;
2415 Py_ssize_t self_len
, result_len
;
2417 PyStringObject
*result
;
2419 self_len
= PyString_GET_SIZE(self
);
2420 self_s
= PyString_AS_STRING(self
);
2422 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
2424 return return_self(self
);
2427 result_len
= self_len
- count
; /* from_len == 1 */
2428 assert(result_len
>=0);
2430 if ( (result
= (PyStringObject
*)
2431 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2433 result_s
= PyString_AS_STRING(result
);
2436 end
= self_s
+ self_len
;
2437 while (count
-- > 0) {
2438 next
= findchar(start
, end
-start
, from_c
);
2441 Py_MEMCPY(result_s
, start
, next
-start
);
2442 result_s
+= (next
-start
);
2445 Py_MEMCPY(result_s
, start
, end
-start
);
2450 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2452 Py_LOCAL(PyStringObject
*)
2453 replace_delete_substring(PyStringObject
*self
,
2454 const char *from_s
, Py_ssize_t from_len
,
2455 Py_ssize_t maxcount
) {
2456 char *self_s
, *result_s
;
2457 char *start
, *next
, *end
;
2458 Py_ssize_t self_len
, result_len
;
2459 Py_ssize_t count
, offset
;
2460 PyStringObject
*result
;
2462 self_len
= PyString_GET_SIZE(self
);
2463 self_s
= PyString_AS_STRING(self
);
2465 count
= stringlib_count(self_s
, self_len
,
2471 return return_self(self
);
2474 result_len
= self_len
- (count
* from_len
);
2475 assert (result_len
>=0);
2477 if ( (result
= (PyStringObject
*)
2478 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2481 result_s
= PyString_AS_STRING(result
);
2484 end
= self_s
+ self_len
;
2485 while (count
-- > 0) {
2486 offset
= stringlib_find(start
, end
-start
,
2491 next
= start
+ offset
;
2493 Py_MEMCPY(result_s
, start
, next
-start
);
2495 result_s
+= (next
-start
);
2496 start
= next
+from_len
;
2498 Py_MEMCPY(result_s
, start
, end
-start
);
2502 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2503 Py_LOCAL(PyStringObject
*)
2504 replace_single_character_in_place(PyStringObject
*self
,
2505 char from_c
, char to_c
,
2506 Py_ssize_t maxcount
)
2508 char *self_s
, *result_s
, *start
, *end
, *next
;
2509 Py_ssize_t self_len
;
2510 PyStringObject
*result
;
2512 /* The result string will be the same size */
2513 self_s
= PyString_AS_STRING(self
);
2514 self_len
= PyString_GET_SIZE(self
);
2516 next
= findchar(self_s
, self_len
, from_c
);
2519 /* No matches; return the original string */
2520 return return_self(self
);
2523 /* Need to make a new string */
2524 result
= (PyStringObject
*) PyString_FromStringAndSize(NULL
, self_len
);
2527 result_s
= PyString_AS_STRING(result
);
2528 Py_MEMCPY(result_s
, self_s
, self_len
);
2530 /* change everything in-place, starting with this one */
2531 start
= result_s
+ (next
-self_s
);
2534 end
= result_s
+ self_len
;
2536 while (--maxcount
> 0) {
2537 next
= findchar(start
, end
-start
, from_c
);
2547 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2548 Py_LOCAL(PyStringObject
*)
2549 replace_substring_in_place(PyStringObject
*self
,
2550 const char *from_s
, Py_ssize_t from_len
,
2551 const char *to_s
, Py_ssize_t to_len
,
2552 Py_ssize_t maxcount
)
2554 char *result_s
, *start
, *end
;
2556 Py_ssize_t self_len
, offset
;
2557 PyStringObject
*result
;
2559 /* The result string will be the same size */
2561 self_s
= PyString_AS_STRING(self
);
2562 self_len
= PyString_GET_SIZE(self
);
2564 offset
= stringlib_find(self_s
, self_len
,
2568 /* No matches; return the original string */
2569 return return_self(self
);
2572 /* Need to make a new string */
2573 result
= (PyStringObject
*) PyString_FromStringAndSize(NULL
, self_len
);
2576 result_s
= PyString_AS_STRING(result
);
2577 Py_MEMCPY(result_s
, self_s
, self_len
);
2579 /* change everything in-place, starting with this one */
2580 start
= result_s
+ offset
;
2581 Py_MEMCPY(start
, to_s
, from_len
);
2583 end
= result_s
+ self_len
;
2585 while ( --maxcount
> 0) {
2586 offset
= stringlib_find(start
, end
-start
,
2591 Py_MEMCPY(start
+offset
, to_s
, from_len
);
2592 start
+= offset
+from_len
;
2598 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2599 Py_LOCAL(PyStringObject
*)
2600 replace_single_character(PyStringObject
*self
,
2602 const char *to_s
, Py_ssize_t to_len
,
2603 Py_ssize_t maxcount
)
2605 char *self_s
, *result_s
;
2606 char *start
, *next
, *end
;
2607 Py_ssize_t self_len
, result_len
;
2608 Py_ssize_t count
, product
;
2609 PyStringObject
*result
;
2611 self_s
= PyString_AS_STRING(self
);
2612 self_len
= PyString_GET_SIZE(self
);
2614 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
2616 /* no matches, return unchanged */
2617 return return_self(self
);
2620 /* use the difference between current and new, hence the "-1" */
2621 /* result_len = self_len + count * (to_len-1) */
2622 product
= count
* (to_len
-1);
2623 if (product
/ (to_len
-1) != count
) {
2624 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2627 result_len
= self_len
+ product
;
2628 if (result_len
< 0) {
2629 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2633 if ( (result
= (PyStringObject
*)
2634 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2636 result_s
= PyString_AS_STRING(result
);
2639 end
= self_s
+ self_len
;
2640 while (count
-- > 0) {
2641 next
= findchar(start
, end
-start
, from_c
);
2645 if (next
== start
) {
2646 /* replace with the 'to' */
2647 Py_MEMCPY(result_s
, to_s
, to_len
);
2651 /* copy the unchanged old then the 'to' */
2652 Py_MEMCPY(result_s
, start
, next
-start
);
2653 result_s
+= (next
-start
);
2654 Py_MEMCPY(result_s
, to_s
, to_len
);
2659 /* Copy the remainder of the remaining string */
2660 Py_MEMCPY(result_s
, start
, end
-start
);
2665 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2666 Py_LOCAL(PyStringObject
*)
2667 replace_substring(PyStringObject
*self
,
2668 const char *from_s
, Py_ssize_t from_len
,
2669 const char *to_s
, Py_ssize_t to_len
,
2670 Py_ssize_t maxcount
) {
2671 char *self_s
, *result_s
;
2672 char *start
, *next
, *end
;
2673 Py_ssize_t self_len
, result_len
;
2674 Py_ssize_t count
, offset
, product
;
2675 PyStringObject
*result
;
2677 self_s
= PyString_AS_STRING(self
);
2678 self_len
= PyString_GET_SIZE(self
);
2680 count
= stringlib_count(self_s
, self_len
,
2685 /* no matches, return unchanged */
2686 return return_self(self
);
2689 /* Check for overflow */
2690 /* result_len = self_len + count * (to_len-from_len) */
2691 product
= count
* (to_len
-from_len
);
2692 if (product
/ (to_len
-from_len
) != count
) {
2693 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2696 result_len
= self_len
+ product
;
2697 if (result_len
< 0) {
2698 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2702 if ( (result
= (PyStringObject
*)
2703 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2705 result_s
= PyString_AS_STRING(result
);
2708 end
= self_s
+ self_len
;
2709 while (count
-- > 0) {
2710 offset
= stringlib_find(start
, end
-start
,
2715 next
= start
+offset
;
2716 if (next
== start
) {
2717 /* replace with the 'to' */
2718 Py_MEMCPY(result_s
, to_s
, to_len
);
2722 /* copy the unchanged old then the 'to' */
2723 Py_MEMCPY(result_s
, start
, next
-start
);
2724 result_s
+= (next
-start
);
2725 Py_MEMCPY(result_s
, to_s
, to_len
);
2727 start
= next
+from_len
;
2730 /* Copy the remainder of the remaining string */
2731 Py_MEMCPY(result_s
, start
, end
-start
);
2737 Py_LOCAL(PyStringObject
*)
2738 replace(PyStringObject
*self
,
2739 const char *from_s
, Py_ssize_t from_len
,
2740 const char *to_s
, Py_ssize_t to_len
,
2741 Py_ssize_t maxcount
)
2744 maxcount
= PY_SSIZE_T_MAX
;
2745 } else if (maxcount
== 0 || PyString_GET_SIZE(self
) == 0) {
2746 /* nothing to do; return the original string */
2747 return return_self(self
);
2750 if (maxcount
== 0 ||
2751 (from_len
== 0 && to_len
== 0)) {
2752 /* nothing to do; return the original string */
2753 return return_self(self
);
2756 /* Handle zero-length special cases */
2758 if (from_len
== 0) {
2759 /* insert the 'to' string everywhere. */
2760 /* >>> "Python".replace("", ".") */
2761 /* '.P.y.t.h.o.n.' */
2762 return replace_interleave(self
, to_s
, to_len
, maxcount
);
2765 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2766 /* point for an empty self string to generate a non-empty string */
2767 /* Special case so the remaining code always gets a non-empty string */
2768 if (PyString_GET_SIZE(self
) == 0) {
2769 return return_self(self
);
2773 /* delete all occurances of 'from' string */
2774 if (from_len
== 1) {
2775 return replace_delete_single_character(
2776 self
, from_s
[0], maxcount
);
2778 return replace_delete_substring(self
, from_s
, from_len
, maxcount
);
2782 /* Handle special case where both strings have the same length */
2784 if (from_len
== to_len
) {
2785 if (from_len
== 1) {
2786 return replace_single_character_in_place(
2792 return replace_substring_in_place(
2793 self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
2797 /* Otherwise use the more generic algorithms */
2798 if (from_len
== 1) {
2799 return replace_single_character(self
, from_s
[0],
2800 to_s
, to_len
, maxcount
);
2802 /* len('from')>=2, len('to')>=1 */
2803 return replace_substring(self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
2807 PyDoc_STRVAR(replace__doc__
,
2808 "S.replace (old, new[, count]) -> string\n\
2810 Return a copy of string S with all occurrences of substring\n\
2811 old replaced by new. If the optional argument count is\n\
2812 given, only the first count occurrences are replaced.");
2815 string_replace(PyStringObject
*self
, PyObject
*args
)
2817 Py_ssize_t count
= -1;
2818 PyObject
*from
, *to
;
2819 const char *from_s
, *to_s
;
2820 Py_ssize_t from_len
, to_len
;
2822 if (!PyArg_ParseTuple(args
, "OO|n:replace", &from
, &to
, &count
))
2825 if (PyString_Check(from
)) {
2826 from_s
= PyString_AS_STRING(from
);
2827 from_len
= PyString_GET_SIZE(from
);
2829 #ifdef Py_USING_UNICODE
2830 if (PyUnicode_Check(from
))
2831 return PyUnicode_Replace((PyObject
*)self
,
2834 else if (PyObject_AsCharBuffer(from
, &from_s
, &from_len
))
2837 if (PyString_Check(to
)) {
2838 to_s
= PyString_AS_STRING(to
);
2839 to_len
= PyString_GET_SIZE(to
);
2841 #ifdef Py_USING_UNICODE
2842 else if (PyUnicode_Check(to
))
2843 return PyUnicode_Replace((PyObject
*)self
,
2846 else if (PyObject_AsCharBuffer(to
, &to_s
, &to_len
))
2849 return (PyObject
*)replace((PyStringObject
*) self
,
2851 to_s
, to_len
, count
);
2856 /* Matches the end (direction >= 0) or start (direction < 0) of self
2857 * against substr, using the start and end arguments. Returns
2858 * -1 on error, 0 if not found and 1 if found.
2861 _string_tailmatch(PyStringObject
*self
, PyObject
*substr
, Py_ssize_t start
,
2862 Py_ssize_t end
, int direction
)
2864 Py_ssize_t len
= PyString_GET_SIZE(self
);
2869 if (PyString_Check(substr
)) {
2870 sub
= PyString_AS_STRING(substr
);
2871 slen
= PyString_GET_SIZE(substr
);
2873 #ifdef Py_USING_UNICODE
2874 else if (PyUnicode_Check(substr
))
2875 return PyUnicode_Tailmatch((PyObject
*)self
,
2876 substr
, start
, end
, direction
);
2878 else if (PyObject_AsCharBuffer(substr
, &sub
, &slen
))
2880 str
= PyString_AS_STRING(self
);
2882 ADJUST_INDICES(start
, end
, len
);
2884 if (direction
< 0) {
2886 if (start
+slen
> len
)
2890 if (end
-start
< slen
|| start
> len
)
2893 if (end
-slen
> start
)
2896 if (end
-start
>= slen
)
2897 return ! memcmp(str
+start
, sub
, slen
);
2902 PyDoc_STRVAR(startswith__doc__
,
2903 "S.startswith(prefix[, start[, end]]) -> bool\n\
2905 Return True if S starts with the specified prefix, False otherwise.\n\
2906 With optional start, test S beginning at that position.\n\
2907 With optional end, stop comparing S at that position.\n\
2908 prefix can also be a tuple of strings to try.");
2911 string_startswith(PyStringObject
*self
, PyObject
*args
)
2913 Py_ssize_t start
= 0;
2914 Py_ssize_t end
= PY_SSIZE_T_MAX
;
2918 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
2919 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
2921 if (PyTuple_Check(subobj
)) {
2923 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
2924 result
= _string_tailmatch(self
,
2925 PyTuple_GET_ITEM(subobj
, i
),
2935 result
= _string_tailmatch(self
, subobj
, start
, end
, -1);
2939 return PyBool_FromLong(result
);
2943 PyDoc_STRVAR(endswith__doc__
,
2944 "S.endswith(suffix[, start[, end]]) -> bool\n\
2946 Return True if S ends with the specified suffix, False otherwise.\n\
2947 With optional start, test S beginning at that position.\n\
2948 With optional end, stop comparing S at that position.\n\
2949 suffix can also be a tuple of strings to try.");
2952 string_endswith(PyStringObject
*self
, PyObject
*args
)
2954 Py_ssize_t start
= 0;
2955 Py_ssize_t end
= PY_SSIZE_T_MAX
;
2959 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
2960 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
2962 if (PyTuple_Check(subobj
)) {
2964 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
2965 result
= _string_tailmatch(self
,
2966 PyTuple_GET_ITEM(subobj
, i
),
2976 result
= _string_tailmatch(self
, subobj
, start
, end
, +1);
2980 return PyBool_FromLong(result
);
2984 PyDoc_STRVAR(encode__doc__
,
2985 "S.encode([encoding[,errors]]) -> object\n\
2987 Encodes S using the codec registered for encoding. encoding defaults\n\
2988 to the default encoding. errors may be given to set a different error\n\
2989 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2990 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2991 'xmlcharrefreplace' as well as any other name registered with\n\
2992 codecs.register_error that is able to handle UnicodeEncodeErrors.");
2995 string_encode(PyStringObject
*self
, PyObject
*args
, PyObject
*kwargs
)
2997 static char *kwlist
[] = {"encoding", "errors", 0};
2998 char *encoding
= NULL
;
2999 char *errors
= NULL
;
3002 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "|ss:encode",
3003 kwlist
, &encoding
, &errors
))
3005 v
= PyString_AsEncodedObject((PyObject
*)self
, encoding
, errors
);
3008 if (!PyString_Check(v
) && !PyUnicode_Check(v
)) {
3009 PyErr_Format(PyExc_TypeError
,
3010 "encoder did not return a string/unicode object "
3012 Py_TYPE(v
)->tp_name
);
3023 PyDoc_STRVAR(decode__doc__
,
3024 "S.decode([encoding[,errors]]) -> object\n\
3026 Decodes S using the codec registered for encoding. encoding defaults\n\
3027 to the default encoding. errors may be given to set a different error\n\
3028 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3029 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3030 as well as any other name registered with codecs.register_error that is\n\
3031 able to handle UnicodeDecodeErrors.");
3034 string_decode(PyStringObject
*self
, PyObject
*args
, PyObject
*kwargs
)
3036 static char *kwlist
[] = {"encoding", "errors", 0};
3037 char *encoding
= NULL
;
3038 char *errors
= NULL
;
3041 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "|ss:decode",
3042 kwlist
, &encoding
, &errors
))
3044 v
= PyString_AsDecodedObject((PyObject
*)self
, encoding
, errors
);
3047 if (!PyString_Check(v
) && !PyUnicode_Check(v
)) {
3048 PyErr_Format(PyExc_TypeError
,
3049 "decoder did not return a string/unicode object "
3051 Py_TYPE(v
)->tp_name
);
3062 PyDoc_STRVAR(expandtabs__doc__
,
3063 "S.expandtabs([tabsize]) -> string\n\
3065 Return a copy of S where all tab characters are expanded using spaces.\n\
3066 If tabsize is not given, a tab size of 8 characters is assumed.");
3069 string_expandtabs(PyStringObject
*self
, PyObject
*args
)
3071 const char *e
, *p
, *qe
;
3073 Py_ssize_t i
, j
, incr
;
3077 if (!PyArg_ParseTuple(args
, "|i:expandtabs", &tabsize
))
3080 /* First pass: determine size of output string */
3081 i
= 0; /* chars up to and including most recent \n or \r */
3082 j
= 0; /* chars since most recent \n or \r (use in tab calculations) */
3083 e
= PyString_AS_STRING(self
) + PyString_GET_SIZE(self
); /* end of input */
3084 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
3087 incr
= tabsize
- (j
% tabsize
);
3088 if (j
> PY_SSIZE_T_MAX
- incr
)
3094 if (j
> PY_SSIZE_T_MAX
- 1)
3097 if (*p
== '\n' || *p
== '\r') {
3098 if (i
> PY_SSIZE_T_MAX
- j
)
3105 if (i
> PY_SSIZE_T_MAX
- j
)
3108 /* Second pass: create output string and fill it */
3109 u
= PyString_FromStringAndSize(NULL
, i
+ j
);
3113 j
= 0; /* same as in first pass */
3114 q
= PyString_AS_STRING(u
); /* next output char */
3115 qe
= PyString_AS_STRING(u
) + PyString_GET_SIZE(u
); /* end of output */
3117 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
3120 i
= tabsize
- (j
% tabsize
);
3134 if (*p
== '\n' || *p
== '\r')
3143 PyErr_SetString(PyExc_OverflowError
, "new string is too long");
3147 Py_LOCAL_INLINE(PyObject
*)
3148 pad(PyStringObject
*self
, Py_ssize_t left
, Py_ssize_t right
, char fill
)
3157 if (left
== 0 && right
== 0 && PyString_CheckExact(self
)) {
3159 return (PyObject
*)self
;
3162 u
= PyString_FromStringAndSize(NULL
,
3163 left
+ PyString_GET_SIZE(self
) + right
);
3166 memset(PyString_AS_STRING(u
), fill
, left
);
3167 Py_MEMCPY(PyString_AS_STRING(u
) + left
,
3168 PyString_AS_STRING(self
),
3169 PyString_GET_SIZE(self
));
3171 memset(PyString_AS_STRING(u
) + left
+ PyString_GET_SIZE(self
),
3178 PyDoc_STRVAR(ljust__doc__
,
3179 "S.ljust(width[, fillchar]) -> string\n"
3181 "Return S left-justified in a string of length width. Padding is\n"
3182 "done using the specified fill character (default is a space).");
3185 string_ljust(PyStringObject
*self
, PyObject
*args
)
3188 char fillchar
= ' ';
3190 if (!PyArg_ParseTuple(args
, "n|c:ljust", &width
, &fillchar
))
3193 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3195 return (PyObject
*) self
;
3198 return pad(self
, 0, width
- PyString_GET_SIZE(self
), fillchar
);
3202 PyDoc_STRVAR(rjust__doc__
,
3203 "S.rjust(width[, fillchar]) -> string\n"
3205 "Return S right-justified in a string of length width. Padding is\n"
3206 "done using the specified fill character (default is a space)");
3209 string_rjust(PyStringObject
*self
, PyObject
*args
)
3212 char fillchar
= ' ';
3214 if (!PyArg_ParseTuple(args
, "n|c:rjust", &width
, &fillchar
))
3217 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3219 return (PyObject
*) self
;
3222 return pad(self
, width
- PyString_GET_SIZE(self
), 0, fillchar
);
3226 PyDoc_STRVAR(center__doc__
,
3227 "S.center(width[, fillchar]) -> string\n"
3229 "Return S centered in a string of length width. Padding is\n"
3230 "done using the specified fill character (default is a space)");
3233 string_center(PyStringObject
*self
, PyObject
*args
)
3235 Py_ssize_t marg
, left
;
3237 char fillchar
= ' ';
3239 if (!PyArg_ParseTuple(args
, "n|c:center", &width
, &fillchar
))
3242 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3244 return (PyObject
*) self
;
3247 marg
= width
- PyString_GET_SIZE(self
);
3248 left
= marg
/ 2 + (marg
& width
& 1);
3250 return pad(self
, left
, marg
- left
, fillchar
);
3253 PyDoc_STRVAR(zfill__doc__
,
3254 "S.zfill(width) -> string\n"
3256 "Pad a numeric string S with zeros on the left, to fill a field\n"
3257 "of the specified width. The string S is never truncated.");
3260 string_zfill(PyStringObject
*self
, PyObject
*args
)
3267 if (!PyArg_ParseTuple(args
, "n:zfill", &width
))
3270 if (PyString_GET_SIZE(self
) >= width
) {
3271 if (PyString_CheckExact(self
)) {
3273 return (PyObject
*) self
;
3276 return PyString_FromStringAndSize(
3277 PyString_AS_STRING(self
),
3278 PyString_GET_SIZE(self
)
3282 fill
= width
- PyString_GET_SIZE(self
);
3284 s
= pad(self
, fill
, 0, '0');
3289 p
= PyString_AS_STRING(s
);
3290 if (p
[fill
] == '+' || p
[fill
] == '-') {
3291 /* move sign to beginning of string */
3296 return (PyObject
*) s
;
3299 PyDoc_STRVAR(isspace__doc__
,
3300 "S.isspace() -> bool\n\
3302 Return True if all characters in S are whitespace\n\
3303 and there is at least one character in S, False otherwise.");
3306 string_isspace(PyStringObject
*self
)
3308 register const unsigned char *p
3309 = (unsigned char *) PyString_AS_STRING(self
);
3310 register const unsigned char *e
;
3312 /* Shortcut for single character strings */
3313 if (PyString_GET_SIZE(self
) == 1 &&
3315 return PyBool_FromLong(1);
3317 /* Special case for empty strings */
3318 if (PyString_GET_SIZE(self
) == 0)
3319 return PyBool_FromLong(0);
3321 e
= p
+ PyString_GET_SIZE(self
);
3322 for (; p
< e
; p
++) {
3324 return PyBool_FromLong(0);
3326 return PyBool_FromLong(1);
3330 PyDoc_STRVAR(isalpha__doc__
,
3331 "S.isalpha() -> bool\n\
3333 Return True if all characters in S are alphabetic\n\
3334 and there is at least one character in S, False otherwise.");
3337 string_isalpha(PyStringObject
*self
)
3339 register const unsigned char *p
3340 = (unsigned char *) PyString_AS_STRING(self
);
3341 register const unsigned char *e
;
3343 /* Shortcut for single character strings */
3344 if (PyString_GET_SIZE(self
) == 1 &&
3346 return PyBool_FromLong(1);
3348 /* Special case for empty strings */
3349 if (PyString_GET_SIZE(self
) == 0)
3350 return PyBool_FromLong(0);
3352 e
= p
+ PyString_GET_SIZE(self
);
3353 for (; p
< e
; p
++) {
3355 return PyBool_FromLong(0);
3357 return PyBool_FromLong(1);
3361 PyDoc_STRVAR(isalnum__doc__
,
3362 "S.isalnum() -> bool\n\
3364 Return True if all characters in S are alphanumeric\n\
3365 and there is at least one character in S, False otherwise.");
3368 string_isalnum(PyStringObject
*self
)
3370 register const unsigned char *p
3371 = (unsigned char *) PyString_AS_STRING(self
);
3372 register const unsigned char *e
;
3374 /* Shortcut for single character strings */
3375 if (PyString_GET_SIZE(self
) == 1 &&
3377 return PyBool_FromLong(1);
3379 /* Special case for empty strings */
3380 if (PyString_GET_SIZE(self
) == 0)
3381 return PyBool_FromLong(0);
3383 e
= p
+ PyString_GET_SIZE(self
);
3384 for (; p
< e
; p
++) {
3386 return PyBool_FromLong(0);
3388 return PyBool_FromLong(1);
3392 PyDoc_STRVAR(isdigit__doc__
,
3393 "S.isdigit() -> bool\n\
3395 Return True if all characters in S are digits\n\
3396 and there is at least one character in S, False otherwise.");
3399 string_isdigit(PyStringObject
*self
)
3401 register const unsigned char *p
3402 = (unsigned char *) PyString_AS_STRING(self
);
3403 register const unsigned char *e
;
3405 /* Shortcut for single character strings */
3406 if (PyString_GET_SIZE(self
) == 1 &&
3408 return PyBool_FromLong(1);
3410 /* Special case for empty strings */
3411 if (PyString_GET_SIZE(self
) == 0)
3412 return PyBool_FromLong(0);
3414 e
= p
+ PyString_GET_SIZE(self
);
3415 for (; p
< e
; p
++) {
3417 return PyBool_FromLong(0);
3419 return PyBool_FromLong(1);
3423 PyDoc_STRVAR(islower__doc__
,
3424 "S.islower() -> bool\n\
3426 Return True if all cased characters in S are lowercase and there is\n\
3427 at least one cased character in S, False otherwise.");
3430 string_islower(PyStringObject
*self
)
3432 register const unsigned char *p
3433 = (unsigned char *) PyString_AS_STRING(self
);
3434 register const unsigned char *e
;
3437 /* Shortcut for single character strings */
3438 if (PyString_GET_SIZE(self
) == 1)
3439 return PyBool_FromLong(islower(*p
) != 0);
3441 /* Special case for empty strings */
3442 if (PyString_GET_SIZE(self
) == 0)
3443 return PyBool_FromLong(0);
3445 e
= p
+ PyString_GET_SIZE(self
);
3447 for (; p
< e
; p
++) {
3449 return PyBool_FromLong(0);
3450 else if (!cased
&& islower(*p
))
3453 return PyBool_FromLong(cased
);
3457 PyDoc_STRVAR(isupper__doc__
,
3458 "S.isupper() -> bool\n\
3460 Return True if all cased characters in S are uppercase and there is\n\
3461 at least one cased character in S, False otherwise.");
3464 string_isupper(PyStringObject
*self
)
3466 register const unsigned char *p
3467 = (unsigned char *) PyString_AS_STRING(self
);
3468 register const unsigned char *e
;
3471 /* Shortcut for single character strings */
3472 if (PyString_GET_SIZE(self
) == 1)
3473 return PyBool_FromLong(isupper(*p
) != 0);
3475 /* Special case for empty strings */
3476 if (PyString_GET_SIZE(self
) == 0)
3477 return PyBool_FromLong(0);
3479 e
= p
+ PyString_GET_SIZE(self
);
3481 for (; p
< e
; p
++) {
3483 return PyBool_FromLong(0);
3484 else if (!cased
&& isupper(*p
))
3487 return PyBool_FromLong(cased
);
3491 PyDoc_STRVAR(istitle__doc__
,
3492 "S.istitle() -> bool\n\
3494 Return True if S is a titlecased string and there is at least one\n\
3495 character in S, i.e. uppercase characters may only follow uncased\n\
3496 characters and lowercase characters only cased ones. Return False\n\
3500 string_istitle(PyStringObject
*self
, PyObject
*uncased
)
3502 register const unsigned char *p
3503 = (unsigned char *) PyString_AS_STRING(self
);
3504 register const unsigned char *e
;
3505 int cased
, previous_is_cased
;
3507 /* Shortcut for single character strings */
3508 if (PyString_GET_SIZE(self
) == 1)
3509 return PyBool_FromLong(isupper(*p
) != 0);
3511 /* Special case for empty strings */
3512 if (PyString_GET_SIZE(self
) == 0)
3513 return PyBool_FromLong(0);
3515 e
= p
+ PyString_GET_SIZE(self
);
3517 previous_is_cased
= 0;
3518 for (; p
< e
; p
++) {
3519 register const unsigned char ch
= *p
;
3522 if (previous_is_cased
)
3523 return PyBool_FromLong(0);
3524 previous_is_cased
= 1;
3527 else if (islower(ch
)) {
3528 if (!previous_is_cased
)
3529 return PyBool_FromLong(0);
3530 previous_is_cased
= 1;
3534 previous_is_cased
= 0;
3536 return PyBool_FromLong(cased
);
3540 PyDoc_STRVAR(splitlines__doc__
,
3541 "S.splitlines([keepends]) -> list of strings\n\
3543 Return a list of the lines in S, breaking at line boundaries.\n\
3544 Line breaks are not included in the resulting list unless keepends\n\
3545 is given and true.");
3548 string_splitlines(PyStringObject
*self
, PyObject
*args
)
3552 if (!PyArg_ParseTuple(args
, "|i:splitlines", &keepends
))
3555 return stringlib_splitlines(
3556 (PyObject
*) self
, PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
3561 PyDoc_STRVAR(sizeof__doc__
,
3562 "S.__sizeof__() -> size of S in memory, in bytes");
3565 string_sizeof(PyStringObject
*v
)
3568 res
= PyStringObject_SIZE
+ PyString_GET_SIZE(v
) * Py_TYPE(v
)->tp_itemsize
;
3569 return PyInt_FromSsize_t(res
);
3573 string_getnewargs(PyStringObject
*v
)
3575 return Py_BuildValue("(s#)", v
->ob_sval
, Py_SIZE(v
));
3579 #include "stringlib/string_format.h"
3581 PyDoc_STRVAR(format__doc__
,
3582 "S.format(*args, **kwargs) -> unicode\n\
3587 string__format__(PyObject
* self
, PyObject
* args
)
3589 PyObject
*format_spec
;
3590 PyObject
*result
= NULL
;
3591 PyObject
*tmp
= NULL
;
3593 /* If 2.x, convert format_spec to the same type as value */
3594 /* This is to allow things like u''.format('') */
3595 if (!PyArg_ParseTuple(args
, "O:__format__", &format_spec
))
3597 if (!(PyString_Check(format_spec
) || PyUnicode_Check(format_spec
))) {
3598 PyErr_Format(PyExc_TypeError
, "__format__ arg must be str "
3599 "or unicode, not %s", Py_TYPE(format_spec
)->tp_name
);
3602 tmp
= PyObject_Str(format_spec
);
3607 result
= _PyBytes_FormatAdvanced(self
,
3608 PyString_AS_STRING(format_spec
),
3609 PyString_GET_SIZE(format_spec
));
3615 PyDoc_STRVAR(p_format__doc__
,
3616 "S.__format__(format_spec) -> unicode\n\
3622 string_methods
[] = {
3623 /* Counterparts of the obsolete stropmodule functions; except
3624 string.maketrans(). */
3625 {"join", (PyCFunction
)string_join
, METH_O
, join__doc__
},
3626 {"split", (PyCFunction
)string_split
, METH_VARARGS
, split__doc__
},
3627 {"rsplit", (PyCFunction
)string_rsplit
, METH_VARARGS
, rsplit__doc__
},
3628 {"lower", (PyCFunction
)string_lower
, METH_NOARGS
, lower__doc__
},
3629 {"upper", (PyCFunction
)string_upper
, METH_NOARGS
, upper__doc__
},
3630 {"islower", (PyCFunction
)string_islower
, METH_NOARGS
, islower__doc__
},
3631 {"isupper", (PyCFunction
)string_isupper
, METH_NOARGS
, isupper__doc__
},
3632 {"isspace", (PyCFunction
)string_isspace
, METH_NOARGS
, isspace__doc__
},
3633 {"isdigit", (PyCFunction
)string_isdigit
, METH_NOARGS
, isdigit__doc__
},
3634 {"istitle", (PyCFunction
)string_istitle
, METH_NOARGS
, istitle__doc__
},
3635 {"isalpha", (PyCFunction
)string_isalpha
, METH_NOARGS
, isalpha__doc__
},
3636 {"isalnum", (PyCFunction
)string_isalnum
, METH_NOARGS
, isalnum__doc__
},
3637 {"capitalize", (PyCFunction
)string_capitalize
, METH_NOARGS
,
3639 {"count", (PyCFunction
)string_count
, METH_VARARGS
, count__doc__
},
3640 {"endswith", (PyCFunction
)string_endswith
, METH_VARARGS
,
3642 {"partition", (PyCFunction
)string_partition
, METH_O
, partition__doc__
},
3643 {"find", (PyCFunction
)string_find
, METH_VARARGS
, find__doc__
},
3644 {"index", (PyCFunction
)string_index
, METH_VARARGS
, index__doc__
},
3645 {"lstrip", (PyCFunction
)string_lstrip
, METH_VARARGS
, lstrip__doc__
},
3646 {"replace", (PyCFunction
)string_replace
, METH_VARARGS
, replace__doc__
},
3647 {"rfind", (PyCFunction
)string_rfind
, METH_VARARGS
, rfind__doc__
},
3648 {"rindex", (PyCFunction
)string_rindex
, METH_VARARGS
, rindex__doc__
},
3649 {"rstrip", (PyCFunction
)string_rstrip
, METH_VARARGS
, rstrip__doc__
},
3650 {"rpartition", (PyCFunction
)string_rpartition
, METH_O
,
3652 {"startswith", (PyCFunction
)string_startswith
, METH_VARARGS
,
3654 {"strip", (PyCFunction
)string_strip
, METH_VARARGS
, strip__doc__
},
3655 {"swapcase", (PyCFunction
)string_swapcase
, METH_NOARGS
,
3657 {"translate", (PyCFunction
)string_translate
, METH_VARARGS
,
3659 {"title", (PyCFunction
)string_title
, METH_NOARGS
, title__doc__
},
3660 {"ljust", (PyCFunction
)string_ljust
, METH_VARARGS
, ljust__doc__
},
3661 {"rjust", (PyCFunction
)string_rjust
, METH_VARARGS
, rjust__doc__
},
3662 {"center", (PyCFunction
)string_center
, METH_VARARGS
, center__doc__
},
3663 {"zfill", (PyCFunction
)string_zfill
, METH_VARARGS
, zfill__doc__
},
3664 {"format", (PyCFunction
) do_string_format
, METH_VARARGS
| METH_KEYWORDS
, format__doc__
},
3665 {"__format__", (PyCFunction
) string__format__
, METH_VARARGS
, p_format__doc__
},
3666 {"_formatter_field_name_split", (PyCFunction
) formatter_field_name_split
, METH_NOARGS
},
3667 {"_formatter_parser", (PyCFunction
) formatter_parser
, METH_NOARGS
},
3668 {"encode", (PyCFunction
)string_encode
, METH_VARARGS
| METH_KEYWORDS
, encode__doc__
},
3669 {"decode", (PyCFunction
)string_decode
, METH_VARARGS
| METH_KEYWORDS
, decode__doc__
},
3670 {"expandtabs", (PyCFunction
)string_expandtabs
, METH_VARARGS
,
3672 {"splitlines", (PyCFunction
)string_splitlines
, METH_VARARGS
,
3674 {"__sizeof__", (PyCFunction
)string_sizeof
, METH_NOARGS
,
3676 {"__getnewargs__", (PyCFunction
)string_getnewargs
, METH_NOARGS
},
3677 {NULL
, NULL
} /* sentinel */
3681 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
);
3684 string_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
3687 static char *kwlist
[] = {"object", 0};
3689 if (type
!= &PyString_Type
)
3690 return str_subtype_new(type
, args
, kwds
);
3691 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|O:str", kwlist
, &x
))
3694 return PyString_FromString("");
3695 return PyObject_Str(x
);
3699 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
3701 PyObject
*tmp
, *pnew
;
3704 assert(PyType_IsSubtype(type
, &PyString_Type
));
3705 tmp
= string_new(&PyString_Type
, args
, kwds
);
3708 assert(PyString_CheckExact(tmp
));
3709 n
= PyString_GET_SIZE(tmp
);
3710 pnew
= type
->tp_alloc(type
, n
);
3712 Py_MEMCPY(PyString_AS_STRING(pnew
), PyString_AS_STRING(tmp
), n
+1);
3713 ((PyStringObject
*)pnew
)->ob_shash
=
3714 ((PyStringObject
*)tmp
)->ob_shash
;
3715 ((PyStringObject
*)pnew
)->ob_sstate
= SSTATE_NOT_INTERNED
;
3722 basestring_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
3724 PyErr_SetString(PyExc_TypeError
,
3725 "The basestring type cannot be instantiated");
3730 string_mod(PyObject
*v
, PyObject
*w
)
3732 if (!PyString_Check(v
)) {
3733 Py_INCREF(Py_NotImplemented
);
3734 return Py_NotImplemented
;
3736 return PyString_Format(v
, w
);
3739 PyDoc_STRVAR(basestring_doc
,
3740 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3742 static PyNumberMethods string_as_number
= {
3747 string_mod
, /*nb_remainder*/
3751 PyTypeObject PyBaseString_Type
= {
3752 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
3762 0, /* tp_as_number */
3763 0, /* tp_as_sequence */
3764 0, /* tp_as_mapping */
3768 0, /* tp_getattro */
3769 0, /* tp_setattro */
3770 0, /* tp_as_buffer */
3771 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /* tp_flags */
3772 basestring_doc
, /* tp_doc */
3773 0, /* tp_traverse */
3775 0, /* tp_richcompare */
3776 0, /* tp_weaklistoffset */
3778 0, /* tp_iternext */
3782 &PyBaseObject_Type
, /* tp_base */
3784 0, /* tp_descr_get */
3785 0, /* tp_descr_set */
3786 0, /* tp_dictoffset */
3789 basestring_new
, /* tp_new */
3793 PyDoc_STRVAR(string_doc
,
3794 "str(object) -> string\n\
3796 Return a nice string representation of the object.\n\
3797 If the argument is a string, the return value is the same object.");
3799 PyTypeObject PyString_Type
= {
3800 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
3802 PyStringObject_SIZE
,
3804 string_dealloc
, /* tp_dealloc */
3805 (printfunc
)string_print
, /* tp_print */
3809 string_repr
, /* tp_repr */
3810 &string_as_number
, /* tp_as_number */
3811 &string_as_sequence
, /* tp_as_sequence */
3812 &string_as_mapping
, /* tp_as_mapping */
3813 (hashfunc
)string_hash
, /* tp_hash */
3815 string_str
, /* tp_str */
3816 PyObject_GenericGetAttr
, /* tp_getattro */
3817 0, /* tp_setattro */
3818 &string_as_buffer
, /* tp_as_buffer */
3819 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_CHECKTYPES
|
3820 Py_TPFLAGS_BASETYPE
| Py_TPFLAGS_STRING_SUBCLASS
|
3821 Py_TPFLAGS_HAVE_NEWBUFFER
, /* tp_flags */
3822 string_doc
, /* tp_doc */
3823 0, /* tp_traverse */
3825 (richcmpfunc
)string_richcompare
, /* tp_richcompare */
3826 0, /* tp_weaklistoffset */
3828 0, /* tp_iternext */
3829 string_methods
, /* tp_methods */
3832 &PyBaseString_Type
, /* tp_base */
3834 0, /* tp_descr_get */
3835 0, /* tp_descr_set */
3836 0, /* tp_dictoffset */
3839 string_new
, /* tp_new */
3840 PyObject_Del
, /* tp_free */
3844 PyString_Concat(register PyObject
**pv
, register PyObject
*w
)
3846 register PyObject
*v
;
3849 if (w
== NULL
|| !PyString_Check(*pv
)) {
3854 v
= string_concat((PyStringObject
*) *pv
, w
);
3860 PyString_ConcatAndDel(register PyObject
**pv
, register PyObject
*w
)
3862 PyString_Concat(pv
, w
);
3867 /* The following function breaks the notion that strings are immutable:
3868 it changes the size of a string. We get away with this only if there
3869 is only one module referencing the object. You can also think of it
3870 as creating a new string object and destroying the old one, only
3871 more efficiently. In any case, don't use this if the string may
3872 already be known to some other part of the code...
3873 Note that if there's not enough memory to resize the string, the original
3874 string object at *pv is deallocated, *pv is set to NULL, an "out of
3875 memory" exception is set, and -1 is returned. Else (on success) 0 is
3876 returned, and the value in *pv may or may not be the same as on input.
3877 As always, an extra byte is allocated for a trailing \0 byte (newsize
3878 does *not* include that), and a trailing \0 byte is stored.
3882 _PyString_Resize(PyObject
**pv
, Py_ssize_t newsize
)
3884 register PyObject
*v
;
3885 register PyStringObject
*sv
;
3887 if (!PyString_Check(v
) || Py_REFCNT(v
) != 1 || newsize
< 0 ||
3888 PyString_CHECK_INTERNED(v
)) {
3891 PyErr_BadInternalCall();
3894 /* XXX UNREF/NEWREF interface should be more symmetrical */
3896 _Py_ForgetReference(v
);
3898 PyObject_REALLOC((char *)v
, PyStringObject_SIZE
+ newsize
);
3904 _Py_NewReference(*pv
);
3905 sv
= (PyStringObject
*) *pv
;
3906 Py_SIZE(sv
) = newsize
;
3907 sv
->ob_sval
[newsize
] = '\0';
3908 sv
->ob_shash
= -1; /* invalidate cached hash value */
3912 /* Helpers for formatstring */
3914 Py_LOCAL_INLINE(PyObject
*)
3915 getnextarg(PyObject
*args
, Py_ssize_t arglen
, Py_ssize_t
*p_argidx
)
3917 Py_ssize_t argidx
= *p_argidx
;
3918 if (argidx
< arglen
) {
3923 return PyTuple_GetItem(args
, argidx
);
3925 PyErr_SetString(PyExc_TypeError
,
3926 "not enough arguments for format string");
3937 #define F_LJUST (1<<0)
3938 #define F_SIGN (1<<1)
3939 #define F_BLANK (1<<2)
3940 #define F_ALT (1<<3)
3941 #define F_ZERO (1<<4)
3943 /* Returns a new reference to a PyString object, or NULL on failure. */
3946 formatfloat(PyObject
*v
, int flags
, int prec
, int type
)
3952 x
= PyFloat_AsDouble(v
);
3953 if (x
== -1.0 && PyErr_Occurred()) {
3954 PyErr_Format(PyExc_TypeError
, "float argument required, "
3955 "not %.200s", Py_TYPE(v
)->tp_name
);
3962 p
= PyOS_double_to_string(x
, type
, prec
,
3963 (flags
& F_ALT
) ? Py_DTSF_ALT
: 0, NULL
);
3967 result
= PyString_FromStringAndSize(p
, strlen(p
));
3972 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3973 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3974 * Python's regular ints.
3975 * Return value: a new PyString*, or NULL if error.
3976 * . *pbuf is set to point into it,
3977 * *plen set to the # of chars following that.
3978 * Caller must decref it when done using pbuf.
3979 * The string starting at *pbuf is of the form
3980 * "-"? ("0x" | "0X")? digit+
3981 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3982 * set in flags. The case of hex digits will be correct,
3983 * There will be at least prec digits, zero-filled on the left if
3984 * necessary to get that many.
3985 * val object to be converted
3986 * flags bitmask of format flags; only F_ALT is looked at
3987 * prec minimum number of digits; 0-fill on left if needed
3988 * type a character in [duoxX]; u acts the same as d
3990 * CAUTION: o, x and X conversions on regular ints can never
3991 * produce a '-' sign, but can for Python's unbounded ints.
3994 _PyString_FormatLong(PyObject
*val
, int flags
, int prec
, int type
,
3995 char **pbuf
, int *plen
)
3997 PyObject
*result
= NULL
;
4000 int sign
; /* 1 if '-', else 0 */
4001 int len
; /* number of characters */
4003 int numdigits
; /* len == numnondigits + numdigits */
4004 int numnondigits
= 0;
4009 result
= Py_TYPE(val
)->tp_str(val
);
4012 result
= Py_TYPE(val
)->tp_as_number
->nb_oct(val
);
4017 result
= Py_TYPE(val
)->tp_as_number
->nb_hex(val
);
4020 assert(!"'type' not in [duoxX]");
4025 buf
= PyString_AsString(result
);
4031 /* To modify the string in-place, there can only be one reference. */
4032 if (Py_REFCNT(result
) != 1) {
4033 PyErr_BadInternalCall();
4036 llen
= PyString_Size(result
);
4037 if (llen
> INT_MAX
) {
4038 PyErr_SetString(PyExc_ValueError
, "string too large in _PyString_FormatLong");
4042 if (buf
[len
-1] == 'L') {
4046 sign
= buf
[0] == '-';
4047 numnondigits
+= sign
;
4048 numdigits
= len
- numnondigits
;
4049 assert(numdigits
> 0);
4051 /* Get rid of base marker unless F_ALT */
4052 if ((flags
& F_ALT
) == 0) {
4053 /* Need to skip 0x, 0X or 0. */
4057 assert(buf
[sign
] == '0');
4058 /* If 0 is only digit, leave it alone. */
4059 if (numdigits
> 1) {
4066 assert(buf
[sign
] == '0');
4067 assert(buf
[sign
+ 1] == 'x');
4078 assert(len
== numnondigits
+ numdigits
);
4079 assert(numdigits
> 0);
4082 /* Fill with leading zeroes to meet minimum width. */
4083 if (prec
> numdigits
) {
4084 PyObject
*r1
= PyString_FromStringAndSize(NULL
,
4085 numnondigits
+ prec
);
4091 b1
= PyString_AS_STRING(r1
);
4092 for (i
= 0; i
< numnondigits
; ++i
)
4094 for (i
= 0; i
< prec
- numdigits
; i
++)
4096 for (i
= 0; i
< numdigits
; i
++)
4101 buf
= PyString_AS_STRING(result
);
4102 len
= numnondigits
+ prec
;
4105 /* Fix up case for hex conversions. */
4107 /* Need to convert all lower case letters to upper case.
4108 and need to convert 0x to 0X (and -0x to -0X). */
4109 for (i
= 0; i
< len
; i
++)
4110 if (buf
[i
] >= 'a' && buf
[i
] <= 'x')
4118 Py_LOCAL_INLINE(int)
4119 formatint(char *buf
, size_t buflen
, int flags
,
4120 int prec
, int type
, PyObject
*v
)
4122 /* fmt = '%#.' + `prec` + 'l' + `type`
4123 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4125 char fmt
[64]; /* plenty big enough! */
4129 x
= PyInt_AsLong(v
);
4130 if (x
== -1 && PyErr_Occurred()) {
4131 PyErr_Format(PyExc_TypeError
, "int argument required, not %.200s",
4132 Py_TYPE(v
)->tp_name
);
4135 if (x
< 0 && type
== 'u') {
4138 if (x
< 0 && (type
== 'x' || type
== 'X' || type
== 'o'))
4145 if ((flags
& F_ALT
) &&
4146 (type
== 'x' || type
== 'X')) {
4147 /* When converting under %#x or %#X, there are a number
4148 * of issues that cause pain:
4149 * - when 0 is being converted, the C standard leaves off
4150 * the '0x' or '0X', which is inconsistent with other
4151 * %#x/%#X conversions and inconsistent with Python's
4153 * - there are platforms that violate the standard and
4154 * convert 0 with the '0x' or '0X'
4155 * (Metrowerks, Compaq Tru64)
4156 * - there are platforms that give '0x' when converting
4157 * under %#X, but convert 0 in accordance with the
4158 * standard (OS/2 EMX)
4160 * We can achieve the desired consistency by inserting our
4161 * own '0x' or '0X' prefix, and substituting %x/%X in place
4164 * Note that this is the same approach as used in
4165 * formatint() in unicodeobject.c
4167 PyOS_snprintf(fmt
, sizeof(fmt
), "%s0%c%%.%dl%c",
4168 sign
, type
, prec
, type
);
4171 PyOS_snprintf(fmt
, sizeof(fmt
), "%s%%%s.%dl%c",
4172 sign
, (flags
&F_ALT
) ? "#" : "",
4176 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4177 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4179 if (buflen
<= 14 || buflen
<= (size_t)3 + (size_t)prec
) {
4180 PyErr_SetString(PyExc_OverflowError
,
4181 "formatted integer is too long (precision too large?)");
4185 PyOS_snprintf(buf
, buflen
, fmt
, -x
);
4187 PyOS_snprintf(buf
, buflen
, fmt
, x
);
4188 return (int)strlen(buf
);
4191 Py_LOCAL_INLINE(int)
4192 formatchar(char *buf
, size_t buflen
, PyObject
*v
)
4194 /* presume that the buffer is at least 2 characters long */
4195 if (PyString_Check(v
)) {
4196 if (!PyArg_Parse(v
, "c;%c requires int or char", &buf
[0]))
4200 if (!PyArg_Parse(v
, "b;%c requires int or char", &buf
[0]))
4207 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4209 FORMATBUFLEN is the length of the buffer in which the ints &
4210 chars are formatted. XXX This is a magic number. Each formatting
4211 routine does bounds checking to ensure no overflow, but a better
4212 solution may be to malloc a buffer of appropriate size for each
4213 format. For now, the current solution is sufficient.
4215 #define FORMATBUFLEN (size_t)120
4218 PyString_Format(PyObject
*format
, PyObject
*args
)
4221 Py_ssize_t arglen
, argidx
;
4222 Py_ssize_t reslen
, rescnt
, fmtcnt
;
4224 PyObject
*result
, *orig_args
;
4225 #ifdef Py_USING_UNICODE
4228 PyObject
*dict
= NULL
;
4229 if (format
== NULL
|| !PyString_Check(format
) || args
== NULL
) {
4230 PyErr_BadInternalCall();
4234 fmt
= PyString_AS_STRING(format
);
4235 fmtcnt
= PyString_GET_SIZE(format
);
4236 reslen
= rescnt
= fmtcnt
+ 100;
4237 result
= PyString_FromStringAndSize((char *)NULL
, reslen
);
4240 res
= PyString_AsString(result
);
4241 if (PyTuple_Check(args
)) {
4242 arglen
= PyTuple_GET_SIZE(args
);
4249 if (Py_TYPE(args
)->tp_as_mapping
&& !PyTuple_Check(args
) &&
4250 !PyObject_TypeCheck(args
, &PyBaseString_Type
))
4252 while (--fmtcnt
>= 0) {
4255 rescnt
= fmtcnt
+ 100;
4257 if (_PyString_Resize(&result
, reslen
) < 0)
4259 res
= PyString_AS_STRING(result
)
4266 /* Got a format specifier */
4268 Py_ssize_t width
= -1;
4274 PyObject
*temp
= NULL
;
4278 char formatbuf
[FORMATBUFLEN
];
4279 /* For format{int,char}() */
4280 #ifdef Py_USING_UNICODE
4281 char *fmt_start
= fmt
;
4282 Py_ssize_t argidx_start
= argidx
;
4293 PyErr_SetString(PyExc_TypeError
,
4294 "format requires a mapping");
4300 /* Skip over balanced parentheses */
4301 while (pcount
> 0 && --fmtcnt
>= 0) {
4304 else if (*fmt
== '(')
4308 keylen
= fmt
- keystart
- 1;
4309 if (fmtcnt
< 0 || pcount
> 0) {
4310 PyErr_SetString(PyExc_ValueError
,
4311 "incomplete format key");
4314 key
= PyString_FromStringAndSize(keystart
,
4322 args
= PyObject_GetItem(dict
, key
);
4331 while (--fmtcnt
>= 0) {
4332 switch (c
= *fmt
++) {
4333 case '-': flags
|= F_LJUST
; continue;
4334 case '+': flags
|= F_SIGN
; continue;
4335 case ' ': flags
|= F_BLANK
; continue;
4336 case '#': flags
|= F_ALT
; continue;
4337 case '0': flags
|= F_ZERO
; continue;
4342 v
= getnextarg(args
, arglen
, &argidx
);
4345 if (!PyInt_Check(v
)) {
4346 PyErr_SetString(PyExc_TypeError
,
4350 width
= PyInt_AsLong(v
);
4358 else if (c
>= 0 && isdigit(c
)) {
4360 while (--fmtcnt
>= 0) {
4361 c
= Py_CHARMASK(*fmt
++);
4364 if ((width
*10) / 10 != width
) {
4370 width
= width
*10 + (c
- '0');
4378 v
= getnextarg(args
, arglen
, &argidx
);
4381 if (!PyInt_Check(v
)) {
4387 prec
= PyInt_AsLong(v
);
4393 else if (c
>= 0 && isdigit(c
)) {
4395 while (--fmtcnt
>= 0) {
4396 c
= Py_CHARMASK(*fmt
++);
4399 if ((prec
*10) / 10 != prec
) {
4405 prec
= prec
*10 + (c
- '0');
4410 if (c
== 'h' || c
== 'l' || c
== 'L') {
4416 PyErr_SetString(PyExc_ValueError
,
4417 "incomplete format");
4421 v
= getnextarg(args
, arglen
, &argidx
);
4433 #ifdef Py_USING_UNICODE
4434 if (PyUnicode_Check(v
)) {
4436 argidx
= argidx_start
;
4440 temp
= _PyObject_Str(v
);
4441 #ifdef Py_USING_UNICODE
4442 if (temp
!= NULL
&& PyUnicode_Check(temp
)) {
4445 argidx
= argidx_start
;
4452 temp
= PyObject_Repr(v
);
4455 if (!PyString_Check(temp
)) {
4456 PyErr_SetString(PyExc_TypeError
,
4457 "%s argument has non-string str()");
4461 pbuf
= PyString_AS_STRING(temp
);
4462 len
= PyString_GET_SIZE(temp
);
4463 if (prec
>= 0 && len
> prec
)
4475 if (PyNumber_Check(v
)) {
4476 PyObject
*iobj
=NULL
;
4478 if (PyInt_Check(v
) || (PyLong_Check(v
))) {
4483 iobj
= PyNumber_Int(v
);
4484 if (iobj
==NULL
) iobj
= PyNumber_Long(v
);
4487 if (PyInt_Check(iobj
)) {
4490 len
= formatint(pbuf
,
4492 flags
, prec
, c
, iobj
);
4498 else if (PyLong_Check(iobj
)) {
4502 temp
= _PyString_FormatLong(iobj
, flags
,
4503 prec
, c
, &pbuf
, &ilen
);
4516 PyErr_Format(PyExc_TypeError
,
4517 "%%%c format: a number is required, "
4518 "not %.200s", c
, Py_TYPE(v
)->tp_name
);
4530 temp
= formatfloat(v
, flags
, prec
, c
);
4533 pbuf
= PyString_AS_STRING(temp
);
4534 len
= PyString_GET_SIZE(temp
);
4540 #ifdef Py_USING_UNICODE
4541 if (PyUnicode_Check(v
)) {
4543 argidx
= argidx_start
;
4548 len
= formatchar(pbuf
, sizeof(formatbuf
), v
);
4553 PyErr_Format(PyExc_ValueError
,
4554 "unsupported format character '%c' (0x%x) "
4557 (Py_ssize_t
)(fmt
- 1 -
4558 PyString_AsString(format
)));
4562 if (*pbuf
== '-' || *pbuf
== '+') {
4566 else if (flags
& F_SIGN
)
4568 else if (flags
& F_BLANK
)
4575 if (rescnt
- (sign
!= 0) < width
) {
4577 rescnt
= width
+ fmtcnt
+ 100;
4582 return PyErr_NoMemory();
4584 if (_PyString_Resize(&result
, reslen
) < 0) {
4588 res
= PyString_AS_STRING(result
)
4598 if ((flags
& F_ALT
) && (c
== 'x' || c
== 'X')) {
4599 assert(pbuf
[0] == '0');
4600 assert(pbuf
[1] == c
);
4611 if (width
> len
&& !(flags
& F_LJUST
)) {
4615 } while (--width
> len
);
4620 if ((flags
& F_ALT
) &&
4621 (c
== 'x' || c
== 'X')) {
4622 assert(pbuf
[0] == '0');
4623 assert(pbuf
[1] == c
);
4628 Py_MEMCPY(res
, pbuf
, len
);
4631 while (--width
>= len
) {
4635 if (dict
&& (argidx
< arglen
) && c
!= '%') {
4636 PyErr_SetString(PyExc_TypeError
,
4637 "not all arguments converted during string formatting");
4644 if (argidx
< arglen
&& !dict
) {
4645 PyErr_SetString(PyExc_TypeError
,
4646 "not all arguments converted during string formatting");
4652 _PyString_Resize(&result
, reslen
- rescnt
);
4655 #ifdef Py_USING_UNICODE
4661 /* Fiddle args right (remove the first argidx arguments) */
4662 if (PyTuple_Check(orig_args
) && argidx
> 0) {
4664 Py_ssize_t n
= PyTuple_GET_SIZE(orig_args
) - argidx
;
4669 PyObject
*w
= PyTuple_GET_ITEM(orig_args
, n
+ argidx
);
4671 PyTuple_SET_ITEM(v
, n
, w
);
4675 Py_INCREF(orig_args
);
4679 /* Take what we have of the result and let the Unicode formatting
4680 function format the rest of the input. */
4681 rescnt
= res
- PyString_AS_STRING(result
);
4682 if (_PyString_Resize(&result
, rescnt
))
4684 fmtcnt
= PyString_GET_SIZE(format
) - \
4685 (fmt
- PyString_AS_STRING(format
));
4686 format
= PyUnicode_Decode(fmt
, fmtcnt
, NULL
, NULL
);
4689 v
= PyUnicode_Format(format
, args
);
4693 /* Paste what we have (result) to what the Unicode formatting
4694 function returned (v) and return the result (or error) */
4695 w
= PyUnicode_Concat(result
, v
);
4700 #endif /* Py_USING_UNICODE */
4711 PyString_InternInPlace(PyObject
**p
)
4713 register PyStringObject
*s
= (PyStringObject
*)(*p
);
4715 if (s
== NULL
|| !PyString_Check(s
))
4716 Py_FatalError("PyString_InternInPlace: strings only please!");
4717 /* If it's a string subclass, we don't really know what putting
4718 it in the interned dict might do. */
4719 if (!PyString_CheckExact(s
))
4721 if (PyString_CHECK_INTERNED(s
))
4723 if (interned
== NULL
) {
4724 interned
= PyDict_New();
4725 if (interned
== NULL
) {
4726 PyErr_Clear(); /* Don't leave an exception */
4730 t
= PyDict_GetItem(interned
, (PyObject
*)s
);
4738 if (PyDict_SetItem(interned
, (PyObject
*)s
, (PyObject
*)s
) < 0) {
4742 /* The two references in interned are not counted by refcnt.
4743 The string deallocator will take care of this */
4745 PyString_CHECK_INTERNED(s
) = SSTATE_INTERNED_MORTAL
;
4749 PyString_InternImmortal(PyObject
**p
)
4751 PyString_InternInPlace(p
);
4752 if (PyString_CHECK_INTERNED(*p
) != SSTATE_INTERNED_IMMORTAL
) {
4753 PyString_CHECK_INTERNED(*p
) = SSTATE_INTERNED_IMMORTAL
;
4760 PyString_InternFromString(const char *cp
)
4762 PyObject
*s
= PyString_FromString(cp
);
4765 PyString_InternInPlace(&s
);
4773 for (i
= 0; i
< UCHAR_MAX
+ 1; i
++) {
4774 Py_XDECREF(characters
[i
]);
4775 characters
[i
] = NULL
;
4777 Py_XDECREF(nullstring
);
4781 void _Py_ReleaseInternedStrings(void)
4786 Py_ssize_t immortal_size
= 0, mortal_size
= 0;
4788 if (interned
== NULL
|| !PyDict_Check(interned
))
4790 keys
= PyDict_Keys(interned
);
4791 if (keys
== NULL
|| !PyList_Check(keys
)) {
4796 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4797 detector, interned strings are not forcibly deallocated; rather, we
4798 give them their stolen references back, and then clear and DECREF
4799 the interned dict. */
4801 n
= PyList_GET_SIZE(keys
);
4802 fprintf(stderr
, "releasing %" PY_FORMAT_SIZE_T
"d interned strings\n",
4804 for (i
= 0; i
< n
; i
++) {
4805 s
= (PyStringObject
*) PyList_GET_ITEM(keys
, i
);
4806 switch (s
->ob_sstate
) {
4807 case SSTATE_NOT_INTERNED
:
4808 /* XXX Shouldn't happen */
4810 case SSTATE_INTERNED_IMMORTAL
:
4812 immortal_size
+= Py_SIZE(s
);
4814 case SSTATE_INTERNED_MORTAL
:
4816 mortal_size
+= Py_SIZE(s
);
4819 Py_FatalError("Inconsistent interned string state.");
4821 s
->ob_sstate
= SSTATE_NOT_INTERNED
;
4823 fprintf(stderr
, "total size of all interned strings: "
4824 "%" PY_FORMAT_SIZE_T
"d/%" PY_FORMAT_SIZE_T
"d "
4825 "mortal/immortal\n", mortal_size
, immortal_size
);
4827 PyDict_Clear(interned
);
4828 Py_DECREF(interned
);