1 /* String (str/bytes) object implementation */
3 #define PY_SSIZE_T_CLEAN
9 int null_strings
, one_strings
;
12 static PyStringObject
*characters
[UCHAR_MAX
+ 1];
13 static PyStringObject
*nullstring
;
15 /* This dictionary holds all interned strings. Note that references to
16 strings in this dictionary are *not* counted in the string's ob_refcnt.
17 When the interned string reaches a refcnt of 0 the string deallocation
18 function will delete the reference from this dictionary.
20 Another way to look at this is that to say that the actual reference
21 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23 static PyObject
*interned
;
26 For both PyString_FromString() and PyString_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
28 null terminating character.
30 For PyString_FromString(), the parameter `str' points to a null-terminated
31 string containing exactly `size' bytes.
33 For PyString_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyString_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
44 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
52 PyString_FromStringAndSize(const char *str
, Py_ssize_t size
)
54 register PyStringObject
*op
;
56 PyErr_SetString(PyExc_SystemError
,
57 "Negative size passed to PyString_FromStringAndSize");
60 if (size
== 0 && (op
= nullstring
) != NULL
) {
65 return (PyObject
*)op
;
67 if (size
== 1 && str
!= NULL
&&
68 (op
= characters
[*str
& UCHAR_MAX
]) != NULL
)
74 return (PyObject
*)op
;
77 /* Inline PyObject_NewVar */
78 op
= (PyStringObject
*)PyObject_MALLOC(sizeof(PyStringObject
) + size
);
80 return PyErr_NoMemory();
81 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
83 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
85 Py_MEMCPY(op
->ob_sval
, str
, size
);
86 op
->ob_sval
[size
] = '\0';
87 /* share short strings */
89 PyObject
*t
= (PyObject
*)op
;
90 PyString_InternInPlace(&t
);
91 op
= (PyStringObject
*)t
;
94 } else if (size
== 1 && str
!= NULL
) {
95 PyObject
*t
= (PyObject
*)op
;
96 PyString_InternInPlace(&t
);
97 op
= (PyStringObject
*)t
;
98 characters
[*str
& UCHAR_MAX
] = op
;
101 return (PyObject
*) op
;
105 PyString_FromString(const char *str
)
107 register size_t size
;
108 register PyStringObject
*op
;
112 if (size
> PY_SSIZE_T_MAX
) {
113 PyErr_SetString(PyExc_OverflowError
,
114 "string is too long for a Python string");
117 if (size
== 0 && (op
= nullstring
) != NULL
) {
122 return (PyObject
*)op
;
124 if (size
== 1 && (op
= characters
[*str
& UCHAR_MAX
]) != NULL
) {
129 return (PyObject
*)op
;
132 /* Inline PyObject_NewVar */
133 op
= (PyStringObject
*)PyObject_MALLOC(sizeof(PyStringObject
) + size
);
135 return PyErr_NoMemory();
136 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
138 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
139 Py_MEMCPY(op
->ob_sval
, str
, size
+1);
140 /* share short strings */
142 PyObject
*t
= (PyObject
*)op
;
143 PyString_InternInPlace(&t
);
144 op
= (PyStringObject
*)t
;
147 } else if (size
== 1) {
148 PyObject
*t
= (PyObject
*)op
;
149 PyString_InternInPlace(&t
);
150 op
= (PyStringObject
*)t
;
151 characters
[*str
& UCHAR_MAX
] = op
;
154 return (PyObject
*) op
;
158 PyString_FromFormatV(const char *format
, va_list vargs
)
166 #ifdef VA_LIST_IS_ARRAY
167 Py_MEMCPY(count
, vargs
, sizeof(va_list));
170 __va_copy(count
, vargs
);
175 /* step 1: figure out how large a buffer we need */
176 for (f
= format
; *f
; f
++) {
179 while (*++f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
182 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
183 * they don't affect the amount of space we reserve.
185 if ((*f
== 'l' || *f
== 'z') &&
186 (f
[1] == 'd' || f
[1] == 'u'))
191 (void)va_arg(count
, int);
192 /* fall through... */
196 case 'd': case 'u': case 'i': case 'x':
197 (void) va_arg(count
, int);
198 /* 20 bytes is enough to hold a 64-bit
199 integer. Decimal takes the most space.
200 This isn't enough for octal. */
204 s
= va_arg(count
, char*);
208 (void) va_arg(count
, int);
209 /* maximum 64-bit pointer representation:
211 * so 19 characters is enough.
212 * XXX I count 18 -- what's the extra for?
217 /* if we stumble upon an unknown
218 formatting code, copy the rest of
219 the format string to the output
220 string. (we cannot just skip the
221 code, since there's no way to know
222 what's in the argument list) */
230 /* step 2: fill the buffer */
231 /* Since we've analyzed how much space we need for the worst case,
232 use sprintf directly instead of the slower PyOS_snprintf. */
233 string
= PyString_FromStringAndSize(NULL
, n
);
237 s
= PyString_AsString(string
);
239 for (f
= format
; *f
; f
++) {
245 /* parse the width.precision part (we're only
246 interested in the precision value, if any) */
248 while (isdigit(Py_CHARMASK(*f
)))
249 n
= (n
*10) + *f
++ - '0';
253 while (isdigit(Py_CHARMASK(*f
)))
254 n
= (n
*10) + *f
++ - '0';
256 while (*f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
258 /* handle the long flag, but only for %ld and %lu.
259 others can be added when necessary. */
260 if (*f
== 'l' && (f
[1] == 'd' || f
[1] == 'u')) {
264 /* handle the size_t flag. */
265 if (*f
== 'z' && (f
[1] == 'd' || f
[1] == 'u')) {
272 *s
++ = va_arg(vargs
, int);
276 sprintf(s
, "%ld", va_arg(vargs
, long));
278 sprintf(s
, "%" PY_FORMAT_SIZE_T
"d",
279 va_arg(vargs
, Py_ssize_t
));
281 sprintf(s
, "%d", va_arg(vargs
, int));
287 va_arg(vargs
, unsigned long));
289 sprintf(s
, "%" PY_FORMAT_SIZE_T
"u",
290 va_arg(vargs
, size_t));
293 va_arg(vargs
, unsigned int));
297 sprintf(s
, "%i", va_arg(vargs
, int));
301 sprintf(s
, "%x", va_arg(vargs
, int));
305 p
= va_arg(vargs
, char*);
313 sprintf(s
, "%p", va_arg(vargs
, void*));
314 /* %p is ill-defined: ensure leading 0x. */
317 else if (s
[1] != 'x') {
318 memmove(s
+2, s
, strlen(s
)+1);
337 _PyString_Resize(&string
, s
- PyString_AS_STRING(string
));
342 PyString_FromFormat(const char *format
, ...)
347 #ifdef HAVE_STDARG_PROTOTYPES
348 va_start(vargs
, format
);
352 ret
= PyString_FromFormatV(format
, vargs
);
358 PyObject
*PyString_Decode(const char *s
,
360 const char *encoding
,
365 str
= PyString_FromStringAndSize(s
, size
);
368 v
= PyString_AsDecodedString(str
, encoding
, errors
);
373 PyObject
*PyString_AsDecodedObject(PyObject
*str
,
374 const char *encoding
,
379 if (!PyString_Check(str
)) {
384 if (encoding
== NULL
) {
385 #ifdef Py_USING_UNICODE
386 encoding
= PyUnicode_GetDefaultEncoding();
388 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
393 /* Decode via the codec registry */
394 v
= PyCodec_Decode(str
, encoding
, errors
);
404 PyObject
*PyString_AsDecodedString(PyObject
*str
,
405 const char *encoding
,
410 v
= PyString_AsDecodedObject(str
, encoding
, errors
);
414 #ifdef Py_USING_UNICODE
415 /* Convert Unicode to a string using the default encoding */
416 if (PyUnicode_Check(v
)) {
418 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
424 if (!PyString_Check(v
)) {
425 PyErr_Format(PyExc_TypeError
,
426 "decoder did not return a string object (type=%.400s)",
427 Py_TYPE(v
)->tp_name
);
438 PyObject
*PyString_Encode(const char *s
,
440 const char *encoding
,
445 str
= PyString_FromStringAndSize(s
, size
);
448 v
= PyString_AsEncodedString(str
, encoding
, errors
);
453 PyObject
*PyString_AsEncodedObject(PyObject
*str
,
454 const char *encoding
,
459 if (!PyString_Check(str
)) {
464 if (encoding
== NULL
) {
465 #ifdef Py_USING_UNICODE
466 encoding
= PyUnicode_GetDefaultEncoding();
468 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
473 /* Encode via the codec registry */
474 v
= PyCodec_Encode(str
, encoding
, errors
);
484 PyObject
*PyString_AsEncodedString(PyObject
*str
,
485 const char *encoding
,
490 v
= PyString_AsEncodedObject(str
, encoding
, errors
);
494 #ifdef Py_USING_UNICODE
495 /* Convert Unicode to a string using the default encoding */
496 if (PyUnicode_Check(v
)) {
498 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
504 if (!PyString_Check(v
)) {
505 PyErr_Format(PyExc_TypeError
,
506 "encoder did not return a string object (type=%.400s)",
507 Py_TYPE(v
)->tp_name
);
519 string_dealloc(PyObject
*op
)
521 switch (PyString_CHECK_INTERNED(op
)) {
522 case SSTATE_NOT_INTERNED
:
525 case SSTATE_INTERNED_MORTAL
:
526 /* revive dead object temporarily for DelItem */
528 if (PyDict_DelItem(interned
, op
) != 0)
530 "deletion of interned string failed");
533 case SSTATE_INTERNED_IMMORTAL
:
534 Py_FatalError("Immortal interned string died.");
537 Py_FatalError("Inconsistent interned string state.");
539 Py_TYPE(op
)->tp_free(op
);
542 /* Unescape a backslash-escaped string. If unicode is non-zero,
543 the string is a u-literal. If recode_encoding is non-zero,
544 the string is UTF-8 encoded and should be re-encoded in the
545 specified encoding. */
547 PyObject
*PyString_DecodeEscape(const char *s
,
551 const char *recode_encoding
)
557 Py_ssize_t newlen
= recode_encoding
? 4*len
:len
;
558 v
= PyString_FromStringAndSize((char *)NULL
, newlen
);
561 p
= buf
= PyString_AsString(v
);
566 #ifdef Py_USING_UNICODE
567 if (recode_encoding
&& (*s
& 0x80)) {
573 /* Decode non-ASCII bytes as UTF-8. */
574 while (t
< end
&& (*t
& 0x80)) t
++;
575 u
= PyUnicode_DecodeUTF8(s
, t
- s
, errors
);
578 /* Recode them in target encoding. */
579 w
= PyUnicode_AsEncodedString(
580 u
, recode_encoding
, errors
);
584 /* Append bytes to output buffer. */
585 assert(PyString_Check(w
));
586 r
= PyString_AS_STRING(w
);
587 rn
= PyString_GET_SIZE(w
);
602 PyErr_SetString(PyExc_ValueError
,
603 "Trailing \\ in string");
607 /* XXX This assumes ASCII! */
609 case '\\': *p
++ = '\\'; break;
610 case '\'': *p
++ = '\''; break;
611 case '\"': *p
++ = '\"'; break;
612 case 'b': *p
++ = '\b'; break;
613 case 'f': *p
++ = '\014'; break; /* FF */
614 case 't': *p
++ = '\t'; break;
615 case 'n': *p
++ = '\n'; break;
616 case 'r': *p
++ = '\r'; break;
617 case 'v': *p
++ = '\013'; break; /* VT */
618 case 'a': *p
++ = '\007'; break; /* BEL, not classic C */
619 case '0': case '1': case '2': case '3':
620 case '4': case '5': case '6': case '7':
622 if (s
< end
&& '0' <= *s
&& *s
<= '7') {
623 c
= (c
<<3) + *s
++ - '0';
624 if (s
< end
&& '0' <= *s
&& *s
<= '7')
625 c
= (c
<<3) + *s
++ - '0';
631 isxdigit(Py_CHARMASK(s
[0])) &&
632 isxdigit(Py_CHARMASK(s
[1])))
655 if (!errors
|| strcmp(errors
, "strict") == 0) {
656 PyErr_SetString(PyExc_ValueError
,
657 "invalid \\x escape");
660 if (strcmp(errors
, "replace") == 0) {
662 } else if (strcmp(errors
, "ignore") == 0)
665 PyErr_Format(PyExc_ValueError
,
667 "unknown error handling code: %.400s",
671 #ifndef Py_USING_UNICODE
676 PyErr_SetString(PyExc_ValueError
,
677 "Unicode escapes not legal "
678 "when Unicode disabled");
685 goto non_esc
; /* an arbitry number of unescaped
686 UTF-8 bytes may follow. */
690 _PyString_Resize(&v
, p
- buf
);
697 /* -------------------------------------------------------------------- */
701 string_getsize(register PyObject
*op
)
705 if (PyString_AsStringAndSize(op
, &s
, &len
))
710 static /*const*/ char *
711 string_getbuffer(register PyObject
*op
)
715 if (PyString_AsStringAndSize(op
, &s
, &len
))
721 PyString_Size(register PyObject
*op
)
723 if (!PyString_Check(op
))
724 return string_getsize(op
);
729 PyString_AsString(register PyObject
*op
)
731 if (!PyString_Check(op
))
732 return string_getbuffer(op
);
733 return ((PyStringObject
*)op
) -> ob_sval
;
737 PyString_AsStringAndSize(register PyObject
*obj
,
739 register Py_ssize_t
*len
)
742 PyErr_BadInternalCall();
746 if (!PyString_Check(obj
)) {
747 #ifdef Py_USING_UNICODE
748 if (PyUnicode_Check(obj
)) {
749 obj
= _PyUnicode_AsDefaultEncodedString(obj
, NULL
);
756 PyErr_Format(PyExc_TypeError
,
757 "expected string or Unicode object, "
758 "%.200s found", Py_TYPE(obj
)->tp_name
);
763 *s
= PyString_AS_STRING(obj
);
765 *len
= PyString_GET_SIZE(obj
);
766 else if (strlen(*s
) != (size_t)PyString_GET_SIZE(obj
)) {
767 PyErr_SetString(PyExc_TypeError
,
768 "expected string without null bytes");
774 /* -------------------------------------------------------------------- */
777 #include "stringlib/stringdefs.h"
778 #include "stringlib/fastsearch.h"
780 #include "stringlib/count.h"
781 #include "stringlib/find.h"
782 #include "stringlib/partition.h"
784 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
785 #include "stringlib/localeutil.h"
790 string_print(PyStringObject
*op
, FILE *fp
, int flags
)
792 Py_ssize_t i
, str_len
;
796 /* XXX Ought to check for interrupts when writing long strings */
797 if (! PyString_CheckExact(op
)) {
799 /* A str subclass may have its own __str__ method. */
800 op
= (PyStringObject
*) PyObject_Str((PyObject
*)op
);
803 ret
= string_print(op
, fp
, flags
);
807 if (flags
& Py_PRINT_RAW
) {
808 char *data
= op
->ob_sval
;
809 Py_ssize_t size
= Py_SIZE(op
);
810 Py_BEGIN_ALLOW_THREADS
811 while (size
> INT_MAX
) {
812 /* Very long strings cannot be written atomically.
813 * But don't write exactly INT_MAX bytes at a time
814 * to avoid memory aligment issues.
816 const int chunk_size
= INT_MAX
& ~0x3FFF;
817 fwrite(data
, 1, chunk_size
, fp
);
822 if (size
) fwrite(data
, (int)size
, 1, fp
);
824 fwrite(data
, 1, (int)size
, fp
);
830 /* figure out which quote to use; single is preferred */
832 if (memchr(op
->ob_sval
, '\'', Py_SIZE(op
)) &&
833 !memchr(op
->ob_sval
, '"', Py_SIZE(op
)))
836 str_len
= Py_SIZE(op
);
837 Py_BEGIN_ALLOW_THREADS
839 for (i
= 0; i
< str_len
; i
++) {
840 /* Since strings are immutable and the caller should have a
841 reference, accessing the interal buffer should not be an issue
842 with the GIL released. */
844 if (c
== quote
|| c
== '\\')
845 fprintf(fp
, "\\%c", c
);
852 else if (c
< ' ' || c
>= 0x7f)
853 fprintf(fp
, "\\x%02x", c
& 0xff);
863 PyString_Repr(PyObject
*obj
, int smartquotes
)
865 register PyStringObject
* op
= (PyStringObject
*) obj
;
866 size_t newsize
= 2 + 4 * Py_SIZE(op
);
868 if (newsize
> PY_SSIZE_T_MAX
|| newsize
/ 4 != Py_SIZE(op
)) {
869 PyErr_SetString(PyExc_OverflowError
,
870 "string is too large to make repr");
873 v
= PyString_FromStringAndSize((char *)NULL
, newsize
);
878 register Py_ssize_t i
;
883 /* figure out which quote to use; single is preferred */
886 memchr(op
->ob_sval
, '\'', Py_SIZE(op
)) &&
887 !memchr(op
->ob_sval
, '"', Py_SIZE(op
)))
890 p
= PyString_AS_STRING(v
);
892 for (i
= 0; i
< Py_SIZE(op
); i
++) {
893 /* There's at least enough room for a hex escape
894 and a closing quote. */
895 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 5);
897 if (c
== quote
|| c
== '\\')
898 *p
++ = '\\', *p
++ = c
;
900 *p
++ = '\\', *p
++ = 't';
902 *p
++ = '\\', *p
++ = 'n';
904 *p
++ = '\\', *p
++ = 'r';
905 else if (c
< ' ' || c
>= 0x7f) {
906 /* For performance, we don't want to call
907 PyOS_snprintf here (extra layers of
909 sprintf(p
, "\\x%02x", c
& 0xff);
915 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 1);
919 &v
, (p
- PyString_AS_STRING(v
)));
925 string_repr(PyObject
*op
)
927 return PyString_Repr(op
, 1);
931 string_str(PyObject
*s
)
933 assert(PyString_Check(s
));
934 if (PyString_CheckExact(s
)) {
939 /* Subtype -- return genuine string with the same value. */
940 PyStringObject
*t
= (PyStringObject
*) s
;
941 return PyString_FromStringAndSize(t
->ob_sval
, Py_SIZE(t
));
946 string_length(PyStringObject
*a
)
952 string_concat(register PyStringObject
*a
, register PyObject
*bb
)
954 register Py_ssize_t size
;
955 register PyStringObject
*op
;
956 if (!PyString_Check(bb
)) {
957 #ifdef Py_USING_UNICODE
958 if (PyUnicode_Check(bb
))
959 return PyUnicode_Concat((PyObject
*)a
, bb
);
961 if (PyByteArray_Check(bb
))
962 return PyByteArray_Concat((PyObject
*)a
, bb
);
963 PyErr_Format(PyExc_TypeError
,
964 "cannot concatenate 'str' and '%.200s' objects",
965 Py_TYPE(bb
)->tp_name
);
968 #define b ((PyStringObject *)bb)
969 /* Optimize cases with empty left or right operand */
970 if ((Py_SIZE(a
) == 0 || Py_SIZE(b
) == 0) &&
971 PyString_CheckExact(a
) && PyString_CheckExact(b
)) {
972 if (Py_SIZE(a
) == 0) {
977 return (PyObject
*)a
;
979 size
= Py_SIZE(a
) + Py_SIZE(b
);
981 PyErr_SetString(PyExc_OverflowError
,
982 "strings are too large to concat");
986 /* Inline PyObject_NewVar */
987 op
= (PyStringObject
*)PyObject_MALLOC(sizeof(PyStringObject
) + size
);
989 return PyErr_NoMemory();
990 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
992 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
993 Py_MEMCPY(op
->ob_sval
, a
->ob_sval
, Py_SIZE(a
));
994 Py_MEMCPY(op
->ob_sval
+ Py_SIZE(a
), b
->ob_sval
, Py_SIZE(b
));
995 op
->ob_sval
[size
] = '\0';
996 return (PyObject
*) op
;
1001 string_repeat(register PyStringObject
*a
, register Py_ssize_t n
)
1003 register Py_ssize_t i
;
1004 register Py_ssize_t j
;
1005 register Py_ssize_t size
;
1006 register PyStringObject
*op
;
1010 /* watch out for overflows: the size can overflow int,
1011 * and the # of bytes needed can overflow size_t
1013 size
= Py_SIZE(a
) * n
;
1014 if (n
&& size
/ n
!= Py_SIZE(a
)) {
1015 PyErr_SetString(PyExc_OverflowError
,
1016 "repeated string is too long");
1019 if (size
== Py_SIZE(a
) && PyString_CheckExact(a
)) {
1021 return (PyObject
*)a
;
1023 nbytes
= (size_t)size
;
1024 if (nbytes
+ sizeof(PyStringObject
) <= nbytes
) {
1025 PyErr_SetString(PyExc_OverflowError
,
1026 "repeated string is too long");
1029 op
= (PyStringObject
*)
1030 PyObject_MALLOC(sizeof(PyStringObject
) + nbytes
);
1032 return PyErr_NoMemory();
1033 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
1035 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
1036 op
->ob_sval
[size
] = '\0';
1037 if (Py_SIZE(a
) == 1 && n
> 0) {
1038 memset(op
->ob_sval
, a
->ob_sval
[0] , n
);
1039 return (PyObject
*) op
;
1043 Py_MEMCPY(op
->ob_sval
, a
->ob_sval
, Py_SIZE(a
));
1047 j
= (i
<= size
-i
) ? i
: size
-i
;
1048 Py_MEMCPY(op
->ob_sval
+i
, op
->ob_sval
, j
);
1051 return (PyObject
*) op
;
1054 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1057 string_slice(register PyStringObject
*a
, register Py_ssize_t i
,
1058 register Py_ssize_t j
)
1059 /* j -- may be negative! */
1064 j
= 0; /* Avoid signed/unsigned bug in next line */
1067 if (i
== 0 && j
== Py_SIZE(a
) && PyString_CheckExact(a
)) {
1068 /* It's the same as a */
1070 return (PyObject
*)a
;
1074 return PyString_FromStringAndSize(a
->ob_sval
+ i
, j
-i
);
1078 string_contains(PyObject
*str_obj
, PyObject
*sub_obj
)
1080 if (!PyString_CheckExact(sub_obj
)) {
1081 #ifdef Py_USING_UNICODE
1082 if (PyUnicode_Check(sub_obj
))
1083 return PyUnicode_Contains(str_obj
, sub_obj
);
1085 if (!PyString_Check(sub_obj
)) {
1086 PyErr_Format(PyExc_TypeError
,
1087 "'in <string>' requires string as left operand, "
1088 "not %.200s", Py_TYPE(sub_obj
)->tp_name
);
1093 return stringlib_contains_obj(str_obj
, sub_obj
);
1097 string_item(PyStringObject
*a
, register Py_ssize_t i
)
1101 if (i
< 0 || i
>= Py_SIZE(a
)) {
1102 PyErr_SetString(PyExc_IndexError
, "string index out of range");
1105 pchar
= a
->ob_sval
[i
];
1106 v
= (PyObject
*)characters
[pchar
& UCHAR_MAX
];
1108 v
= PyString_FromStringAndSize(&pchar
, 1);
1119 string_richcompare(PyStringObject
*a
, PyStringObject
*b
, int op
)
1122 Py_ssize_t len_a
, len_b
;
1126 /* Make sure both arguments are strings. */
1127 if (!(PyString_Check(a
) && PyString_Check(b
))) {
1128 result
= Py_NotImplemented
;
1133 case Py_EQ
:case Py_LE
:case Py_GE
:
1136 case Py_NE
:case Py_LT
:case Py_GT
:
1142 /* Supporting Py_NE here as well does not save
1143 much time, since Py_NE is rarely used. */
1144 if (Py_SIZE(a
) == Py_SIZE(b
)
1145 && (a
->ob_sval
[0] == b
->ob_sval
[0]
1146 && memcmp(a
->ob_sval
, b
->ob_sval
, Py_SIZE(a
)) == 0)) {
1153 len_a
= Py_SIZE(a
); len_b
= Py_SIZE(b
);
1154 min_len
= (len_a
< len_b
) ? len_a
: len_b
;
1156 c
= Py_CHARMASK(*a
->ob_sval
) - Py_CHARMASK(*b
->ob_sval
);
1158 c
= memcmp(a
->ob_sval
, b
->ob_sval
, min_len
);
1162 c
= (len_a
< len_b
) ? -1 : (len_a
> len_b
) ? 1 : 0;
1164 case Py_LT
: c
= c
< 0; break;
1165 case Py_LE
: c
= c
<= 0; break;
1166 case Py_EQ
: assert(0); break; /* unreachable */
1167 case Py_NE
: c
= c
!= 0; break;
1168 case Py_GT
: c
= c
> 0; break;
1169 case Py_GE
: c
= c
>= 0; break;
1171 result
= Py_NotImplemented
;
1174 result
= c
? Py_True
: Py_False
;
1181 _PyString_Eq(PyObject
*o1
, PyObject
*o2
)
1183 PyStringObject
*a
= (PyStringObject
*) o1
;
1184 PyStringObject
*b
= (PyStringObject
*) o2
;
1185 return Py_SIZE(a
) == Py_SIZE(b
)
1186 && *a
->ob_sval
== *b
->ob_sval
1187 && memcmp(a
->ob_sval
, b
->ob_sval
, Py_SIZE(a
)) == 0;
1191 string_hash(PyStringObject
*a
)
1193 register Py_ssize_t len
;
1194 register unsigned char *p
;
1197 if (a
->ob_shash
!= -1)
1200 p
= (unsigned char *) a
->ob_sval
;
1203 x
= (1000003*x
) ^ *p
++;
1212 string_subscript(PyStringObject
* self
, PyObject
* item
)
1214 if (PyIndex_Check(item
)) {
1215 Py_ssize_t i
= PyNumber_AsSsize_t(item
, PyExc_IndexError
);
1216 if (i
== -1 && PyErr_Occurred())
1219 i
+= PyString_GET_SIZE(self
);
1220 return string_item(self
, i
);
1222 else if (PySlice_Check(item
)) {
1223 Py_ssize_t start
, stop
, step
, slicelength
, cur
, i
;
1228 if (PySlice_GetIndicesEx((PySliceObject
*)item
,
1229 PyString_GET_SIZE(self
),
1230 &start
, &stop
, &step
, &slicelength
) < 0) {
1234 if (slicelength
<= 0) {
1235 return PyString_FromStringAndSize("", 0);
1237 else if (start
== 0 && step
== 1 &&
1238 slicelength
== PyString_GET_SIZE(self
) &&
1239 PyString_CheckExact(self
)) {
1241 return (PyObject
*)self
;
1243 else if (step
== 1) {
1244 return PyString_FromStringAndSize(
1245 PyString_AS_STRING(self
) + start
,
1249 source_buf
= PyString_AsString((PyObject
*)self
);
1250 result_buf
= (char *)PyMem_Malloc(slicelength
);
1251 if (result_buf
== NULL
)
1252 return PyErr_NoMemory();
1254 for (cur
= start
, i
= 0; i
< slicelength
;
1256 result_buf
[i
] = source_buf
[cur
];
1259 result
= PyString_FromStringAndSize(result_buf
,
1261 PyMem_Free(result_buf
);
1266 PyErr_Format(PyExc_TypeError
,
1267 "string indices must be integers, not %.200s",
1268 Py_TYPE(item
)->tp_name
);
1274 string_buffer_getreadbuf(PyStringObject
*self
, Py_ssize_t index
, const void **ptr
)
1277 PyErr_SetString(PyExc_SystemError
,
1278 "accessing non-existent string segment");
1281 *ptr
= (void *)self
->ob_sval
;
1282 return Py_SIZE(self
);
1286 string_buffer_getwritebuf(PyStringObject
*self
, Py_ssize_t index
, const void **ptr
)
1288 PyErr_SetString(PyExc_TypeError
,
1289 "Cannot use string as modifiable buffer");
1294 string_buffer_getsegcount(PyStringObject
*self
, Py_ssize_t
*lenp
)
1297 *lenp
= Py_SIZE(self
);
1302 string_buffer_getcharbuf(PyStringObject
*self
, Py_ssize_t index
, const char **ptr
)
1305 PyErr_SetString(PyExc_SystemError
,
1306 "accessing non-existent string segment");
1309 *ptr
= self
->ob_sval
;
1310 return Py_SIZE(self
);
1314 string_buffer_getbuffer(PyStringObject
*self
, Py_buffer
*view
, int flags
)
1316 return PyBuffer_FillInfo(view
, (void *)self
->ob_sval
, Py_SIZE(self
),
1320 static PySequenceMethods string_as_sequence
= {
1321 (lenfunc
)string_length
, /*sq_length*/
1322 (binaryfunc
)string_concat
, /*sq_concat*/
1323 (ssizeargfunc
)string_repeat
, /*sq_repeat*/
1324 (ssizeargfunc
)string_item
, /*sq_item*/
1325 (ssizessizeargfunc
)string_slice
, /*sq_slice*/
1328 (objobjproc
)string_contains
/*sq_contains*/
1331 static PyMappingMethods string_as_mapping
= {
1332 (lenfunc
)string_length
,
1333 (binaryfunc
)string_subscript
,
1337 static PyBufferProcs string_as_buffer
= {
1338 (readbufferproc
)string_buffer_getreadbuf
,
1339 (writebufferproc
)string_buffer_getwritebuf
,
1340 (segcountproc
)string_buffer_getsegcount
,
1341 (charbufferproc
)string_buffer_getcharbuf
,
1342 (getbufferproc
)string_buffer_getbuffer
,
1349 #define RIGHTSTRIP 1
1352 /* Arrays indexed by above */
1353 static const char *stripformat
[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1355 #define STRIPNAME(i) (stripformat[i]+3)
1358 /* Don't call if length < 2 */
1359 #define Py_STRING_MATCH(target, offset, pattern, length) \
1360 (target[offset] == pattern[0] && \
1361 target[offset+length-1] == pattern[length-1] && \
1362 !memcmp(target+offset+1, pattern+1, length-2) )
1365 /* Overallocate the initial list to reduce the number of reallocs for small
1366 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1367 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1368 text (roughly 11 words per line) and field delimited data (usually 1-10
1369 fields). For large strings the split algorithms are bandwidth limited
1370 so increasing the preallocation likely will not improve things.*/
1372 #define MAX_PREALLOC 12
1374 /* 5 splits gives 6 elements */
1375 #define PREALLOC_SIZE(maxsplit) \
1376 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1378 #define SPLIT_APPEND(data, left, right) \
1379 str = PyString_FromStringAndSize((data) + (left), \
1380 (right) - (left)); \
1383 if (PyList_Append(list, str)) { \
1390 #define SPLIT_ADD(data, left, right) { \
1391 str = PyString_FromStringAndSize((data) + (left), \
1392 (right) - (left)); \
1395 if (count < MAX_PREALLOC) { \
1396 PyList_SET_ITEM(list, count, str); \
1398 if (PyList_Append(list, str)) { \
1407 /* Always force the list to the expected size. */
1408 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1410 #define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1411 #define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1412 #define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1413 #define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1415 Py_LOCAL_INLINE(PyObject
*)
1416 split_whitespace(PyStringObject
*self
, Py_ssize_t len
, Py_ssize_t maxsplit
)
1418 const char *s
= PyString_AS_STRING(self
);
1419 Py_ssize_t i
, j
, count
=0;
1421 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1428 while (maxsplit
-- > 0) {
1429 SKIP_SPACE(s
, i
, len
);
1432 SKIP_NONSPACE(s
, i
, len
);
1433 if (j
== 0 && i
== len
&& PyString_CheckExact(self
)) {
1434 /* No whitespace in self, so just use it as list[0] */
1436 PyList_SET_ITEM(list
, 0, (PyObject
*)self
);
1444 /* Only occurs when maxsplit was reached */
1445 /* Skip any remaining whitespace and copy to end of string */
1446 SKIP_SPACE(s
, i
, len
);
1448 SPLIT_ADD(s
, i
, len
);
1450 FIX_PREALLOC_SIZE(list
);
1457 Py_LOCAL_INLINE(PyObject
*)
1458 split_char(PyStringObject
*self
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
1460 const char *s
= PyString_AS_STRING(self
);
1461 register Py_ssize_t i
, j
, count
=0;
1463 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
1469 while ((j
< len
) && (maxcount
-- > 0)) {
1471 /* I found that using memchr makes no difference */
1479 if (i
== 0 && count
== 0 && PyString_CheckExact(self
)) {
1480 /* ch not in self, so just use self as list[0] */
1482 PyList_SET_ITEM(list
, 0, (PyObject
*)self
);
1485 else if (i
<= len
) {
1486 SPLIT_ADD(s
, i
, len
);
1488 FIX_PREALLOC_SIZE(list
);
1496 PyDoc_STRVAR(split__doc__
,
1497 "S.split([sep [,maxsplit]]) -> list of strings\n\
1499 Return a list of the words in the string S, using sep as the\n\
1500 delimiter string. If maxsplit is given, at most maxsplit\n\
1501 splits are done. If sep is not specified or is None, any\n\
1502 whitespace string is a separator and empty strings are removed\n\
1506 string_split(PyStringObject
*self
, PyObject
*args
)
1508 Py_ssize_t len
= PyString_GET_SIZE(self
), n
, i
, j
;
1509 Py_ssize_t maxsplit
= -1, count
=0;
1510 const char *s
= PyString_AS_STRING(self
), *sub
;
1511 PyObject
*list
, *str
, *subobj
= Py_None
;
1516 if (!PyArg_ParseTuple(args
, "|On:split", &subobj
, &maxsplit
))
1519 maxsplit
= PY_SSIZE_T_MAX
;
1520 if (subobj
== Py_None
)
1521 return split_whitespace(self
, len
, maxsplit
);
1522 if (PyString_Check(subobj
)) {
1523 sub
= PyString_AS_STRING(subobj
);
1524 n
= PyString_GET_SIZE(subobj
);
1526 #ifdef Py_USING_UNICODE
1527 else if (PyUnicode_Check(subobj
))
1528 return PyUnicode_Split((PyObject
*)self
, subobj
, maxsplit
);
1530 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1534 PyErr_SetString(PyExc_ValueError
, "empty separator");
1538 return split_char(self
, len
, sub
[0], maxsplit
);
1540 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1546 while (maxsplit
-- > 0) {
1547 pos
= fastsearch(s
+i
, len
-i
, sub
, n
, FAST_SEARCH
);
1556 while ((j
+n
<= len
) && (maxsplit
-- > 0)) {
1557 for (; j
+n
<= len
; j
++) {
1558 if (Py_STRING_MATCH(s
, j
, sub
, n
)) {
1566 SPLIT_ADD(s
, i
, len
);
1567 FIX_PREALLOC_SIZE(list
);
1575 PyDoc_STRVAR(partition__doc__
,
1576 "S.partition(sep) -> (head, sep, tail)\n\
1578 Searches for the separator sep in S, and returns the part before it,\n\
1579 the separator itself, and the part after it. If the separator is not\n\
1580 found, returns S and two empty strings.");
1583 string_partition(PyStringObject
*self
, PyObject
*sep_obj
)
1588 if (PyString_Check(sep_obj
)) {
1589 sep
= PyString_AS_STRING(sep_obj
);
1590 sep_len
= PyString_GET_SIZE(sep_obj
);
1592 #ifdef Py_USING_UNICODE
1593 else if (PyUnicode_Check(sep_obj
))
1594 return PyUnicode_Partition((PyObject
*) self
, sep_obj
);
1596 else if (PyObject_AsCharBuffer(sep_obj
, &sep
, &sep_len
))
1599 return stringlib_partition(
1601 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1602 sep_obj
, sep
, sep_len
1606 PyDoc_STRVAR(rpartition__doc__
,
1607 "S.rpartition(sep) -> (tail, sep, head)\n\
1609 Searches for the separator sep in S, starting at the end of S, and returns\n\
1610 the part before it, the separator itself, and the part after it. If the\n\
1611 separator is not found, returns two empty strings and S.");
1614 string_rpartition(PyStringObject
*self
, PyObject
*sep_obj
)
1619 if (PyString_Check(sep_obj
)) {
1620 sep
= PyString_AS_STRING(sep_obj
);
1621 sep_len
= PyString_GET_SIZE(sep_obj
);
1623 #ifdef Py_USING_UNICODE
1624 else if (PyUnicode_Check(sep_obj
))
1625 return PyUnicode_Partition((PyObject
*) self
, sep_obj
);
1627 else if (PyObject_AsCharBuffer(sep_obj
, &sep
, &sep_len
))
1630 return stringlib_rpartition(
1632 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1633 sep_obj
, sep
, sep_len
1637 Py_LOCAL_INLINE(PyObject
*)
1638 rsplit_whitespace(PyStringObject
*self
, Py_ssize_t len
, Py_ssize_t maxsplit
)
1640 const char *s
= PyString_AS_STRING(self
);
1641 Py_ssize_t i
, j
, count
=0;
1643 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1650 while (maxsplit
-- > 0) {
1654 RSKIP_NONSPACE(s
, i
);
1655 if (j
== len
-1 && i
< 0 && PyString_CheckExact(self
)) {
1656 /* No whitespace in self, so just use it as list[0] */
1658 PyList_SET_ITEM(list
, 0, (PyObject
*)self
);
1662 SPLIT_ADD(s
, i
+ 1, j
+ 1);
1665 /* Only occurs when maxsplit was reached */
1666 /* Skip any remaining whitespace and copy to beginning of string */
1669 SPLIT_ADD(s
, 0, i
+ 1);
1672 FIX_PREALLOC_SIZE(list
);
1673 if (PyList_Reverse(list
) < 0)
1681 Py_LOCAL_INLINE(PyObject
*)
1682 rsplit_char(PyStringObject
*self
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
1684 const char *s
= PyString_AS_STRING(self
);
1685 register Py_ssize_t i
, j
, count
=0;
1687 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
1693 while ((i
>= 0) && (maxcount
-- > 0)) {
1694 for (; i
>= 0; i
--) {
1696 SPLIT_ADD(s
, i
+ 1, j
+ 1);
1702 if (i
< 0 && count
== 0 && PyString_CheckExact(self
)) {
1703 /* ch not in self, so just use self as list[0] */
1705 PyList_SET_ITEM(list
, 0, (PyObject
*)self
);
1709 SPLIT_ADD(s
, 0, j
+ 1);
1711 FIX_PREALLOC_SIZE(list
);
1712 if (PyList_Reverse(list
) < 0)
1721 PyDoc_STRVAR(rsplit__doc__
,
1722 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1724 Return a list of the words in the string S, using sep as the\n\
1725 delimiter string, starting at the end of the string and working\n\
1726 to the front. If maxsplit is given, at most maxsplit splits are\n\
1727 done. If sep is not specified or is None, any whitespace string\n\
1731 string_rsplit(PyStringObject
*self
, PyObject
*args
)
1733 Py_ssize_t len
= PyString_GET_SIZE(self
), n
, i
, j
;
1734 Py_ssize_t maxsplit
= -1, count
=0;
1735 const char *s
, *sub
;
1736 PyObject
*list
, *str
, *subobj
= Py_None
;
1738 if (!PyArg_ParseTuple(args
, "|On:rsplit", &subobj
, &maxsplit
))
1741 maxsplit
= PY_SSIZE_T_MAX
;
1742 if (subobj
== Py_None
)
1743 return rsplit_whitespace(self
, len
, maxsplit
);
1744 if (PyString_Check(subobj
)) {
1745 sub
= PyString_AS_STRING(subobj
);
1746 n
= PyString_GET_SIZE(subobj
);
1748 #ifdef Py_USING_UNICODE
1749 else if (PyUnicode_Check(subobj
))
1750 return PyUnicode_RSplit((PyObject
*)self
, subobj
, maxsplit
);
1752 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1756 PyErr_SetString(PyExc_ValueError
, "empty separator");
1760 return rsplit_char(self
, len
, sub
[0], maxsplit
);
1762 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1769 s
= PyString_AS_STRING(self
);
1770 while ( (i
>= 0) && (maxsplit
-- > 0) ) {
1772 if (Py_STRING_MATCH(s
, i
, sub
, n
)) {
1773 SPLIT_ADD(s
, i
+ n
, j
);
1781 FIX_PREALLOC_SIZE(list
);
1782 if (PyList_Reverse(list
) < 0)
1792 PyDoc_STRVAR(join__doc__
,
1793 "S.join(sequence) -> string\n\
1795 Return a string which is the concatenation of the strings in the\n\
1796 sequence. The separator between elements is S.");
1799 string_join(PyStringObject
*self
, PyObject
*orig
)
1801 char *sep
= PyString_AS_STRING(self
);
1802 const Py_ssize_t seplen
= PyString_GET_SIZE(self
);
1803 PyObject
*res
= NULL
;
1805 Py_ssize_t seqlen
= 0;
1808 PyObject
*seq
, *item
;
1810 seq
= PySequence_Fast(orig
, "");
1815 seqlen
= PySequence_Size(seq
);
1818 return PyString_FromString("");
1821 item
= PySequence_Fast_GET_ITEM(seq
, 0);
1822 if (PyString_CheckExact(item
) || PyUnicode_CheckExact(item
)) {
1829 /* There are at least two things to join, or else we have a subclass
1830 * of the builtin types in the sequence.
1831 * Do a pre-pass to figure out the total amount of space we'll
1832 * need (sz), see whether any argument is absurd, and defer to
1833 * the Unicode join if appropriate.
1835 for (i
= 0; i
< seqlen
; i
++) {
1836 const size_t old_sz
= sz
;
1837 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1838 if (!PyString_Check(item
)){
1839 #ifdef Py_USING_UNICODE
1840 if (PyUnicode_Check(item
)) {
1841 /* Defer to Unicode join.
1842 * CAUTION: There's no gurantee that the
1843 * original sequence can be iterated over
1844 * again, so we must pass seq here.
1847 result
= PyUnicode_Join((PyObject
*)self
, seq
);
1852 PyErr_Format(PyExc_TypeError
,
1853 "sequence item %zd: expected string,"
1855 i
, Py_TYPE(item
)->tp_name
);
1859 sz
+= PyString_GET_SIZE(item
);
1862 if (sz
< old_sz
|| sz
> PY_SSIZE_T_MAX
) {
1863 PyErr_SetString(PyExc_OverflowError
,
1864 "join() result is too long for a Python string");
1870 /* Allocate result space. */
1871 res
= PyString_FromStringAndSize((char*)NULL
, sz
);
1877 /* Catenate everything. */
1878 p
= PyString_AS_STRING(res
);
1879 for (i
= 0; i
< seqlen
; ++i
) {
1881 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1882 n
= PyString_GET_SIZE(item
);
1883 Py_MEMCPY(p
, PyString_AS_STRING(item
), n
);
1885 if (i
< seqlen
- 1) {
1886 Py_MEMCPY(p
, sep
, seplen
);
1896 _PyString_Join(PyObject
*sep
, PyObject
*x
)
1898 assert(sep
!= NULL
&& PyString_Check(sep
));
1900 return string_join((PyStringObject
*)sep
, x
);
1903 Py_LOCAL_INLINE(void)
1904 string_adjust_indices(Py_ssize_t
*start
, Py_ssize_t
*end
, Py_ssize_t len
)
1918 Py_LOCAL_INLINE(Py_ssize_t
)
1919 string_find_internal(PyStringObject
*self
, PyObject
*args
, int dir
)
1924 Py_ssize_t start
=0, end
=PY_SSIZE_T_MAX
;
1925 PyObject
*obj_start
=Py_None
, *obj_end
=Py_None
;
1927 if (!PyArg_ParseTuple(args
, "O|OO:find/rfind/index/rindex", &subobj
,
1928 &obj_start
, &obj_end
))
1930 /* To support None in "start" and "end" arguments, meaning
1931 the same as if they were not passed.
1933 if (obj_start
!= Py_None
)
1934 if (!_PyEval_SliceIndex(obj_start
, &start
))
1936 if (obj_end
!= Py_None
)
1937 if (!_PyEval_SliceIndex(obj_end
, &end
))
1940 if (PyString_Check(subobj
)) {
1941 sub
= PyString_AS_STRING(subobj
);
1942 sub_len
= PyString_GET_SIZE(subobj
);
1944 #ifdef Py_USING_UNICODE
1945 else if (PyUnicode_Check(subobj
))
1946 return PyUnicode_Find(
1947 (PyObject
*)self
, subobj
, start
, end
, dir
);
1949 else if (PyObject_AsCharBuffer(subobj
, &sub
, &sub_len
))
1950 /* XXX - the "expected a character buffer object" is pretty
1951 confusing for a non-expert. remap to something else ? */
1955 return stringlib_find_slice(
1956 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1957 sub
, sub_len
, start
, end
);
1959 return stringlib_rfind_slice(
1960 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1961 sub
, sub_len
, start
, end
);
1965 PyDoc_STRVAR(find__doc__
,
1966 "S.find(sub [,start [,end]]) -> int\n\
1968 Return the lowest index in S where substring sub is found,\n\
1969 such that sub is contained within s[start:end]. Optional\n\
1970 arguments start and end are interpreted as in slice notation.\n\
1972 Return -1 on failure.");
1975 string_find(PyStringObject
*self
, PyObject
*args
)
1977 Py_ssize_t result
= string_find_internal(self
, args
, +1);
1980 return PyInt_FromSsize_t(result
);
1984 PyDoc_STRVAR(index__doc__
,
1985 "S.index(sub [,start [,end]]) -> int\n\
1987 Like S.find() but raise ValueError when the substring is not found.");
1990 string_index(PyStringObject
*self
, PyObject
*args
)
1992 Py_ssize_t result
= string_find_internal(self
, args
, +1);
1996 PyErr_SetString(PyExc_ValueError
,
1997 "substring not found");
2000 return PyInt_FromSsize_t(result
);
2004 PyDoc_STRVAR(rfind__doc__
,
2005 "S.rfind(sub [,start [,end]]) -> int\n\
2007 Return the highest index in S where substring sub is found,\n\
2008 such that sub is contained within s[start:end]. Optional\n\
2009 arguments start and end are interpreted as in slice notation.\n\
2011 Return -1 on failure.");
2014 string_rfind(PyStringObject
*self
, PyObject
*args
)
2016 Py_ssize_t result
= string_find_internal(self
, args
, -1);
2019 return PyInt_FromSsize_t(result
);
2023 PyDoc_STRVAR(rindex__doc__
,
2024 "S.rindex(sub [,start [,end]]) -> int\n\
2026 Like S.rfind() but raise ValueError when the substring is not found.");
2029 string_rindex(PyStringObject
*self
, PyObject
*args
)
2031 Py_ssize_t result
= string_find_internal(self
, args
, -1);
2035 PyErr_SetString(PyExc_ValueError
,
2036 "substring not found");
2039 return PyInt_FromSsize_t(result
);
2043 Py_LOCAL_INLINE(PyObject
*)
2044 do_xstrip(PyStringObject
*self
, int striptype
, PyObject
*sepobj
)
2046 char *s
= PyString_AS_STRING(self
);
2047 Py_ssize_t len
= PyString_GET_SIZE(self
);
2048 char *sep
= PyString_AS_STRING(sepobj
);
2049 Py_ssize_t seplen
= PyString_GET_SIZE(sepobj
);
2053 if (striptype
!= RIGHTSTRIP
) {
2054 while (i
< len
&& memchr(sep
, Py_CHARMASK(s
[i
]), seplen
)) {
2060 if (striptype
!= LEFTSTRIP
) {
2063 } while (j
>= i
&& memchr(sep
, Py_CHARMASK(s
[j
]), seplen
));
2067 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
2069 return (PyObject
*)self
;
2072 return PyString_FromStringAndSize(s
+i
, j
-i
);
2076 Py_LOCAL_INLINE(PyObject
*)
2077 do_strip(PyStringObject
*self
, int striptype
)
2079 char *s
= PyString_AS_STRING(self
);
2080 Py_ssize_t len
= PyString_GET_SIZE(self
), i
, j
;
2083 if (striptype
!= RIGHTSTRIP
) {
2084 while (i
< len
&& isspace(Py_CHARMASK(s
[i
]))) {
2090 if (striptype
!= LEFTSTRIP
) {
2093 } while (j
>= i
&& isspace(Py_CHARMASK(s
[j
])));
2097 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
2099 return (PyObject
*)self
;
2102 return PyString_FromStringAndSize(s
+i
, j
-i
);
2106 Py_LOCAL_INLINE(PyObject
*)
2107 do_argstrip(PyStringObject
*self
, int striptype
, PyObject
*args
)
2109 PyObject
*sep
= NULL
;
2111 if (!PyArg_ParseTuple(args
, (char *)stripformat
[striptype
], &sep
))
2114 if (sep
!= NULL
&& sep
!= Py_None
) {
2115 if (PyString_Check(sep
))
2116 return do_xstrip(self
, striptype
, sep
);
2117 #ifdef Py_USING_UNICODE
2118 else if (PyUnicode_Check(sep
)) {
2119 PyObject
*uniself
= PyUnicode_FromObject((PyObject
*)self
);
2123 res
= _PyUnicode_XStrip((PyUnicodeObject
*)uniself
,
2129 PyErr_Format(PyExc_TypeError
,
2130 #ifdef Py_USING_UNICODE
2131 "%s arg must be None, str or unicode",
2133 "%s arg must be None or str",
2135 STRIPNAME(striptype
));
2139 return do_strip(self
, striptype
);
2143 PyDoc_STRVAR(strip__doc__
,
2144 "S.strip([chars]) -> string or unicode\n\
2146 Return a copy of the string S with leading and trailing\n\
2147 whitespace removed.\n\
2148 If chars is given and not None, remove characters in chars instead.\n\
2149 If chars is unicode, S will be converted to unicode before stripping");
2152 string_strip(PyStringObject
*self
, PyObject
*args
)
2154 if (PyTuple_GET_SIZE(args
) == 0)
2155 return do_strip(self
, BOTHSTRIP
); /* Common case */
2157 return do_argstrip(self
, BOTHSTRIP
, args
);
2161 PyDoc_STRVAR(lstrip__doc__
,
2162 "S.lstrip([chars]) -> string or unicode\n\
2164 Return a copy of the string S with leading whitespace removed.\n\
2165 If chars is given and not None, remove characters in chars instead.\n\
2166 If chars is unicode, S will be converted to unicode before stripping");
2169 string_lstrip(PyStringObject
*self
, PyObject
*args
)
2171 if (PyTuple_GET_SIZE(args
) == 0)
2172 return do_strip(self
, LEFTSTRIP
); /* Common case */
2174 return do_argstrip(self
, LEFTSTRIP
, args
);
2178 PyDoc_STRVAR(rstrip__doc__
,
2179 "S.rstrip([chars]) -> string or unicode\n\
2181 Return a copy of the string S with trailing whitespace removed.\n\
2182 If chars is given and not None, remove characters in chars instead.\n\
2183 If chars is unicode, S will be converted to unicode before stripping");
2186 string_rstrip(PyStringObject
*self
, PyObject
*args
)
2188 if (PyTuple_GET_SIZE(args
) == 0)
2189 return do_strip(self
, RIGHTSTRIP
); /* Common case */
2191 return do_argstrip(self
, RIGHTSTRIP
, args
);
2195 PyDoc_STRVAR(lower__doc__
,
2196 "S.lower() -> string\n\
2198 Return a copy of the string S converted to lowercase.");
2200 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2202 #define _tolower tolower
2206 string_lower(PyStringObject
*self
)
2209 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2212 newobj
= PyString_FromStringAndSize(NULL
, n
);
2216 s
= PyString_AS_STRING(newobj
);
2218 Py_MEMCPY(s
, PyString_AS_STRING(self
), n
);
2220 for (i
= 0; i
< n
; i
++) {
2221 int c
= Py_CHARMASK(s
[i
]);
2229 PyDoc_STRVAR(upper__doc__
,
2230 "S.upper() -> string\n\
2232 Return a copy of the string S converted to uppercase.");
2235 #define _toupper toupper
2239 string_upper(PyStringObject
*self
)
2242 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2245 newobj
= PyString_FromStringAndSize(NULL
, n
);
2249 s
= PyString_AS_STRING(newobj
);
2251 Py_MEMCPY(s
, PyString_AS_STRING(self
), n
);
2253 for (i
= 0; i
< n
; i
++) {
2254 int c
= Py_CHARMASK(s
[i
]);
2262 PyDoc_STRVAR(title__doc__
,
2263 "S.title() -> string\n\
2265 Return a titlecased version of S, i.e. words start with uppercase\n\
2266 characters, all remaining cased characters have lowercase.");
2269 string_title(PyStringObject
*self
)
2271 char *s
= PyString_AS_STRING(self
), *s_new
;
2272 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2273 int previous_is_cased
= 0;
2276 newobj
= PyString_FromStringAndSize(NULL
, n
);
2279 s_new
= PyString_AsString(newobj
);
2280 for (i
= 0; i
< n
; i
++) {
2281 int c
= Py_CHARMASK(*s
++);
2283 if (!previous_is_cased
)
2285 previous_is_cased
= 1;
2286 } else if (isupper(c
)) {
2287 if (previous_is_cased
)
2289 previous_is_cased
= 1;
2291 previous_is_cased
= 0;
2297 PyDoc_STRVAR(capitalize__doc__
,
2298 "S.capitalize() -> string\n\
2300 Return a copy of the string S with only its first character\n\
2304 string_capitalize(PyStringObject
*self
)
2306 char *s
= PyString_AS_STRING(self
), *s_new
;
2307 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2310 newobj
= PyString_FromStringAndSize(NULL
, n
);
2313 s_new
= PyString_AsString(newobj
);
2315 int c
= Py_CHARMASK(*s
++);
2317 *s_new
= toupper(c
);
2322 for (i
= 1; i
< n
; i
++) {
2323 int c
= Py_CHARMASK(*s
++);
2325 *s_new
= tolower(c
);
2334 PyDoc_STRVAR(count__doc__
,
2335 "S.count(sub[, start[, end]]) -> int\n\
2337 Return the number of non-overlapping occurrences of substring sub in\n\
2338 string S[start:end]. Optional arguments start and end are interpreted\n\
2339 as in slice notation.");
2342 string_count(PyStringObject
*self
, PyObject
*args
)
2345 const char *str
= PyString_AS_STRING(self
), *sub
;
2347 Py_ssize_t start
= 0, end
= PY_SSIZE_T_MAX
;
2349 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &sub_obj
,
2350 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
2353 if (PyString_Check(sub_obj
)) {
2354 sub
= PyString_AS_STRING(sub_obj
);
2355 sub_len
= PyString_GET_SIZE(sub_obj
);
2357 #ifdef Py_USING_UNICODE
2358 else if (PyUnicode_Check(sub_obj
)) {
2360 count
= PyUnicode_Count((PyObject
*)self
, sub_obj
, start
, end
);
2364 return PyInt_FromSsize_t(count
);
2367 else if (PyObject_AsCharBuffer(sub_obj
, &sub
, &sub_len
))
2370 string_adjust_indices(&start
, &end
, PyString_GET_SIZE(self
));
2372 return PyInt_FromSsize_t(
2373 stringlib_count(str
+ start
, end
- start
, sub
, sub_len
)
2377 PyDoc_STRVAR(swapcase__doc__
,
2378 "S.swapcase() -> string\n\
2380 Return a copy of the string S with uppercase characters\n\
2381 converted to lowercase and vice versa.");
2384 string_swapcase(PyStringObject
*self
)
2386 char *s
= PyString_AS_STRING(self
), *s_new
;
2387 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2390 newobj
= PyString_FromStringAndSize(NULL
, n
);
2393 s_new
= PyString_AsString(newobj
);
2394 for (i
= 0; i
< n
; i
++) {
2395 int c
= Py_CHARMASK(*s
++);
2397 *s_new
= toupper(c
);
2399 else if (isupper(c
)) {
2400 *s_new
= tolower(c
);
2410 PyDoc_STRVAR(translate__doc__
,
2411 "S.translate(table [,deletechars]) -> string\n\
2413 Return a copy of the string S, where all characters occurring\n\
2414 in the optional argument deletechars are removed, and the\n\
2415 remaining characters have been mapped through the given\n\
2416 translation table, which must be a string of length 256.");
2419 string_translate(PyStringObject
*self
, PyObject
*args
)
2421 register char *input
, *output
;
2423 register Py_ssize_t i
, c
, changed
= 0;
2424 PyObject
*input_obj
= (PyObject
*)self
;
2425 const char *output_start
, *del_table
=NULL
;
2426 Py_ssize_t inlen
, tablen
, dellen
= 0;
2428 int trans_table
[256];
2429 PyObject
*tableobj
, *delobj
= NULL
;
2431 if (!PyArg_UnpackTuple(args
, "translate", 1, 2,
2432 &tableobj
, &delobj
))
2435 if (PyString_Check(tableobj
)) {
2436 table
= PyString_AS_STRING(tableobj
);
2437 tablen
= PyString_GET_SIZE(tableobj
);
2439 else if (tableobj
== Py_None
) {
2443 #ifdef Py_USING_UNICODE
2444 else if (PyUnicode_Check(tableobj
)) {
2445 /* Unicode .translate() does not support the deletechars
2446 parameter; instead a mapping to None will cause characters
2448 if (delobj
!= NULL
) {
2449 PyErr_SetString(PyExc_TypeError
,
2450 "deletions are implemented differently for unicode");
2453 return PyUnicode_Translate((PyObject
*)self
, tableobj
, NULL
);
2456 else if (PyObject_AsCharBuffer(tableobj
, &table
, &tablen
))
2459 if (tablen
!= 256) {
2460 PyErr_SetString(PyExc_ValueError
,
2461 "translation table must be 256 characters long");
2465 if (delobj
!= NULL
) {
2466 if (PyString_Check(delobj
)) {
2467 del_table
= PyString_AS_STRING(delobj
);
2468 dellen
= PyString_GET_SIZE(delobj
);
2470 #ifdef Py_USING_UNICODE
2471 else if (PyUnicode_Check(delobj
)) {
2472 PyErr_SetString(PyExc_TypeError
,
2473 "deletions are implemented differently for unicode");
2477 else if (PyObject_AsCharBuffer(delobj
, &del_table
, &dellen
))
2485 inlen
= PyString_GET_SIZE(input_obj
);
2486 result
= PyString_FromStringAndSize((char *)NULL
, inlen
);
2489 output_start
= output
= PyString_AsString(result
);
2490 input
= PyString_AS_STRING(input_obj
);
2492 if (dellen
== 0 && table
!= NULL
) {
2493 /* If no deletions are required, use faster code */
2494 for (i
= inlen
; --i
>= 0; ) {
2495 c
= Py_CHARMASK(*input
++);
2496 if (Py_CHARMASK((*output
++ = table
[c
])) != c
)
2499 if (changed
|| !PyString_CheckExact(input_obj
))
2502 Py_INCREF(input_obj
);
2506 if (table
== NULL
) {
2507 for (i
= 0; i
< 256; i
++)
2508 trans_table
[i
] = Py_CHARMASK(i
);
2510 for (i
= 0; i
< 256; i
++)
2511 trans_table
[i
] = Py_CHARMASK(table
[i
]);
2514 for (i
= 0; i
< dellen
; i
++)
2515 trans_table
[(int) Py_CHARMASK(del_table
[i
])] = -1;
2517 for (i
= inlen
; --i
>= 0; ) {
2518 c
= Py_CHARMASK(*input
++);
2519 if (trans_table
[c
] != -1)
2520 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
2524 if (!changed
&& PyString_CheckExact(input_obj
)) {
2526 Py_INCREF(input_obj
);
2529 /* Fix the size of the resulting string */
2531 _PyString_Resize(&result
, output
- output_start
);
2539 /* find and count characters and substrings */
2541 #define findchar(target, target_len, c) \
2542 ((char *)memchr((const void *)(target), c, target_len))
2544 /* String ops must return a string. */
2545 /* If the object is subclass of string, create a copy */
2546 Py_LOCAL(PyStringObject
*)
2547 return_self(PyStringObject
*self
)
2549 if (PyString_CheckExact(self
)) {
2553 return (PyStringObject
*)PyString_FromStringAndSize(
2554 PyString_AS_STRING(self
),
2555 PyString_GET_SIZE(self
));
2558 Py_LOCAL_INLINE(Py_ssize_t
)
2559 countchar(const char *target
, int target_len
, char c
, Py_ssize_t maxcount
)
2562 const char *start
=target
;
2563 const char *end
=target
+target_len
;
2565 while ( (start
=findchar(start
, end
-start
, c
)) != NULL
) {
2567 if (count
>= maxcount
)
2574 Py_LOCAL(Py_ssize_t
)
2575 findstring(const char *target
, Py_ssize_t target_len
,
2576 const char *pattern
, Py_ssize_t pattern_len
,
2582 start
+= target_len
;
2586 if (end
> target_len
) {
2588 } else if (end
< 0) {
2594 /* zero-length substrings always match at the first attempt */
2595 if (pattern_len
== 0)
2596 return (direction
> 0) ? start
: end
;
2600 if (direction
< 0) {
2601 for (; end
>= start
; end
--)
2602 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
))
2605 for (; start
<= end
; start
++)
2606 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
))
2612 Py_LOCAL_INLINE(Py_ssize_t
)
2613 countstring(const char *target
, Py_ssize_t target_len
,
2614 const char *pattern
, Py_ssize_t pattern_len
,
2617 int direction
, Py_ssize_t maxcount
)
2622 start
+= target_len
;
2626 if (end
> target_len
) {
2628 } else if (end
< 0) {
2634 /* zero-length substrings match everywhere */
2635 if (pattern_len
== 0 || maxcount
== 0) {
2636 if (target_len
+1 < maxcount
)
2637 return target_len
+1;
2642 if (direction
< 0) {
2643 for (; (end
>= start
); end
--)
2644 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
)) {
2646 if (--maxcount
<= 0) break;
2647 end
-= pattern_len
-1;
2650 for (; (start
<= end
); start
++)
2651 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
)) {
2653 if (--maxcount
<= 0)
2655 start
+= pattern_len
-1;
2662 /* Algorithms for different cases of string replacement */
2664 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2665 Py_LOCAL(PyStringObject
*)
2666 replace_interleave(PyStringObject
*self
,
2667 const char *to_s
, Py_ssize_t to_len
,
2668 Py_ssize_t maxcount
)
2670 char *self_s
, *result_s
;
2671 Py_ssize_t self_len
, result_len
;
2672 Py_ssize_t count
, i
, product
;
2673 PyStringObject
*result
;
2675 self_len
= PyString_GET_SIZE(self
);
2677 /* 1 at the end plus 1 after every character */
2679 if (maxcount
< count
)
2682 /* Check for overflow */
2683 /* result_len = count * to_len + self_len; */
2684 product
= count
* to_len
;
2685 if (product
/ to_len
!= count
) {
2686 PyErr_SetString(PyExc_OverflowError
,
2687 "replace string is too long");
2690 result_len
= product
+ self_len
;
2691 if (result_len
< 0) {
2692 PyErr_SetString(PyExc_OverflowError
,
2693 "replace string is too long");
2697 if (! (result
= (PyStringObject
*)
2698 PyString_FromStringAndSize(NULL
, result_len
)) )
2701 self_s
= PyString_AS_STRING(self
);
2702 result_s
= PyString_AS_STRING(result
);
2704 /* TODO: special case single character, which doesn't need memcpy */
2706 /* Lay the first one down (guaranteed this will occur) */
2707 Py_MEMCPY(result_s
, to_s
, to_len
);
2711 for (i
=0; i
<count
; i
++) {
2712 *result_s
++ = *self_s
++;
2713 Py_MEMCPY(result_s
, to_s
, to_len
);
2717 /* Copy the rest of the original string */
2718 Py_MEMCPY(result_s
, self_s
, self_len
-i
);
2723 /* Special case for deleting a single character */
2724 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2725 Py_LOCAL(PyStringObject
*)
2726 replace_delete_single_character(PyStringObject
*self
,
2727 char from_c
, Py_ssize_t maxcount
)
2729 char *self_s
, *result_s
;
2730 char *start
, *next
, *end
;
2731 Py_ssize_t self_len
, result_len
;
2733 PyStringObject
*result
;
2735 self_len
= PyString_GET_SIZE(self
);
2736 self_s
= PyString_AS_STRING(self
);
2738 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
2740 return return_self(self
);
2743 result_len
= self_len
- count
; /* from_len == 1 */
2744 assert(result_len
>=0);
2746 if ( (result
= (PyStringObject
*)
2747 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2749 result_s
= PyString_AS_STRING(result
);
2752 end
= self_s
+ self_len
;
2753 while (count
-- > 0) {
2754 next
= findchar(start
, end
-start
, from_c
);
2757 Py_MEMCPY(result_s
, start
, next
-start
);
2758 result_s
+= (next
-start
);
2761 Py_MEMCPY(result_s
, start
, end
-start
);
2766 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2768 Py_LOCAL(PyStringObject
*)
2769 replace_delete_substring(PyStringObject
*self
,
2770 const char *from_s
, Py_ssize_t from_len
,
2771 Py_ssize_t maxcount
) {
2772 char *self_s
, *result_s
;
2773 char *start
, *next
, *end
;
2774 Py_ssize_t self_len
, result_len
;
2775 Py_ssize_t count
, offset
;
2776 PyStringObject
*result
;
2778 self_len
= PyString_GET_SIZE(self
);
2779 self_s
= PyString_AS_STRING(self
);
2781 count
= countstring(self_s
, self_len
,
2788 return return_self(self
);
2791 result_len
= self_len
- (count
* from_len
);
2792 assert (result_len
>=0);
2794 if ( (result
= (PyStringObject
*)
2795 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2798 result_s
= PyString_AS_STRING(result
);
2801 end
= self_s
+ self_len
;
2802 while (count
-- > 0) {
2803 offset
= findstring(start
, end
-start
,
2805 0, end
-start
, FORWARD
);
2808 next
= start
+ offset
;
2810 Py_MEMCPY(result_s
, start
, next
-start
);
2812 result_s
+= (next
-start
);
2813 start
= next
+from_len
;
2815 Py_MEMCPY(result_s
, start
, end
-start
);
2819 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2820 Py_LOCAL(PyStringObject
*)
2821 replace_single_character_in_place(PyStringObject
*self
,
2822 char from_c
, char to_c
,
2823 Py_ssize_t maxcount
)
2825 char *self_s
, *result_s
, *start
, *end
, *next
;
2826 Py_ssize_t self_len
;
2827 PyStringObject
*result
;
2829 /* The result string will be the same size */
2830 self_s
= PyString_AS_STRING(self
);
2831 self_len
= PyString_GET_SIZE(self
);
2833 next
= findchar(self_s
, self_len
, from_c
);
2836 /* No matches; return the original string */
2837 return return_self(self
);
2840 /* Need to make a new string */
2841 result
= (PyStringObject
*) PyString_FromStringAndSize(NULL
, self_len
);
2844 result_s
= PyString_AS_STRING(result
);
2845 Py_MEMCPY(result_s
, self_s
, self_len
);
2847 /* change everything in-place, starting with this one */
2848 start
= result_s
+ (next
-self_s
);
2851 end
= result_s
+ self_len
;
2853 while (--maxcount
> 0) {
2854 next
= findchar(start
, end
-start
, from_c
);
2864 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2865 Py_LOCAL(PyStringObject
*)
2866 replace_substring_in_place(PyStringObject
*self
,
2867 const char *from_s
, Py_ssize_t from_len
,
2868 const char *to_s
, Py_ssize_t to_len
,
2869 Py_ssize_t maxcount
)
2871 char *result_s
, *start
, *end
;
2873 Py_ssize_t self_len
, offset
;
2874 PyStringObject
*result
;
2876 /* The result string will be the same size */
2878 self_s
= PyString_AS_STRING(self
);
2879 self_len
= PyString_GET_SIZE(self
);
2881 offset
= findstring(self_s
, self_len
,
2883 0, self_len
, FORWARD
);
2885 /* No matches; return the original string */
2886 return return_self(self
);
2889 /* Need to make a new string */
2890 result
= (PyStringObject
*) PyString_FromStringAndSize(NULL
, self_len
);
2893 result_s
= PyString_AS_STRING(result
);
2894 Py_MEMCPY(result_s
, self_s
, self_len
);
2896 /* change everything in-place, starting with this one */
2897 start
= result_s
+ offset
;
2898 Py_MEMCPY(start
, to_s
, from_len
);
2900 end
= result_s
+ self_len
;
2902 while ( --maxcount
> 0) {
2903 offset
= findstring(start
, end
-start
,
2905 0, end
-start
, FORWARD
);
2908 Py_MEMCPY(start
+offset
, to_s
, from_len
);
2909 start
+= offset
+from_len
;
2915 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2916 Py_LOCAL(PyStringObject
*)
2917 replace_single_character(PyStringObject
*self
,
2919 const char *to_s
, Py_ssize_t to_len
,
2920 Py_ssize_t maxcount
)
2922 char *self_s
, *result_s
;
2923 char *start
, *next
, *end
;
2924 Py_ssize_t self_len
, result_len
;
2925 Py_ssize_t count
, product
;
2926 PyStringObject
*result
;
2928 self_s
= PyString_AS_STRING(self
);
2929 self_len
= PyString_GET_SIZE(self
);
2931 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
2933 /* no matches, return unchanged */
2934 return return_self(self
);
2937 /* use the difference between current and new, hence the "-1" */
2938 /* result_len = self_len + count * (to_len-1) */
2939 product
= count
* (to_len
-1);
2940 if (product
/ (to_len
-1) != count
) {
2941 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2944 result_len
= self_len
+ product
;
2945 if (result_len
< 0) {
2946 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2950 if ( (result
= (PyStringObject
*)
2951 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2953 result_s
= PyString_AS_STRING(result
);
2956 end
= self_s
+ self_len
;
2957 while (count
-- > 0) {
2958 next
= findchar(start
, end
-start
, from_c
);
2962 if (next
== start
) {
2963 /* replace with the 'to' */
2964 Py_MEMCPY(result_s
, to_s
, to_len
);
2968 /* copy the unchanged old then the 'to' */
2969 Py_MEMCPY(result_s
, start
, next
-start
);
2970 result_s
+= (next
-start
);
2971 Py_MEMCPY(result_s
, to_s
, to_len
);
2976 /* Copy the remainder of the remaining string */
2977 Py_MEMCPY(result_s
, start
, end
-start
);
2982 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2983 Py_LOCAL(PyStringObject
*)
2984 replace_substring(PyStringObject
*self
,
2985 const char *from_s
, Py_ssize_t from_len
,
2986 const char *to_s
, Py_ssize_t to_len
,
2987 Py_ssize_t maxcount
) {
2988 char *self_s
, *result_s
;
2989 char *start
, *next
, *end
;
2990 Py_ssize_t self_len
, result_len
;
2991 Py_ssize_t count
, offset
, product
;
2992 PyStringObject
*result
;
2994 self_s
= PyString_AS_STRING(self
);
2995 self_len
= PyString_GET_SIZE(self
);
2997 count
= countstring(self_s
, self_len
,
2999 0, self_len
, FORWARD
, maxcount
);
3001 /* no matches, return unchanged */
3002 return return_self(self
);
3005 /* Check for overflow */
3006 /* result_len = self_len + count * (to_len-from_len) */
3007 product
= count
* (to_len
-from_len
);
3008 if (product
/ (to_len
-from_len
) != count
) {
3009 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
3012 result_len
= self_len
+ product
;
3013 if (result_len
< 0) {
3014 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
3018 if ( (result
= (PyStringObject
*)
3019 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
3021 result_s
= PyString_AS_STRING(result
);
3024 end
= self_s
+ self_len
;
3025 while (count
-- > 0) {
3026 offset
= findstring(start
, end
-start
,
3028 0, end
-start
, FORWARD
);
3031 next
= start
+offset
;
3032 if (next
== start
) {
3033 /* replace with the 'to' */
3034 Py_MEMCPY(result_s
, to_s
, to_len
);
3038 /* copy the unchanged old then the 'to' */
3039 Py_MEMCPY(result_s
, start
, next
-start
);
3040 result_s
+= (next
-start
);
3041 Py_MEMCPY(result_s
, to_s
, to_len
);
3043 start
= next
+from_len
;
3046 /* Copy the remainder of the remaining string */
3047 Py_MEMCPY(result_s
, start
, end
-start
);
3053 Py_LOCAL(PyStringObject
*)
3054 replace(PyStringObject
*self
,
3055 const char *from_s
, Py_ssize_t from_len
,
3056 const char *to_s
, Py_ssize_t to_len
,
3057 Py_ssize_t maxcount
)
3060 maxcount
= PY_SSIZE_T_MAX
;
3061 } else if (maxcount
== 0 || PyString_GET_SIZE(self
) == 0) {
3062 /* nothing to do; return the original string */
3063 return return_self(self
);
3066 if (maxcount
== 0 ||
3067 (from_len
== 0 && to_len
== 0)) {
3068 /* nothing to do; return the original string */
3069 return return_self(self
);
3072 /* Handle zero-length special cases */
3074 if (from_len
== 0) {
3075 /* insert the 'to' string everywhere. */
3076 /* >>> "Python".replace("", ".") */
3077 /* '.P.y.t.h.o.n.' */
3078 return replace_interleave(self
, to_s
, to_len
, maxcount
);
3081 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3082 /* point for an empty self string to generate a non-empty string */
3083 /* Special case so the remaining code always gets a non-empty string */
3084 if (PyString_GET_SIZE(self
) == 0) {
3085 return return_self(self
);
3089 /* delete all occurances of 'from' string */
3090 if (from_len
== 1) {
3091 return replace_delete_single_character(
3092 self
, from_s
[0], maxcount
);
3094 return replace_delete_substring(self
, from_s
, from_len
, maxcount
);
3098 /* Handle special case where both strings have the same length */
3100 if (from_len
== to_len
) {
3101 if (from_len
== 1) {
3102 return replace_single_character_in_place(
3108 return replace_substring_in_place(
3109 self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
3113 /* Otherwise use the more generic algorithms */
3114 if (from_len
== 1) {
3115 return replace_single_character(self
, from_s
[0],
3116 to_s
, to_len
, maxcount
);
3118 /* len('from')>=2, len('to')>=1 */
3119 return replace_substring(self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
3123 PyDoc_STRVAR(replace__doc__
,
3124 "S.replace (old, new[, count]) -> string\n\
3126 Return a copy of string S with all occurrences of substring\n\
3127 old replaced by new. If the optional argument count is\n\
3128 given, only the first count occurrences are replaced.");
3131 string_replace(PyStringObject
*self
, PyObject
*args
)
3133 Py_ssize_t count
= -1;
3134 PyObject
*from
, *to
;
3135 const char *from_s
, *to_s
;
3136 Py_ssize_t from_len
, to_len
;
3138 if (!PyArg_ParseTuple(args
, "OO|n:replace", &from
, &to
, &count
))
3141 if (PyString_Check(from
)) {
3142 from_s
= PyString_AS_STRING(from
);
3143 from_len
= PyString_GET_SIZE(from
);
3145 #ifdef Py_USING_UNICODE
3146 if (PyUnicode_Check(from
))
3147 return PyUnicode_Replace((PyObject
*)self
,
3150 else if (PyObject_AsCharBuffer(from
, &from_s
, &from_len
))
3153 if (PyString_Check(to
)) {
3154 to_s
= PyString_AS_STRING(to
);
3155 to_len
= PyString_GET_SIZE(to
);
3157 #ifdef Py_USING_UNICODE
3158 else if (PyUnicode_Check(to
))
3159 return PyUnicode_Replace((PyObject
*)self
,
3162 else if (PyObject_AsCharBuffer(to
, &to_s
, &to_len
))
3165 return (PyObject
*)replace((PyStringObject
*) self
,
3167 to_s
, to_len
, count
);
3172 /* Matches the end (direction >= 0) or start (direction < 0) of self
3173 * against substr, using the start and end arguments. Returns
3174 * -1 on error, 0 if not found and 1 if found.
3177 _string_tailmatch(PyStringObject
*self
, PyObject
*substr
, Py_ssize_t start
,
3178 Py_ssize_t end
, int direction
)
3180 Py_ssize_t len
= PyString_GET_SIZE(self
);
3185 if (PyString_Check(substr
)) {
3186 sub
= PyString_AS_STRING(substr
);
3187 slen
= PyString_GET_SIZE(substr
);
3189 #ifdef Py_USING_UNICODE
3190 else if (PyUnicode_Check(substr
))
3191 return PyUnicode_Tailmatch((PyObject
*)self
,
3192 substr
, start
, end
, direction
);
3194 else if (PyObject_AsCharBuffer(substr
, &sub
, &slen
))
3196 str
= PyString_AS_STRING(self
);
3198 string_adjust_indices(&start
, &end
, len
);
3200 if (direction
< 0) {
3202 if (start
+slen
> len
)
3206 if (end
-start
< slen
|| start
> len
)
3209 if (end
-slen
> start
)
3212 if (end
-start
>= slen
)
3213 return ! memcmp(str
+start
, sub
, slen
);
3218 PyDoc_STRVAR(startswith__doc__
,
3219 "S.startswith(prefix[, start[, end]]) -> bool\n\
3221 Return True if S starts with the specified prefix, False otherwise.\n\
3222 With optional start, test S beginning at that position.\n\
3223 With optional end, stop comparing S at that position.\n\
3224 prefix can also be a tuple of strings to try.");
3227 string_startswith(PyStringObject
*self
, PyObject
*args
)
3229 Py_ssize_t start
= 0;
3230 Py_ssize_t end
= PY_SSIZE_T_MAX
;
3234 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
3235 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
3237 if (PyTuple_Check(subobj
)) {
3239 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
3240 result
= _string_tailmatch(self
,
3241 PyTuple_GET_ITEM(subobj
, i
),
3251 result
= _string_tailmatch(self
, subobj
, start
, end
, -1);
3255 return PyBool_FromLong(result
);
3259 PyDoc_STRVAR(endswith__doc__
,
3260 "S.endswith(suffix[, start[, end]]) -> bool\n\
3262 Return True if S ends with the specified suffix, False otherwise.\n\
3263 With optional start, test S beginning at that position.\n\
3264 With optional end, stop comparing S at that position.\n\
3265 suffix can also be a tuple of strings to try.");
3268 string_endswith(PyStringObject
*self
, PyObject
*args
)
3270 Py_ssize_t start
= 0;
3271 Py_ssize_t end
= PY_SSIZE_T_MAX
;
3275 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
3276 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
3278 if (PyTuple_Check(subobj
)) {
3280 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
3281 result
= _string_tailmatch(self
,
3282 PyTuple_GET_ITEM(subobj
, i
),
3292 result
= _string_tailmatch(self
, subobj
, start
, end
, +1);
3296 return PyBool_FromLong(result
);
3300 PyDoc_STRVAR(encode__doc__
,
3301 "S.encode([encoding[,errors]]) -> object\n\
3303 Encodes S using the codec registered for encoding. encoding defaults\n\
3304 to the default encoding. errors may be given to set a different error\n\
3305 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3306 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3307 'xmlcharrefreplace' as well as any other name registered with\n\
3308 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3311 string_encode(PyStringObject
*self
, PyObject
*args
)
3313 char *encoding
= NULL
;
3314 char *errors
= NULL
;
3317 if (!PyArg_ParseTuple(args
, "|ss:encode", &encoding
, &errors
))
3319 v
= PyString_AsEncodedObject((PyObject
*)self
, encoding
, errors
);
3322 if (!PyString_Check(v
) && !PyUnicode_Check(v
)) {
3323 PyErr_Format(PyExc_TypeError
,
3324 "encoder did not return a string/unicode object "
3326 Py_TYPE(v
)->tp_name
);
3337 PyDoc_STRVAR(decode__doc__
,
3338 "S.decode([encoding[,errors]]) -> object\n\
3340 Decodes S using the codec registered for encoding. encoding defaults\n\
3341 to the default encoding. errors may be given to set a different error\n\
3342 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3343 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3344 as well as any other name registerd with codecs.register_error that is\n\
3345 able to handle UnicodeDecodeErrors.");
3348 string_decode(PyStringObject
*self
, PyObject
*args
)
3350 char *encoding
= NULL
;
3351 char *errors
= NULL
;
3354 if (!PyArg_ParseTuple(args
, "|ss:decode", &encoding
, &errors
))
3356 v
= PyString_AsDecodedObject((PyObject
*)self
, encoding
, errors
);
3359 if (!PyString_Check(v
) && !PyUnicode_Check(v
)) {
3360 PyErr_Format(PyExc_TypeError
,
3361 "decoder did not return a string/unicode object "
3363 Py_TYPE(v
)->tp_name
);
3374 PyDoc_STRVAR(expandtabs__doc__
,
3375 "S.expandtabs([tabsize]) -> string\n\
3377 Return a copy of S where all tab characters are expanded using spaces.\n\
3378 If tabsize is not given, a tab size of 8 characters is assumed.");
3381 string_expandtabs(PyStringObject
*self
, PyObject
*args
)
3383 const char *e
, *p
, *qe
;
3385 Py_ssize_t i
, j
, incr
;
3389 if (!PyArg_ParseTuple(args
, "|i:expandtabs", &tabsize
))
3392 /* First pass: determine size of output string */
3393 i
= 0; /* chars up to and including most recent \n or \r */
3394 j
= 0; /* chars since most recent \n or \r (use in tab calculations) */
3395 e
= PyString_AS_STRING(self
) + PyString_GET_SIZE(self
); /* end of input */
3396 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
3399 incr
= tabsize
- (j
% tabsize
);
3400 if (j
> PY_SSIZE_T_MAX
- incr
)
3406 if (j
> PY_SSIZE_T_MAX
- 1)
3409 if (*p
== '\n' || *p
== '\r') {
3410 if (i
> PY_SSIZE_T_MAX
- j
)
3417 if (i
> PY_SSIZE_T_MAX
- j
)
3420 /* Second pass: create output string and fill it */
3421 u
= PyString_FromStringAndSize(NULL
, i
+ j
);
3425 j
= 0; /* same as in first pass */
3426 q
= PyString_AS_STRING(u
); /* next output char */
3427 qe
= PyString_AS_STRING(u
) + PyString_GET_SIZE(u
); /* end of output */
3429 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
3432 i
= tabsize
- (j
% tabsize
);
3446 if (*p
== '\n' || *p
== '\r')
3455 PyErr_SetString(PyExc_OverflowError
, "new string is too long");
3459 Py_LOCAL_INLINE(PyObject
*)
3460 pad(PyStringObject
*self
, Py_ssize_t left
, Py_ssize_t right
, char fill
)
3469 if (left
== 0 && right
== 0 && PyString_CheckExact(self
)) {
3471 return (PyObject
*)self
;
3474 u
= PyString_FromStringAndSize(NULL
,
3475 left
+ PyString_GET_SIZE(self
) + right
);
3478 memset(PyString_AS_STRING(u
), fill
, left
);
3479 Py_MEMCPY(PyString_AS_STRING(u
) + left
,
3480 PyString_AS_STRING(self
),
3481 PyString_GET_SIZE(self
));
3483 memset(PyString_AS_STRING(u
) + left
+ PyString_GET_SIZE(self
),
3490 PyDoc_STRVAR(ljust__doc__
,
3491 "S.ljust(width[, fillchar]) -> string\n"
3493 "Return S left justified in a string of length width. Padding is\n"
3494 "done using the specified fill character (default is a space).");
3497 string_ljust(PyStringObject
*self
, PyObject
*args
)
3500 char fillchar
= ' ';
3502 if (!PyArg_ParseTuple(args
, "n|c:ljust", &width
, &fillchar
))
3505 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3507 return (PyObject
*) self
;
3510 return pad(self
, 0, width
- PyString_GET_SIZE(self
), fillchar
);
3514 PyDoc_STRVAR(rjust__doc__
,
3515 "S.rjust(width[, fillchar]) -> string\n"
3517 "Return S right justified in a string of length width. Padding is\n"
3518 "done using the specified fill character (default is a space)");
3521 string_rjust(PyStringObject
*self
, PyObject
*args
)
3524 char fillchar
= ' ';
3526 if (!PyArg_ParseTuple(args
, "n|c:rjust", &width
, &fillchar
))
3529 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3531 return (PyObject
*) self
;
3534 return pad(self
, width
- PyString_GET_SIZE(self
), 0, fillchar
);
3538 PyDoc_STRVAR(center__doc__
,
3539 "S.center(width[, fillchar]) -> string\n"
3541 "Return S centered in a string of length width. Padding is\n"
3542 "done using the specified fill character (default is a space)");
3545 string_center(PyStringObject
*self
, PyObject
*args
)
3547 Py_ssize_t marg
, left
;
3549 char fillchar
= ' ';
3551 if (!PyArg_ParseTuple(args
, "n|c:center", &width
, &fillchar
))
3554 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3556 return (PyObject
*) self
;
3559 marg
= width
- PyString_GET_SIZE(self
);
3560 left
= marg
/ 2 + (marg
& width
& 1);
3562 return pad(self
, left
, marg
- left
, fillchar
);
3565 PyDoc_STRVAR(zfill__doc__
,
3566 "S.zfill(width) -> string\n"
3568 "Pad a numeric string S with zeros on the left, to fill a field\n"
3569 "of the specified width. The string S is never truncated.");
3572 string_zfill(PyStringObject
*self
, PyObject
*args
)
3579 if (!PyArg_ParseTuple(args
, "n:zfill", &width
))
3582 if (PyString_GET_SIZE(self
) >= width
) {
3583 if (PyString_CheckExact(self
)) {
3585 return (PyObject
*) self
;
3588 return PyString_FromStringAndSize(
3589 PyString_AS_STRING(self
),
3590 PyString_GET_SIZE(self
)
3594 fill
= width
- PyString_GET_SIZE(self
);
3596 s
= pad(self
, fill
, 0, '0');
3601 p
= PyString_AS_STRING(s
);
3602 if (p
[fill
] == '+' || p
[fill
] == '-') {
3603 /* move sign to beginning of string */
3608 return (PyObject
*) s
;
3611 PyDoc_STRVAR(isspace__doc__
,
3612 "S.isspace() -> bool\n\
3614 Return True if all characters in S are whitespace\n\
3615 and there is at least one character in S, False otherwise.");
3618 string_isspace(PyStringObject
*self
)
3620 register const unsigned char *p
3621 = (unsigned char *) PyString_AS_STRING(self
);
3622 register const unsigned char *e
;
3624 /* Shortcut for single character strings */
3625 if (PyString_GET_SIZE(self
) == 1 &&
3627 return PyBool_FromLong(1);
3629 /* Special case for empty strings */
3630 if (PyString_GET_SIZE(self
) == 0)
3631 return PyBool_FromLong(0);
3633 e
= p
+ PyString_GET_SIZE(self
);
3634 for (; p
< e
; p
++) {
3636 return PyBool_FromLong(0);
3638 return PyBool_FromLong(1);
3642 PyDoc_STRVAR(isalpha__doc__
,
3643 "S.isalpha() -> bool\n\
3645 Return True if all characters in S are alphabetic\n\
3646 and there is at least one character in S, False otherwise.");
3649 string_isalpha(PyStringObject
*self
)
3651 register const unsigned char *p
3652 = (unsigned char *) PyString_AS_STRING(self
);
3653 register const unsigned char *e
;
3655 /* Shortcut for single character strings */
3656 if (PyString_GET_SIZE(self
) == 1 &&
3658 return PyBool_FromLong(1);
3660 /* Special case for empty strings */
3661 if (PyString_GET_SIZE(self
) == 0)
3662 return PyBool_FromLong(0);
3664 e
= p
+ PyString_GET_SIZE(self
);
3665 for (; p
< e
; p
++) {
3667 return PyBool_FromLong(0);
3669 return PyBool_FromLong(1);
3673 PyDoc_STRVAR(isalnum__doc__
,
3674 "S.isalnum() -> bool\n\
3676 Return True if all characters in S are alphanumeric\n\
3677 and there is at least one character in S, False otherwise.");
3680 string_isalnum(PyStringObject
*self
)
3682 register const unsigned char *p
3683 = (unsigned char *) PyString_AS_STRING(self
);
3684 register const unsigned char *e
;
3686 /* Shortcut for single character strings */
3687 if (PyString_GET_SIZE(self
) == 1 &&
3689 return PyBool_FromLong(1);
3691 /* Special case for empty strings */
3692 if (PyString_GET_SIZE(self
) == 0)
3693 return PyBool_FromLong(0);
3695 e
= p
+ PyString_GET_SIZE(self
);
3696 for (; p
< e
; p
++) {
3698 return PyBool_FromLong(0);
3700 return PyBool_FromLong(1);
3704 PyDoc_STRVAR(isdigit__doc__
,
3705 "S.isdigit() -> bool\n\
3707 Return True if all characters in S are digits\n\
3708 and there is at least one character in S, False otherwise.");
3711 string_isdigit(PyStringObject
*self
)
3713 register const unsigned char *p
3714 = (unsigned char *) PyString_AS_STRING(self
);
3715 register const unsigned char *e
;
3717 /* Shortcut for single character strings */
3718 if (PyString_GET_SIZE(self
) == 1 &&
3720 return PyBool_FromLong(1);
3722 /* Special case for empty strings */
3723 if (PyString_GET_SIZE(self
) == 0)
3724 return PyBool_FromLong(0);
3726 e
= p
+ PyString_GET_SIZE(self
);
3727 for (; p
< e
; p
++) {
3729 return PyBool_FromLong(0);
3731 return PyBool_FromLong(1);
3735 PyDoc_STRVAR(islower__doc__
,
3736 "S.islower() -> bool\n\
3738 Return True if all cased characters in S are lowercase and there is\n\
3739 at least one cased character in S, False otherwise.");
3742 string_islower(PyStringObject
*self
)
3744 register const unsigned char *p
3745 = (unsigned char *) PyString_AS_STRING(self
);
3746 register const unsigned char *e
;
3749 /* Shortcut for single character strings */
3750 if (PyString_GET_SIZE(self
) == 1)
3751 return PyBool_FromLong(islower(*p
) != 0);
3753 /* Special case for empty strings */
3754 if (PyString_GET_SIZE(self
) == 0)
3755 return PyBool_FromLong(0);
3757 e
= p
+ PyString_GET_SIZE(self
);
3759 for (; p
< e
; p
++) {
3761 return PyBool_FromLong(0);
3762 else if (!cased
&& islower(*p
))
3765 return PyBool_FromLong(cased
);
3769 PyDoc_STRVAR(isupper__doc__
,
3770 "S.isupper() -> bool\n\
3772 Return True if all cased characters in S are uppercase and there is\n\
3773 at least one cased character in S, False otherwise.");
3776 string_isupper(PyStringObject
*self
)
3778 register const unsigned char *p
3779 = (unsigned char *) PyString_AS_STRING(self
);
3780 register const unsigned char *e
;
3783 /* Shortcut for single character strings */
3784 if (PyString_GET_SIZE(self
) == 1)
3785 return PyBool_FromLong(isupper(*p
) != 0);
3787 /* Special case for empty strings */
3788 if (PyString_GET_SIZE(self
) == 0)
3789 return PyBool_FromLong(0);
3791 e
= p
+ PyString_GET_SIZE(self
);
3793 for (; p
< e
; p
++) {
3795 return PyBool_FromLong(0);
3796 else if (!cased
&& isupper(*p
))
3799 return PyBool_FromLong(cased
);
3803 PyDoc_STRVAR(istitle__doc__
,
3804 "S.istitle() -> bool\n\
3806 Return True if S is a titlecased string and there is at least one\n\
3807 character in S, i.e. uppercase characters may only follow uncased\n\
3808 characters and lowercase characters only cased ones. Return False\n\
3812 string_istitle(PyStringObject
*self
, PyObject
*uncased
)
3814 register const unsigned char *p
3815 = (unsigned char *) PyString_AS_STRING(self
);
3816 register const unsigned char *e
;
3817 int cased
, previous_is_cased
;
3819 /* Shortcut for single character strings */
3820 if (PyString_GET_SIZE(self
) == 1)
3821 return PyBool_FromLong(isupper(*p
) != 0);
3823 /* Special case for empty strings */
3824 if (PyString_GET_SIZE(self
) == 0)
3825 return PyBool_FromLong(0);
3827 e
= p
+ PyString_GET_SIZE(self
);
3829 previous_is_cased
= 0;
3830 for (; p
< e
; p
++) {
3831 register const unsigned char ch
= *p
;
3834 if (previous_is_cased
)
3835 return PyBool_FromLong(0);
3836 previous_is_cased
= 1;
3839 else if (islower(ch
)) {
3840 if (!previous_is_cased
)
3841 return PyBool_FromLong(0);
3842 previous_is_cased
= 1;
3846 previous_is_cased
= 0;
3848 return PyBool_FromLong(cased
);
3852 PyDoc_STRVAR(splitlines__doc__
,
3853 "S.splitlines([keepends]) -> list of strings\n\
3855 Return a list of the lines in S, breaking at line boundaries.\n\
3856 Line breaks are not included in the resulting list unless keepends\n\
3857 is given and true.");
3860 string_splitlines(PyStringObject
*self
, PyObject
*args
)
3862 register Py_ssize_t i
;
3863 register Py_ssize_t j
;
3870 if (!PyArg_ParseTuple(args
, "|i:splitlines", &keepends
))
3873 data
= PyString_AS_STRING(self
);
3874 len
= PyString_GET_SIZE(self
);
3876 /* This does not use the preallocated list because splitlines is
3877 usually run with hundreds of newlines. The overhead of
3878 switching between PyList_SET_ITEM and append causes about a
3879 2-3% slowdown for that common case. A smarter implementation
3880 could move the if check out, so the SET_ITEMs are done first
3881 and the appends only done when the prealloc buffer is full.
3882 That's too much work for little gain.*/
3884 list
= PyList_New(0);
3888 for (i
= j
= 0; i
< len
; ) {
3891 /* Find a line and append it */
3892 while (i
< len
&& data
[i
] != '\n' && data
[i
] != '\r')
3895 /* Skip the line break reading CRLF as one line break */
3898 if (data
[i
] == '\r' && i
+ 1 < len
&&
3906 SPLIT_APPEND(data
, j
, eol
);
3910 SPLIT_APPEND(data
, j
, len
);
3920 PyDoc_STRVAR(sizeof__doc__
,
3921 "S.__sizeof__() -> size of S in memory, in bytes");
3924 string_sizeof(PyStringObject
*v
)
3927 res
= sizeof(PyStringObject
) + v
->ob_size
* v
->ob_type
->tp_itemsize
;
3928 return PyInt_FromSsize_t(res
);
3934 #undef PREALLOC_SIZE
3937 string_getnewargs(PyStringObject
*v
)
3939 return Py_BuildValue("(s#)", v
->ob_sval
, Py_SIZE(v
));
3943 #include "stringlib/string_format.h"
3945 PyDoc_STRVAR(format__doc__
,
3946 "S.format(*args, **kwargs) -> unicode\n\
3951 string__format__(PyObject
* self
, PyObject
* args
)
3953 PyObject
*format_spec
;
3954 PyObject
*result
= NULL
;
3955 PyObject
*tmp
= NULL
;
3957 /* If 2.x, convert format_spec to the same type as value */
3958 /* This is to allow things like u''.format('') */
3959 if (!PyArg_ParseTuple(args
, "O:__format__", &format_spec
))
3961 if (!(PyString_Check(format_spec
) || PyUnicode_Check(format_spec
))) {
3962 PyErr_Format(PyExc_TypeError
, "__format__ arg must be str "
3963 "or unicode, not %s", Py_TYPE(format_spec
)->tp_name
);
3966 tmp
= PyObject_Str(format_spec
);
3971 result
= _PyBytes_FormatAdvanced(self
,
3972 PyString_AS_STRING(format_spec
),
3973 PyString_GET_SIZE(format_spec
));
3979 PyDoc_STRVAR(p_format__doc__
,
3980 "S.__format__(format_spec) -> unicode\n\
3986 string_methods
[] = {
3987 /* Counterparts of the obsolete stropmodule functions; except
3988 string.maketrans(). */
3989 {"join", (PyCFunction
)string_join
, METH_O
, join__doc__
},
3990 {"split", (PyCFunction
)string_split
, METH_VARARGS
, split__doc__
},
3991 {"rsplit", (PyCFunction
)string_rsplit
, METH_VARARGS
, rsplit__doc__
},
3992 {"lower", (PyCFunction
)string_lower
, METH_NOARGS
, lower__doc__
},
3993 {"upper", (PyCFunction
)string_upper
, METH_NOARGS
, upper__doc__
},
3994 {"islower", (PyCFunction
)string_islower
, METH_NOARGS
, islower__doc__
},
3995 {"isupper", (PyCFunction
)string_isupper
, METH_NOARGS
, isupper__doc__
},
3996 {"isspace", (PyCFunction
)string_isspace
, METH_NOARGS
, isspace__doc__
},
3997 {"isdigit", (PyCFunction
)string_isdigit
, METH_NOARGS
, isdigit__doc__
},
3998 {"istitle", (PyCFunction
)string_istitle
, METH_NOARGS
, istitle__doc__
},
3999 {"isalpha", (PyCFunction
)string_isalpha
, METH_NOARGS
, isalpha__doc__
},
4000 {"isalnum", (PyCFunction
)string_isalnum
, METH_NOARGS
, isalnum__doc__
},
4001 {"capitalize", (PyCFunction
)string_capitalize
, METH_NOARGS
,
4003 {"count", (PyCFunction
)string_count
, METH_VARARGS
, count__doc__
},
4004 {"endswith", (PyCFunction
)string_endswith
, METH_VARARGS
,
4006 {"partition", (PyCFunction
)string_partition
, METH_O
, partition__doc__
},
4007 {"find", (PyCFunction
)string_find
, METH_VARARGS
, find__doc__
},
4008 {"index", (PyCFunction
)string_index
, METH_VARARGS
, index__doc__
},
4009 {"lstrip", (PyCFunction
)string_lstrip
, METH_VARARGS
, lstrip__doc__
},
4010 {"replace", (PyCFunction
)string_replace
, METH_VARARGS
, replace__doc__
},
4011 {"rfind", (PyCFunction
)string_rfind
, METH_VARARGS
, rfind__doc__
},
4012 {"rindex", (PyCFunction
)string_rindex
, METH_VARARGS
, rindex__doc__
},
4013 {"rstrip", (PyCFunction
)string_rstrip
, METH_VARARGS
, rstrip__doc__
},
4014 {"rpartition", (PyCFunction
)string_rpartition
, METH_O
,
4016 {"startswith", (PyCFunction
)string_startswith
, METH_VARARGS
,
4018 {"strip", (PyCFunction
)string_strip
, METH_VARARGS
, strip__doc__
},
4019 {"swapcase", (PyCFunction
)string_swapcase
, METH_NOARGS
,
4021 {"translate", (PyCFunction
)string_translate
, METH_VARARGS
,
4023 {"title", (PyCFunction
)string_title
, METH_NOARGS
, title__doc__
},
4024 {"ljust", (PyCFunction
)string_ljust
, METH_VARARGS
, ljust__doc__
},
4025 {"rjust", (PyCFunction
)string_rjust
, METH_VARARGS
, rjust__doc__
},
4026 {"center", (PyCFunction
)string_center
, METH_VARARGS
, center__doc__
},
4027 {"zfill", (PyCFunction
)string_zfill
, METH_VARARGS
, zfill__doc__
},
4028 {"format", (PyCFunction
) do_string_format
, METH_VARARGS
| METH_KEYWORDS
, format__doc__
},
4029 {"__format__", (PyCFunction
) string__format__
, METH_VARARGS
, p_format__doc__
},
4030 {"_formatter_field_name_split", (PyCFunction
) formatter_field_name_split
, METH_NOARGS
},
4031 {"_formatter_parser", (PyCFunction
) formatter_parser
, METH_NOARGS
},
4032 {"encode", (PyCFunction
)string_encode
, METH_VARARGS
, encode__doc__
},
4033 {"decode", (PyCFunction
)string_decode
, METH_VARARGS
, decode__doc__
},
4034 {"expandtabs", (PyCFunction
)string_expandtabs
, METH_VARARGS
,
4036 {"splitlines", (PyCFunction
)string_splitlines
, METH_VARARGS
,
4038 {"__sizeof__", (PyCFunction
)string_sizeof
, METH_NOARGS
,
4040 {"__getnewargs__", (PyCFunction
)string_getnewargs
, METH_NOARGS
},
4041 {NULL
, NULL
} /* sentinel */
4045 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
);
4048 string_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
4051 static char *kwlist
[] = {"object", 0};
4053 if (type
!= &PyString_Type
)
4054 return str_subtype_new(type
, args
, kwds
);
4055 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|O:str", kwlist
, &x
))
4058 return PyString_FromString("");
4059 return PyObject_Str(x
);
4063 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
4065 PyObject
*tmp
, *pnew
;
4068 assert(PyType_IsSubtype(type
, &PyString_Type
));
4069 tmp
= string_new(&PyString_Type
, args
, kwds
);
4072 assert(PyString_CheckExact(tmp
));
4073 n
= PyString_GET_SIZE(tmp
);
4074 pnew
= type
->tp_alloc(type
, n
);
4076 Py_MEMCPY(PyString_AS_STRING(pnew
), PyString_AS_STRING(tmp
), n
+1);
4077 ((PyStringObject
*)pnew
)->ob_shash
=
4078 ((PyStringObject
*)tmp
)->ob_shash
;
4079 ((PyStringObject
*)pnew
)->ob_sstate
= SSTATE_NOT_INTERNED
;
4086 basestring_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
4088 PyErr_SetString(PyExc_TypeError
,
4089 "The basestring type cannot be instantiated");
4094 string_mod(PyObject
*v
, PyObject
*w
)
4096 if (!PyString_Check(v
)) {
4097 Py_INCREF(Py_NotImplemented
);
4098 return Py_NotImplemented
;
4100 return PyString_Format(v
, w
);
4103 PyDoc_STRVAR(basestring_doc
,
4104 "Type basestring cannot be instantiated; it is the base for str and unicode.");
4106 static PyNumberMethods string_as_number
= {
4111 string_mod
, /*nb_remainder*/
4115 PyTypeObject PyBaseString_Type
= {
4116 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
4126 0, /* tp_as_number */
4127 0, /* tp_as_sequence */
4128 0, /* tp_as_mapping */
4132 0, /* tp_getattro */
4133 0, /* tp_setattro */
4134 0, /* tp_as_buffer */
4135 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /* tp_flags */
4136 basestring_doc
, /* tp_doc */
4137 0, /* tp_traverse */
4139 0, /* tp_richcompare */
4140 0, /* tp_weaklistoffset */
4142 0, /* tp_iternext */
4146 &PyBaseObject_Type
, /* tp_base */
4148 0, /* tp_descr_get */
4149 0, /* tp_descr_set */
4150 0, /* tp_dictoffset */
4153 basestring_new
, /* tp_new */
4157 PyDoc_STRVAR(string_doc
,
4158 "str(object) -> string\n\
4160 Return a nice string representation of the object.\n\
4161 If the argument is a string, the return value is the same object.");
4163 PyTypeObject PyString_Type
= {
4164 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
4166 sizeof(PyStringObject
),
4168 string_dealloc
, /* tp_dealloc */
4169 (printfunc
)string_print
, /* tp_print */
4173 string_repr
, /* tp_repr */
4174 &string_as_number
, /* tp_as_number */
4175 &string_as_sequence
, /* tp_as_sequence */
4176 &string_as_mapping
, /* tp_as_mapping */
4177 (hashfunc
)string_hash
, /* tp_hash */
4179 string_str
, /* tp_str */
4180 PyObject_GenericGetAttr
, /* tp_getattro */
4181 0, /* tp_setattro */
4182 &string_as_buffer
, /* tp_as_buffer */
4183 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_CHECKTYPES
|
4184 Py_TPFLAGS_BASETYPE
| Py_TPFLAGS_STRING_SUBCLASS
|
4185 Py_TPFLAGS_HAVE_NEWBUFFER
, /* tp_flags */
4186 string_doc
, /* tp_doc */
4187 0, /* tp_traverse */
4189 (richcmpfunc
)string_richcompare
, /* tp_richcompare */
4190 0, /* tp_weaklistoffset */
4192 0, /* tp_iternext */
4193 string_methods
, /* tp_methods */
4196 &PyBaseString_Type
, /* tp_base */
4198 0, /* tp_descr_get */
4199 0, /* tp_descr_set */
4200 0, /* tp_dictoffset */
4203 string_new
, /* tp_new */
4204 PyObject_Del
, /* tp_free */
4208 PyString_Concat(register PyObject
**pv
, register PyObject
*w
)
4210 register PyObject
*v
;
4213 if (w
== NULL
|| !PyString_Check(*pv
)) {
4218 v
= string_concat((PyStringObject
*) *pv
, w
);
4224 PyString_ConcatAndDel(register PyObject
**pv
, register PyObject
*w
)
4226 PyString_Concat(pv
, w
);
4231 /* The following function breaks the notion that strings are immutable:
4232 it changes the size of a string. We get away with this only if there
4233 is only one module referencing the object. You can also think of it
4234 as creating a new string object and destroying the old one, only
4235 more efficiently. In any case, don't use this if the string may
4236 already be known to some other part of the code...
4237 Note that if there's not enough memory to resize the string, the original
4238 string object at *pv is deallocated, *pv is set to NULL, an "out of
4239 memory" exception is set, and -1 is returned. Else (on success) 0 is
4240 returned, and the value in *pv may or may not be the same as on input.
4241 As always, an extra byte is allocated for a trailing \0 byte (newsize
4242 does *not* include that), and a trailing \0 byte is stored.
4246 _PyString_Resize(PyObject
**pv
, Py_ssize_t newsize
)
4248 register PyObject
*v
;
4249 register PyStringObject
*sv
;
4251 if (!PyString_Check(v
) || Py_REFCNT(v
) != 1 || newsize
< 0 ||
4252 PyString_CHECK_INTERNED(v
)) {
4255 PyErr_BadInternalCall();
4258 /* XXX UNREF/NEWREF interface should be more symmetrical */
4260 _Py_ForgetReference(v
);
4262 PyObject_REALLOC((char *)v
, sizeof(PyStringObject
) + newsize
);
4268 _Py_NewReference(*pv
);
4269 sv
= (PyStringObject
*) *pv
;
4270 Py_SIZE(sv
) = newsize
;
4271 sv
->ob_sval
[newsize
] = '\0';
4272 sv
->ob_shash
= -1; /* invalidate cached hash value */
4276 /* Helpers for formatstring */
4278 Py_LOCAL_INLINE(PyObject
*)
4279 getnextarg(PyObject
*args
, Py_ssize_t arglen
, Py_ssize_t
*p_argidx
)
4281 Py_ssize_t argidx
= *p_argidx
;
4282 if (argidx
< arglen
) {
4287 return PyTuple_GetItem(args
, argidx
);
4289 PyErr_SetString(PyExc_TypeError
,
4290 "not enough arguments for format string");
4301 #define F_LJUST (1<<0)
4302 #define F_SIGN (1<<1)
4303 #define F_BLANK (1<<2)
4304 #define F_ALT (1<<3)
4305 #define F_ZERO (1<<4)
4307 Py_LOCAL_INLINE(int)
4308 formatfloat(char *buf
, size_t buflen
, int flags
,
4309 int prec
, int type
, PyObject
*v
)
4311 /* fmt = '%#.' + `prec` + `type`
4312 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4315 x
= PyFloat_AsDouble(v
);
4316 if (x
== -1.0 && PyErr_Occurred()) {
4317 PyErr_Format(PyExc_TypeError
, "float argument required, "
4318 "not %.200s", Py_TYPE(v
)->tp_name
);
4323 if (type
== 'f' && fabs(x
)/1e25
>= 1e25
)
4325 /* Worst case length calc to ensure no buffer overrun:
4329 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4330 for any double rep.)
4331 len = 1 + prec + 1 + 2 + 5 = 9 + prec
4334 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4335 len = 1 + 50 + 1 + prec = 52 + prec
4337 If prec=0 the effective precision is 1 (the leading digit is
4338 always given), therefore increase the length by one.
4341 if (((type
== 'g' || type
== 'G') &&
4342 buflen
<= (size_t)10 + (size_t)prec
) ||
4343 (type
== 'f' && buflen
<= (size_t)53 + (size_t)prec
)) {
4344 PyErr_SetString(PyExc_OverflowError
,
4345 "formatted float is too long (precision too large?)");
4348 PyOS_snprintf(fmt
, sizeof(fmt
), "%%%s.%d%c",
4349 (flags
&F_ALT
) ? "#" : "",
4351 PyOS_ascii_formatd(buf
, buflen
, fmt
, x
);
4352 return (int)strlen(buf
);
4355 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4356 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4357 * Python's regular ints.
4358 * Return value: a new PyString*, or NULL if error.
4359 * . *pbuf is set to point into it,
4360 * *plen set to the # of chars following that.
4361 * Caller must decref it when done using pbuf.
4362 * The string starting at *pbuf is of the form
4363 * "-"? ("0x" | "0X")? digit+
4364 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4365 * set in flags. The case of hex digits will be correct,
4366 * There will be at least prec digits, zero-filled on the left if
4367 * necessary to get that many.
4368 * val object to be converted
4369 * flags bitmask of format flags; only F_ALT is looked at
4370 * prec minimum number of digits; 0-fill on left if needed
4371 * type a character in [duoxX]; u acts the same as d
4373 * CAUTION: o, x and X conversions on regular ints can never
4374 * produce a '-' sign, but can for Python's unbounded ints.
4377 _PyString_FormatLong(PyObject
*val
, int flags
, int prec
, int type
,
4378 char **pbuf
, int *plen
)
4380 PyObject
*result
= NULL
;
4383 int sign
; /* 1 if '-', else 0 */
4384 int len
; /* number of characters */
4386 int numdigits
; /* len == numnondigits + numdigits */
4387 int numnondigits
= 0;
4392 result
= Py_TYPE(val
)->tp_str(val
);
4395 result
= Py_TYPE(val
)->tp_as_number
->nb_oct(val
);
4400 result
= Py_TYPE(val
)->tp_as_number
->nb_hex(val
);
4403 assert(!"'type' not in [duoxX]");
4408 buf
= PyString_AsString(result
);
4414 /* To modify the string in-place, there can only be one reference. */
4415 if (Py_REFCNT(result
) != 1) {
4416 PyErr_BadInternalCall();
4419 llen
= PyString_Size(result
);
4420 if (llen
> INT_MAX
) {
4421 PyErr_SetString(PyExc_ValueError
, "string too large in _PyString_FormatLong");
4425 if (buf
[len
-1] == 'L') {
4429 sign
= buf
[0] == '-';
4430 numnondigits
+= sign
;
4431 numdigits
= len
- numnondigits
;
4432 assert(numdigits
> 0);
4434 /* Get rid of base marker unless F_ALT */
4435 if ((flags
& F_ALT
) == 0) {
4436 /* Need to skip 0x, 0X or 0. */
4440 assert(buf
[sign
] == '0');
4441 /* If 0 is only digit, leave it alone. */
4442 if (numdigits
> 1) {
4449 assert(buf
[sign
] == '0');
4450 assert(buf
[sign
+ 1] == 'x');
4461 assert(len
== numnondigits
+ numdigits
);
4462 assert(numdigits
> 0);
4465 /* Fill with leading zeroes to meet minimum width. */
4466 if (prec
> numdigits
) {
4467 PyObject
*r1
= PyString_FromStringAndSize(NULL
,
4468 numnondigits
+ prec
);
4474 b1
= PyString_AS_STRING(r1
);
4475 for (i
= 0; i
< numnondigits
; ++i
)
4477 for (i
= 0; i
< prec
- numdigits
; i
++)
4479 for (i
= 0; i
< numdigits
; i
++)
4484 buf
= PyString_AS_STRING(result
);
4485 len
= numnondigits
+ prec
;
4488 /* Fix up case for hex conversions. */
4490 /* Need to convert all lower case letters to upper case.
4491 and need to convert 0x to 0X (and -0x to -0X). */
4492 for (i
= 0; i
< len
; i
++)
4493 if (buf
[i
] >= 'a' && buf
[i
] <= 'x')
4501 Py_LOCAL_INLINE(int)
4502 formatint(char *buf
, size_t buflen
, int flags
,
4503 int prec
, int type
, PyObject
*v
)
4505 /* fmt = '%#.' + `prec` + 'l' + `type`
4506 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4508 char fmt
[64]; /* plenty big enough! */
4512 x
= PyInt_AsLong(v
);
4513 if (x
== -1 && PyErr_Occurred()) {
4514 PyErr_Format(PyExc_TypeError
, "int argument required, not %.200s",
4515 Py_TYPE(v
)->tp_name
);
4518 if (x
< 0 && type
== 'u') {
4521 if (x
< 0 && (type
== 'x' || type
== 'X' || type
== 'o'))
4528 if ((flags
& F_ALT
) &&
4529 (type
== 'x' || type
== 'X')) {
4530 /* When converting under %#x or %#X, there are a number
4531 * of issues that cause pain:
4532 * - when 0 is being converted, the C standard leaves off
4533 * the '0x' or '0X', which is inconsistent with other
4534 * %#x/%#X conversions and inconsistent with Python's
4536 * - there are platforms that violate the standard and
4537 * convert 0 with the '0x' or '0X'
4538 * (Metrowerks, Compaq Tru64)
4539 * - there are platforms that give '0x' when converting
4540 * under %#X, but convert 0 in accordance with the
4541 * standard (OS/2 EMX)
4543 * We can achieve the desired consistency by inserting our
4544 * own '0x' or '0X' prefix, and substituting %x/%X in place
4547 * Note that this is the same approach as used in
4548 * formatint() in unicodeobject.c
4550 PyOS_snprintf(fmt
, sizeof(fmt
), "%s0%c%%.%dl%c",
4551 sign
, type
, prec
, type
);
4554 PyOS_snprintf(fmt
, sizeof(fmt
), "%s%%%s.%dl%c",
4555 sign
, (flags
&F_ALT
) ? "#" : "",
4559 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4560 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4562 if (buflen
<= 14 || buflen
<= (size_t)3 + (size_t)prec
) {
4563 PyErr_SetString(PyExc_OverflowError
,
4564 "formatted integer is too long (precision too large?)");
4568 PyOS_snprintf(buf
, buflen
, fmt
, -x
);
4570 PyOS_snprintf(buf
, buflen
, fmt
, x
);
4571 return (int)strlen(buf
);
4574 Py_LOCAL_INLINE(int)
4575 formatchar(char *buf
, size_t buflen
, PyObject
*v
)
4577 /* presume that the buffer is at least 2 characters long */
4578 if (PyString_Check(v
)) {
4579 if (!PyArg_Parse(v
, "c;%c requires int or char", &buf
[0]))
4583 if (!PyArg_Parse(v
, "b;%c requires int or char", &buf
[0]))
4590 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4592 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4593 chars are formatted. XXX This is a magic number. Each formatting
4594 routine does bounds checking to ensure no overflow, but a better
4595 solution may be to malloc a buffer of appropriate size for each
4596 format. For now, the current solution is sufficient.
4598 #define FORMATBUFLEN (size_t)120
4601 PyString_Format(PyObject
*format
, PyObject
*args
)
4604 Py_ssize_t arglen
, argidx
;
4605 Py_ssize_t reslen
, rescnt
, fmtcnt
;
4607 PyObject
*result
, *orig_args
;
4608 #ifdef Py_USING_UNICODE
4611 PyObject
*dict
= NULL
;
4612 if (format
== NULL
|| !PyString_Check(format
) || args
== NULL
) {
4613 PyErr_BadInternalCall();
4617 fmt
= PyString_AS_STRING(format
);
4618 fmtcnt
= PyString_GET_SIZE(format
);
4619 reslen
= rescnt
= fmtcnt
+ 100;
4620 result
= PyString_FromStringAndSize((char *)NULL
, reslen
);
4623 res
= PyString_AsString(result
);
4624 if (PyTuple_Check(args
)) {
4625 arglen
= PyTuple_GET_SIZE(args
);
4632 if (Py_TYPE(args
)->tp_as_mapping
&& !PyTuple_Check(args
) &&
4633 !PyObject_TypeCheck(args
, &PyBaseString_Type
))
4635 while (--fmtcnt
>= 0) {
4638 rescnt
= fmtcnt
+ 100;
4640 if (_PyString_Resize(&result
, reslen
) < 0)
4642 res
= PyString_AS_STRING(result
)
4649 /* Got a format specifier */
4651 Py_ssize_t width
= -1;
4657 PyObject
*temp
= NULL
;
4661 char formatbuf
[FORMATBUFLEN
];
4662 /* For format{float,int,char}() */
4663 #ifdef Py_USING_UNICODE
4664 char *fmt_start
= fmt
;
4665 Py_ssize_t argidx_start
= argidx
;
4676 PyErr_SetString(PyExc_TypeError
,
4677 "format requires a mapping");
4683 /* Skip over balanced parentheses */
4684 while (pcount
> 0 && --fmtcnt
>= 0) {
4687 else if (*fmt
== '(')
4691 keylen
= fmt
- keystart
- 1;
4692 if (fmtcnt
< 0 || pcount
> 0) {
4693 PyErr_SetString(PyExc_ValueError
,
4694 "incomplete format key");
4697 key
= PyString_FromStringAndSize(keystart
,
4705 args
= PyObject_GetItem(dict
, key
);
4714 while (--fmtcnt
>= 0) {
4715 switch (c
= *fmt
++) {
4716 case '-': flags
|= F_LJUST
; continue;
4717 case '+': flags
|= F_SIGN
; continue;
4718 case ' ': flags
|= F_BLANK
; continue;
4719 case '#': flags
|= F_ALT
; continue;
4720 case '0': flags
|= F_ZERO
; continue;
4725 v
= getnextarg(args
, arglen
, &argidx
);
4728 if (!PyInt_Check(v
)) {
4729 PyErr_SetString(PyExc_TypeError
,
4733 width
= PyInt_AsLong(v
);
4741 else if (c
>= 0 && isdigit(c
)) {
4743 while (--fmtcnt
>= 0) {
4744 c
= Py_CHARMASK(*fmt
++);
4747 if ((width
*10) / 10 != width
) {
4753 width
= width
*10 + (c
- '0');
4761 v
= getnextarg(args
, arglen
, &argidx
);
4764 if (!PyInt_Check(v
)) {
4770 prec
= PyInt_AsLong(v
);
4776 else if (c
>= 0 && isdigit(c
)) {
4778 while (--fmtcnt
>= 0) {
4779 c
= Py_CHARMASK(*fmt
++);
4782 if ((prec
*10) / 10 != prec
) {
4788 prec
= prec
*10 + (c
- '0');
4793 if (c
== 'h' || c
== 'l' || c
== 'L') {
4799 PyErr_SetString(PyExc_ValueError
,
4800 "incomplete format");
4804 v
= getnextarg(args
, arglen
, &argidx
);
4816 #ifdef Py_USING_UNICODE
4817 if (PyUnicode_Check(v
)) {
4819 argidx
= argidx_start
;
4823 temp
= _PyObject_Str(v
);
4824 #ifdef Py_USING_UNICODE
4825 if (temp
!= NULL
&& PyUnicode_Check(temp
)) {
4828 argidx
= argidx_start
;
4835 temp
= PyObject_Repr(v
);
4838 if (!PyString_Check(temp
)) {
4839 PyErr_SetString(PyExc_TypeError
,
4840 "%s argument has non-string str()");
4844 pbuf
= PyString_AS_STRING(temp
);
4845 len
= PyString_GET_SIZE(temp
);
4846 if (prec
>= 0 && len
> prec
)
4858 if (PyNumber_Check(v
)) {
4859 PyObject
*iobj
=NULL
;
4861 if (PyInt_Check(v
) || (PyLong_Check(v
))) {
4866 iobj
= PyNumber_Int(v
);
4867 if (iobj
==NULL
) iobj
= PyNumber_Long(v
);
4870 if (PyInt_Check(iobj
)) {
4873 len
= formatint(pbuf
,
4875 flags
, prec
, c
, iobj
);
4881 else if (PyLong_Check(iobj
)) {
4885 temp
= _PyString_FormatLong(iobj
, flags
,
4886 prec
, c
, &pbuf
, &ilen
);
4899 PyErr_Format(PyExc_TypeError
,
4900 "%%%c format: a number is required, "
4901 "not %.200s", c
, Py_TYPE(v
)->tp_name
);
4916 len
= formatfloat(pbuf
, sizeof(formatbuf
),
4925 #ifdef Py_USING_UNICODE
4926 if (PyUnicode_Check(v
)) {
4928 argidx
= argidx_start
;
4933 len
= formatchar(pbuf
, sizeof(formatbuf
), v
);
4938 PyErr_Format(PyExc_ValueError
,
4939 "unsupported format character '%c' (0x%x) "
4942 (Py_ssize_t
)(fmt
- 1 -
4943 PyString_AsString(format
)));
4947 if (*pbuf
== '-' || *pbuf
== '+') {
4951 else if (flags
& F_SIGN
)
4953 else if (flags
& F_BLANK
)
4960 if (rescnt
- (sign
!= 0) < width
) {
4962 rescnt
= width
+ fmtcnt
+ 100;
4967 return PyErr_NoMemory();
4969 if (_PyString_Resize(&result
, reslen
) < 0) {
4973 res
= PyString_AS_STRING(result
)
4983 if ((flags
& F_ALT
) && (c
== 'x' || c
== 'X')) {
4984 assert(pbuf
[0] == '0');
4985 assert(pbuf
[1] == c
);
4996 if (width
> len
&& !(flags
& F_LJUST
)) {
5000 } while (--width
> len
);
5005 if ((flags
& F_ALT
) &&
5006 (c
== 'x' || c
== 'X')) {
5007 assert(pbuf
[0] == '0');
5008 assert(pbuf
[1] == c
);
5013 Py_MEMCPY(res
, pbuf
, len
);
5016 while (--width
>= len
) {
5020 if (dict
&& (argidx
< arglen
) && c
!= '%') {
5021 PyErr_SetString(PyExc_TypeError
,
5022 "not all arguments converted during string formatting");
5029 if (argidx
< arglen
&& !dict
) {
5030 PyErr_SetString(PyExc_TypeError
,
5031 "not all arguments converted during string formatting");
5037 _PyString_Resize(&result
, reslen
- rescnt
);
5040 #ifdef Py_USING_UNICODE
5046 /* Fiddle args right (remove the first argidx arguments) */
5047 if (PyTuple_Check(orig_args
) && argidx
> 0) {
5049 Py_ssize_t n
= PyTuple_GET_SIZE(orig_args
) - argidx
;
5054 PyObject
*w
= PyTuple_GET_ITEM(orig_args
, n
+ argidx
);
5056 PyTuple_SET_ITEM(v
, n
, w
);
5060 Py_INCREF(orig_args
);
5064 /* Take what we have of the result and let the Unicode formatting
5065 function format the rest of the input. */
5066 rescnt
= res
- PyString_AS_STRING(result
);
5067 if (_PyString_Resize(&result
, rescnt
))
5069 fmtcnt
= PyString_GET_SIZE(format
) - \
5070 (fmt
- PyString_AS_STRING(format
));
5071 format
= PyUnicode_Decode(fmt
, fmtcnt
, NULL
, NULL
);
5074 v
= PyUnicode_Format(format
, args
);
5078 /* Paste what we have (result) to what the Unicode formatting
5079 function returned (v) and return the result (or error) */
5080 w
= PyUnicode_Concat(result
, v
);
5085 #endif /* Py_USING_UNICODE */
5096 PyString_InternInPlace(PyObject
**p
)
5098 register PyStringObject
*s
= (PyStringObject
*)(*p
);
5100 if (s
== NULL
|| !PyString_Check(s
))
5101 Py_FatalError("PyString_InternInPlace: strings only please!");
5102 /* If it's a string subclass, we don't really know what putting
5103 it in the interned dict might do. */
5104 if (!PyString_CheckExact(s
))
5106 if (PyString_CHECK_INTERNED(s
))
5108 if (interned
== NULL
) {
5109 interned
= PyDict_New();
5110 if (interned
== NULL
) {
5111 PyErr_Clear(); /* Don't leave an exception */
5115 t
= PyDict_GetItem(interned
, (PyObject
*)s
);
5123 if (PyDict_SetItem(interned
, (PyObject
*)s
, (PyObject
*)s
) < 0) {
5127 /* The two references in interned are not counted by refcnt.
5128 The string deallocator will take care of this */
5130 PyString_CHECK_INTERNED(s
) = SSTATE_INTERNED_MORTAL
;
5134 PyString_InternImmortal(PyObject
**p
)
5136 PyString_InternInPlace(p
);
5137 if (PyString_CHECK_INTERNED(*p
) != SSTATE_INTERNED_IMMORTAL
) {
5138 PyString_CHECK_INTERNED(*p
) = SSTATE_INTERNED_IMMORTAL
;
5145 PyString_InternFromString(const char *cp
)
5147 PyObject
*s
= PyString_FromString(cp
);
5150 PyString_InternInPlace(&s
);
5158 for (i
= 0; i
< UCHAR_MAX
+ 1; i
++) {
5159 Py_XDECREF(characters
[i
]);
5160 characters
[i
] = NULL
;
5162 Py_XDECREF(nullstring
);
5166 void _Py_ReleaseInternedStrings(void)
5171 Py_ssize_t immortal_size
= 0, mortal_size
= 0;
5173 if (interned
== NULL
|| !PyDict_Check(interned
))
5175 keys
= PyDict_Keys(interned
);
5176 if (keys
== NULL
|| !PyList_Check(keys
)) {
5181 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5182 detector, interned strings are not forcibly deallocated; rather, we
5183 give them their stolen references back, and then clear and DECREF
5184 the interned dict. */
5186 n
= PyList_GET_SIZE(keys
);
5187 fprintf(stderr
, "releasing %" PY_FORMAT_SIZE_T
"d interned strings\n",
5189 for (i
= 0; i
< n
; i
++) {
5190 s
= (PyStringObject
*) PyList_GET_ITEM(keys
, i
);
5191 switch (s
->ob_sstate
) {
5192 case SSTATE_NOT_INTERNED
:
5193 /* XXX Shouldn't happen */
5195 case SSTATE_INTERNED_IMMORTAL
:
5197 immortal_size
+= Py_SIZE(s
);
5199 case SSTATE_INTERNED_MORTAL
:
5201 mortal_size
+= Py_SIZE(s
);
5204 Py_FatalError("Inconsistent interned string state.");
5206 s
->ob_sstate
= SSTATE_NOT_INTERNED
;
5208 fprintf(stderr
, "total size of all interned strings: "
5209 "%" PY_FORMAT_SIZE_T
"d/%" PY_FORMAT_SIZE_T
"d "
5210 "mortal/immortal\n", mortal_size
, immortal_size
);
5212 PyDict_Clear(interned
);
5213 Py_DECREF(interned
);