1 /* String object implementation */
3 #define PY_SSIZE_T_CLEAN
10 int null_strings
, one_strings
;
13 static PyStringObject
*characters
[UCHAR_MAX
+ 1];
14 static PyStringObject
*nullstring
;
16 /* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
24 static PyObject
*interned
;
27 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
29 null terminating character.
31 For PyString_FromString(), the parameter `str' points to a null-terminated
32 string containing exactly `size' bytes.
34 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
45 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
53 PyString_FromStringAndSize(const char *str
, Py_ssize_t size
)
55 register PyStringObject
*op
;
57 if (size
== 0 && (op
= nullstring
) != NULL
) {
62 return (PyObject
*)op
;
64 if (size
== 1 && str
!= NULL
&&
65 (op
= characters
[*str
& UCHAR_MAX
]) != NULL
)
71 return (PyObject
*)op
;
74 /* Inline PyObject_NewVar */
75 op
= (PyStringObject
*)PyObject_MALLOC(sizeof(PyStringObject
) + size
);
77 return PyErr_NoMemory();
78 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
80 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
82 Py_MEMCPY(op
->ob_sval
, str
, size
);
83 op
->ob_sval
[size
] = '\0';
84 /* share short strings */
86 PyObject
*t
= (PyObject
*)op
;
87 PyString_InternInPlace(&t
);
88 op
= (PyStringObject
*)t
;
91 } else if (size
== 1 && str
!= NULL
) {
92 PyObject
*t
= (PyObject
*)op
;
93 PyString_InternInPlace(&t
);
94 op
= (PyStringObject
*)t
;
95 characters
[*str
& UCHAR_MAX
] = op
;
98 return (PyObject
*) op
;
102 PyString_FromString(const char *str
)
104 register size_t size
;
105 register PyStringObject
*op
;
109 if (size
> PY_SSIZE_T_MAX
) {
110 PyErr_SetString(PyExc_OverflowError
,
111 "string is too long for a Python string");
114 if (size
== 0 && (op
= nullstring
) != NULL
) {
119 return (PyObject
*)op
;
121 if (size
== 1 && (op
= characters
[*str
& UCHAR_MAX
]) != NULL
) {
126 return (PyObject
*)op
;
129 /* Inline PyObject_NewVar */
130 op
= (PyStringObject
*)PyObject_MALLOC(sizeof(PyStringObject
) + size
);
132 return PyErr_NoMemory();
133 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
135 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
136 Py_MEMCPY(op
->ob_sval
, str
, size
+1);
137 /* share short strings */
139 PyObject
*t
= (PyObject
*)op
;
140 PyString_InternInPlace(&t
);
141 op
= (PyStringObject
*)t
;
144 } else if (size
== 1) {
145 PyObject
*t
= (PyObject
*)op
;
146 PyString_InternInPlace(&t
);
147 op
= (PyStringObject
*)t
;
148 characters
[*str
& UCHAR_MAX
] = op
;
151 return (PyObject
*) op
;
155 PyString_FromFormatV(const char *format
, va_list vargs
)
163 #ifdef VA_LIST_IS_ARRAY
164 Py_MEMCPY(count
, vargs
, sizeof(va_list));
167 __va_copy(count
, vargs
);
172 /* step 1: figure out how large a buffer we need */
173 for (f
= format
; *f
; f
++) {
176 while (*++f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
182 if ((*f
== 'l' || *f
== 'z') &&
183 (f
[1] == 'd' || f
[1] == 'u'))
188 (void)va_arg(count
, int);
189 /* fall through... */
193 case 'd': case 'u': case 'i': case 'x':
194 (void) va_arg(count
, int);
195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
201 s
= va_arg(count
, char*);
205 (void) va_arg(count
, int);
206 /* maximum 64-bit pointer representation:
208 * so 19 characters is enough.
209 * XXX I count 18 -- what's the extra for?
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
219 what's in the argument list) */
227 /* step 2: fill the buffer */
228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
230 string
= PyString_FromStringAndSize(NULL
, n
);
234 s
= PyString_AsString(string
);
236 for (f
= format
; *f
; f
++) {
242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
245 while (isdigit(Py_CHARMASK(*f
)))
246 n
= (n
*10) + *f
++ - '0';
250 while (isdigit(Py_CHARMASK(*f
)))
251 n
= (n
*10) + *f
++ - '0';
253 while (*f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f
== 'l' && (f
[1] == 'd' || f
[1] == 'u')) {
261 /* handle the size_t flag. */
262 if (*f
== 'z' && (f
[1] == 'd' || f
[1] == 'u')) {
269 *s
++ = va_arg(vargs
, int);
273 sprintf(s
, "%ld", va_arg(vargs
, long));
275 sprintf(s
, "%" PY_FORMAT_SIZE_T
"d",
276 va_arg(vargs
, Py_ssize_t
));
278 sprintf(s
, "%d", va_arg(vargs
, int));
284 va_arg(vargs
, unsigned long));
286 sprintf(s
, "%" PY_FORMAT_SIZE_T
"u",
287 va_arg(vargs
, size_t));
290 va_arg(vargs
, unsigned int));
294 sprintf(s
, "%i", va_arg(vargs
, int));
298 sprintf(s
, "%x", va_arg(vargs
, int));
302 p
= va_arg(vargs
, char*);
310 sprintf(s
, "%p", va_arg(vargs
, void*));
311 /* %p is ill-defined: ensure leading 0x. */
314 else if (s
[1] != 'x') {
315 memmove(s
+2, s
, strlen(s
)+1);
334 _PyString_Resize(&string
, s
- PyString_AS_STRING(string
));
339 PyString_FromFormat(const char *format
, ...)
344 #ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs
, format
);
349 ret
= PyString_FromFormatV(format
, vargs
);
355 PyObject
*PyString_Decode(const char *s
,
357 const char *encoding
,
362 str
= PyString_FromStringAndSize(s
, size
);
365 v
= PyString_AsDecodedString(str
, encoding
, errors
);
370 PyObject
*PyString_AsDecodedObject(PyObject
*str
,
371 const char *encoding
,
376 if (!PyString_Check(str
)) {
381 if (encoding
== NULL
) {
382 #ifdef Py_USING_UNICODE
383 encoding
= PyUnicode_GetDefaultEncoding();
385 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
390 /* Decode via the codec registry */
391 v
= PyCodec_Decode(str
, encoding
, errors
);
401 PyObject
*PyString_AsDecodedString(PyObject
*str
,
402 const char *encoding
,
407 v
= PyString_AsDecodedObject(str
, encoding
, errors
);
411 #ifdef Py_USING_UNICODE
412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v
)) {
415 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
421 if (!PyString_Check(v
)) {
422 PyErr_Format(PyExc_TypeError
,
423 "decoder did not return a string object (type=%.400s)",
424 v
->ob_type
->tp_name
);
435 PyObject
*PyString_Encode(const char *s
,
437 const char *encoding
,
442 str
= PyString_FromStringAndSize(s
, size
);
445 v
= PyString_AsEncodedString(str
, encoding
, errors
);
450 PyObject
*PyString_AsEncodedObject(PyObject
*str
,
451 const char *encoding
,
456 if (!PyString_Check(str
)) {
461 if (encoding
== NULL
) {
462 #ifdef Py_USING_UNICODE
463 encoding
= PyUnicode_GetDefaultEncoding();
465 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
470 /* Encode via the codec registry */
471 v
= PyCodec_Encode(str
, encoding
, errors
);
481 PyObject
*PyString_AsEncodedString(PyObject
*str
,
482 const char *encoding
,
487 v
= PyString_AsEncodedObject(str
, encoding
, errors
);
491 #ifdef Py_USING_UNICODE
492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v
)) {
495 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
501 if (!PyString_Check(v
)) {
502 PyErr_Format(PyExc_TypeError
,
503 "encoder did not return a string object (type=%.400s)",
504 v
->ob_type
->tp_name
);
516 string_dealloc(PyObject
*op
)
518 switch (PyString_CHECK_INTERNED(op
)) {
519 case SSTATE_NOT_INTERNED
:
522 case SSTATE_INTERNED_MORTAL
:
523 /* revive dead object temporarily for DelItem */
525 if (PyDict_DelItem(interned
, op
) != 0)
527 "deletion of interned string failed");
530 case SSTATE_INTERNED_IMMORTAL
:
531 Py_FatalError("Immortal interned string died.");
534 Py_FatalError("Inconsistent interned string state.");
536 op
->ob_type
->tp_free(op
);
539 /* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
544 PyObject
*PyString_DecodeEscape(const char *s
,
548 const char *recode_encoding
)
554 Py_ssize_t newlen
= recode_encoding
? 4*len
:len
;
555 v
= PyString_FromStringAndSize((char *)NULL
, newlen
);
558 p
= buf
= PyString_AsString(v
);
563 #ifdef Py_USING_UNICODE
564 if (recode_encoding
&& (*s
& 0x80)) {
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t
< end
&& (*t
& 0x80)) t
++;
572 u
= PyUnicode_DecodeUTF8(s
, t
- s
, errors
);
575 /* Recode them in target encoding. */
576 w
= PyUnicode_AsEncodedString(
577 u
, recode_encoding
, errors
);
581 /* Append bytes to output buffer. */
582 assert(PyString_Check(w
));
583 r
= PyString_AS_STRING(w
);
584 rn
= PyString_GET_SIZE(w
);
599 PyErr_SetString(PyExc_ValueError
,
600 "Trailing \\ in string");
604 /* XXX This assumes ASCII! */
606 case '\\': *p
++ = '\\'; break;
607 case '\'': *p
++ = '\''; break;
608 case '\"': *p
++ = '\"'; break;
609 case 'b': *p
++ = '\b'; break;
610 case 'f': *p
++ = '\014'; break; /* FF */
611 case 't': *p
++ = '\t'; break;
612 case 'n': *p
++ = '\n'; break;
613 case 'r': *p
++ = '\r'; break;
614 case 'v': *p
++ = '\013'; break; /* VT */
615 case 'a': *p
++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
619 if (s
< end
&& '0' <= *s
&& *s
<= '7') {
620 c
= (c
<<3) + *s
++ - '0';
621 if (s
< end
&& '0' <= *s
&& *s
<= '7')
622 c
= (c
<<3) + *s
++ - '0';
628 isxdigit(Py_CHARMASK(s
[0])) &&
629 isxdigit(Py_CHARMASK(s
[1])))
652 if (!errors
|| strcmp(errors
, "strict") == 0) {
653 PyErr_SetString(PyExc_ValueError
,
654 "invalid \\x escape");
657 if (strcmp(errors
, "replace") == 0) {
659 } else if (strcmp(errors
, "ignore") == 0)
662 PyErr_Format(PyExc_ValueError
,
664 "unknown error handling code: %.400s",
668 #ifndef Py_USING_UNICODE
673 PyErr_SetString(PyExc_ValueError
,
674 "Unicode escapes not legal "
675 "when Unicode disabled");
682 goto non_esc
; /* an arbitry number of unescaped
683 UTF-8 bytes may follow. */
687 _PyString_Resize(&v
, p
- buf
);
694 /* -------------------------------------------------------------------- */
698 string_getsize(register PyObject
*op
)
702 if (PyString_AsStringAndSize(op
, &s
, &len
))
707 static /*const*/ char *
708 string_getbuffer(register PyObject
*op
)
712 if (PyString_AsStringAndSize(op
, &s
, &len
))
718 PyString_Size(register PyObject
*op
)
720 if (!PyString_Check(op
))
721 return string_getsize(op
);
722 return ((PyStringObject
*)op
) -> ob_size
;
726 PyString_AsString(register PyObject
*op
)
728 if (!PyString_Check(op
))
729 return string_getbuffer(op
);
730 return ((PyStringObject
*)op
) -> ob_sval
;
734 PyString_AsStringAndSize(register PyObject
*obj
,
736 register Py_ssize_t
*len
)
739 PyErr_BadInternalCall();
743 if (!PyString_Check(obj
)) {
744 #ifdef Py_USING_UNICODE
745 if (PyUnicode_Check(obj
)) {
746 obj
= _PyUnicode_AsDefaultEncodedString(obj
, NULL
);
753 PyErr_Format(PyExc_TypeError
,
754 "expected string or Unicode object, "
755 "%.200s found", obj
->ob_type
->tp_name
);
760 *s
= PyString_AS_STRING(obj
);
762 *len
= PyString_GET_SIZE(obj
);
763 else if (strlen(*s
) != (size_t)PyString_GET_SIZE(obj
)) {
764 PyErr_SetString(PyExc_TypeError
,
765 "expected string without null bytes");
771 /* -------------------------------------------------------------------- */
774 #define STRINGLIB_CHAR char
776 #define STRINGLIB_CMP memcmp
777 #define STRINGLIB_LEN PyString_GET_SIZE
778 #define STRINGLIB_NEW PyString_FromStringAndSize
779 #define STRINGLIB_STR PyString_AS_STRING
781 #define STRINGLIB_EMPTY nullstring
783 #include "stringlib/fastsearch.h"
785 #include "stringlib/count.h"
786 #include "stringlib/find.h"
787 #include "stringlib/partition.h"
791 string_print(PyStringObject
*op
, FILE *fp
, int flags
)
797 /* XXX Ought to check for interrupts when writing long strings */
798 if (! PyString_CheckExact(op
)) {
800 /* A str subclass may have its own __str__ method. */
801 op
= (PyStringObject
*) PyObject_Str((PyObject
*)op
);
804 ret
= string_print(op
, fp
, flags
);
808 if (flags
& Py_PRINT_RAW
) {
809 char *data
= op
->ob_sval
;
810 Py_ssize_t size
= op
->ob_size
;
811 while (size
> INT_MAX
) {
812 /* Very long strings cannot be written atomically.
813 * But don't write exactly INT_MAX bytes at a time
814 * to avoid memory aligment issues.
816 const int chunk_size
= INT_MAX
& ~0x3FFF;
817 fwrite(data
, 1, chunk_size
, fp
);
822 if (size
) fwrite(data
, (int)size
, 1, fp
);
824 fwrite(data
, 1, (int)size
, fp
);
829 /* figure out which quote to use; single is preferred */
831 if (memchr(op
->ob_sval
, '\'', op
->ob_size
) &&
832 !memchr(op
->ob_sval
, '"', op
->ob_size
))
836 for (i
= 0; i
< op
->ob_size
; i
++) {
838 if (c
== quote
|| c
== '\\')
839 fprintf(fp
, "\\%c", c
);
846 else if (c
< ' ' || c
>= 0x7f)
847 fprintf(fp
, "\\x%02x", c
& 0xff);
856 PyString_Repr(PyObject
*obj
, int smartquotes
)
858 register PyStringObject
* op
= (PyStringObject
*) obj
;
859 size_t newsize
= 2 + 4 * op
->ob_size
;
861 if (newsize
> PY_SSIZE_T_MAX
|| newsize
/ 4 != op
->ob_size
) {
862 PyErr_SetString(PyExc_OverflowError
,
863 "string is too large to make repr");
866 v
= PyString_FromStringAndSize((char *)NULL
, newsize
);
871 register Py_ssize_t i
;
876 /* figure out which quote to use; single is preferred */
879 memchr(op
->ob_sval
, '\'', op
->ob_size
) &&
880 !memchr(op
->ob_sval
, '"', op
->ob_size
))
883 p
= PyString_AS_STRING(v
);
885 for (i
= 0; i
< op
->ob_size
; i
++) {
886 /* There's at least enough room for a hex escape
887 and a closing quote. */
888 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 5);
890 if (c
== quote
|| c
== '\\')
891 *p
++ = '\\', *p
++ = c
;
893 *p
++ = '\\', *p
++ = 't';
895 *p
++ = '\\', *p
++ = 'n';
897 *p
++ = '\\', *p
++ = 'r';
898 else if (c
< ' ' || c
>= 0x7f) {
899 /* For performance, we don't want to call
900 PyOS_snprintf here (extra layers of
902 sprintf(p
, "\\x%02x", c
& 0xff);
908 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 1);
912 &v
, (p
- PyString_AS_STRING(v
)));
918 string_repr(PyObject
*op
)
920 return PyString_Repr(op
, 1);
924 string_str(PyObject
*s
)
926 assert(PyString_Check(s
));
927 if (PyString_CheckExact(s
)) {
932 /* Subtype -- return genuine string with the same value. */
933 PyStringObject
*t
= (PyStringObject
*) s
;
934 return PyString_FromStringAndSize(t
->ob_sval
, t
->ob_size
);
939 string_length(PyStringObject
*a
)
945 string_concat(register PyStringObject
*a
, register PyObject
*bb
)
947 register Py_ssize_t size
;
948 register PyStringObject
*op
;
949 if (!PyString_Check(bb
)) {
950 #ifdef Py_USING_UNICODE
951 if (PyUnicode_Check(bb
))
952 return PyUnicode_Concat((PyObject
*)a
, bb
);
954 PyErr_Format(PyExc_TypeError
,
955 "cannot concatenate 'str' and '%.200s' objects",
956 bb
->ob_type
->tp_name
);
959 #define b ((PyStringObject *)bb)
960 /* Optimize cases with empty left or right operand */
961 if ((a
->ob_size
== 0 || b
->ob_size
== 0) &&
962 PyString_CheckExact(a
) && PyString_CheckExact(b
)) {
963 if (a
->ob_size
== 0) {
968 return (PyObject
*)a
;
970 size
= a
->ob_size
+ b
->ob_size
;
972 PyErr_SetString(PyExc_OverflowError
,
973 "strings are too large to concat");
977 /* Inline PyObject_NewVar */
978 op
= (PyStringObject
*)PyObject_MALLOC(sizeof(PyStringObject
) + size
);
980 return PyErr_NoMemory();
981 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
983 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
984 Py_MEMCPY(op
->ob_sval
, a
->ob_sval
, a
->ob_size
);
985 Py_MEMCPY(op
->ob_sval
+ a
->ob_size
, b
->ob_sval
, b
->ob_size
);
986 op
->ob_sval
[size
] = '\0';
987 return (PyObject
*) op
;
992 string_repeat(register PyStringObject
*a
, register Py_ssize_t n
)
994 register Py_ssize_t i
;
995 register Py_ssize_t j
;
996 register Py_ssize_t size
;
997 register PyStringObject
*op
;
1001 /* watch out for overflows: the size can overflow int,
1002 * and the # of bytes needed can overflow size_t
1004 size
= a
->ob_size
* n
;
1005 if (n
&& size
/ n
!= a
->ob_size
) {
1006 PyErr_SetString(PyExc_OverflowError
,
1007 "repeated string is too long");
1010 if (size
== a
->ob_size
&& PyString_CheckExact(a
)) {
1012 return (PyObject
*)a
;
1014 nbytes
= (size_t)size
;
1015 if (nbytes
+ sizeof(PyStringObject
) <= nbytes
) {
1016 PyErr_SetString(PyExc_OverflowError
,
1017 "repeated string is too long");
1020 op
= (PyStringObject
*)
1021 PyObject_MALLOC(sizeof(PyStringObject
) + nbytes
);
1023 return PyErr_NoMemory();
1024 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
1026 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
1027 op
->ob_sval
[size
] = '\0';
1028 if (a
->ob_size
== 1 && n
> 0) {
1029 memset(op
->ob_sval
, a
->ob_sval
[0] , n
);
1030 return (PyObject
*) op
;
1034 Py_MEMCPY(op
->ob_sval
, a
->ob_sval
, a
->ob_size
);
1038 j
= (i
<= size
-i
) ? i
: size
-i
;
1039 Py_MEMCPY(op
->ob_sval
+i
, op
->ob_sval
, j
);
1042 return (PyObject
*) op
;
1045 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1048 string_slice(register PyStringObject
*a
, register Py_ssize_t i
,
1049 register Py_ssize_t j
)
1050 /* j -- may be negative! */
1055 j
= 0; /* Avoid signed/unsigned bug in next line */
1058 if (i
== 0 && j
== a
->ob_size
&& PyString_CheckExact(a
)) {
1059 /* It's the same as a */
1061 return (PyObject
*)a
;
1065 return PyString_FromStringAndSize(a
->ob_sval
+ i
, j
-i
);
1069 string_contains(PyObject
*str_obj
, PyObject
*sub_obj
)
1071 if (!PyString_CheckExact(sub_obj
)) {
1072 #ifdef Py_USING_UNICODE
1073 if (PyUnicode_Check(sub_obj
))
1074 return PyUnicode_Contains(str_obj
, sub_obj
);
1076 if (!PyString_Check(sub_obj
)) {
1077 PyErr_SetString(PyExc_TypeError
,
1078 "'in <string>' requires string as left operand");
1083 return stringlib_contains_obj(str_obj
, sub_obj
);
1087 string_item(PyStringObject
*a
, register Py_ssize_t i
)
1091 if (i
< 0 || i
>= a
->ob_size
) {
1092 PyErr_SetString(PyExc_IndexError
, "string index out of range");
1095 pchar
= a
->ob_sval
[i
];
1096 v
= (PyObject
*)characters
[pchar
& UCHAR_MAX
];
1098 v
= PyString_FromStringAndSize(&pchar
, 1);
1109 string_richcompare(PyStringObject
*a
, PyStringObject
*b
, int op
)
1112 Py_ssize_t len_a
, len_b
;
1116 /* Make sure both arguments are strings. */
1117 if (!(PyString_Check(a
) && PyString_Check(b
))) {
1118 result
= Py_NotImplemented
;
1123 case Py_EQ
:case Py_LE
:case Py_GE
:
1126 case Py_NE
:case Py_LT
:case Py_GT
:
1132 /* Supporting Py_NE here as well does not save
1133 much time, since Py_NE is rarely used. */
1134 if (a
->ob_size
== b
->ob_size
1135 && (a
->ob_sval
[0] == b
->ob_sval
[0]
1136 && memcmp(a
->ob_sval
, b
->ob_sval
,
1137 a
->ob_size
) == 0)) {
1144 len_a
= a
->ob_size
; len_b
= b
->ob_size
;
1145 min_len
= (len_a
< len_b
) ? len_a
: len_b
;
1147 c
= Py_CHARMASK(*a
->ob_sval
) - Py_CHARMASK(*b
->ob_sval
);
1149 c
= memcmp(a
->ob_sval
, b
->ob_sval
, min_len
);
1153 c
= (len_a
< len_b
) ? -1 : (len_a
> len_b
) ? 1 : 0;
1155 case Py_LT
: c
= c
< 0; break;
1156 case Py_LE
: c
= c
<= 0; break;
1157 case Py_EQ
: assert(0); break; /* unreachable */
1158 case Py_NE
: c
= c
!= 0; break;
1159 case Py_GT
: c
= c
> 0; break;
1160 case Py_GE
: c
= c
>= 0; break;
1162 result
= Py_NotImplemented
;
1165 result
= c
? Py_True
: Py_False
;
1172 _PyString_Eq(PyObject
*o1
, PyObject
*o2
)
1174 PyStringObject
*a
= (PyStringObject
*) o1
;
1175 PyStringObject
*b
= (PyStringObject
*) o2
;
1176 return a
->ob_size
== b
->ob_size
1177 && *a
->ob_sval
== *b
->ob_sval
1178 && memcmp(a
->ob_sval
, b
->ob_sval
, a
->ob_size
) == 0;
1182 string_hash(PyStringObject
*a
)
1184 register Py_ssize_t len
;
1185 register unsigned char *p
;
1188 if (a
->ob_shash
!= -1)
1191 p
= (unsigned char *) a
->ob_sval
;
1194 x
= (1000003*x
) ^ *p
++;
1203 string_subscript(PyStringObject
* self
, PyObject
* item
)
1205 if (PyIndex_Check(item
)) {
1206 Py_ssize_t i
= PyNumber_AsSsize_t(item
, PyExc_IndexError
);
1207 if (i
== -1 && PyErr_Occurred())
1210 i
+= PyString_GET_SIZE(self
);
1211 return string_item(self
, i
);
1213 else if (PySlice_Check(item
)) {
1214 Py_ssize_t start
, stop
, step
, slicelength
, cur
, i
;
1219 if (PySlice_GetIndicesEx((PySliceObject
*)item
,
1220 PyString_GET_SIZE(self
),
1221 &start
, &stop
, &step
, &slicelength
) < 0) {
1225 if (slicelength
<= 0) {
1226 return PyString_FromStringAndSize("", 0);
1229 source_buf
= PyString_AsString((PyObject
*)self
);
1230 result_buf
= (char *)PyMem_Malloc(slicelength
);
1231 if (result_buf
== NULL
)
1232 return PyErr_NoMemory();
1234 for (cur
= start
, i
= 0; i
< slicelength
;
1236 result_buf
[i
] = source_buf
[cur
];
1239 result
= PyString_FromStringAndSize(result_buf
,
1241 PyMem_Free(result_buf
);
1246 PyErr_SetString(PyExc_TypeError
,
1247 "string indices must be integers");
1253 string_buffer_getreadbuf(PyStringObject
*self
, Py_ssize_t index
, const void **ptr
)
1256 PyErr_SetString(PyExc_SystemError
,
1257 "accessing non-existent string segment");
1260 *ptr
= (void *)self
->ob_sval
;
1261 return self
->ob_size
;
1265 string_buffer_getwritebuf(PyStringObject
*self
, Py_ssize_t index
, const void **ptr
)
1267 PyErr_SetString(PyExc_TypeError
,
1268 "Cannot use string as modifiable buffer");
1273 string_buffer_getsegcount(PyStringObject
*self
, Py_ssize_t
*lenp
)
1276 *lenp
= self
->ob_size
;
1281 string_buffer_getcharbuf(PyStringObject
*self
, Py_ssize_t index
, const char **ptr
)
1284 PyErr_SetString(PyExc_SystemError
,
1285 "accessing non-existent string segment");
1288 *ptr
= self
->ob_sval
;
1289 return self
->ob_size
;
1292 static PySequenceMethods string_as_sequence
= {
1293 (lenfunc
)string_length
, /*sq_length*/
1294 (binaryfunc
)string_concat
, /*sq_concat*/
1295 (ssizeargfunc
)string_repeat
, /*sq_repeat*/
1296 (ssizeargfunc
)string_item
, /*sq_item*/
1297 (ssizessizeargfunc
)string_slice
, /*sq_slice*/
1300 (objobjproc
)string_contains
/*sq_contains*/
1303 static PyMappingMethods string_as_mapping
= {
1304 (lenfunc
)string_length
,
1305 (binaryfunc
)string_subscript
,
1309 static PyBufferProcs string_as_buffer
= {
1310 (readbufferproc
)string_buffer_getreadbuf
,
1311 (writebufferproc
)string_buffer_getwritebuf
,
1312 (segcountproc
)string_buffer_getsegcount
,
1313 (charbufferproc
)string_buffer_getcharbuf
,
1319 #define RIGHTSTRIP 1
1322 /* Arrays indexed by above */
1323 static const char *stripformat
[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1325 #define STRIPNAME(i) (stripformat[i]+3)
1328 /* Don't call if length < 2 */
1329 #define Py_STRING_MATCH(target, offset, pattern, length) \
1330 (target[offset] == pattern[0] && \
1331 target[offset+length-1] == pattern[length-1] && \
1332 !memcmp(target+offset+1, pattern+1, length-2) )
1335 /* Overallocate the initial list to reduce the number of reallocs for small
1336 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1337 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1338 text (roughly 11 words per line) and field delimited data (usually 1-10
1339 fields). For large strings the split algorithms are bandwidth limited
1340 so increasing the preallocation likely will not improve things.*/
1342 #define MAX_PREALLOC 12
1344 /* 5 splits gives 6 elements */
1345 #define PREALLOC_SIZE(maxsplit) \
1346 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1348 #define SPLIT_APPEND(data, left, right) \
1349 str = PyString_FromStringAndSize((data) + (left), \
1350 (right) - (left)); \
1353 if (PyList_Append(list, str)) { \
1360 #define SPLIT_ADD(data, left, right) { \
1361 str = PyString_FromStringAndSize((data) + (left), \
1362 (right) - (left)); \
1365 if (count < MAX_PREALLOC) { \
1366 PyList_SET_ITEM(list, count, str); \
1368 if (PyList_Append(list, str)) { \
1377 /* Always force the list to the expected size. */
1378 #define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
1380 #define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1381 #define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1382 #define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1383 #define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1385 Py_LOCAL_INLINE(PyObject
*)
1386 split_whitespace(const char *s
, Py_ssize_t len
, Py_ssize_t maxsplit
)
1388 Py_ssize_t i
, j
, count
=0;
1390 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1397 while (maxsplit
-- > 0) {
1398 SKIP_SPACE(s
, i
, len
);
1401 SKIP_NONSPACE(s
, i
, len
);
1406 /* Only occurs when maxsplit was reached */
1407 /* Skip any remaining whitespace and copy to end of string */
1408 SKIP_SPACE(s
, i
, len
);
1410 SPLIT_ADD(s
, i
, len
);
1412 FIX_PREALLOC_SIZE(list
);
1419 Py_LOCAL_INLINE(PyObject
*)
1420 split_char(const char *s
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
1422 register Py_ssize_t i
, j
, count
=0;
1424 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
1430 while ((j
< len
) && (maxcount
-- > 0)) {
1432 /* I found that using memchr makes no difference */
1441 SPLIT_ADD(s
, i
, len
);
1443 FIX_PREALLOC_SIZE(list
);
1451 PyDoc_STRVAR(split__doc__
,
1452 "S.split([sep [,maxsplit]]) -> list of strings\n\
1454 Return a list of the words in the string S, using sep as the\n\
1455 delimiter string. If maxsplit is given, at most maxsplit\n\
1456 splits are done. If sep is not specified or is None, any\n\
1457 whitespace string is a separator.");
1460 string_split(PyStringObject
*self
, PyObject
*args
)
1462 Py_ssize_t len
= PyString_GET_SIZE(self
), n
, i
, j
;
1463 Py_ssize_t maxsplit
= -1, count
=0;
1464 const char *s
= PyString_AS_STRING(self
), *sub
;
1465 PyObject
*list
, *str
, *subobj
= Py_None
;
1470 if (!PyArg_ParseTuple(args
, "|On:split", &subobj
, &maxsplit
))
1473 maxsplit
= PY_SSIZE_T_MAX
;
1474 if (subobj
== Py_None
)
1475 return split_whitespace(s
, len
, maxsplit
);
1476 if (PyString_Check(subobj
)) {
1477 sub
= PyString_AS_STRING(subobj
);
1478 n
= PyString_GET_SIZE(subobj
);
1480 #ifdef Py_USING_UNICODE
1481 else if (PyUnicode_Check(subobj
))
1482 return PyUnicode_Split((PyObject
*)self
, subobj
, maxsplit
);
1484 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1488 PyErr_SetString(PyExc_ValueError
, "empty separator");
1492 return split_char(s
, len
, sub
[0], maxsplit
);
1494 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1500 while (maxsplit
-- > 0) {
1501 pos
= fastsearch(s
+i
, len
-i
, sub
, n
, FAST_SEARCH
);
1510 while ((j
+n
<= len
) && (maxsplit
-- > 0)) {
1511 for (; j
+n
<= len
; j
++) {
1512 if (Py_STRING_MATCH(s
, j
, sub
, n
)) {
1520 SPLIT_ADD(s
, i
, len
);
1521 FIX_PREALLOC_SIZE(list
);
1529 PyDoc_STRVAR(partition__doc__
,
1530 "S.partition(sep) -> (head, sep, tail)\n\
1532 Searches for the separator sep in S, and returns the part before it,\n\
1533 the separator itself, and the part after it. If the separator is not\n\
1534 found, returns S and two empty strings.");
1537 string_partition(PyStringObject
*self
, PyObject
*sep_obj
)
1542 if (PyString_Check(sep_obj
)) {
1543 sep
= PyString_AS_STRING(sep_obj
);
1544 sep_len
= PyString_GET_SIZE(sep_obj
);
1546 #ifdef Py_USING_UNICODE
1547 else if (PyUnicode_Check(sep_obj
))
1548 return PyUnicode_Partition((PyObject
*) self
, sep_obj
);
1550 else if (PyObject_AsCharBuffer(sep_obj
, &sep
, &sep_len
))
1553 return stringlib_partition(
1555 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1556 sep_obj
, sep
, sep_len
1560 PyDoc_STRVAR(rpartition__doc__
,
1561 "S.rpartition(sep) -> (tail, sep, head)\n\
1563 Searches for the separator sep in S, starting at the end of S, and returns\n\
1564 the part before it, the separator itself, and the part after it. If the\n\
1565 separator is not found, returns two empty strings and S.");
1568 string_rpartition(PyStringObject
*self
, PyObject
*sep_obj
)
1573 if (PyString_Check(sep_obj
)) {
1574 sep
= PyString_AS_STRING(sep_obj
);
1575 sep_len
= PyString_GET_SIZE(sep_obj
);
1577 #ifdef Py_USING_UNICODE
1578 else if (PyUnicode_Check(sep_obj
))
1579 return PyUnicode_Partition((PyObject
*) self
, sep_obj
);
1581 else if (PyObject_AsCharBuffer(sep_obj
, &sep
, &sep_len
))
1584 return stringlib_rpartition(
1586 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1587 sep_obj
, sep
, sep_len
1591 Py_LOCAL_INLINE(PyObject
*)
1592 rsplit_whitespace(const char *s
, Py_ssize_t len
, Py_ssize_t maxsplit
)
1594 Py_ssize_t i
, j
, count
=0;
1596 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1603 while (maxsplit
-- > 0) {
1607 RSKIP_NONSPACE(s
, i
);
1608 SPLIT_ADD(s
, i
+ 1, j
+ 1);
1611 /* Only occurs when maxsplit was reached */
1612 /* Skip any remaining whitespace and copy to beginning of string */
1615 SPLIT_ADD(s
, 0, i
+ 1);
1618 FIX_PREALLOC_SIZE(list
);
1619 if (PyList_Reverse(list
) < 0)
1627 Py_LOCAL_INLINE(PyObject
*)
1628 rsplit_char(const char *s
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
1630 register Py_ssize_t i
, j
, count
=0;
1632 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
1638 while ((i
>= 0) && (maxcount
-- > 0)) {
1639 for (; i
>= 0; i
--) {
1641 SPLIT_ADD(s
, i
+ 1, j
+ 1);
1648 SPLIT_ADD(s
, 0, j
+ 1);
1650 FIX_PREALLOC_SIZE(list
);
1651 if (PyList_Reverse(list
) < 0)
1660 PyDoc_STRVAR(rsplit__doc__
,
1661 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1663 Return a list of the words in the string S, using sep as the\n\
1664 delimiter string, starting at the end of the string and working\n\
1665 to the front. If maxsplit is given, at most maxsplit splits are\n\
1666 done. If sep is not specified or is None, any whitespace string\n\
1670 string_rsplit(PyStringObject
*self
, PyObject
*args
)
1672 Py_ssize_t len
= PyString_GET_SIZE(self
), n
, i
, j
;
1673 Py_ssize_t maxsplit
= -1, count
=0;
1674 const char *s
= PyString_AS_STRING(self
), *sub
;
1675 PyObject
*list
, *str
, *subobj
= Py_None
;
1677 if (!PyArg_ParseTuple(args
, "|On:rsplit", &subobj
, &maxsplit
))
1680 maxsplit
= PY_SSIZE_T_MAX
;
1681 if (subobj
== Py_None
)
1682 return rsplit_whitespace(s
, len
, maxsplit
);
1683 if (PyString_Check(subobj
)) {
1684 sub
= PyString_AS_STRING(subobj
);
1685 n
= PyString_GET_SIZE(subobj
);
1687 #ifdef Py_USING_UNICODE
1688 else if (PyUnicode_Check(subobj
))
1689 return PyUnicode_RSplit((PyObject
*)self
, subobj
, maxsplit
);
1691 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1695 PyErr_SetString(PyExc_ValueError
, "empty separator");
1699 return rsplit_char(s
, len
, sub
[0], maxsplit
);
1701 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1708 while ( (i
>= 0) && (maxsplit
-- > 0) ) {
1710 if (Py_STRING_MATCH(s
, i
, sub
, n
)) {
1711 SPLIT_ADD(s
, i
+ n
, j
);
1719 FIX_PREALLOC_SIZE(list
);
1720 if (PyList_Reverse(list
) < 0)
1730 PyDoc_STRVAR(join__doc__
,
1731 "S.join(sequence) -> string\n\
1733 Return a string which is the concatenation of the strings in the\n\
1734 sequence. The separator between elements is S.");
1737 string_join(PyStringObject
*self
, PyObject
*orig
)
1739 char *sep
= PyString_AS_STRING(self
);
1740 const Py_ssize_t seplen
= PyString_GET_SIZE(self
);
1741 PyObject
*res
= NULL
;
1743 Py_ssize_t seqlen
= 0;
1746 PyObject
*seq
, *item
;
1748 seq
= PySequence_Fast(orig
, "");
1753 seqlen
= PySequence_Size(seq
);
1756 return PyString_FromString("");
1759 item
= PySequence_Fast_GET_ITEM(seq
, 0);
1760 if (PyString_CheckExact(item
) || PyUnicode_CheckExact(item
)) {
1767 /* There are at least two things to join, or else we have a subclass
1768 * of the builtin types in the sequence.
1769 * Do a pre-pass to figure out the total amount of space we'll
1770 * need (sz), see whether any argument is absurd, and defer to
1771 * the Unicode join if appropriate.
1773 for (i
= 0; i
< seqlen
; i
++) {
1774 const size_t old_sz
= sz
;
1775 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1776 if (!PyString_Check(item
)){
1777 #ifdef Py_USING_UNICODE
1778 if (PyUnicode_Check(item
)) {
1779 /* Defer to Unicode join.
1780 * CAUTION: There's no gurantee that the
1781 * original sequence can be iterated over
1782 * again, so we must pass seq here.
1785 result
= PyUnicode_Join((PyObject
*)self
, seq
);
1790 PyErr_Format(PyExc_TypeError
,
1791 "sequence item %zd: expected string,"
1793 i
, item
->ob_type
->tp_name
);
1797 sz
+= PyString_GET_SIZE(item
);
1800 if (sz
< old_sz
|| sz
> PY_SSIZE_T_MAX
) {
1801 PyErr_SetString(PyExc_OverflowError
,
1802 "join() result is too long for a Python string");
1808 /* Allocate result space. */
1809 res
= PyString_FromStringAndSize((char*)NULL
, sz
);
1815 /* Catenate everything. */
1816 p
= PyString_AS_STRING(res
);
1817 for (i
= 0; i
< seqlen
; ++i
) {
1819 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1820 n
= PyString_GET_SIZE(item
);
1821 Py_MEMCPY(p
, PyString_AS_STRING(item
), n
);
1823 if (i
< seqlen
- 1) {
1824 Py_MEMCPY(p
, sep
, seplen
);
1834 _PyString_Join(PyObject
*sep
, PyObject
*x
)
1836 assert(sep
!= NULL
&& PyString_Check(sep
));
1838 return string_join((PyStringObject
*)sep
, x
);
1841 Py_LOCAL_INLINE(void)
1842 string_adjust_indices(Py_ssize_t
*start
, Py_ssize_t
*end
, Py_ssize_t len
)
1856 Py_LOCAL_INLINE(Py_ssize_t
)
1857 string_find_internal(PyStringObject
*self
, PyObject
*args
, int dir
)
1862 Py_ssize_t start
=0, end
=PY_SSIZE_T_MAX
;
1864 if (!PyArg_ParseTuple(args
, "O|O&O&:find/rfind/index/rindex", &subobj
,
1865 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1867 if (PyString_Check(subobj
)) {
1868 sub
= PyString_AS_STRING(subobj
);
1869 sub_len
= PyString_GET_SIZE(subobj
);
1871 #ifdef Py_USING_UNICODE
1872 else if (PyUnicode_Check(subobj
))
1873 return PyUnicode_Find(
1874 (PyObject
*)self
, subobj
, start
, end
, dir
);
1876 else if (PyObject_AsCharBuffer(subobj
, &sub
, &sub_len
))
1877 /* XXX - the "expected a character buffer object" is pretty
1878 confusing for a non-expert. remap to something else ? */
1882 return stringlib_find_slice(
1883 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1884 sub
, sub_len
, start
, end
);
1886 return stringlib_rfind_slice(
1887 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1888 sub
, sub_len
, start
, end
);
1892 PyDoc_STRVAR(find__doc__
,
1893 "S.find(sub [,start [,end]]) -> int\n\
1895 Return the lowest index in S where substring sub is found,\n\
1896 such that sub is contained within s[start:end]. Optional\n\
1897 arguments start and end are interpreted as in slice notation.\n\
1899 Return -1 on failure.");
1902 string_find(PyStringObject
*self
, PyObject
*args
)
1904 Py_ssize_t result
= string_find_internal(self
, args
, +1);
1907 return PyInt_FromSsize_t(result
);
1911 PyDoc_STRVAR(index__doc__
,
1912 "S.index(sub [,start [,end]]) -> int\n\
1914 Like S.find() but raise ValueError when the substring is not found.");
1917 string_index(PyStringObject
*self
, PyObject
*args
)
1919 Py_ssize_t result
= string_find_internal(self
, args
, +1);
1923 PyErr_SetString(PyExc_ValueError
,
1924 "substring not found");
1927 return PyInt_FromSsize_t(result
);
1931 PyDoc_STRVAR(rfind__doc__
,
1932 "S.rfind(sub [,start [,end]]) -> int\n\
1934 Return the highest index in S where substring sub is found,\n\
1935 such that sub is contained within s[start:end]. Optional\n\
1936 arguments start and end are interpreted as in slice notation.\n\
1938 Return -1 on failure.");
1941 string_rfind(PyStringObject
*self
, PyObject
*args
)
1943 Py_ssize_t result
= string_find_internal(self
, args
, -1);
1946 return PyInt_FromSsize_t(result
);
1950 PyDoc_STRVAR(rindex__doc__
,
1951 "S.rindex(sub [,start [,end]]) -> int\n\
1953 Like S.rfind() but raise ValueError when the substring is not found.");
1956 string_rindex(PyStringObject
*self
, PyObject
*args
)
1958 Py_ssize_t result
= string_find_internal(self
, args
, -1);
1962 PyErr_SetString(PyExc_ValueError
,
1963 "substring not found");
1966 return PyInt_FromSsize_t(result
);
1970 Py_LOCAL_INLINE(PyObject
*)
1971 do_xstrip(PyStringObject
*self
, int striptype
, PyObject
*sepobj
)
1973 char *s
= PyString_AS_STRING(self
);
1974 Py_ssize_t len
= PyString_GET_SIZE(self
);
1975 char *sep
= PyString_AS_STRING(sepobj
);
1976 Py_ssize_t seplen
= PyString_GET_SIZE(sepobj
);
1980 if (striptype
!= RIGHTSTRIP
) {
1981 while (i
< len
&& memchr(sep
, Py_CHARMASK(s
[i
]), seplen
)) {
1987 if (striptype
!= LEFTSTRIP
) {
1990 } while (j
>= i
&& memchr(sep
, Py_CHARMASK(s
[j
]), seplen
));
1994 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
1996 return (PyObject
*)self
;
1999 return PyString_FromStringAndSize(s
+i
, j
-i
);
2003 Py_LOCAL_INLINE(PyObject
*)
2004 do_strip(PyStringObject
*self
, int striptype
)
2006 char *s
= PyString_AS_STRING(self
);
2007 Py_ssize_t len
= PyString_GET_SIZE(self
), i
, j
;
2010 if (striptype
!= RIGHTSTRIP
) {
2011 while (i
< len
&& isspace(Py_CHARMASK(s
[i
]))) {
2017 if (striptype
!= LEFTSTRIP
) {
2020 } while (j
>= i
&& isspace(Py_CHARMASK(s
[j
])));
2024 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
2026 return (PyObject
*)self
;
2029 return PyString_FromStringAndSize(s
+i
, j
-i
);
2033 Py_LOCAL_INLINE(PyObject
*)
2034 do_argstrip(PyStringObject
*self
, int striptype
, PyObject
*args
)
2036 PyObject
*sep
= NULL
;
2038 if (!PyArg_ParseTuple(args
, (char *)stripformat
[striptype
], &sep
))
2041 if (sep
!= NULL
&& sep
!= Py_None
) {
2042 if (PyString_Check(sep
))
2043 return do_xstrip(self
, striptype
, sep
);
2044 #ifdef Py_USING_UNICODE
2045 else if (PyUnicode_Check(sep
)) {
2046 PyObject
*uniself
= PyUnicode_FromObject((PyObject
*)self
);
2050 res
= _PyUnicode_XStrip((PyUnicodeObject
*)uniself
,
2056 PyErr_Format(PyExc_TypeError
,
2057 #ifdef Py_USING_UNICODE
2058 "%s arg must be None, str or unicode",
2060 "%s arg must be None or str",
2062 STRIPNAME(striptype
));
2066 return do_strip(self
, striptype
);
2070 PyDoc_STRVAR(strip__doc__
,
2071 "S.strip([chars]) -> string or unicode\n\
2073 Return a copy of the string S with leading and trailing\n\
2074 whitespace removed.\n\
2075 If chars is given and not None, remove characters in chars instead.\n\
2076 If chars is unicode, S will be converted to unicode before stripping");
2079 string_strip(PyStringObject
*self
, PyObject
*args
)
2081 if (PyTuple_GET_SIZE(args
) == 0)
2082 return do_strip(self
, BOTHSTRIP
); /* Common case */
2084 return do_argstrip(self
, BOTHSTRIP
, args
);
2088 PyDoc_STRVAR(lstrip__doc__
,
2089 "S.lstrip([chars]) -> string or unicode\n\
2091 Return a copy of the string S with leading whitespace removed.\n\
2092 If chars is given and not None, remove characters in chars instead.\n\
2093 If chars is unicode, S will be converted to unicode before stripping");
2096 string_lstrip(PyStringObject
*self
, PyObject
*args
)
2098 if (PyTuple_GET_SIZE(args
) == 0)
2099 return do_strip(self
, LEFTSTRIP
); /* Common case */
2101 return do_argstrip(self
, LEFTSTRIP
, args
);
2105 PyDoc_STRVAR(rstrip__doc__
,
2106 "S.rstrip([chars]) -> string or unicode\n\
2108 Return a copy of the string S with trailing whitespace removed.\n\
2109 If chars is given and not None, remove characters in chars instead.\n\
2110 If chars is unicode, S will be converted to unicode before stripping");
2113 string_rstrip(PyStringObject
*self
, PyObject
*args
)
2115 if (PyTuple_GET_SIZE(args
) == 0)
2116 return do_strip(self
, RIGHTSTRIP
); /* Common case */
2118 return do_argstrip(self
, RIGHTSTRIP
, args
);
2122 PyDoc_STRVAR(lower__doc__
,
2123 "S.lower() -> string\n\
2125 Return a copy of the string S converted to lowercase.");
2127 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2129 #define _tolower tolower
2133 string_lower(PyStringObject
*self
)
2136 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2139 newobj
= PyString_FromStringAndSize(NULL
, n
);
2143 s
= PyString_AS_STRING(newobj
);
2145 Py_MEMCPY(s
, PyString_AS_STRING(self
), n
);
2147 for (i
= 0; i
< n
; i
++) {
2148 int c
= Py_CHARMASK(s
[i
]);
2156 PyDoc_STRVAR(upper__doc__
,
2157 "S.upper() -> string\n\
2159 Return a copy of the string S converted to uppercase.");
2162 #define _toupper toupper
2166 string_upper(PyStringObject
*self
)
2169 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2172 newobj
= PyString_FromStringAndSize(NULL
, n
);
2176 s
= PyString_AS_STRING(newobj
);
2178 Py_MEMCPY(s
, PyString_AS_STRING(self
), n
);
2180 for (i
= 0; i
< n
; i
++) {
2181 int c
= Py_CHARMASK(s
[i
]);
2189 PyDoc_STRVAR(title__doc__
,
2190 "S.title() -> string\n\
2192 Return a titlecased version of S, i.e. words start with uppercase\n\
2193 characters, all remaining cased characters have lowercase.");
2196 string_title(PyStringObject
*self
)
2198 char *s
= PyString_AS_STRING(self
), *s_new
;
2199 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2200 int previous_is_cased
= 0;
2203 newobj
= PyString_FromStringAndSize(NULL
, n
);
2206 s_new
= PyString_AsString(newobj
);
2207 for (i
= 0; i
< n
; i
++) {
2208 int c
= Py_CHARMASK(*s
++);
2210 if (!previous_is_cased
)
2212 previous_is_cased
= 1;
2213 } else if (isupper(c
)) {
2214 if (previous_is_cased
)
2216 previous_is_cased
= 1;
2218 previous_is_cased
= 0;
2224 PyDoc_STRVAR(capitalize__doc__
,
2225 "S.capitalize() -> string\n\
2227 Return a copy of the string S with only its first character\n\
2231 string_capitalize(PyStringObject
*self
)
2233 char *s
= PyString_AS_STRING(self
), *s_new
;
2234 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2237 newobj
= PyString_FromStringAndSize(NULL
, n
);
2240 s_new
= PyString_AsString(newobj
);
2242 int c
= Py_CHARMASK(*s
++);
2244 *s_new
= toupper(c
);
2249 for (i
= 1; i
< n
; i
++) {
2250 int c
= Py_CHARMASK(*s
++);
2252 *s_new
= tolower(c
);
2261 PyDoc_STRVAR(count__doc__
,
2262 "S.count(sub[, start[, end]]) -> int\n\
2264 Return the number of non-overlapping occurrences of substring sub in\n\
2265 string S[start:end]. Optional arguments start and end are interpreted\n\
2266 as in slice notation.");
2269 string_count(PyStringObject
*self
, PyObject
*args
)
2272 const char *str
= PyString_AS_STRING(self
), *sub
;
2274 Py_ssize_t start
= 0, end
= PY_SSIZE_T_MAX
;
2276 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &sub_obj
,
2277 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
2280 if (PyString_Check(sub_obj
)) {
2281 sub
= PyString_AS_STRING(sub_obj
);
2282 sub_len
= PyString_GET_SIZE(sub_obj
);
2284 #ifdef Py_USING_UNICODE
2285 else if (PyUnicode_Check(sub_obj
)) {
2287 count
= PyUnicode_Count((PyObject
*)self
, sub_obj
, start
, end
);
2291 return PyInt_FromSsize_t(count
);
2294 else if (PyObject_AsCharBuffer(sub_obj
, &sub
, &sub_len
))
2297 string_adjust_indices(&start
, &end
, PyString_GET_SIZE(self
));
2299 return PyInt_FromSsize_t(
2300 stringlib_count(str
+ start
, end
- start
, sub
, sub_len
)
2304 PyDoc_STRVAR(swapcase__doc__
,
2305 "S.swapcase() -> string\n\
2307 Return a copy of the string S with uppercase characters\n\
2308 converted to lowercase and vice versa.");
2311 string_swapcase(PyStringObject
*self
)
2313 char *s
= PyString_AS_STRING(self
), *s_new
;
2314 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2317 newobj
= PyString_FromStringAndSize(NULL
, n
);
2320 s_new
= PyString_AsString(newobj
);
2321 for (i
= 0; i
< n
; i
++) {
2322 int c
= Py_CHARMASK(*s
++);
2324 *s_new
= toupper(c
);
2326 else if (isupper(c
)) {
2327 *s_new
= tolower(c
);
2337 PyDoc_STRVAR(translate__doc__
,
2338 "S.translate(table [,deletechars]) -> string\n\
2340 Return a copy of the string S, where all characters occurring\n\
2341 in the optional argument deletechars are removed, and the\n\
2342 remaining characters have been mapped through the given\n\
2343 translation table, which must be a string of length 256.");
2346 string_translate(PyStringObject
*self
, PyObject
*args
)
2348 register char *input
, *output
;
2349 register const char *table
;
2350 register Py_ssize_t i
, c
, changed
= 0;
2351 PyObject
*input_obj
= (PyObject
*)self
;
2352 const char *table1
, *output_start
, *del_table
=NULL
;
2353 Py_ssize_t inlen
, tablen
, dellen
= 0;
2355 int trans_table
[256];
2356 PyObject
*tableobj
, *delobj
= NULL
;
2358 if (!PyArg_UnpackTuple(args
, "translate", 1, 2,
2359 &tableobj
, &delobj
))
2362 if (PyString_Check(tableobj
)) {
2363 table1
= PyString_AS_STRING(tableobj
);
2364 tablen
= PyString_GET_SIZE(tableobj
);
2366 #ifdef Py_USING_UNICODE
2367 else if (PyUnicode_Check(tableobj
)) {
2368 /* Unicode .translate() does not support the deletechars
2369 parameter; instead a mapping to None will cause characters
2371 if (delobj
!= NULL
) {
2372 PyErr_SetString(PyExc_TypeError
,
2373 "deletions are implemented differently for unicode");
2376 return PyUnicode_Translate((PyObject
*)self
, tableobj
, NULL
);
2379 else if (PyObject_AsCharBuffer(tableobj
, &table1
, &tablen
))
2382 if (tablen
!= 256) {
2383 PyErr_SetString(PyExc_ValueError
,
2384 "translation table must be 256 characters long");
2388 if (delobj
!= NULL
) {
2389 if (PyString_Check(delobj
)) {
2390 del_table
= PyString_AS_STRING(delobj
);
2391 dellen
= PyString_GET_SIZE(delobj
);
2393 #ifdef Py_USING_UNICODE
2394 else if (PyUnicode_Check(delobj
)) {
2395 PyErr_SetString(PyExc_TypeError
,
2396 "deletions are implemented differently for unicode");
2400 else if (PyObject_AsCharBuffer(delobj
, &del_table
, &dellen
))
2409 inlen
= PyString_GET_SIZE(input_obj
);
2410 result
= PyString_FromStringAndSize((char *)NULL
, inlen
);
2413 output_start
= output
= PyString_AsString(result
);
2414 input
= PyString_AS_STRING(input_obj
);
2417 /* If no deletions are required, use faster code */
2418 for (i
= inlen
; --i
>= 0; ) {
2419 c
= Py_CHARMASK(*input
++);
2420 if (Py_CHARMASK((*output
++ = table
[c
])) != c
)
2423 if (changed
|| !PyString_CheckExact(input_obj
))
2426 Py_INCREF(input_obj
);
2430 for (i
= 0; i
< 256; i
++)
2431 trans_table
[i
] = Py_CHARMASK(table
[i
]);
2433 for (i
= 0; i
< dellen
; i
++)
2434 trans_table
[(int) Py_CHARMASK(del_table
[i
])] = -1;
2436 for (i
= inlen
; --i
>= 0; ) {
2437 c
= Py_CHARMASK(*input
++);
2438 if (trans_table
[c
] != -1)
2439 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
2443 if (!changed
&& PyString_CheckExact(input_obj
)) {
2445 Py_INCREF(input_obj
);
2448 /* Fix the size of the resulting string */
2450 _PyString_Resize(&result
, output
- output_start
);
2458 /* find and count characters and substrings */
2460 #define findchar(target, target_len, c) \
2461 ((char *)memchr((const void *)(target), c, target_len))
2463 /* String ops must return a string. */
2464 /* If the object is subclass of string, create a copy */
2465 Py_LOCAL(PyStringObject
*)
2466 return_self(PyStringObject
*self
)
2468 if (PyString_CheckExact(self
)) {
2472 return (PyStringObject
*)PyString_FromStringAndSize(
2473 PyString_AS_STRING(self
),
2474 PyString_GET_SIZE(self
));
2477 Py_LOCAL_INLINE(Py_ssize_t
)
2478 countchar(const char *target
, int target_len
, char c
, Py_ssize_t maxcount
)
2481 const char *start
=target
;
2482 const char *end
=target
+target_len
;
2484 while ( (start
=findchar(start
, end
-start
, c
)) != NULL
) {
2486 if (count
>= maxcount
)
2493 Py_LOCAL(Py_ssize_t
)
2494 findstring(const char *target
, Py_ssize_t target_len
,
2495 const char *pattern
, Py_ssize_t pattern_len
,
2501 start
+= target_len
;
2505 if (end
> target_len
) {
2507 } else if (end
< 0) {
2513 /* zero-length substrings always match at the first attempt */
2514 if (pattern_len
== 0)
2515 return (direction
> 0) ? start
: end
;
2519 if (direction
< 0) {
2520 for (; end
>= start
; end
--)
2521 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
))
2524 for (; start
<= end
; start
++)
2525 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
))
2531 Py_LOCAL_INLINE(Py_ssize_t
)
2532 countstring(const char *target
, Py_ssize_t target_len
,
2533 const char *pattern
, Py_ssize_t pattern_len
,
2536 int direction
, Py_ssize_t maxcount
)
2541 start
+= target_len
;
2545 if (end
> target_len
) {
2547 } else if (end
< 0) {
2553 /* zero-length substrings match everywhere */
2554 if (pattern_len
== 0 || maxcount
== 0) {
2555 if (target_len
+1 < maxcount
)
2556 return target_len
+1;
2561 if (direction
< 0) {
2562 for (; (end
>= start
); end
--)
2563 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
)) {
2565 if (--maxcount
<= 0) break;
2566 end
-= pattern_len
-1;
2569 for (; (start
<= end
); start
++)
2570 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
)) {
2572 if (--maxcount
<= 0)
2574 start
+= pattern_len
-1;
2581 /* Algorithms for different cases of string replacement */
2583 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2584 Py_LOCAL(PyStringObject
*)
2585 replace_interleave(PyStringObject
*self
,
2586 const char *to_s
, Py_ssize_t to_len
,
2587 Py_ssize_t maxcount
)
2589 char *self_s
, *result_s
;
2590 Py_ssize_t self_len
, result_len
;
2591 Py_ssize_t count
, i
, product
;
2592 PyStringObject
*result
;
2594 self_len
= PyString_GET_SIZE(self
);
2596 /* 1 at the end plus 1 after every character */
2598 if (maxcount
< count
)
2601 /* Check for overflow */
2602 /* result_len = count * to_len + self_len; */
2603 product
= count
* to_len
;
2604 if (product
/ to_len
!= count
) {
2605 PyErr_SetString(PyExc_OverflowError
,
2606 "replace string is too long");
2609 result_len
= product
+ self_len
;
2610 if (result_len
< 0) {
2611 PyErr_SetString(PyExc_OverflowError
,
2612 "replace string is too long");
2616 if (! (result
= (PyStringObject
*)
2617 PyString_FromStringAndSize(NULL
, result_len
)) )
2620 self_s
= PyString_AS_STRING(self
);
2621 result_s
= PyString_AS_STRING(result
);
2623 /* TODO: special case single character, which doesn't need memcpy */
2625 /* Lay the first one down (guaranteed this will occur) */
2626 Py_MEMCPY(result_s
, to_s
, to_len
);
2630 for (i
=0; i
<count
; i
++) {
2631 *result_s
++ = *self_s
++;
2632 Py_MEMCPY(result_s
, to_s
, to_len
);
2636 /* Copy the rest of the original string */
2637 Py_MEMCPY(result_s
, self_s
, self_len
-i
);
2642 /* Special case for deleting a single character */
2643 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2644 Py_LOCAL(PyStringObject
*)
2645 replace_delete_single_character(PyStringObject
*self
,
2646 char from_c
, Py_ssize_t maxcount
)
2648 char *self_s
, *result_s
;
2649 char *start
, *next
, *end
;
2650 Py_ssize_t self_len
, result_len
;
2652 PyStringObject
*result
;
2654 self_len
= PyString_GET_SIZE(self
);
2655 self_s
= PyString_AS_STRING(self
);
2657 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
2659 return return_self(self
);
2662 result_len
= self_len
- count
; /* from_len == 1 */
2663 assert(result_len
>=0);
2665 if ( (result
= (PyStringObject
*)
2666 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2668 result_s
= PyString_AS_STRING(result
);
2671 end
= self_s
+ self_len
;
2672 while (count
-- > 0) {
2673 next
= findchar(start
, end
-start
, from_c
);
2676 Py_MEMCPY(result_s
, start
, next
-start
);
2677 result_s
+= (next
-start
);
2680 Py_MEMCPY(result_s
, start
, end
-start
);
2685 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2687 Py_LOCAL(PyStringObject
*)
2688 replace_delete_substring(PyStringObject
*self
,
2689 const char *from_s
, Py_ssize_t from_len
,
2690 Py_ssize_t maxcount
) {
2691 char *self_s
, *result_s
;
2692 char *start
, *next
, *end
;
2693 Py_ssize_t self_len
, result_len
;
2694 Py_ssize_t count
, offset
;
2695 PyStringObject
*result
;
2697 self_len
= PyString_GET_SIZE(self
);
2698 self_s
= PyString_AS_STRING(self
);
2700 count
= countstring(self_s
, self_len
,
2707 return return_self(self
);
2710 result_len
= self_len
- (count
* from_len
);
2711 assert (result_len
>=0);
2713 if ( (result
= (PyStringObject
*)
2714 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2717 result_s
= PyString_AS_STRING(result
);
2720 end
= self_s
+ self_len
;
2721 while (count
-- > 0) {
2722 offset
= findstring(start
, end
-start
,
2724 0, end
-start
, FORWARD
);
2727 next
= start
+ offset
;
2729 Py_MEMCPY(result_s
, start
, next
-start
);
2731 result_s
+= (next
-start
);
2732 start
= next
+from_len
;
2734 Py_MEMCPY(result_s
, start
, end
-start
);
2738 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2739 Py_LOCAL(PyStringObject
*)
2740 replace_single_character_in_place(PyStringObject
*self
,
2741 char from_c
, char to_c
,
2742 Py_ssize_t maxcount
)
2744 char *self_s
, *result_s
, *start
, *end
, *next
;
2745 Py_ssize_t self_len
;
2746 PyStringObject
*result
;
2748 /* The result string will be the same size */
2749 self_s
= PyString_AS_STRING(self
);
2750 self_len
= PyString_GET_SIZE(self
);
2752 next
= findchar(self_s
, self_len
, from_c
);
2755 /* No matches; return the original string */
2756 return return_self(self
);
2759 /* Need to make a new string */
2760 result
= (PyStringObject
*) PyString_FromStringAndSize(NULL
, self_len
);
2763 result_s
= PyString_AS_STRING(result
);
2764 Py_MEMCPY(result_s
, self_s
, self_len
);
2766 /* change everything in-place, starting with this one */
2767 start
= result_s
+ (next
-self_s
);
2770 end
= result_s
+ self_len
;
2772 while (--maxcount
> 0) {
2773 next
= findchar(start
, end
-start
, from_c
);
2783 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2784 Py_LOCAL(PyStringObject
*)
2785 replace_substring_in_place(PyStringObject
*self
,
2786 const char *from_s
, Py_ssize_t from_len
,
2787 const char *to_s
, Py_ssize_t to_len
,
2788 Py_ssize_t maxcount
)
2790 char *result_s
, *start
, *end
;
2792 Py_ssize_t self_len
, offset
;
2793 PyStringObject
*result
;
2795 /* The result string will be the same size */
2797 self_s
= PyString_AS_STRING(self
);
2798 self_len
= PyString_GET_SIZE(self
);
2800 offset
= findstring(self_s
, self_len
,
2802 0, self_len
, FORWARD
);
2804 /* No matches; return the original string */
2805 return return_self(self
);
2808 /* Need to make a new string */
2809 result
= (PyStringObject
*) PyString_FromStringAndSize(NULL
, self_len
);
2812 result_s
= PyString_AS_STRING(result
);
2813 Py_MEMCPY(result_s
, self_s
, self_len
);
2815 /* change everything in-place, starting with this one */
2816 start
= result_s
+ offset
;
2817 Py_MEMCPY(start
, to_s
, from_len
);
2819 end
= result_s
+ self_len
;
2821 while ( --maxcount
> 0) {
2822 offset
= findstring(start
, end
-start
,
2824 0, end
-start
, FORWARD
);
2827 Py_MEMCPY(start
+offset
, to_s
, from_len
);
2828 start
+= offset
+from_len
;
2834 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2835 Py_LOCAL(PyStringObject
*)
2836 replace_single_character(PyStringObject
*self
,
2838 const char *to_s
, Py_ssize_t to_len
,
2839 Py_ssize_t maxcount
)
2841 char *self_s
, *result_s
;
2842 char *start
, *next
, *end
;
2843 Py_ssize_t self_len
, result_len
;
2844 Py_ssize_t count
, product
;
2845 PyStringObject
*result
;
2847 self_s
= PyString_AS_STRING(self
);
2848 self_len
= PyString_GET_SIZE(self
);
2850 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
2852 /* no matches, return unchanged */
2853 return return_self(self
);
2856 /* use the difference between current and new, hence the "-1" */
2857 /* result_len = self_len + count * (to_len-1) */
2858 product
= count
* (to_len
-1);
2859 if (product
/ (to_len
-1) != count
) {
2860 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2863 result_len
= self_len
+ product
;
2864 if (result_len
< 0) {
2865 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2869 if ( (result
= (PyStringObject
*)
2870 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2872 result_s
= PyString_AS_STRING(result
);
2875 end
= self_s
+ self_len
;
2876 while (count
-- > 0) {
2877 next
= findchar(start
, end
-start
, from_c
);
2881 if (next
== start
) {
2882 /* replace with the 'to' */
2883 Py_MEMCPY(result_s
, to_s
, to_len
);
2887 /* copy the unchanged old then the 'to' */
2888 Py_MEMCPY(result_s
, start
, next
-start
);
2889 result_s
+= (next
-start
);
2890 Py_MEMCPY(result_s
, to_s
, to_len
);
2895 /* Copy the remainder of the remaining string */
2896 Py_MEMCPY(result_s
, start
, end
-start
);
2901 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2902 Py_LOCAL(PyStringObject
*)
2903 replace_substring(PyStringObject
*self
,
2904 const char *from_s
, Py_ssize_t from_len
,
2905 const char *to_s
, Py_ssize_t to_len
,
2906 Py_ssize_t maxcount
) {
2907 char *self_s
, *result_s
;
2908 char *start
, *next
, *end
;
2909 Py_ssize_t self_len
, result_len
;
2910 Py_ssize_t count
, offset
, product
;
2911 PyStringObject
*result
;
2913 self_s
= PyString_AS_STRING(self
);
2914 self_len
= PyString_GET_SIZE(self
);
2916 count
= countstring(self_s
, self_len
,
2918 0, self_len
, FORWARD
, maxcount
);
2920 /* no matches, return unchanged */
2921 return return_self(self
);
2924 /* Check for overflow */
2925 /* result_len = self_len + count * (to_len-from_len) */
2926 product
= count
* (to_len
-from_len
);
2927 if (product
/ (to_len
-from_len
) != count
) {
2928 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2931 result_len
= self_len
+ product
;
2932 if (result_len
< 0) {
2933 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2937 if ( (result
= (PyStringObject
*)
2938 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2940 result_s
= PyString_AS_STRING(result
);
2943 end
= self_s
+ self_len
;
2944 while (count
-- > 0) {
2945 offset
= findstring(start
, end
-start
,
2947 0, end
-start
, FORWARD
);
2950 next
= start
+offset
;
2951 if (next
== start
) {
2952 /* replace with the 'to' */
2953 Py_MEMCPY(result_s
, to_s
, to_len
);
2957 /* copy the unchanged old then the 'to' */
2958 Py_MEMCPY(result_s
, start
, next
-start
);
2959 result_s
+= (next
-start
);
2960 Py_MEMCPY(result_s
, to_s
, to_len
);
2962 start
= next
+from_len
;
2965 /* Copy the remainder of the remaining string */
2966 Py_MEMCPY(result_s
, start
, end
-start
);
2972 Py_LOCAL(PyStringObject
*)
2973 replace(PyStringObject
*self
,
2974 const char *from_s
, Py_ssize_t from_len
,
2975 const char *to_s
, Py_ssize_t to_len
,
2976 Py_ssize_t maxcount
)
2979 maxcount
= PY_SSIZE_T_MAX
;
2980 } else if (maxcount
== 0 || PyString_GET_SIZE(self
) == 0) {
2981 /* nothing to do; return the original string */
2982 return return_self(self
);
2985 if (maxcount
== 0 ||
2986 (from_len
== 0 && to_len
== 0)) {
2987 /* nothing to do; return the original string */
2988 return return_self(self
);
2991 /* Handle zero-length special cases */
2993 if (from_len
== 0) {
2994 /* insert the 'to' string everywhere. */
2995 /* >>> "Python".replace("", ".") */
2996 /* '.P.y.t.h.o.n.' */
2997 return replace_interleave(self
, to_s
, to_len
, maxcount
);
3000 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3001 /* point for an empty self string to generate a non-empty string */
3002 /* Special case so the remaining code always gets a non-empty string */
3003 if (PyString_GET_SIZE(self
) == 0) {
3004 return return_self(self
);
3008 /* delete all occurances of 'from' string */
3009 if (from_len
== 1) {
3010 return replace_delete_single_character(
3011 self
, from_s
[0], maxcount
);
3013 return replace_delete_substring(self
, from_s
, from_len
, maxcount
);
3017 /* Handle special case where both strings have the same length */
3019 if (from_len
== to_len
) {
3020 if (from_len
== 1) {
3021 return replace_single_character_in_place(
3027 return replace_substring_in_place(
3028 self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
3032 /* Otherwise use the more generic algorithms */
3033 if (from_len
== 1) {
3034 return replace_single_character(self
, from_s
[0],
3035 to_s
, to_len
, maxcount
);
3037 /* len('from')>=2, len('to')>=1 */
3038 return replace_substring(self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
3042 PyDoc_STRVAR(replace__doc__
,
3043 "S.replace (old, new[, count]) -> string\n\
3045 Return a copy of string S with all occurrences of substring\n\
3046 old replaced by new. If the optional argument count is\n\
3047 given, only the first count occurrences are replaced.");
3050 string_replace(PyStringObject
*self
, PyObject
*args
)
3052 Py_ssize_t count
= -1;
3053 PyObject
*from
, *to
;
3054 const char *from_s
, *to_s
;
3055 Py_ssize_t from_len
, to_len
;
3057 if (!PyArg_ParseTuple(args
, "OO|n:replace", &from
, &to
, &count
))
3060 if (PyString_Check(from
)) {
3061 from_s
= PyString_AS_STRING(from
);
3062 from_len
= PyString_GET_SIZE(from
);
3064 #ifdef Py_USING_UNICODE
3065 if (PyUnicode_Check(from
))
3066 return PyUnicode_Replace((PyObject
*)self
,
3069 else if (PyObject_AsCharBuffer(from
, &from_s
, &from_len
))
3072 if (PyString_Check(to
)) {
3073 to_s
= PyString_AS_STRING(to
);
3074 to_len
= PyString_GET_SIZE(to
);
3076 #ifdef Py_USING_UNICODE
3077 else if (PyUnicode_Check(to
))
3078 return PyUnicode_Replace((PyObject
*)self
,
3081 else if (PyObject_AsCharBuffer(to
, &to_s
, &to_len
))
3084 return (PyObject
*)replace((PyStringObject
*) self
,
3086 to_s
, to_len
, count
);
3091 /* Matches the end (direction >= 0) or start (direction < 0) of self
3092 * against substr, using the start and end arguments. Returns
3093 * -1 on error, 0 if not found and 1 if found.
3096 _string_tailmatch(PyStringObject
*self
, PyObject
*substr
, Py_ssize_t start
,
3097 Py_ssize_t end
, int direction
)
3099 Py_ssize_t len
= PyString_GET_SIZE(self
);
3104 if (PyString_Check(substr
)) {
3105 sub
= PyString_AS_STRING(substr
);
3106 slen
= PyString_GET_SIZE(substr
);
3108 #ifdef Py_USING_UNICODE
3109 else if (PyUnicode_Check(substr
))
3110 return PyUnicode_Tailmatch((PyObject
*)self
,
3111 substr
, start
, end
, direction
);
3113 else if (PyObject_AsCharBuffer(substr
, &sub
, &slen
))
3115 str
= PyString_AS_STRING(self
);
3117 string_adjust_indices(&start
, &end
, len
);
3119 if (direction
< 0) {
3121 if (start
+slen
> len
)
3125 if (end
-start
< slen
|| start
> len
)
3128 if (end
-slen
> start
)
3131 if (end
-start
>= slen
)
3132 return ! memcmp(str
+start
, sub
, slen
);
3137 PyDoc_STRVAR(startswith__doc__
,
3138 "S.startswith(prefix[, start[, end]]) -> bool\n\
3140 Return True if S starts with the specified prefix, False otherwise.\n\
3141 With optional start, test S beginning at that position.\n\
3142 With optional end, stop comparing S at that position.\n\
3143 prefix can also be a tuple of strings to try.");
3146 string_startswith(PyStringObject
*self
, PyObject
*args
)
3148 Py_ssize_t start
= 0;
3149 Py_ssize_t end
= PY_SSIZE_T_MAX
;
3153 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
3154 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
3156 if (PyTuple_Check(subobj
)) {
3158 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
3159 result
= _string_tailmatch(self
,
3160 PyTuple_GET_ITEM(subobj
, i
),
3170 result
= _string_tailmatch(self
, subobj
, start
, end
, -1);
3174 return PyBool_FromLong(result
);
3178 PyDoc_STRVAR(endswith__doc__
,
3179 "S.endswith(suffix[, start[, end]]) -> bool\n\
3181 Return True if S ends with the specified suffix, False otherwise.\n\
3182 With optional start, test S beginning at that position.\n\
3183 With optional end, stop comparing S at that position.\n\
3184 suffix can also be a tuple of strings to try.");
3187 string_endswith(PyStringObject
*self
, PyObject
*args
)
3189 Py_ssize_t start
= 0;
3190 Py_ssize_t end
= PY_SSIZE_T_MAX
;
3194 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
3195 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
3197 if (PyTuple_Check(subobj
)) {
3199 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
3200 result
= _string_tailmatch(self
,
3201 PyTuple_GET_ITEM(subobj
, i
),
3211 result
= _string_tailmatch(self
, subobj
, start
, end
, +1);
3215 return PyBool_FromLong(result
);
3219 PyDoc_STRVAR(encode__doc__
,
3220 "S.encode([encoding[,errors]]) -> object\n\
3222 Encodes S using the codec registered for encoding. encoding defaults\n\
3223 to the default encoding. errors may be given to set a different error\n\
3224 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3225 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3226 'xmlcharrefreplace' as well as any other name registered with\n\
3227 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3230 string_encode(PyStringObject
*self
, PyObject
*args
)
3232 char *encoding
= NULL
;
3233 char *errors
= NULL
;
3236 if (!PyArg_ParseTuple(args
, "|ss:encode", &encoding
, &errors
))
3238 v
= PyString_AsEncodedObject((PyObject
*)self
, encoding
, errors
);
3241 if (!PyString_Check(v
) && !PyUnicode_Check(v
)) {
3242 PyErr_Format(PyExc_TypeError
,
3243 "encoder did not return a string/unicode object "
3245 v
->ob_type
->tp_name
);
3256 PyDoc_STRVAR(decode__doc__
,
3257 "S.decode([encoding[,errors]]) -> object\n\
3259 Decodes S using the codec registered for encoding. encoding defaults\n\
3260 to the default encoding. errors may be given to set a different error\n\
3261 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3262 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3263 as well as any other name registerd with codecs.register_error that is\n\
3264 able to handle UnicodeDecodeErrors.");
3267 string_decode(PyStringObject
*self
, PyObject
*args
)
3269 char *encoding
= NULL
;
3270 char *errors
= NULL
;
3273 if (!PyArg_ParseTuple(args
, "|ss:decode", &encoding
, &errors
))
3275 v
= PyString_AsDecodedObject((PyObject
*)self
, encoding
, errors
);
3278 if (!PyString_Check(v
) && !PyUnicode_Check(v
)) {
3279 PyErr_Format(PyExc_TypeError
,
3280 "decoder did not return a string/unicode object "
3282 v
->ob_type
->tp_name
);
3293 PyDoc_STRVAR(expandtabs__doc__
,
3294 "S.expandtabs([tabsize]) -> string\n\
3296 Return a copy of S where all tab characters are expanded using spaces.\n\
3297 If tabsize is not given, a tab size of 8 characters is assumed.");
3300 string_expandtabs(PyStringObject
*self
, PyObject
*args
)
3304 Py_ssize_t i
, j
, old_j
;
3308 if (!PyArg_ParseTuple(args
, "|i:expandtabs", &tabsize
))
3311 /* First pass: determine size of output string */
3313 e
= PyString_AS_STRING(self
) + PyString_GET_SIZE(self
);
3314 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
3317 j
+= tabsize
- (j
% tabsize
);
3319 PyErr_SetString(PyExc_OverflowError
,
3320 "new string is too long");
3328 if (*p
== '\n' || *p
== '\r') {
3332 PyErr_SetString(PyExc_OverflowError
,
3333 "new string is too long");
3340 PyErr_SetString(PyExc_OverflowError
, "new string is too long");
3344 /* Second pass: create output string and fill it */
3345 u
= PyString_FromStringAndSize(NULL
, i
+ j
);
3350 q
= PyString_AS_STRING(u
);
3352 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
3355 i
= tabsize
- (j
% tabsize
);
3364 if (*p
== '\n' || *p
== '\r')
3371 Py_LOCAL_INLINE(PyObject
*)
3372 pad(PyStringObject
*self
, Py_ssize_t left
, Py_ssize_t right
, char fill
)
3381 if (left
== 0 && right
== 0 && PyString_CheckExact(self
)) {
3383 return (PyObject
*)self
;
3386 u
= PyString_FromStringAndSize(NULL
,
3387 left
+ PyString_GET_SIZE(self
) + right
);
3390 memset(PyString_AS_STRING(u
), fill
, left
);
3391 Py_MEMCPY(PyString_AS_STRING(u
) + left
,
3392 PyString_AS_STRING(self
),
3393 PyString_GET_SIZE(self
));
3395 memset(PyString_AS_STRING(u
) + left
+ PyString_GET_SIZE(self
),
3402 PyDoc_STRVAR(ljust__doc__
,
3403 "S.ljust(width[, fillchar]) -> string\n"
3405 "Return S left justified in a string of length width. Padding is\n"
3406 "done using the specified fill character (default is a space).");
3409 string_ljust(PyStringObject
*self
, PyObject
*args
)
3412 char fillchar
= ' ';
3414 if (!PyArg_ParseTuple(args
, "n|c:ljust", &width
, &fillchar
))
3417 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3419 return (PyObject
*) self
;
3422 return pad(self
, 0, width
- PyString_GET_SIZE(self
), fillchar
);
3426 PyDoc_STRVAR(rjust__doc__
,
3427 "S.rjust(width[, fillchar]) -> string\n"
3429 "Return S right justified in a string of length width. Padding is\n"
3430 "done using the specified fill character (default is a space)");
3433 string_rjust(PyStringObject
*self
, PyObject
*args
)
3436 char fillchar
= ' ';
3438 if (!PyArg_ParseTuple(args
, "n|c:rjust", &width
, &fillchar
))
3441 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3443 return (PyObject
*) self
;
3446 return pad(self
, width
- PyString_GET_SIZE(self
), 0, fillchar
);
3450 PyDoc_STRVAR(center__doc__
,
3451 "S.center(width[, fillchar]) -> string\n"
3453 "Return S centered in a string of length width. Padding is\n"
3454 "done using the specified fill character (default is a space)");
3457 string_center(PyStringObject
*self
, PyObject
*args
)
3459 Py_ssize_t marg
, left
;
3461 char fillchar
= ' ';
3463 if (!PyArg_ParseTuple(args
, "n|c:center", &width
, &fillchar
))
3466 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3468 return (PyObject
*) self
;
3471 marg
= width
- PyString_GET_SIZE(self
);
3472 left
= marg
/ 2 + (marg
& width
& 1);
3474 return pad(self
, left
, marg
- left
, fillchar
);
3477 PyDoc_STRVAR(zfill__doc__
,
3478 "S.zfill(width) -> string\n"
3480 "Pad a numeric string S with zeros on the left, to fill a field\n"
3481 "of the specified width. The string S is never truncated.");
3484 string_zfill(PyStringObject
*self
, PyObject
*args
)
3491 if (!PyArg_ParseTuple(args
, "n:zfill", &width
))
3494 if (PyString_GET_SIZE(self
) >= width
) {
3495 if (PyString_CheckExact(self
)) {
3497 return (PyObject
*) self
;
3500 return PyString_FromStringAndSize(
3501 PyString_AS_STRING(self
),
3502 PyString_GET_SIZE(self
)
3506 fill
= width
- PyString_GET_SIZE(self
);
3508 s
= pad(self
, fill
, 0, '0');
3513 p
= PyString_AS_STRING(s
);
3514 if (p
[fill
] == '+' || p
[fill
] == '-') {
3515 /* move sign to beginning of string */
3520 return (PyObject
*) s
;
3523 PyDoc_STRVAR(isspace__doc__
,
3524 "S.isspace() -> bool\n\
3526 Return True if all characters in S are whitespace\n\
3527 and there is at least one character in S, False otherwise.");
3530 string_isspace(PyStringObject
*self
)
3532 register const unsigned char *p
3533 = (unsigned char *) PyString_AS_STRING(self
);
3534 register const unsigned char *e
;
3536 /* Shortcut for single character strings */
3537 if (PyString_GET_SIZE(self
) == 1 &&
3539 return PyBool_FromLong(1);
3541 /* Special case for empty strings */
3542 if (PyString_GET_SIZE(self
) == 0)
3543 return PyBool_FromLong(0);
3545 e
= p
+ PyString_GET_SIZE(self
);
3546 for (; p
< e
; p
++) {
3548 return PyBool_FromLong(0);
3550 return PyBool_FromLong(1);
3554 PyDoc_STRVAR(isalpha__doc__
,
3555 "S.isalpha() -> bool\n\
3557 Return True if all characters in S are alphabetic\n\
3558 and there is at least one character in S, False otherwise.");
3561 string_isalpha(PyStringObject
*self
)
3563 register const unsigned char *p
3564 = (unsigned char *) PyString_AS_STRING(self
);
3565 register const unsigned char *e
;
3567 /* Shortcut for single character strings */
3568 if (PyString_GET_SIZE(self
) == 1 &&
3570 return PyBool_FromLong(1);
3572 /* Special case for empty strings */
3573 if (PyString_GET_SIZE(self
) == 0)
3574 return PyBool_FromLong(0);
3576 e
= p
+ PyString_GET_SIZE(self
);
3577 for (; p
< e
; p
++) {
3579 return PyBool_FromLong(0);
3581 return PyBool_FromLong(1);
3585 PyDoc_STRVAR(isalnum__doc__
,
3586 "S.isalnum() -> bool\n\
3588 Return True if all characters in S are alphanumeric\n\
3589 and there is at least one character in S, False otherwise.");
3592 string_isalnum(PyStringObject
*self
)
3594 register const unsigned char *p
3595 = (unsigned char *) PyString_AS_STRING(self
);
3596 register const unsigned char *e
;
3598 /* Shortcut for single character strings */
3599 if (PyString_GET_SIZE(self
) == 1 &&
3601 return PyBool_FromLong(1);
3603 /* Special case for empty strings */
3604 if (PyString_GET_SIZE(self
) == 0)
3605 return PyBool_FromLong(0);
3607 e
= p
+ PyString_GET_SIZE(self
);
3608 for (; p
< e
; p
++) {
3610 return PyBool_FromLong(0);
3612 return PyBool_FromLong(1);
3616 PyDoc_STRVAR(isdigit__doc__
,
3617 "S.isdigit() -> bool\n\
3619 Return True if all characters in S are digits\n\
3620 and there is at least one character in S, False otherwise.");
3623 string_isdigit(PyStringObject
*self
)
3625 register const unsigned char *p
3626 = (unsigned char *) PyString_AS_STRING(self
);
3627 register const unsigned char *e
;
3629 /* Shortcut for single character strings */
3630 if (PyString_GET_SIZE(self
) == 1 &&
3632 return PyBool_FromLong(1);
3634 /* Special case for empty strings */
3635 if (PyString_GET_SIZE(self
) == 0)
3636 return PyBool_FromLong(0);
3638 e
= p
+ PyString_GET_SIZE(self
);
3639 for (; p
< e
; p
++) {
3641 return PyBool_FromLong(0);
3643 return PyBool_FromLong(1);
3647 PyDoc_STRVAR(islower__doc__
,
3648 "S.islower() -> bool\n\
3650 Return True if all cased characters in S are lowercase and there is\n\
3651 at least one cased character in S, False otherwise.");
3654 string_islower(PyStringObject
*self
)
3656 register const unsigned char *p
3657 = (unsigned char *) PyString_AS_STRING(self
);
3658 register const unsigned char *e
;
3661 /* Shortcut for single character strings */
3662 if (PyString_GET_SIZE(self
) == 1)
3663 return PyBool_FromLong(islower(*p
) != 0);
3665 /* Special case for empty strings */
3666 if (PyString_GET_SIZE(self
) == 0)
3667 return PyBool_FromLong(0);
3669 e
= p
+ PyString_GET_SIZE(self
);
3671 for (; p
< e
; p
++) {
3673 return PyBool_FromLong(0);
3674 else if (!cased
&& islower(*p
))
3677 return PyBool_FromLong(cased
);
3681 PyDoc_STRVAR(isupper__doc__
,
3682 "S.isupper() -> bool\n\
3684 Return True if all cased characters in S are uppercase and there is\n\
3685 at least one cased character in S, False otherwise.");
3688 string_isupper(PyStringObject
*self
)
3690 register const unsigned char *p
3691 = (unsigned char *) PyString_AS_STRING(self
);
3692 register const unsigned char *e
;
3695 /* Shortcut for single character strings */
3696 if (PyString_GET_SIZE(self
) == 1)
3697 return PyBool_FromLong(isupper(*p
) != 0);
3699 /* Special case for empty strings */
3700 if (PyString_GET_SIZE(self
) == 0)
3701 return PyBool_FromLong(0);
3703 e
= p
+ PyString_GET_SIZE(self
);
3705 for (; p
< e
; p
++) {
3707 return PyBool_FromLong(0);
3708 else if (!cased
&& isupper(*p
))
3711 return PyBool_FromLong(cased
);
3715 PyDoc_STRVAR(istitle__doc__
,
3716 "S.istitle() -> bool\n\
3718 Return True if S is a titlecased string and there is at least one\n\
3719 character in S, i.e. uppercase characters may only follow uncased\n\
3720 characters and lowercase characters only cased ones. Return False\n\
3724 string_istitle(PyStringObject
*self
, PyObject
*uncased
)
3726 register const unsigned char *p
3727 = (unsigned char *) PyString_AS_STRING(self
);
3728 register const unsigned char *e
;
3729 int cased
, previous_is_cased
;
3731 /* Shortcut for single character strings */
3732 if (PyString_GET_SIZE(self
) == 1)
3733 return PyBool_FromLong(isupper(*p
) != 0);
3735 /* Special case for empty strings */
3736 if (PyString_GET_SIZE(self
) == 0)
3737 return PyBool_FromLong(0);
3739 e
= p
+ PyString_GET_SIZE(self
);
3741 previous_is_cased
= 0;
3742 for (; p
< e
; p
++) {
3743 register const unsigned char ch
= *p
;
3746 if (previous_is_cased
)
3747 return PyBool_FromLong(0);
3748 previous_is_cased
= 1;
3751 else if (islower(ch
)) {
3752 if (!previous_is_cased
)
3753 return PyBool_FromLong(0);
3754 previous_is_cased
= 1;
3758 previous_is_cased
= 0;
3760 return PyBool_FromLong(cased
);
3764 PyDoc_STRVAR(splitlines__doc__
,
3765 "S.splitlines([keepends]) -> list of strings\n\
3767 Return a list of the lines in S, breaking at line boundaries.\n\
3768 Line breaks are not included in the resulting list unless keepends\n\
3769 is given and true.");
3772 string_splitlines(PyStringObject
*self
, PyObject
*args
)
3774 register Py_ssize_t i
;
3775 register Py_ssize_t j
;
3782 if (!PyArg_ParseTuple(args
, "|i:splitlines", &keepends
))
3785 data
= PyString_AS_STRING(self
);
3786 len
= PyString_GET_SIZE(self
);
3788 /* This does not use the preallocated list because splitlines is
3789 usually run with hundreds of newlines. The overhead of
3790 switching between PyList_SET_ITEM and append causes about a
3791 2-3% slowdown for that common case. A smarter implementation
3792 could move the if check out, so the SET_ITEMs are done first
3793 and the appends only done when the prealloc buffer is full.
3794 That's too much work for little gain.*/
3796 list
= PyList_New(0);
3800 for (i
= j
= 0; i
< len
; ) {
3803 /* Find a line and append it */
3804 while (i
< len
&& data
[i
] != '\n' && data
[i
] != '\r')
3807 /* Skip the line break reading CRLF as one line break */
3810 if (data
[i
] == '\r' && i
+ 1 < len
&&
3818 SPLIT_APPEND(data
, j
, eol
);
3822 SPLIT_APPEND(data
, j
, len
);
3835 #undef PREALLOC_SIZE
3838 string_getnewargs(PyStringObject
*v
)
3840 return Py_BuildValue("(s#)", v
->ob_sval
, v
->ob_size
);
3845 string_methods
[] = {
3846 /* Counterparts of the obsolete stropmodule functions; except
3847 string.maketrans(). */
3848 {"join", (PyCFunction
)string_join
, METH_O
, join__doc__
},
3849 {"split", (PyCFunction
)string_split
, METH_VARARGS
, split__doc__
},
3850 {"rsplit", (PyCFunction
)string_rsplit
, METH_VARARGS
, rsplit__doc__
},
3851 {"lower", (PyCFunction
)string_lower
, METH_NOARGS
, lower__doc__
},
3852 {"upper", (PyCFunction
)string_upper
, METH_NOARGS
, upper__doc__
},
3853 {"islower", (PyCFunction
)string_islower
, METH_NOARGS
, islower__doc__
},
3854 {"isupper", (PyCFunction
)string_isupper
, METH_NOARGS
, isupper__doc__
},
3855 {"isspace", (PyCFunction
)string_isspace
, METH_NOARGS
, isspace__doc__
},
3856 {"isdigit", (PyCFunction
)string_isdigit
, METH_NOARGS
, isdigit__doc__
},
3857 {"istitle", (PyCFunction
)string_istitle
, METH_NOARGS
, istitle__doc__
},
3858 {"isalpha", (PyCFunction
)string_isalpha
, METH_NOARGS
, isalpha__doc__
},
3859 {"isalnum", (PyCFunction
)string_isalnum
, METH_NOARGS
, isalnum__doc__
},
3860 {"capitalize", (PyCFunction
)string_capitalize
, METH_NOARGS
,
3862 {"count", (PyCFunction
)string_count
, METH_VARARGS
, count__doc__
},
3863 {"endswith", (PyCFunction
)string_endswith
, METH_VARARGS
,
3865 {"partition", (PyCFunction
)string_partition
, METH_O
, partition__doc__
},
3866 {"find", (PyCFunction
)string_find
, METH_VARARGS
, find__doc__
},
3867 {"index", (PyCFunction
)string_index
, METH_VARARGS
, index__doc__
},
3868 {"lstrip", (PyCFunction
)string_lstrip
, METH_VARARGS
, lstrip__doc__
},
3869 {"replace", (PyCFunction
)string_replace
, METH_VARARGS
, replace__doc__
},
3870 {"rfind", (PyCFunction
)string_rfind
, METH_VARARGS
, rfind__doc__
},
3871 {"rindex", (PyCFunction
)string_rindex
, METH_VARARGS
, rindex__doc__
},
3872 {"rstrip", (PyCFunction
)string_rstrip
, METH_VARARGS
, rstrip__doc__
},
3873 {"rpartition", (PyCFunction
)string_rpartition
, METH_O
,
3875 {"startswith", (PyCFunction
)string_startswith
, METH_VARARGS
,
3877 {"strip", (PyCFunction
)string_strip
, METH_VARARGS
, strip__doc__
},
3878 {"swapcase", (PyCFunction
)string_swapcase
, METH_NOARGS
,
3880 {"translate", (PyCFunction
)string_translate
, METH_VARARGS
,
3882 {"title", (PyCFunction
)string_title
, METH_NOARGS
, title__doc__
},
3883 {"ljust", (PyCFunction
)string_ljust
, METH_VARARGS
, ljust__doc__
},
3884 {"rjust", (PyCFunction
)string_rjust
, METH_VARARGS
, rjust__doc__
},
3885 {"center", (PyCFunction
)string_center
, METH_VARARGS
, center__doc__
},
3886 {"zfill", (PyCFunction
)string_zfill
, METH_VARARGS
, zfill__doc__
},
3887 {"encode", (PyCFunction
)string_encode
, METH_VARARGS
, encode__doc__
},
3888 {"decode", (PyCFunction
)string_decode
, METH_VARARGS
, decode__doc__
},
3889 {"expandtabs", (PyCFunction
)string_expandtabs
, METH_VARARGS
,
3891 {"splitlines", (PyCFunction
)string_splitlines
, METH_VARARGS
,
3893 {"__getnewargs__", (PyCFunction
)string_getnewargs
, METH_NOARGS
},
3894 {NULL
, NULL
} /* sentinel */
3898 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
);
3901 string_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
3904 static char *kwlist
[] = {"object", 0};
3906 if (type
!= &PyString_Type
)
3907 return str_subtype_new(type
, args
, kwds
);
3908 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|O:str", kwlist
, &x
))
3911 return PyString_FromString("");
3912 return PyObject_Str(x
);
3916 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
3918 PyObject
*tmp
, *pnew
;
3921 assert(PyType_IsSubtype(type
, &PyString_Type
));
3922 tmp
= string_new(&PyString_Type
, args
, kwds
);
3925 assert(PyString_CheckExact(tmp
));
3926 n
= PyString_GET_SIZE(tmp
);
3927 pnew
= type
->tp_alloc(type
, n
);
3929 Py_MEMCPY(PyString_AS_STRING(pnew
), PyString_AS_STRING(tmp
), n
+1);
3930 ((PyStringObject
*)pnew
)->ob_shash
=
3931 ((PyStringObject
*)tmp
)->ob_shash
;
3932 ((PyStringObject
*)pnew
)->ob_sstate
= SSTATE_NOT_INTERNED
;
3939 basestring_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
3941 PyErr_SetString(PyExc_TypeError
,
3942 "The basestring type cannot be instantiated");
3947 string_mod(PyObject
*v
, PyObject
*w
)
3949 if (!PyString_Check(v
)) {
3950 Py_INCREF(Py_NotImplemented
);
3951 return Py_NotImplemented
;
3953 return PyString_Format(v
, w
);
3956 PyDoc_STRVAR(basestring_doc
,
3957 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3959 static PyNumberMethods string_as_number
= {
3964 string_mod
, /*nb_remainder*/
3968 PyTypeObject PyBaseString_Type
= {
3969 PyObject_HEAD_INIT(&PyType_Type
)
3980 0, /* tp_as_number */
3981 0, /* tp_as_sequence */
3982 0, /* tp_as_mapping */
3986 0, /* tp_getattro */
3987 0, /* tp_setattro */
3988 0, /* tp_as_buffer */
3989 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /* tp_flags */
3990 basestring_doc
, /* tp_doc */
3991 0, /* tp_traverse */
3993 0, /* tp_richcompare */
3994 0, /* tp_weaklistoffset */
3996 0, /* tp_iternext */
4000 &PyBaseObject_Type
, /* tp_base */
4002 0, /* tp_descr_get */
4003 0, /* tp_descr_set */
4004 0, /* tp_dictoffset */
4007 basestring_new
, /* tp_new */
4011 PyDoc_STRVAR(string_doc
,
4012 "str(object) -> string\n\
4014 Return a nice string representation of the object.\n\
4015 If the argument is a string, the return value is the same object.");
4017 PyTypeObject PyString_Type
= {
4018 PyObject_HEAD_INIT(&PyType_Type
)
4021 sizeof(PyStringObject
),
4023 string_dealloc
, /* tp_dealloc */
4024 (printfunc
)string_print
, /* tp_print */
4028 string_repr
, /* tp_repr */
4029 &string_as_number
, /* tp_as_number */
4030 &string_as_sequence
, /* tp_as_sequence */
4031 &string_as_mapping
, /* tp_as_mapping */
4032 (hashfunc
)string_hash
, /* tp_hash */
4034 string_str
, /* tp_str */
4035 PyObject_GenericGetAttr
, /* tp_getattro */
4036 0, /* tp_setattro */
4037 &string_as_buffer
, /* tp_as_buffer */
4038 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_CHECKTYPES
|
4039 Py_TPFLAGS_BASETYPE
, /* tp_flags */
4040 string_doc
, /* tp_doc */
4041 0, /* tp_traverse */
4043 (richcmpfunc
)string_richcompare
, /* tp_richcompare */
4044 0, /* tp_weaklistoffset */
4046 0, /* tp_iternext */
4047 string_methods
, /* tp_methods */
4050 &PyBaseString_Type
, /* tp_base */
4052 0, /* tp_descr_get */
4053 0, /* tp_descr_set */
4054 0, /* tp_dictoffset */
4057 string_new
, /* tp_new */
4058 PyObject_Del
, /* tp_free */
4062 PyString_Concat(register PyObject
**pv
, register PyObject
*w
)
4064 register PyObject
*v
;
4067 if (w
== NULL
|| !PyString_Check(*pv
)) {
4072 v
= string_concat((PyStringObject
*) *pv
, w
);
4078 PyString_ConcatAndDel(register PyObject
**pv
, register PyObject
*w
)
4080 PyString_Concat(pv
, w
);
4085 /* The following function breaks the notion that strings are immutable:
4086 it changes the size of a string. We get away with this only if there
4087 is only one module referencing the object. You can also think of it
4088 as creating a new string object and destroying the old one, only
4089 more efficiently. In any case, don't use this if the string may
4090 already be known to some other part of the code...
4091 Note that if there's not enough memory to resize the string, the original
4092 string object at *pv is deallocated, *pv is set to NULL, an "out of
4093 memory" exception is set, and -1 is returned. Else (on success) 0 is
4094 returned, and the value in *pv may or may not be the same as on input.
4095 As always, an extra byte is allocated for a trailing \0 byte (newsize
4096 does *not* include that), and a trailing \0 byte is stored.
4100 _PyString_Resize(PyObject
**pv
, Py_ssize_t newsize
)
4102 register PyObject
*v
;
4103 register PyStringObject
*sv
;
4105 if (!PyString_Check(v
) || v
->ob_refcnt
!= 1 || newsize
< 0 ||
4106 PyString_CHECK_INTERNED(v
)) {
4109 PyErr_BadInternalCall();
4112 /* XXX UNREF/NEWREF interface should be more symmetrical */
4114 _Py_ForgetReference(v
);
4116 PyObject_REALLOC((char *)v
, sizeof(PyStringObject
) + newsize
);
4122 _Py_NewReference(*pv
);
4123 sv
= (PyStringObject
*) *pv
;
4124 sv
->ob_size
= newsize
;
4125 sv
->ob_sval
[newsize
] = '\0';
4126 sv
->ob_shash
= -1; /* invalidate cached hash value */
4130 /* Helpers for formatstring */
4132 Py_LOCAL_INLINE(PyObject
*)
4133 getnextarg(PyObject
*args
, Py_ssize_t arglen
, Py_ssize_t
*p_argidx
)
4135 Py_ssize_t argidx
= *p_argidx
;
4136 if (argidx
< arglen
) {
4141 return PyTuple_GetItem(args
, argidx
);
4143 PyErr_SetString(PyExc_TypeError
,
4144 "not enough arguments for format string");
4155 #define F_LJUST (1<<0)
4156 #define F_SIGN (1<<1)
4157 #define F_BLANK (1<<2)
4158 #define F_ALT (1<<3)
4159 #define F_ZERO (1<<4)
4161 Py_LOCAL_INLINE(int)
4162 formatfloat(char *buf
, size_t buflen
, int flags
,
4163 int prec
, int type
, PyObject
*v
)
4165 /* fmt = '%#.' + `prec` + `type`
4166 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4169 x
= PyFloat_AsDouble(v
);
4170 if (x
== -1.0 && PyErr_Occurred()) {
4171 PyErr_SetString(PyExc_TypeError
, "float argument required");
4176 if (type
== 'f' && fabs(x
)/1e25
>= 1e25
)
4178 /* Worst case length calc to ensure no buffer overrun:
4182 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4183 for any double rep.)
4184 len = 1 + prec + 1 + 2 + 5 = 9 + prec
4187 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4188 len = 1 + 50 + 1 + prec = 52 + prec
4190 If prec=0 the effective precision is 1 (the leading digit is
4191 always given), therefore increase the length by one.
4194 if (((type
== 'g' || type
== 'G') &&
4195 buflen
<= (size_t)10 + (size_t)prec
) ||
4196 (type
== 'f' && buflen
<= (size_t)53 + (size_t)prec
)) {
4197 PyErr_SetString(PyExc_OverflowError
,
4198 "formatted float is too long (precision too large?)");
4201 PyOS_snprintf(fmt
, sizeof(fmt
), "%%%s.%d%c",
4202 (flags
&F_ALT
) ? "#" : "",
4204 PyOS_ascii_formatd(buf
, buflen
, fmt
, x
);
4205 return (int)strlen(buf
);
4208 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4209 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4210 * Python's regular ints.
4211 * Return value: a new PyString*, or NULL if error.
4212 * . *pbuf is set to point into it,
4213 * *plen set to the # of chars following that.
4214 * Caller must decref it when done using pbuf.
4215 * The string starting at *pbuf is of the form
4216 * "-"? ("0x" | "0X")? digit+
4217 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4218 * set in flags. The case of hex digits will be correct,
4219 * There will be at least prec digits, zero-filled on the left if
4220 * necessary to get that many.
4221 * val object to be converted
4222 * flags bitmask of format flags; only F_ALT is looked at
4223 * prec minimum number of digits; 0-fill on left if needed
4224 * type a character in [duoxX]; u acts the same as d
4226 * CAUTION: o, x and X conversions on regular ints can never
4227 * produce a '-' sign, but can for Python's unbounded ints.
4230 _PyString_FormatLong(PyObject
*val
, int flags
, int prec
, int type
,
4231 char **pbuf
, int *plen
)
4233 PyObject
*result
= NULL
;
4236 int sign
; /* 1 if '-', else 0 */
4237 int len
; /* number of characters */
4239 int numdigits
; /* len == numnondigits + numdigits */
4240 int numnondigits
= 0;
4245 result
= val
->ob_type
->tp_str(val
);
4248 result
= val
->ob_type
->tp_as_number
->nb_oct(val
);
4253 result
= val
->ob_type
->tp_as_number
->nb_hex(val
);
4256 assert(!"'type' not in [duoxX]");
4261 buf
= PyString_AsString(result
);
4267 /* To modify the string in-place, there can only be one reference. */
4268 if (result
->ob_refcnt
!= 1) {
4269 PyErr_BadInternalCall();
4272 llen
= PyString_Size(result
);
4273 if (llen
> INT_MAX
) {
4274 PyErr_SetString(PyExc_ValueError
, "string too large in _PyString_FormatLong");
4278 if (buf
[len
-1] == 'L') {
4282 sign
= buf
[0] == '-';
4283 numnondigits
+= sign
;
4284 numdigits
= len
- numnondigits
;
4285 assert(numdigits
> 0);
4287 /* Get rid of base marker unless F_ALT */
4288 if ((flags
& F_ALT
) == 0) {
4289 /* Need to skip 0x, 0X or 0. */
4293 assert(buf
[sign
] == '0');
4294 /* If 0 is only digit, leave it alone. */
4295 if (numdigits
> 1) {
4302 assert(buf
[sign
] == '0');
4303 assert(buf
[sign
+ 1] == 'x');
4314 assert(len
== numnondigits
+ numdigits
);
4315 assert(numdigits
> 0);
4318 /* Fill with leading zeroes to meet minimum width. */
4319 if (prec
> numdigits
) {
4320 PyObject
*r1
= PyString_FromStringAndSize(NULL
,
4321 numnondigits
+ prec
);
4327 b1
= PyString_AS_STRING(r1
);
4328 for (i
= 0; i
< numnondigits
; ++i
)
4330 for (i
= 0; i
< prec
- numdigits
; i
++)
4332 for (i
= 0; i
< numdigits
; i
++)
4337 buf
= PyString_AS_STRING(result
);
4338 len
= numnondigits
+ prec
;
4341 /* Fix up case for hex conversions. */
4343 /* Need to convert all lower case letters to upper case.
4344 and need to convert 0x to 0X (and -0x to -0X). */
4345 for (i
= 0; i
< len
; i
++)
4346 if (buf
[i
] >= 'a' && buf
[i
] <= 'x')
4354 Py_LOCAL_INLINE(int)
4355 formatint(char *buf
, size_t buflen
, int flags
,
4356 int prec
, int type
, PyObject
*v
)
4358 /* fmt = '%#.' + `prec` + 'l' + `type`
4359 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4361 char fmt
[64]; /* plenty big enough! */
4365 x
= PyInt_AsLong(v
);
4366 if (x
== -1 && PyErr_Occurred()) {
4367 PyErr_SetString(PyExc_TypeError
, "int argument required");
4370 if (x
< 0 && type
== 'u') {
4373 if (x
< 0 && (type
== 'x' || type
== 'X' || type
== 'o'))
4380 if ((flags
& F_ALT
) &&
4381 (type
== 'x' || type
== 'X')) {
4382 /* When converting under %#x or %#X, there are a number
4383 * of issues that cause pain:
4384 * - when 0 is being converted, the C standard leaves off
4385 * the '0x' or '0X', which is inconsistent with other
4386 * %#x/%#X conversions and inconsistent with Python's
4388 * - there are platforms that violate the standard and
4389 * convert 0 with the '0x' or '0X'
4390 * (Metrowerks, Compaq Tru64)
4391 * - there are platforms that give '0x' when converting
4392 * under %#X, but convert 0 in accordance with the
4393 * standard (OS/2 EMX)
4395 * We can achieve the desired consistency by inserting our
4396 * own '0x' or '0X' prefix, and substituting %x/%X in place
4399 * Note that this is the same approach as used in
4400 * formatint() in unicodeobject.c
4402 PyOS_snprintf(fmt
, sizeof(fmt
), "%s0%c%%.%dl%c",
4403 sign
, type
, prec
, type
);
4406 PyOS_snprintf(fmt
, sizeof(fmt
), "%s%%%s.%dl%c",
4407 sign
, (flags
&F_ALT
) ? "#" : "",
4411 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4412 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4414 if (buflen
<= 14 || buflen
<= (size_t)3 + (size_t)prec
) {
4415 PyErr_SetString(PyExc_OverflowError
,
4416 "formatted integer is too long (precision too large?)");
4420 PyOS_snprintf(buf
, buflen
, fmt
, -x
);
4422 PyOS_snprintf(buf
, buflen
, fmt
, x
);
4423 return (int)strlen(buf
);
4426 Py_LOCAL_INLINE(int)
4427 formatchar(char *buf
, size_t buflen
, PyObject
*v
)
4429 /* presume that the buffer is at least 2 characters long */
4430 if (PyString_Check(v
)) {
4431 if (!PyArg_Parse(v
, "c;%c requires int or char", &buf
[0]))
4435 if (!PyArg_Parse(v
, "b;%c requires int or char", &buf
[0]))
4442 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4444 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4445 chars are formatted. XXX This is a magic number. Each formatting
4446 routine does bounds checking to ensure no overflow, but a better
4447 solution may be to malloc a buffer of appropriate size for each
4448 format. For now, the current solution is sufficient.
4450 #define FORMATBUFLEN (size_t)120
4453 PyString_Format(PyObject
*format
, PyObject
*args
)
4456 Py_ssize_t arglen
, argidx
;
4457 Py_ssize_t reslen
, rescnt
, fmtcnt
;
4459 PyObject
*result
, *orig_args
;
4460 #ifdef Py_USING_UNICODE
4463 PyObject
*dict
= NULL
;
4464 if (format
== NULL
|| !PyString_Check(format
) || args
== NULL
) {
4465 PyErr_BadInternalCall();
4469 fmt
= PyString_AS_STRING(format
);
4470 fmtcnt
= PyString_GET_SIZE(format
);
4471 reslen
= rescnt
= fmtcnt
+ 100;
4472 result
= PyString_FromStringAndSize((char *)NULL
, reslen
);
4475 res
= PyString_AsString(result
);
4476 if (PyTuple_Check(args
)) {
4477 arglen
= PyTuple_GET_SIZE(args
);
4484 if (args
->ob_type
->tp_as_mapping
&& !PyTuple_Check(args
) &&
4485 !PyObject_TypeCheck(args
, &PyBaseString_Type
))
4487 while (--fmtcnt
>= 0) {
4490 rescnt
= fmtcnt
+ 100;
4492 if (_PyString_Resize(&result
, reslen
) < 0)
4494 res
= PyString_AS_STRING(result
)
4501 /* Got a format specifier */
4503 Py_ssize_t width
= -1;
4508 PyObject
*temp
= NULL
;
4512 char formatbuf
[FORMATBUFLEN
];
4513 /* For format{float,int,char}() */
4514 #ifdef Py_USING_UNICODE
4515 char *fmt_start
= fmt
;
4516 Py_ssize_t argidx_start
= argidx
;
4527 PyErr_SetString(PyExc_TypeError
,
4528 "format requires a mapping");
4534 /* Skip over balanced parentheses */
4535 while (pcount
> 0 && --fmtcnt
>= 0) {
4538 else if (*fmt
== '(')
4542 keylen
= fmt
- keystart
- 1;
4543 if (fmtcnt
< 0 || pcount
> 0) {
4544 PyErr_SetString(PyExc_ValueError
,
4545 "incomplete format key");
4548 key
= PyString_FromStringAndSize(keystart
,
4556 args
= PyObject_GetItem(dict
, key
);
4565 while (--fmtcnt
>= 0) {
4566 switch (c
= *fmt
++) {
4567 case '-': flags
|= F_LJUST
; continue;
4568 case '+': flags
|= F_SIGN
; continue;
4569 case ' ': flags
|= F_BLANK
; continue;
4570 case '#': flags
|= F_ALT
; continue;
4571 case '0': flags
|= F_ZERO
; continue;
4576 v
= getnextarg(args
, arglen
, &argidx
);
4579 if (!PyInt_Check(v
)) {
4580 PyErr_SetString(PyExc_TypeError
,
4584 width
= PyInt_AsLong(v
);
4592 else if (c
>= 0 && isdigit(c
)) {
4594 while (--fmtcnt
>= 0) {
4595 c
= Py_CHARMASK(*fmt
++);
4598 if ((width
*10) / 10 != width
) {
4604 width
= width
*10 + (c
- '0');
4612 v
= getnextarg(args
, arglen
, &argidx
);
4615 if (!PyInt_Check(v
)) {
4621 prec
= PyInt_AsLong(v
);
4627 else if (c
>= 0 && isdigit(c
)) {
4629 while (--fmtcnt
>= 0) {
4630 c
= Py_CHARMASK(*fmt
++);
4633 if ((prec
*10) / 10 != prec
) {
4639 prec
= prec
*10 + (c
- '0');
4644 if (c
== 'h' || c
== 'l' || c
== 'L') {
4650 PyErr_SetString(PyExc_ValueError
,
4651 "incomplete format");
4655 v
= getnextarg(args
, arglen
, &argidx
);
4667 #ifdef Py_USING_UNICODE
4668 if (PyUnicode_Check(v
)) {
4670 argidx
= argidx_start
;
4674 temp
= _PyObject_Str(v
);
4675 #ifdef Py_USING_UNICODE
4676 if (temp
!= NULL
&& PyUnicode_Check(temp
)) {
4679 argidx
= argidx_start
;
4686 temp
= PyObject_Repr(v
);
4689 if (!PyString_Check(temp
)) {
4690 PyErr_SetString(PyExc_TypeError
,
4691 "%s argument has non-string str()");
4695 pbuf
= PyString_AS_STRING(temp
);
4696 len
= PyString_GET_SIZE(temp
);
4697 if (prec
>= 0 && len
> prec
)
4708 if (PyLong_Check(v
)) {
4710 temp
= _PyString_FormatLong(v
, flags
,
4711 prec
, c
, &pbuf
, &ilen
);
4719 len
= formatint(pbuf
,
4738 len
= formatfloat(pbuf
, sizeof(formatbuf
),
4747 #ifdef Py_USING_UNICODE
4748 if (PyUnicode_Check(v
)) {
4750 argidx
= argidx_start
;
4755 len
= formatchar(pbuf
, sizeof(formatbuf
), v
);
4760 PyErr_Format(PyExc_ValueError
,
4761 "unsupported format character '%c' (0x%x) "
4764 (Py_ssize_t
)(fmt
- 1 -
4765 PyString_AsString(format
)));
4769 if (*pbuf
== '-' || *pbuf
== '+') {
4773 else if (flags
& F_SIGN
)
4775 else if (flags
& F_BLANK
)
4782 if (rescnt
- (sign
!= 0) < width
) {
4784 rescnt
= width
+ fmtcnt
+ 100;
4789 return PyErr_NoMemory();
4791 if (_PyString_Resize(&result
, reslen
) < 0) {
4795 res
= PyString_AS_STRING(result
)
4805 if ((flags
& F_ALT
) && (c
== 'x' || c
== 'X')) {
4806 assert(pbuf
[0] == '0');
4807 assert(pbuf
[1] == c
);
4818 if (width
> len
&& !(flags
& F_LJUST
)) {
4822 } while (--width
> len
);
4827 if ((flags
& F_ALT
) &&
4828 (c
== 'x' || c
== 'X')) {
4829 assert(pbuf
[0] == '0');
4830 assert(pbuf
[1] == c
);
4835 Py_MEMCPY(res
, pbuf
, len
);
4838 while (--width
>= len
) {
4842 if (dict
&& (argidx
< arglen
) && c
!= '%') {
4843 PyErr_SetString(PyExc_TypeError
,
4844 "not all arguments converted during string formatting");
4851 if (argidx
< arglen
&& !dict
) {
4852 PyErr_SetString(PyExc_TypeError
,
4853 "not all arguments converted during string formatting");
4859 _PyString_Resize(&result
, reslen
- rescnt
);
4862 #ifdef Py_USING_UNICODE
4868 /* Fiddle args right (remove the first argidx arguments) */
4869 if (PyTuple_Check(orig_args
) && argidx
> 0) {
4871 Py_ssize_t n
= PyTuple_GET_SIZE(orig_args
) - argidx
;
4876 PyObject
*w
= PyTuple_GET_ITEM(orig_args
, n
+ argidx
);
4878 PyTuple_SET_ITEM(v
, n
, w
);
4882 Py_INCREF(orig_args
);
4886 /* Take what we have of the result and let the Unicode formatting
4887 function format the rest of the input. */
4888 rescnt
= res
- PyString_AS_STRING(result
);
4889 if (_PyString_Resize(&result
, rescnt
))
4891 fmtcnt
= PyString_GET_SIZE(format
) - \
4892 (fmt
- PyString_AS_STRING(format
));
4893 format
= PyUnicode_Decode(fmt
, fmtcnt
, NULL
, NULL
);
4896 v
= PyUnicode_Format(format
, args
);
4900 /* Paste what we have (result) to what the Unicode formatting
4901 function returned (v) and return the result (or error) */
4902 w
= PyUnicode_Concat(result
, v
);
4907 #endif /* Py_USING_UNICODE */
4918 PyString_InternInPlace(PyObject
**p
)
4920 register PyStringObject
*s
= (PyStringObject
*)(*p
);
4922 if (s
== NULL
|| !PyString_Check(s
))
4923 Py_FatalError("PyString_InternInPlace: strings only please!");
4924 /* If it's a string subclass, we don't really know what putting
4925 it in the interned dict might do. */
4926 if (!PyString_CheckExact(s
))
4928 if (PyString_CHECK_INTERNED(s
))
4930 if (interned
== NULL
) {
4931 interned
= PyDict_New();
4932 if (interned
== NULL
) {
4933 PyErr_Clear(); /* Don't leave an exception */
4937 t
= PyDict_GetItem(interned
, (PyObject
*)s
);
4945 if (PyDict_SetItem(interned
, (PyObject
*)s
, (PyObject
*)s
) < 0) {
4949 /* The two references in interned are not counted by refcnt.
4950 The string deallocator will take care of this */
4952 PyString_CHECK_INTERNED(s
) = SSTATE_INTERNED_MORTAL
;
4956 PyString_InternImmortal(PyObject
**p
)
4958 PyString_InternInPlace(p
);
4959 if (PyString_CHECK_INTERNED(*p
) != SSTATE_INTERNED_IMMORTAL
) {
4960 PyString_CHECK_INTERNED(*p
) = SSTATE_INTERNED_IMMORTAL
;
4967 PyString_InternFromString(const char *cp
)
4969 PyObject
*s
= PyString_FromString(cp
);
4972 PyString_InternInPlace(&s
);
4980 for (i
= 0; i
< UCHAR_MAX
+ 1; i
++) {
4981 Py_XDECREF(characters
[i
]);
4982 characters
[i
] = NULL
;
4984 Py_XDECREF(nullstring
);
4988 void _Py_ReleaseInternedStrings(void)
4994 if (interned
== NULL
|| !PyDict_Check(interned
))
4996 keys
= PyDict_Keys(interned
);
4997 if (keys
== NULL
|| !PyList_Check(keys
)) {
5002 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5003 detector, interned strings are not forcibly deallocated; rather, we
5004 give them their stolen references back, and then clear and DECREF
5005 the interned dict. */
5007 fprintf(stderr
, "releasing interned strings\n");
5008 n
= PyList_GET_SIZE(keys
);
5009 for (i
= 0; i
< n
; i
++) {
5010 s
= (PyStringObject
*) PyList_GET_ITEM(keys
, i
);
5011 switch (s
->ob_sstate
) {
5012 case SSTATE_NOT_INTERNED
:
5013 /* XXX Shouldn't happen */
5015 case SSTATE_INTERNED_IMMORTAL
:
5018 case SSTATE_INTERNED_MORTAL
:
5022 Py_FatalError("Inconsistent interned string state.");
5024 s
->ob_sstate
= SSTATE_NOT_INTERNED
;
5027 PyDict_Clear(interned
);
5028 Py_DECREF(interned
);