1 /* String (str/bytes) object implementation */
3 #define PY_SSIZE_T_CLEAN
10 Py_ssize_t null_strings
, one_strings
;
13 static PyStringObject
*characters
[UCHAR_MAX
+ 1];
14 static PyStringObject
*nullstring
;
16 /* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
24 static PyObject
*interned
;
26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
35 For both PyString_FromString() and PyString_FromStringAndSize(), the
36 parameter `size' denotes number of characters to allocate, not counting any
37 null terminating character.
39 For PyString_FromString(), the parameter `str' points to a null-terminated
40 string containing exactly `size' bytes.
42 For PyString_FromStringAndSize(), the parameter the parameter `str' is
43 either NULL or else points to a string containing at least `size' bytes.
44 For PyString_FromStringAndSize(), the string in the `str' parameter does
45 not have to be null-terminated. (Therefore it is safe to construct a
46 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
48 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
50 PyString object must be treated as immutable and you must not fill in nor
51 alter the data yourself, since the strings may be shared.
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
55 allocated for string data, not counting the null terminating character. It
56 is therefore equal to the equal to the `size' parameter (for
57 PyString_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyString_FromString()).
61 PyString_FromStringAndSize(const char *str
, Py_ssize_t size
)
63 register PyStringObject
*op
;
65 PyErr_SetString(PyExc_SystemError
,
66 "Negative size passed to PyString_FromStringAndSize");
69 if (size
== 0 && (op
= nullstring
) != NULL
) {
74 return (PyObject
*)op
;
76 if (size
== 1 && str
!= NULL
&&
77 (op
= characters
[*str
& UCHAR_MAX
]) != NULL
)
83 return (PyObject
*)op
;
86 if (size
> PY_SSIZE_T_MAX
- PyStringObject_SIZE
) {
87 PyErr_SetString(PyExc_OverflowError
, "string is too large");
91 /* Inline PyObject_NewVar */
92 op
= (PyStringObject
*)PyObject_MALLOC(PyStringObject_SIZE
+ size
);
94 return PyErr_NoMemory();
95 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
97 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
99 Py_MEMCPY(op
->ob_sval
, str
, size
);
100 op
->ob_sval
[size
] = '\0';
101 /* share short strings */
103 PyObject
*t
= (PyObject
*)op
;
104 PyString_InternInPlace(&t
);
105 op
= (PyStringObject
*)t
;
108 } else if (size
== 1 && str
!= NULL
) {
109 PyObject
*t
= (PyObject
*)op
;
110 PyString_InternInPlace(&t
);
111 op
= (PyStringObject
*)t
;
112 characters
[*str
& UCHAR_MAX
] = op
;
115 return (PyObject
*) op
;
119 PyString_FromString(const char *str
)
121 register size_t size
;
122 register PyStringObject
*op
;
126 if (size
> PY_SSIZE_T_MAX
- PyStringObject_SIZE
) {
127 PyErr_SetString(PyExc_OverflowError
,
128 "string is too long for a Python string");
131 if (size
== 0 && (op
= nullstring
) != NULL
) {
136 return (PyObject
*)op
;
138 if (size
== 1 && (op
= characters
[*str
& UCHAR_MAX
]) != NULL
) {
143 return (PyObject
*)op
;
146 /* Inline PyObject_NewVar */
147 op
= (PyStringObject
*)PyObject_MALLOC(PyStringObject_SIZE
+ size
);
149 return PyErr_NoMemory();
150 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
152 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
153 Py_MEMCPY(op
->ob_sval
, str
, size
+1);
154 /* share short strings */
156 PyObject
*t
= (PyObject
*)op
;
157 PyString_InternInPlace(&t
);
158 op
= (PyStringObject
*)t
;
161 } else if (size
== 1) {
162 PyObject
*t
= (PyObject
*)op
;
163 PyString_InternInPlace(&t
);
164 op
= (PyStringObject
*)t
;
165 characters
[*str
& UCHAR_MAX
] = op
;
168 return (PyObject
*) op
;
172 PyString_FromFormatV(const char *format
, va_list vargs
)
180 #ifdef VA_LIST_IS_ARRAY
181 Py_MEMCPY(count
, vargs
, sizeof(va_list));
184 __va_copy(count
, vargs
);
189 /* step 1: figure out how large a buffer we need */
190 for (f
= format
; *f
; f
++) {
193 while (*++f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
196 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
197 * they don't affect the amount of space we reserve.
199 if ((*f
== 'l' || *f
== 'z') &&
200 (f
[1] == 'd' || f
[1] == 'u'))
205 (void)va_arg(count
, int);
206 /* fall through... */
210 case 'd': case 'u': case 'i': case 'x':
211 (void) va_arg(count
, int);
212 /* 20 bytes is enough to hold a 64-bit
213 integer. Decimal takes the most space.
214 This isn't enough for octal. */
218 s
= va_arg(count
, char*);
222 (void) va_arg(count
, int);
223 /* maximum 64-bit pointer representation:
225 * so 19 characters is enough.
226 * XXX I count 18 -- what's the extra for?
231 /* if we stumble upon an unknown
232 formatting code, copy the rest of
233 the format string to the output
234 string. (we cannot just skip the
235 code, since there's no way to know
236 what's in the argument list) */
244 /* step 2: fill the buffer */
245 /* Since we've analyzed how much space we need for the worst case,
246 use sprintf directly instead of the slower PyOS_snprintf. */
247 string
= PyString_FromStringAndSize(NULL
, n
);
251 s
= PyString_AsString(string
);
253 for (f
= format
; *f
; f
++) {
259 /* parse the width.precision part (we're only
260 interested in the precision value, if any) */
262 while (isdigit(Py_CHARMASK(*f
)))
263 n
= (n
*10) + *f
++ - '0';
267 while (isdigit(Py_CHARMASK(*f
)))
268 n
= (n
*10) + *f
++ - '0';
270 while (*f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
272 /* handle the long flag, but only for %ld and %lu.
273 others can be added when necessary. */
274 if (*f
== 'l' && (f
[1] == 'd' || f
[1] == 'u')) {
278 /* handle the size_t flag. */
279 if (*f
== 'z' && (f
[1] == 'd' || f
[1] == 'u')) {
286 *s
++ = va_arg(vargs
, int);
290 sprintf(s
, "%ld", va_arg(vargs
, long));
292 sprintf(s
, "%" PY_FORMAT_SIZE_T
"d",
293 va_arg(vargs
, Py_ssize_t
));
295 sprintf(s
, "%d", va_arg(vargs
, int));
301 va_arg(vargs
, unsigned long));
303 sprintf(s
, "%" PY_FORMAT_SIZE_T
"u",
304 va_arg(vargs
, size_t));
307 va_arg(vargs
, unsigned int));
311 sprintf(s
, "%i", va_arg(vargs
, int));
315 sprintf(s
, "%x", va_arg(vargs
, int));
319 p
= va_arg(vargs
, char*);
327 sprintf(s
, "%p", va_arg(vargs
, void*));
328 /* %p is ill-defined: ensure leading 0x. */
331 else if (s
[1] != 'x') {
332 memmove(s
+2, s
, strlen(s
)+1);
351 _PyString_Resize(&string
, s
- PyString_AS_STRING(string
));
356 PyString_FromFormat(const char *format
, ...)
361 #ifdef HAVE_STDARG_PROTOTYPES
362 va_start(vargs
, format
);
366 ret
= PyString_FromFormatV(format
, vargs
);
372 PyObject
*PyString_Decode(const char *s
,
374 const char *encoding
,
379 str
= PyString_FromStringAndSize(s
, size
);
382 v
= PyString_AsDecodedString(str
, encoding
, errors
);
387 PyObject
*PyString_AsDecodedObject(PyObject
*str
,
388 const char *encoding
,
393 if (!PyString_Check(str
)) {
398 if (encoding
== NULL
) {
399 #ifdef Py_USING_UNICODE
400 encoding
= PyUnicode_GetDefaultEncoding();
402 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
407 /* Decode via the codec registry */
408 v
= PyCodec_Decode(str
, encoding
, errors
);
418 PyObject
*PyString_AsDecodedString(PyObject
*str
,
419 const char *encoding
,
424 v
= PyString_AsDecodedObject(str
, encoding
, errors
);
428 #ifdef Py_USING_UNICODE
429 /* Convert Unicode to a string using the default encoding */
430 if (PyUnicode_Check(v
)) {
432 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
438 if (!PyString_Check(v
)) {
439 PyErr_Format(PyExc_TypeError
,
440 "decoder did not return a string object (type=%.400s)",
441 Py_TYPE(v
)->tp_name
);
452 PyObject
*PyString_Encode(const char *s
,
454 const char *encoding
,
459 str
= PyString_FromStringAndSize(s
, size
);
462 v
= PyString_AsEncodedString(str
, encoding
, errors
);
467 PyObject
*PyString_AsEncodedObject(PyObject
*str
,
468 const char *encoding
,
473 if (!PyString_Check(str
)) {
478 if (encoding
== NULL
) {
479 #ifdef Py_USING_UNICODE
480 encoding
= PyUnicode_GetDefaultEncoding();
482 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
487 /* Encode via the codec registry */
488 v
= PyCodec_Encode(str
, encoding
, errors
);
498 PyObject
*PyString_AsEncodedString(PyObject
*str
,
499 const char *encoding
,
504 v
= PyString_AsEncodedObject(str
, encoding
, errors
);
508 #ifdef Py_USING_UNICODE
509 /* Convert Unicode to a string using the default encoding */
510 if (PyUnicode_Check(v
)) {
512 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
518 if (!PyString_Check(v
)) {
519 PyErr_Format(PyExc_TypeError
,
520 "encoder did not return a string object (type=%.400s)",
521 Py_TYPE(v
)->tp_name
);
533 string_dealloc(PyObject
*op
)
535 switch (PyString_CHECK_INTERNED(op
)) {
536 case SSTATE_NOT_INTERNED
:
539 case SSTATE_INTERNED_MORTAL
:
540 /* revive dead object temporarily for DelItem */
542 if (PyDict_DelItem(interned
, op
) != 0)
544 "deletion of interned string failed");
547 case SSTATE_INTERNED_IMMORTAL
:
548 Py_FatalError("Immortal interned string died.");
551 Py_FatalError("Inconsistent interned string state.");
553 Py_TYPE(op
)->tp_free(op
);
556 /* Unescape a backslash-escaped string. If unicode is non-zero,
557 the string is a u-literal. If recode_encoding is non-zero,
558 the string is UTF-8 encoded and should be re-encoded in the
559 specified encoding. */
561 PyObject
*PyString_DecodeEscape(const char *s
,
565 const char *recode_encoding
)
571 Py_ssize_t newlen
= recode_encoding
? 4*len
:len
;
572 v
= PyString_FromStringAndSize((char *)NULL
, newlen
);
575 p
= buf
= PyString_AsString(v
);
580 #ifdef Py_USING_UNICODE
581 if (recode_encoding
&& (*s
& 0x80)) {
587 /* Decode non-ASCII bytes as UTF-8. */
588 while (t
< end
&& (*t
& 0x80)) t
++;
589 u
= PyUnicode_DecodeUTF8(s
, t
- s
, errors
);
592 /* Recode them in target encoding. */
593 w
= PyUnicode_AsEncodedString(
594 u
, recode_encoding
, errors
);
598 /* Append bytes to output buffer. */
599 assert(PyString_Check(w
));
600 r
= PyString_AS_STRING(w
);
601 rn
= PyString_GET_SIZE(w
);
616 PyErr_SetString(PyExc_ValueError
,
617 "Trailing \\ in string");
621 /* XXX This assumes ASCII! */
623 case '\\': *p
++ = '\\'; break;
624 case '\'': *p
++ = '\''; break;
625 case '\"': *p
++ = '\"'; break;
626 case 'b': *p
++ = '\b'; break;
627 case 'f': *p
++ = '\014'; break; /* FF */
628 case 't': *p
++ = '\t'; break;
629 case 'n': *p
++ = '\n'; break;
630 case 'r': *p
++ = '\r'; break;
631 case 'v': *p
++ = '\013'; break; /* VT */
632 case 'a': *p
++ = '\007'; break; /* BEL, not classic C */
633 case '0': case '1': case '2': case '3':
634 case '4': case '5': case '6': case '7':
636 if (s
< end
&& '0' <= *s
&& *s
<= '7') {
637 c
= (c
<<3) + *s
++ - '0';
638 if (s
< end
&& '0' <= *s
&& *s
<= '7')
639 c
= (c
<<3) + *s
++ - '0';
645 isxdigit(Py_CHARMASK(s
[0])) &&
646 isxdigit(Py_CHARMASK(s
[1])))
669 if (!errors
|| strcmp(errors
, "strict") == 0) {
670 PyErr_SetString(PyExc_ValueError
,
671 "invalid \\x escape");
674 if (strcmp(errors
, "replace") == 0) {
676 } else if (strcmp(errors
, "ignore") == 0)
679 PyErr_Format(PyExc_ValueError
,
681 "unknown error handling code: %.400s",
685 #ifndef Py_USING_UNICODE
690 PyErr_SetString(PyExc_ValueError
,
691 "Unicode escapes not legal "
692 "when Unicode disabled");
699 goto non_esc
; /* an arbitry number of unescaped
700 UTF-8 bytes may follow. */
704 _PyString_Resize(&v
, p
- buf
);
711 /* -------------------------------------------------------------------- */
715 string_getsize(register PyObject
*op
)
719 if (PyString_AsStringAndSize(op
, &s
, &len
))
724 static /*const*/ char *
725 string_getbuffer(register PyObject
*op
)
729 if (PyString_AsStringAndSize(op
, &s
, &len
))
735 PyString_Size(register PyObject
*op
)
737 if (!PyString_Check(op
))
738 return string_getsize(op
);
743 PyString_AsString(register PyObject
*op
)
745 if (!PyString_Check(op
))
746 return string_getbuffer(op
);
747 return ((PyStringObject
*)op
) -> ob_sval
;
751 PyString_AsStringAndSize(register PyObject
*obj
,
753 register Py_ssize_t
*len
)
756 PyErr_BadInternalCall();
760 if (!PyString_Check(obj
)) {
761 #ifdef Py_USING_UNICODE
762 if (PyUnicode_Check(obj
)) {
763 obj
= _PyUnicode_AsDefaultEncodedString(obj
, NULL
);
770 PyErr_Format(PyExc_TypeError
,
771 "expected string or Unicode object, "
772 "%.200s found", Py_TYPE(obj
)->tp_name
);
777 *s
= PyString_AS_STRING(obj
);
779 *len
= PyString_GET_SIZE(obj
);
780 else if (strlen(*s
) != (size_t)PyString_GET_SIZE(obj
)) {
781 PyErr_SetString(PyExc_TypeError
,
782 "expected string without null bytes");
788 /* -------------------------------------------------------------------- */
791 #include "stringlib/stringdefs.h"
792 #include "stringlib/fastsearch.h"
794 #include "stringlib/count.h"
795 #include "stringlib/find.h"
796 #include "stringlib/partition.h"
798 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
799 #include "stringlib/localeutil.h"
804 string_print(PyStringObject
*op
, FILE *fp
, int flags
)
806 Py_ssize_t i
, str_len
;
810 /* XXX Ought to check for interrupts when writing long strings */
811 if (! PyString_CheckExact(op
)) {
813 /* A str subclass may have its own __str__ method. */
814 op
= (PyStringObject
*) PyObject_Str((PyObject
*)op
);
817 ret
= string_print(op
, fp
, flags
);
821 if (flags
& Py_PRINT_RAW
) {
822 char *data
= op
->ob_sval
;
823 Py_ssize_t size
= Py_SIZE(op
);
824 Py_BEGIN_ALLOW_THREADS
825 while (size
> INT_MAX
) {
826 /* Very long strings cannot be written atomically.
827 * But don't write exactly INT_MAX bytes at a time
828 * to avoid memory aligment issues.
830 const int chunk_size
= INT_MAX
& ~0x3FFF;
831 fwrite(data
, 1, chunk_size
, fp
);
836 if (size
) fwrite(data
, (int)size
, 1, fp
);
838 fwrite(data
, 1, (int)size
, fp
);
844 /* figure out which quote to use; single is preferred */
846 if (memchr(op
->ob_sval
, '\'', Py_SIZE(op
)) &&
847 !memchr(op
->ob_sval
, '"', Py_SIZE(op
)))
850 str_len
= Py_SIZE(op
);
851 Py_BEGIN_ALLOW_THREADS
853 for (i
= 0; i
< str_len
; i
++) {
854 /* Since strings are immutable and the caller should have a
855 reference, accessing the interal buffer should not be an issue
856 with the GIL released. */
858 if (c
== quote
|| c
== '\\')
859 fprintf(fp
, "\\%c", c
);
866 else if (c
< ' ' || c
>= 0x7f)
867 fprintf(fp
, "\\x%02x", c
& 0xff);
877 PyString_Repr(PyObject
*obj
, int smartquotes
)
879 register PyStringObject
* op
= (PyStringObject
*) obj
;
880 size_t newsize
= 2 + 4 * Py_SIZE(op
);
882 if (newsize
> PY_SSIZE_T_MAX
|| newsize
/ 4 != Py_SIZE(op
)) {
883 PyErr_SetString(PyExc_OverflowError
,
884 "string is too large to make repr");
887 v
= PyString_FromStringAndSize((char *)NULL
, newsize
);
892 register Py_ssize_t i
;
897 /* figure out which quote to use; single is preferred */
900 memchr(op
->ob_sval
, '\'', Py_SIZE(op
)) &&
901 !memchr(op
->ob_sval
, '"', Py_SIZE(op
)))
904 p
= PyString_AS_STRING(v
);
906 for (i
= 0; i
< Py_SIZE(op
); i
++) {
907 /* There's at least enough room for a hex escape
908 and a closing quote. */
909 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 5);
911 if (c
== quote
|| c
== '\\')
912 *p
++ = '\\', *p
++ = c
;
914 *p
++ = '\\', *p
++ = 't';
916 *p
++ = '\\', *p
++ = 'n';
918 *p
++ = '\\', *p
++ = 'r';
919 else if (c
< ' ' || c
>= 0x7f) {
920 /* For performance, we don't want to call
921 PyOS_snprintf here (extra layers of
923 sprintf(p
, "\\x%02x", c
& 0xff);
929 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 1);
933 &v
, (p
- PyString_AS_STRING(v
)));
939 string_repr(PyObject
*op
)
941 return PyString_Repr(op
, 1);
945 string_str(PyObject
*s
)
947 assert(PyString_Check(s
));
948 if (PyString_CheckExact(s
)) {
953 /* Subtype -- return genuine string with the same value. */
954 PyStringObject
*t
= (PyStringObject
*) s
;
955 return PyString_FromStringAndSize(t
->ob_sval
, Py_SIZE(t
));
960 string_length(PyStringObject
*a
)
966 string_concat(register PyStringObject
*a
, register PyObject
*bb
)
968 register Py_ssize_t size
;
969 register PyStringObject
*op
;
970 if (!PyString_Check(bb
)) {
971 #ifdef Py_USING_UNICODE
972 if (PyUnicode_Check(bb
))
973 return PyUnicode_Concat((PyObject
*)a
, bb
);
975 if (PyByteArray_Check(bb
))
976 return PyByteArray_Concat((PyObject
*)a
, bb
);
977 PyErr_Format(PyExc_TypeError
,
978 "cannot concatenate 'str' and '%.200s' objects",
979 Py_TYPE(bb
)->tp_name
);
982 #define b ((PyStringObject *)bb)
983 /* Optimize cases with empty left or right operand */
984 if ((Py_SIZE(a
) == 0 || Py_SIZE(b
) == 0) &&
985 PyString_CheckExact(a
) && PyString_CheckExact(b
)) {
986 if (Py_SIZE(a
) == 0) {
991 return (PyObject
*)a
;
993 size
= Py_SIZE(a
) + Py_SIZE(b
);
994 /* Check that string sizes are not negative, to prevent an
995 overflow in cases where we are passed incorrectly-created
996 strings with negative lengths (due to a bug in other code).
998 if (Py_SIZE(a
) < 0 || Py_SIZE(b
) < 0 ||
999 Py_SIZE(a
) > PY_SSIZE_T_MAX
- Py_SIZE(b
)) {
1000 PyErr_SetString(PyExc_OverflowError
,
1001 "strings are too large to concat");
1005 /* Inline PyObject_NewVar */
1006 if (size
> PY_SSIZE_T_MAX
- PyStringObject_SIZE
) {
1007 PyErr_SetString(PyExc_OverflowError
,
1008 "strings are too large to concat");
1011 op
= (PyStringObject
*)PyObject_MALLOC(PyStringObject_SIZE
+ size
);
1013 return PyErr_NoMemory();
1014 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
1016 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
1017 Py_MEMCPY(op
->ob_sval
, a
->ob_sval
, Py_SIZE(a
));
1018 Py_MEMCPY(op
->ob_sval
+ Py_SIZE(a
), b
->ob_sval
, Py_SIZE(b
));
1019 op
->ob_sval
[size
] = '\0';
1020 return (PyObject
*) op
;
1025 string_repeat(register PyStringObject
*a
, register Py_ssize_t n
)
1027 register Py_ssize_t i
;
1028 register Py_ssize_t j
;
1029 register Py_ssize_t size
;
1030 register PyStringObject
*op
;
1034 /* watch out for overflows: the size can overflow int,
1035 * and the # of bytes needed can overflow size_t
1037 size
= Py_SIZE(a
) * n
;
1038 if (n
&& size
/ n
!= Py_SIZE(a
)) {
1039 PyErr_SetString(PyExc_OverflowError
,
1040 "repeated string is too long");
1043 if (size
== Py_SIZE(a
) && PyString_CheckExact(a
)) {
1045 return (PyObject
*)a
;
1047 nbytes
= (size_t)size
;
1048 if (nbytes
+ PyStringObject_SIZE
<= nbytes
) {
1049 PyErr_SetString(PyExc_OverflowError
,
1050 "repeated string is too long");
1053 op
= (PyStringObject
*)PyObject_MALLOC(PyStringObject_SIZE
+ nbytes
);
1055 return PyErr_NoMemory();
1056 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
1058 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
1059 op
->ob_sval
[size
] = '\0';
1060 if (Py_SIZE(a
) == 1 && n
> 0) {
1061 memset(op
->ob_sval
, a
->ob_sval
[0] , n
);
1062 return (PyObject
*) op
;
1066 Py_MEMCPY(op
->ob_sval
, a
->ob_sval
, Py_SIZE(a
));
1070 j
= (i
<= size
-i
) ? i
: size
-i
;
1071 Py_MEMCPY(op
->ob_sval
+i
, op
->ob_sval
, j
);
1074 return (PyObject
*) op
;
1077 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1080 string_slice(register PyStringObject
*a
, register Py_ssize_t i
,
1081 register Py_ssize_t j
)
1082 /* j -- may be negative! */
1087 j
= 0; /* Avoid signed/unsigned bug in next line */
1090 if (i
== 0 && j
== Py_SIZE(a
) && PyString_CheckExact(a
)) {
1091 /* It's the same as a */
1093 return (PyObject
*)a
;
1097 return PyString_FromStringAndSize(a
->ob_sval
+ i
, j
-i
);
1101 string_contains(PyObject
*str_obj
, PyObject
*sub_obj
)
1103 if (!PyString_CheckExact(sub_obj
)) {
1104 #ifdef Py_USING_UNICODE
1105 if (PyUnicode_Check(sub_obj
))
1106 return PyUnicode_Contains(str_obj
, sub_obj
);
1108 if (!PyString_Check(sub_obj
)) {
1109 PyErr_Format(PyExc_TypeError
,
1110 "'in <string>' requires string as left operand, "
1111 "not %.200s", Py_TYPE(sub_obj
)->tp_name
);
1116 return stringlib_contains_obj(str_obj
, sub_obj
);
1120 string_item(PyStringObject
*a
, register Py_ssize_t i
)
1124 if (i
< 0 || i
>= Py_SIZE(a
)) {
1125 PyErr_SetString(PyExc_IndexError
, "string index out of range");
1128 pchar
= a
->ob_sval
[i
];
1129 v
= (PyObject
*)characters
[pchar
& UCHAR_MAX
];
1131 v
= PyString_FromStringAndSize(&pchar
, 1);
1142 string_richcompare(PyStringObject
*a
, PyStringObject
*b
, int op
)
1145 Py_ssize_t len_a
, len_b
;
1149 /* Make sure both arguments are strings. */
1150 if (!(PyString_Check(a
) && PyString_Check(b
))) {
1151 result
= Py_NotImplemented
;
1156 case Py_EQ
:case Py_LE
:case Py_GE
:
1159 case Py_NE
:case Py_LT
:case Py_GT
:
1165 /* Supporting Py_NE here as well does not save
1166 much time, since Py_NE is rarely used. */
1167 if (Py_SIZE(a
) == Py_SIZE(b
)
1168 && (a
->ob_sval
[0] == b
->ob_sval
[0]
1169 && memcmp(a
->ob_sval
, b
->ob_sval
, Py_SIZE(a
)) == 0)) {
1176 len_a
= Py_SIZE(a
); len_b
= Py_SIZE(b
);
1177 min_len
= (len_a
< len_b
) ? len_a
: len_b
;
1179 c
= Py_CHARMASK(*a
->ob_sval
) - Py_CHARMASK(*b
->ob_sval
);
1181 c
= memcmp(a
->ob_sval
, b
->ob_sval
, min_len
);
1185 c
= (len_a
< len_b
) ? -1 : (len_a
> len_b
) ? 1 : 0;
1187 case Py_LT
: c
= c
< 0; break;
1188 case Py_LE
: c
= c
<= 0; break;
1189 case Py_EQ
: assert(0); break; /* unreachable */
1190 case Py_NE
: c
= c
!= 0; break;
1191 case Py_GT
: c
= c
> 0; break;
1192 case Py_GE
: c
= c
>= 0; break;
1194 result
= Py_NotImplemented
;
1197 result
= c
? Py_True
: Py_False
;
1204 _PyString_Eq(PyObject
*o1
, PyObject
*o2
)
1206 PyStringObject
*a
= (PyStringObject
*) o1
;
1207 PyStringObject
*b
= (PyStringObject
*) o2
;
1208 return Py_SIZE(a
) == Py_SIZE(b
)
1209 && *a
->ob_sval
== *b
->ob_sval
1210 && memcmp(a
->ob_sval
, b
->ob_sval
, Py_SIZE(a
)) == 0;
1214 string_hash(PyStringObject
*a
)
1216 register Py_ssize_t len
;
1217 register unsigned char *p
;
1220 if (a
->ob_shash
!= -1)
1223 p
= (unsigned char *) a
->ob_sval
;
1226 x
= (1000003*x
) ^ *p
++;
1235 string_subscript(PyStringObject
* self
, PyObject
* item
)
1237 if (PyIndex_Check(item
)) {
1238 Py_ssize_t i
= PyNumber_AsSsize_t(item
, PyExc_IndexError
);
1239 if (i
== -1 && PyErr_Occurred())
1242 i
+= PyString_GET_SIZE(self
);
1243 return string_item(self
, i
);
1245 else if (PySlice_Check(item
)) {
1246 Py_ssize_t start
, stop
, step
, slicelength
, cur
, i
;
1251 if (PySlice_GetIndicesEx((PySliceObject
*)item
,
1252 PyString_GET_SIZE(self
),
1253 &start
, &stop
, &step
, &slicelength
) < 0) {
1257 if (slicelength
<= 0) {
1258 return PyString_FromStringAndSize("", 0);
1260 else if (start
== 0 && step
== 1 &&
1261 slicelength
== PyString_GET_SIZE(self
) &&
1262 PyString_CheckExact(self
)) {
1264 return (PyObject
*)self
;
1266 else if (step
== 1) {
1267 return PyString_FromStringAndSize(
1268 PyString_AS_STRING(self
) + start
,
1272 source_buf
= PyString_AsString((PyObject
*)self
);
1273 result_buf
= (char *)PyMem_Malloc(slicelength
);
1274 if (result_buf
== NULL
)
1275 return PyErr_NoMemory();
1277 for (cur
= start
, i
= 0; i
< slicelength
;
1279 result_buf
[i
] = source_buf
[cur
];
1282 result
= PyString_FromStringAndSize(result_buf
,
1284 PyMem_Free(result_buf
);
1289 PyErr_Format(PyExc_TypeError
,
1290 "string indices must be integers, not %.200s",
1291 Py_TYPE(item
)->tp_name
);
1297 string_buffer_getreadbuf(PyStringObject
*self
, Py_ssize_t index
, const void **ptr
)
1300 PyErr_SetString(PyExc_SystemError
,
1301 "accessing non-existent string segment");
1304 *ptr
= (void *)self
->ob_sval
;
1305 return Py_SIZE(self
);
1309 string_buffer_getwritebuf(PyStringObject
*self
, Py_ssize_t index
, const void **ptr
)
1311 PyErr_SetString(PyExc_TypeError
,
1312 "Cannot use string as modifiable buffer");
1317 string_buffer_getsegcount(PyStringObject
*self
, Py_ssize_t
*lenp
)
1320 *lenp
= Py_SIZE(self
);
1325 string_buffer_getcharbuf(PyStringObject
*self
, Py_ssize_t index
, const char **ptr
)
1328 PyErr_SetString(PyExc_SystemError
,
1329 "accessing non-existent string segment");
1332 *ptr
= self
->ob_sval
;
1333 return Py_SIZE(self
);
1337 string_buffer_getbuffer(PyStringObject
*self
, Py_buffer
*view
, int flags
)
1339 return PyBuffer_FillInfo(view
, (PyObject
*)self
,
1340 (void *)self
->ob_sval
, Py_SIZE(self
),
1344 static PySequenceMethods string_as_sequence
= {
1345 (lenfunc
)string_length
, /*sq_length*/
1346 (binaryfunc
)string_concat
, /*sq_concat*/
1347 (ssizeargfunc
)string_repeat
, /*sq_repeat*/
1348 (ssizeargfunc
)string_item
, /*sq_item*/
1349 (ssizessizeargfunc
)string_slice
, /*sq_slice*/
1352 (objobjproc
)string_contains
/*sq_contains*/
1355 static PyMappingMethods string_as_mapping
= {
1356 (lenfunc
)string_length
,
1357 (binaryfunc
)string_subscript
,
1361 static PyBufferProcs string_as_buffer
= {
1362 (readbufferproc
)string_buffer_getreadbuf
,
1363 (writebufferproc
)string_buffer_getwritebuf
,
1364 (segcountproc
)string_buffer_getsegcount
,
1365 (charbufferproc
)string_buffer_getcharbuf
,
1366 (getbufferproc
)string_buffer_getbuffer
,
1373 #define RIGHTSTRIP 1
1376 /* Arrays indexed by above */
1377 static const char *stripformat
[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1379 #define STRIPNAME(i) (stripformat[i]+3)
1382 /* Don't call if length < 2 */
1383 #define Py_STRING_MATCH(target, offset, pattern, length) \
1384 (target[offset] == pattern[0] && \
1385 target[offset+length-1] == pattern[length-1] && \
1386 !memcmp(target+offset+1, pattern+1, length-2) )
1389 /* Overallocate the initial list to reduce the number of reallocs for small
1390 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1391 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1392 text (roughly 11 words per line) and field delimited data (usually 1-10
1393 fields). For large strings the split algorithms are bandwidth limited
1394 so increasing the preallocation likely will not improve things.*/
1396 #define MAX_PREALLOC 12
1398 /* 5 splits gives 6 elements */
1399 #define PREALLOC_SIZE(maxsplit) \
1400 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1402 #define SPLIT_APPEND(data, left, right) \
1403 str = PyString_FromStringAndSize((data) + (left), \
1404 (right) - (left)); \
1407 if (PyList_Append(list, str)) { \
1414 #define SPLIT_ADD(data, left, right) { \
1415 str = PyString_FromStringAndSize((data) + (left), \
1416 (right) - (left)); \
1419 if (count < MAX_PREALLOC) { \
1420 PyList_SET_ITEM(list, count, str); \
1422 if (PyList_Append(list, str)) { \
1431 /* Always force the list to the expected size. */
1432 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1434 #define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1435 #define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1436 #define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1437 #define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1439 Py_LOCAL_INLINE(PyObject
*)
1440 split_whitespace(PyStringObject
*self
, Py_ssize_t len
, Py_ssize_t maxsplit
)
1442 const char *s
= PyString_AS_STRING(self
);
1443 Py_ssize_t i
, j
, count
=0;
1445 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1452 while (maxsplit
-- > 0) {
1453 SKIP_SPACE(s
, i
, len
);
1456 SKIP_NONSPACE(s
, i
, len
);
1457 if (j
== 0 && i
== len
&& PyString_CheckExact(self
)) {
1458 /* No whitespace in self, so just use it as list[0] */
1460 PyList_SET_ITEM(list
, 0, (PyObject
*)self
);
1468 /* Only occurs when maxsplit was reached */
1469 /* Skip any remaining whitespace and copy to end of string */
1470 SKIP_SPACE(s
, i
, len
);
1472 SPLIT_ADD(s
, i
, len
);
1474 FIX_PREALLOC_SIZE(list
);
1481 Py_LOCAL_INLINE(PyObject
*)
1482 split_char(PyStringObject
*self
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
1484 const char *s
= PyString_AS_STRING(self
);
1485 register Py_ssize_t i
, j
, count
=0;
1487 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
1493 while ((j
< len
) && (maxcount
-- > 0)) {
1495 /* I found that using memchr makes no difference */
1503 if (i
== 0 && count
== 0 && PyString_CheckExact(self
)) {
1504 /* ch not in self, so just use self as list[0] */
1506 PyList_SET_ITEM(list
, 0, (PyObject
*)self
);
1509 else if (i
<= len
) {
1510 SPLIT_ADD(s
, i
, len
);
1512 FIX_PREALLOC_SIZE(list
);
1520 PyDoc_STRVAR(split__doc__
,
1521 "S.split([sep [,maxsplit]]) -> list of strings\n\
1523 Return a list of the words in the string S, using sep as the\n\
1524 delimiter string. If maxsplit is given, at most maxsplit\n\
1525 splits are done. If sep is not specified or is None, any\n\
1526 whitespace string is a separator and empty strings are removed\n\
1530 string_split(PyStringObject
*self
, PyObject
*args
)
1532 Py_ssize_t len
= PyString_GET_SIZE(self
), n
, i
, j
;
1533 Py_ssize_t maxsplit
= -1, count
=0;
1534 const char *s
= PyString_AS_STRING(self
), *sub
;
1535 PyObject
*list
, *str
, *subobj
= Py_None
;
1540 if (!PyArg_ParseTuple(args
, "|On:split", &subobj
, &maxsplit
))
1543 maxsplit
= PY_SSIZE_T_MAX
;
1544 if (subobj
== Py_None
)
1545 return split_whitespace(self
, len
, maxsplit
);
1546 if (PyString_Check(subobj
)) {
1547 sub
= PyString_AS_STRING(subobj
);
1548 n
= PyString_GET_SIZE(subobj
);
1550 #ifdef Py_USING_UNICODE
1551 else if (PyUnicode_Check(subobj
))
1552 return PyUnicode_Split((PyObject
*)self
, subobj
, maxsplit
);
1554 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1558 PyErr_SetString(PyExc_ValueError
, "empty separator");
1562 return split_char(self
, len
, sub
[0], maxsplit
);
1564 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1570 while (maxsplit
-- > 0) {
1571 pos
= fastsearch(s
+i
, len
-i
, sub
, n
, FAST_SEARCH
);
1580 while ((j
+n
<= len
) && (maxsplit
-- > 0)) {
1581 for (; j
+n
<= len
; j
++) {
1582 if (Py_STRING_MATCH(s
, j
, sub
, n
)) {
1590 SPLIT_ADD(s
, i
, len
);
1591 FIX_PREALLOC_SIZE(list
);
1599 PyDoc_STRVAR(partition__doc__
,
1600 "S.partition(sep) -> (head, sep, tail)\n\
1602 Search for the separator sep in S, and return the part before it,\n\
1603 the separator itself, and the part after it. If the separator is not\n\
1604 found, return S and two empty strings.");
1607 string_partition(PyStringObject
*self
, PyObject
*sep_obj
)
1612 if (PyString_Check(sep_obj
)) {
1613 sep
= PyString_AS_STRING(sep_obj
);
1614 sep_len
= PyString_GET_SIZE(sep_obj
);
1616 #ifdef Py_USING_UNICODE
1617 else if (PyUnicode_Check(sep_obj
))
1618 return PyUnicode_Partition((PyObject
*) self
, sep_obj
);
1620 else if (PyObject_AsCharBuffer(sep_obj
, &sep
, &sep_len
))
1623 return stringlib_partition(
1625 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1626 sep_obj
, sep
, sep_len
1630 PyDoc_STRVAR(rpartition__doc__
,
1631 "S.rpartition(sep) -> (tail, sep, head)\n\
1633 Search for the separator sep in S, starting at the end of S, and return\n\
1634 the part before it, the separator itself, and the part after it. If the\n\
1635 separator is not found, return two empty strings and S.");
1638 string_rpartition(PyStringObject
*self
, PyObject
*sep_obj
)
1643 if (PyString_Check(sep_obj
)) {
1644 sep
= PyString_AS_STRING(sep_obj
);
1645 sep_len
= PyString_GET_SIZE(sep_obj
);
1647 #ifdef Py_USING_UNICODE
1648 else if (PyUnicode_Check(sep_obj
))
1649 return PyUnicode_RPartition((PyObject
*) self
, sep_obj
);
1651 else if (PyObject_AsCharBuffer(sep_obj
, &sep
, &sep_len
))
1654 return stringlib_rpartition(
1656 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1657 sep_obj
, sep
, sep_len
1661 Py_LOCAL_INLINE(PyObject
*)
1662 rsplit_whitespace(PyStringObject
*self
, Py_ssize_t len
, Py_ssize_t maxsplit
)
1664 const char *s
= PyString_AS_STRING(self
);
1665 Py_ssize_t i
, j
, count
=0;
1667 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1674 while (maxsplit
-- > 0) {
1678 RSKIP_NONSPACE(s
, i
);
1679 if (j
== len
-1 && i
< 0 && PyString_CheckExact(self
)) {
1680 /* No whitespace in self, so just use it as list[0] */
1682 PyList_SET_ITEM(list
, 0, (PyObject
*)self
);
1686 SPLIT_ADD(s
, i
+ 1, j
+ 1);
1689 /* Only occurs when maxsplit was reached */
1690 /* Skip any remaining whitespace and copy to beginning of string */
1693 SPLIT_ADD(s
, 0, i
+ 1);
1696 FIX_PREALLOC_SIZE(list
);
1697 if (PyList_Reverse(list
) < 0)
1705 Py_LOCAL_INLINE(PyObject
*)
1706 rsplit_char(PyStringObject
*self
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
1708 const char *s
= PyString_AS_STRING(self
);
1709 register Py_ssize_t i
, j
, count
=0;
1711 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
1717 while ((i
>= 0) && (maxcount
-- > 0)) {
1718 for (; i
>= 0; i
--) {
1720 SPLIT_ADD(s
, i
+ 1, j
+ 1);
1726 if (i
< 0 && count
== 0 && PyString_CheckExact(self
)) {
1727 /* ch not in self, so just use self as list[0] */
1729 PyList_SET_ITEM(list
, 0, (PyObject
*)self
);
1733 SPLIT_ADD(s
, 0, j
+ 1);
1735 FIX_PREALLOC_SIZE(list
);
1736 if (PyList_Reverse(list
) < 0)
1745 PyDoc_STRVAR(rsplit__doc__
,
1746 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1748 Return a list of the words in the string S, using sep as the\n\
1749 delimiter string, starting at the end of the string and working\n\
1750 to the front. If maxsplit is given, at most maxsplit splits are\n\
1751 done. If sep is not specified or is None, any whitespace string\n\
1755 string_rsplit(PyStringObject
*self
, PyObject
*args
)
1757 Py_ssize_t len
= PyString_GET_SIZE(self
), n
, i
, j
;
1758 Py_ssize_t maxsplit
= -1, count
=0;
1759 const char *s
, *sub
;
1760 PyObject
*list
, *str
, *subobj
= Py_None
;
1762 if (!PyArg_ParseTuple(args
, "|On:rsplit", &subobj
, &maxsplit
))
1765 maxsplit
= PY_SSIZE_T_MAX
;
1766 if (subobj
== Py_None
)
1767 return rsplit_whitespace(self
, len
, maxsplit
);
1768 if (PyString_Check(subobj
)) {
1769 sub
= PyString_AS_STRING(subobj
);
1770 n
= PyString_GET_SIZE(subobj
);
1772 #ifdef Py_USING_UNICODE
1773 else if (PyUnicode_Check(subobj
))
1774 return PyUnicode_RSplit((PyObject
*)self
, subobj
, maxsplit
);
1776 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1780 PyErr_SetString(PyExc_ValueError
, "empty separator");
1784 return rsplit_char(self
, len
, sub
[0], maxsplit
);
1786 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1793 s
= PyString_AS_STRING(self
);
1794 while ( (i
>= 0) && (maxsplit
-- > 0) ) {
1796 if (Py_STRING_MATCH(s
, i
, sub
, n
)) {
1797 SPLIT_ADD(s
, i
+ n
, j
);
1805 FIX_PREALLOC_SIZE(list
);
1806 if (PyList_Reverse(list
) < 0)
1816 PyDoc_STRVAR(join__doc__
,
1817 "S.join(iterable) -> string\n\
1819 Return a string which is the concatenation of the strings in the\n\
1820 iterable. The separator between elements is S.");
1823 string_join(PyStringObject
*self
, PyObject
*orig
)
1825 char *sep
= PyString_AS_STRING(self
);
1826 const Py_ssize_t seplen
= PyString_GET_SIZE(self
);
1827 PyObject
*res
= NULL
;
1829 Py_ssize_t seqlen
= 0;
1832 PyObject
*seq
, *item
;
1834 seq
= PySequence_Fast(orig
, "");
1839 seqlen
= PySequence_Size(seq
);
1842 return PyString_FromString("");
1845 item
= PySequence_Fast_GET_ITEM(seq
, 0);
1846 if (PyString_CheckExact(item
) || PyUnicode_CheckExact(item
)) {
1853 /* There are at least two things to join, or else we have a subclass
1854 * of the builtin types in the sequence.
1855 * Do a pre-pass to figure out the total amount of space we'll
1856 * need (sz), see whether any argument is absurd, and defer to
1857 * the Unicode join if appropriate.
1859 for (i
= 0; i
< seqlen
; i
++) {
1860 const size_t old_sz
= sz
;
1861 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1862 if (!PyString_Check(item
)){
1863 #ifdef Py_USING_UNICODE
1864 if (PyUnicode_Check(item
)) {
1865 /* Defer to Unicode join.
1866 * CAUTION: There's no gurantee that the
1867 * original sequence can be iterated over
1868 * again, so we must pass seq here.
1871 result
= PyUnicode_Join((PyObject
*)self
, seq
);
1876 PyErr_Format(PyExc_TypeError
,
1877 "sequence item %zd: expected string,"
1879 i
, Py_TYPE(item
)->tp_name
);
1883 sz
+= PyString_GET_SIZE(item
);
1886 if (sz
< old_sz
|| sz
> PY_SSIZE_T_MAX
) {
1887 PyErr_SetString(PyExc_OverflowError
,
1888 "join() result is too long for a Python string");
1894 /* Allocate result space. */
1895 res
= PyString_FromStringAndSize((char*)NULL
, sz
);
1901 /* Catenate everything. */
1902 p
= PyString_AS_STRING(res
);
1903 for (i
= 0; i
< seqlen
; ++i
) {
1905 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1906 n
= PyString_GET_SIZE(item
);
1907 Py_MEMCPY(p
, PyString_AS_STRING(item
), n
);
1909 if (i
< seqlen
- 1) {
1910 Py_MEMCPY(p
, sep
, seplen
);
1920 _PyString_Join(PyObject
*sep
, PyObject
*x
)
1922 assert(sep
!= NULL
&& PyString_Check(sep
));
1924 return string_join((PyStringObject
*)sep
, x
);
1927 Py_LOCAL_INLINE(void)
1928 string_adjust_indices(Py_ssize_t
*start
, Py_ssize_t
*end
, Py_ssize_t len
)
1942 Py_LOCAL_INLINE(Py_ssize_t
)
1943 string_find_internal(PyStringObject
*self
, PyObject
*args
, int dir
)
1948 Py_ssize_t start
=0, end
=PY_SSIZE_T_MAX
;
1949 PyObject
*obj_start
=Py_None
, *obj_end
=Py_None
;
1951 if (!PyArg_ParseTuple(args
, "O|OO:find/rfind/index/rindex", &subobj
,
1952 &obj_start
, &obj_end
))
1954 /* To support None in "start" and "end" arguments, meaning
1955 the same as if they were not passed.
1957 if (obj_start
!= Py_None
)
1958 if (!_PyEval_SliceIndex(obj_start
, &start
))
1960 if (obj_end
!= Py_None
)
1961 if (!_PyEval_SliceIndex(obj_end
, &end
))
1964 if (PyString_Check(subobj
)) {
1965 sub
= PyString_AS_STRING(subobj
);
1966 sub_len
= PyString_GET_SIZE(subobj
);
1968 #ifdef Py_USING_UNICODE
1969 else if (PyUnicode_Check(subobj
))
1970 return PyUnicode_Find(
1971 (PyObject
*)self
, subobj
, start
, end
, dir
);
1973 else if (PyObject_AsCharBuffer(subobj
, &sub
, &sub_len
))
1974 /* XXX - the "expected a character buffer object" is pretty
1975 confusing for a non-expert. remap to something else ? */
1979 return stringlib_find_slice(
1980 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1981 sub
, sub_len
, start
, end
);
1983 return stringlib_rfind_slice(
1984 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1985 sub
, sub_len
, start
, end
);
1989 PyDoc_STRVAR(find__doc__
,
1990 "S.find(sub [,start [,end]]) -> int\n\
1992 Return the lowest index in S where substring sub is found,\n\
1993 such that sub is contained within s[start:end]. Optional\n\
1994 arguments start and end are interpreted as in slice notation.\n\
1996 Return -1 on failure.");
1999 string_find(PyStringObject
*self
, PyObject
*args
)
2001 Py_ssize_t result
= string_find_internal(self
, args
, +1);
2004 return PyInt_FromSsize_t(result
);
2008 PyDoc_STRVAR(index__doc__
,
2009 "S.index(sub [,start [,end]]) -> int\n\
2011 Like S.find() but raise ValueError when the substring is not found.");
2014 string_index(PyStringObject
*self
, PyObject
*args
)
2016 Py_ssize_t result
= string_find_internal(self
, args
, +1);
2020 PyErr_SetString(PyExc_ValueError
,
2021 "substring not found");
2024 return PyInt_FromSsize_t(result
);
2028 PyDoc_STRVAR(rfind__doc__
,
2029 "S.rfind(sub [,start [,end]]) -> int\n\
2031 Return the highest index in S where substring sub is found,\n\
2032 such that sub is contained within s[start:end]. Optional\n\
2033 arguments start and end are interpreted as in slice notation.\n\
2035 Return -1 on failure.");
2038 string_rfind(PyStringObject
*self
, PyObject
*args
)
2040 Py_ssize_t result
= string_find_internal(self
, args
, -1);
2043 return PyInt_FromSsize_t(result
);
2047 PyDoc_STRVAR(rindex__doc__
,
2048 "S.rindex(sub [,start [,end]]) -> int\n\
2050 Like S.rfind() but raise ValueError when the substring is not found.");
2053 string_rindex(PyStringObject
*self
, PyObject
*args
)
2055 Py_ssize_t result
= string_find_internal(self
, args
, -1);
2059 PyErr_SetString(PyExc_ValueError
,
2060 "substring not found");
2063 return PyInt_FromSsize_t(result
);
2067 Py_LOCAL_INLINE(PyObject
*)
2068 do_xstrip(PyStringObject
*self
, int striptype
, PyObject
*sepobj
)
2070 char *s
= PyString_AS_STRING(self
);
2071 Py_ssize_t len
= PyString_GET_SIZE(self
);
2072 char *sep
= PyString_AS_STRING(sepobj
);
2073 Py_ssize_t seplen
= PyString_GET_SIZE(sepobj
);
2077 if (striptype
!= RIGHTSTRIP
) {
2078 while (i
< len
&& memchr(sep
, Py_CHARMASK(s
[i
]), seplen
)) {
2084 if (striptype
!= LEFTSTRIP
) {
2087 } while (j
>= i
&& memchr(sep
, Py_CHARMASK(s
[j
]), seplen
));
2091 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
2093 return (PyObject
*)self
;
2096 return PyString_FromStringAndSize(s
+i
, j
-i
);
2100 Py_LOCAL_INLINE(PyObject
*)
2101 do_strip(PyStringObject
*self
, int striptype
)
2103 char *s
= PyString_AS_STRING(self
);
2104 Py_ssize_t len
= PyString_GET_SIZE(self
), i
, j
;
2107 if (striptype
!= RIGHTSTRIP
) {
2108 while (i
< len
&& isspace(Py_CHARMASK(s
[i
]))) {
2114 if (striptype
!= LEFTSTRIP
) {
2117 } while (j
>= i
&& isspace(Py_CHARMASK(s
[j
])));
2121 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
2123 return (PyObject
*)self
;
2126 return PyString_FromStringAndSize(s
+i
, j
-i
);
2130 Py_LOCAL_INLINE(PyObject
*)
2131 do_argstrip(PyStringObject
*self
, int striptype
, PyObject
*args
)
2133 PyObject
*sep
= NULL
;
2135 if (!PyArg_ParseTuple(args
, (char *)stripformat
[striptype
], &sep
))
2138 if (sep
!= NULL
&& sep
!= Py_None
) {
2139 if (PyString_Check(sep
))
2140 return do_xstrip(self
, striptype
, sep
);
2141 #ifdef Py_USING_UNICODE
2142 else if (PyUnicode_Check(sep
)) {
2143 PyObject
*uniself
= PyUnicode_FromObject((PyObject
*)self
);
2147 res
= _PyUnicode_XStrip((PyUnicodeObject
*)uniself
,
2153 PyErr_Format(PyExc_TypeError
,
2154 #ifdef Py_USING_UNICODE
2155 "%s arg must be None, str or unicode",
2157 "%s arg must be None or str",
2159 STRIPNAME(striptype
));
2163 return do_strip(self
, striptype
);
2167 PyDoc_STRVAR(strip__doc__
,
2168 "S.strip([chars]) -> string or unicode\n\
2170 Return a copy of the string S with leading and trailing\n\
2171 whitespace removed.\n\
2172 If chars is given and not None, remove characters in chars instead.\n\
2173 If chars is unicode, S will be converted to unicode before stripping");
2176 string_strip(PyStringObject
*self
, PyObject
*args
)
2178 if (PyTuple_GET_SIZE(args
) == 0)
2179 return do_strip(self
, BOTHSTRIP
); /* Common case */
2181 return do_argstrip(self
, BOTHSTRIP
, args
);
2185 PyDoc_STRVAR(lstrip__doc__
,
2186 "S.lstrip([chars]) -> string or unicode\n\
2188 Return a copy of the string S with leading whitespace removed.\n\
2189 If chars is given and not None, remove characters in chars instead.\n\
2190 If chars is unicode, S will be converted to unicode before stripping");
2193 string_lstrip(PyStringObject
*self
, PyObject
*args
)
2195 if (PyTuple_GET_SIZE(args
) == 0)
2196 return do_strip(self
, LEFTSTRIP
); /* Common case */
2198 return do_argstrip(self
, LEFTSTRIP
, args
);
2202 PyDoc_STRVAR(rstrip__doc__
,
2203 "S.rstrip([chars]) -> string or unicode\n\
2205 Return a copy of the string S with trailing whitespace removed.\n\
2206 If chars is given and not None, remove characters in chars instead.\n\
2207 If chars is unicode, S will be converted to unicode before stripping");
2210 string_rstrip(PyStringObject
*self
, PyObject
*args
)
2212 if (PyTuple_GET_SIZE(args
) == 0)
2213 return do_strip(self
, RIGHTSTRIP
); /* Common case */
2215 return do_argstrip(self
, RIGHTSTRIP
, args
);
2219 PyDoc_STRVAR(lower__doc__
,
2220 "S.lower() -> string\n\
2222 Return a copy of the string S converted to lowercase.");
2224 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2226 #define _tolower tolower
2230 string_lower(PyStringObject
*self
)
2233 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2236 newobj
= PyString_FromStringAndSize(NULL
, n
);
2240 s
= PyString_AS_STRING(newobj
);
2242 Py_MEMCPY(s
, PyString_AS_STRING(self
), n
);
2244 for (i
= 0; i
< n
; i
++) {
2245 int c
= Py_CHARMASK(s
[i
]);
2253 PyDoc_STRVAR(upper__doc__
,
2254 "S.upper() -> string\n\
2256 Return a copy of the string S converted to uppercase.");
2259 #define _toupper toupper
2263 string_upper(PyStringObject
*self
)
2266 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2269 newobj
= PyString_FromStringAndSize(NULL
, n
);
2273 s
= PyString_AS_STRING(newobj
);
2275 Py_MEMCPY(s
, PyString_AS_STRING(self
), n
);
2277 for (i
= 0; i
< n
; i
++) {
2278 int c
= Py_CHARMASK(s
[i
]);
2286 PyDoc_STRVAR(title__doc__
,
2287 "S.title() -> string\n\
2289 Return a titlecased version of S, i.e. words start with uppercase\n\
2290 characters, all remaining cased characters have lowercase.");
2293 string_title(PyStringObject
*self
)
2295 char *s
= PyString_AS_STRING(self
), *s_new
;
2296 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2297 int previous_is_cased
= 0;
2300 newobj
= PyString_FromStringAndSize(NULL
, n
);
2303 s_new
= PyString_AsString(newobj
);
2304 for (i
= 0; i
< n
; i
++) {
2305 int c
= Py_CHARMASK(*s
++);
2307 if (!previous_is_cased
)
2309 previous_is_cased
= 1;
2310 } else if (isupper(c
)) {
2311 if (previous_is_cased
)
2313 previous_is_cased
= 1;
2315 previous_is_cased
= 0;
2321 PyDoc_STRVAR(capitalize__doc__
,
2322 "S.capitalize() -> string\n\
2324 Return a copy of the string S with only its first character\n\
2328 string_capitalize(PyStringObject
*self
)
2330 char *s
= PyString_AS_STRING(self
), *s_new
;
2331 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2334 newobj
= PyString_FromStringAndSize(NULL
, n
);
2337 s_new
= PyString_AsString(newobj
);
2339 int c
= Py_CHARMASK(*s
++);
2341 *s_new
= toupper(c
);
2346 for (i
= 1; i
< n
; i
++) {
2347 int c
= Py_CHARMASK(*s
++);
2349 *s_new
= tolower(c
);
2358 PyDoc_STRVAR(count__doc__
,
2359 "S.count(sub[, start[, end]]) -> int\n\
2361 Return the number of non-overlapping occurrences of substring sub in\n\
2362 string S[start:end]. Optional arguments start and end are interpreted\n\
2363 as in slice notation.");
2366 string_count(PyStringObject
*self
, PyObject
*args
)
2369 const char *str
= PyString_AS_STRING(self
), *sub
;
2371 Py_ssize_t start
= 0, end
= PY_SSIZE_T_MAX
;
2373 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &sub_obj
,
2374 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
2377 if (PyString_Check(sub_obj
)) {
2378 sub
= PyString_AS_STRING(sub_obj
);
2379 sub_len
= PyString_GET_SIZE(sub_obj
);
2381 #ifdef Py_USING_UNICODE
2382 else if (PyUnicode_Check(sub_obj
)) {
2384 count
= PyUnicode_Count((PyObject
*)self
, sub_obj
, start
, end
);
2388 return PyInt_FromSsize_t(count
);
2391 else if (PyObject_AsCharBuffer(sub_obj
, &sub
, &sub_len
))
2394 string_adjust_indices(&start
, &end
, PyString_GET_SIZE(self
));
2396 return PyInt_FromSsize_t(
2397 stringlib_count(str
+ start
, end
- start
, sub
, sub_len
)
2401 PyDoc_STRVAR(swapcase__doc__
,
2402 "S.swapcase() -> string\n\
2404 Return a copy of the string S with uppercase characters\n\
2405 converted to lowercase and vice versa.");
2408 string_swapcase(PyStringObject
*self
)
2410 char *s
= PyString_AS_STRING(self
), *s_new
;
2411 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2414 newobj
= PyString_FromStringAndSize(NULL
, n
);
2417 s_new
= PyString_AsString(newobj
);
2418 for (i
= 0; i
< n
; i
++) {
2419 int c
= Py_CHARMASK(*s
++);
2421 *s_new
= toupper(c
);
2423 else if (isupper(c
)) {
2424 *s_new
= tolower(c
);
2434 PyDoc_STRVAR(translate__doc__
,
2435 "S.translate(table [,deletechars]) -> string\n\
2437 Return a copy of the string S, where all characters occurring\n\
2438 in the optional argument deletechars are removed, and the\n\
2439 remaining characters have been mapped through the given\n\
2440 translation table, which must be a string of length 256.");
2443 string_translate(PyStringObject
*self
, PyObject
*args
)
2445 register char *input
, *output
;
2447 register Py_ssize_t i
, c
, changed
= 0;
2448 PyObject
*input_obj
= (PyObject
*)self
;
2449 const char *output_start
, *del_table
=NULL
;
2450 Py_ssize_t inlen
, tablen
, dellen
= 0;
2452 int trans_table
[256];
2453 PyObject
*tableobj
, *delobj
= NULL
;
2455 if (!PyArg_UnpackTuple(args
, "translate", 1, 2,
2456 &tableobj
, &delobj
))
2459 if (PyString_Check(tableobj
)) {
2460 table
= PyString_AS_STRING(tableobj
);
2461 tablen
= PyString_GET_SIZE(tableobj
);
2463 else if (tableobj
== Py_None
) {
2467 #ifdef Py_USING_UNICODE
2468 else if (PyUnicode_Check(tableobj
)) {
2469 /* Unicode .translate() does not support the deletechars
2470 parameter; instead a mapping to None will cause characters
2472 if (delobj
!= NULL
) {
2473 PyErr_SetString(PyExc_TypeError
,
2474 "deletions are implemented differently for unicode");
2477 return PyUnicode_Translate((PyObject
*)self
, tableobj
, NULL
);
2480 else if (PyObject_AsCharBuffer(tableobj
, &table
, &tablen
))
2483 if (tablen
!= 256) {
2484 PyErr_SetString(PyExc_ValueError
,
2485 "translation table must be 256 characters long");
2489 if (delobj
!= NULL
) {
2490 if (PyString_Check(delobj
)) {
2491 del_table
= PyString_AS_STRING(delobj
);
2492 dellen
= PyString_GET_SIZE(delobj
);
2494 #ifdef Py_USING_UNICODE
2495 else if (PyUnicode_Check(delobj
)) {
2496 PyErr_SetString(PyExc_TypeError
,
2497 "deletions are implemented differently for unicode");
2501 else if (PyObject_AsCharBuffer(delobj
, &del_table
, &dellen
))
2509 inlen
= PyString_GET_SIZE(input_obj
);
2510 result
= PyString_FromStringAndSize((char *)NULL
, inlen
);
2513 output_start
= output
= PyString_AsString(result
);
2514 input
= PyString_AS_STRING(input_obj
);
2516 if (dellen
== 0 && table
!= NULL
) {
2517 /* If no deletions are required, use faster code */
2518 for (i
= inlen
; --i
>= 0; ) {
2519 c
= Py_CHARMASK(*input
++);
2520 if (Py_CHARMASK((*output
++ = table
[c
])) != c
)
2523 if (changed
|| !PyString_CheckExact(input_obj
))
2526 Py_INCREF(input_obj
);
2530 if (table
== NULL
) {
2531 for (i
= 0; i
< 256; i
++)
2532 trans_table
[i
] = Py_CHARMASK(i
);
2534 for (i
= 0; i
< 256; i
++)
2535 trans_table
[i
] = Py_CHARMASK(table
[i
]);
2538 for (i
= 0; i
< dellen
; i
++)
2539 trans_table
[(int) Py_CHARMASK(del_table
[i
])] = -1;
2541 for (i
= inlen
; --i
>= 0; ) {
2542 c
= Py_CHARMASK(*input
++);
2543 if (trans_table
[c
] != -1)
2544 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
2548 if (!changed
&& PyString_CheckExact(input_obj
)) {
2550 Py_INCREF(input_obj
);
2553 /* Fix the size of the resulting string */
2555 _PyString_Resize(&result
, output
- output_start
);
2563 /* find and count characters and substrings */
2565 #define findchar(target, target_len, c) \
2566 ((char *)memchr((const void *)(target), c, target_len))
2568 /* String ops must return a string. */
2569 /* If the object is subclass of string, create a copy */
2570 Py_LOCAL(PyStringObject
*)
2571 return_self(PyStringObject
*self
)
2573 if (PyString_CheckExact(self
)) {
2577 return (PyStringObject
*)PyString_FromStringAndSize(
2578 PyString_AS_STRING(self
),
2579 PyString_GET_SIZE(self
));
2582 Py_LOCAL_INLINE(Py_ssize_t
)
2583 countchar(const char *target
, int target_len
, char c
, Py_ssize_t maxcount
)
2586 const char *start
=target
;
2587 const char *end
=target
+target_len
;
2589 while ( (start
=findchar(start
, end
-start
, c
)) != NULL
) {
2591 if (count
>= maxcount
)
2598 Py_LOCAL(Py_ssize_t
)
2599 findstring(const char *target
, Py_ssize_t target_len
,
2600 const char *pattern
, Py_ssize_t pattern_len
,
2606 start
+= target_len
;
2610 if (end
> target_len
) {
2612 } else if (end
< 0) {
2618 /* zero-length substrings always match at the first attempt */
2619 if (pattern_len
== 0)
2620 return (direction
> 0) ? start
: end
;
2624 if (direction
< 0) {
2625 for (; end
>= start
; end
--)
2626 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
))
2629 for (; start
<= end
; start
++)
2630 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
))
2636 Py_LOCAL_INLINE(Py_ssize_t
)
2637 countstring(const char *target
, Py_ssize_t target_len
,
2638 const char *pattern
, Py_ssize_t pattern_len
,
2641 int direction
, Py_ssize_t maxcount
)
2646 start
+= target_len
;
2650 if (end
> target_len
) {
2652 } else if (end
< 0) {
2658 /* zero-length substrings match everywhere */
2659 if (pattern_len
== 0 || maxcount
== 0) {
2660 if (target_len
+1 < maxcount
)
2661 return target_len
+1;
2666 if (direction
< 0) {
2667 for (; (end
>= start
); end
--)
2668 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
)) {
2670 if (--maxcount
<= 0) break;
2671 end
-= pattern_len
-1;
2674 for (; (start
<= end
); start
++)
2675 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
)) {
2677 if (--maxcount
<= 0)
2679 start
+= pattern_len
-1;
2686 /* Algorithms for different cases of string replacement */
2688 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2689 Py_LOCAL(PyStringObject
*)
2690 replace_interleave(PyStringObject
*self
,
2691 const char *to_s
, Py_ssize_t to_len
,
2692 Py_ssize_t maxcount
)
2694 char *self_s
, *result_s
;
2695 Py_ssize_t self_len
, result_len
;
2696 Py_ssize_t count
, i
, product
;
2697 PyStringObject
*result
;
2699 self_len
= PyString_GET_SIZE(self
);
2701 /* 1 at the end plus 1 after every character */
2703 if (maxcount
< count
)
2706 /* Check for overflow */
2707 /* result_len = count * to_len + self_len; */
2708 product
= count
* to_len
;
2709 if (product
/ to_len
!= count
) {
2710 PyErr_SetString(PyExc_OverflowError
,
2711 "replace string is too long");
2714 result_len
= product
+ self_len
;
2715 if (result_len
< 0) {
2716 PyErr_SetString(PyExc_OverflowError
,
2717 "replace string is too long");
2721 if (! (result
= (PyStringObject
*)
2722 PyString_FromStringAndSize(NULL
, result_len
)) )
2725 self_s
= PyString_AS_STRING(self
);
2726 result_s
= PyString_AS_STRING(result
);
2728 /* TODO: special case single character, which doesn't need memcpy */
2730 /* Lay the first one down (guaranteed this will occur) */
2731 Py_MEMCPY(result_s
, to_s
, to_len
);
2735 for (i
=0; i
<count
; i
++) {
2736 *result_s
++ = *self_s
++;
2737 Py_MEMCPY(result_s
, to_s
, to_len
);
2741 /* Copy the rest of the original string */
2742 Py_MEMCPY(result_s
, self_s
, self_len
-i
);
2747 /* Special case for deleting a single character */
2748 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2749 Py_LOCAL(PyStringObject
*)
2750 replace_delete_single_character(PyStringObject
*self
,
2751 char from_c
, Py_ssize_t maxcount
)
2753 char *self_s
, *result_s
;
2754 char *start
, *next
, *end
;
2755 Py_ssize_t self_len
, result_len
;
2757 PyStringObject
*result
;
2759 self_len
= PyString_GET_SIZE(self
);
2760 self_s
= PyString_AS_STRING(self
);
2762 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
2764 return return_self(self
);
2767 result_len
= self_len
- count
; /* from_len == 1 */
2768 assert(result_len
>=0);
2770 if ( (result
= (PyStringObject
*)
2771 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2773 result_s
= PyString_AS_STRING(result
);
2776 end
= self_s
+ self_len
;
2777 while (count
-- > 0) {
2778 next
= findchar(start
, end
-start
, from_c
);
2781 Py_MEMCPY(result_s
, start
, next
-start
);
2782 result_s
+= (next
-start
);
2785 Py_MEMCPY(result_s
, start
, end
-start
);
2790 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2792 Py_LOCAL(PyStringObject
*)
2793 replace_delete_substring(PyStringObject
*self
,
2794 const char *from_s
, Py_ssize_t from_len
,
2795 Py_ssize_t maxcount
) {
2796 char *self_s
, *result_s
;
2797 char *start
, *next
, *end
;
2798 Py_ssize_t self_len
, result_len
;
2799 Py_ssize_t count
, offset
;
2800 PyStringObject
*result
;
2802 self_len
= PyString_GET_SIZE(self
);
2803 self_s
= PyString_AS_STRING(self
);
2805 count
= countstring(self_s
, self_len
,
2812 return return_self(self
);
2815 result_len
= self_len
- (count
* from_len
);
2816 assert (result_len
>=0);
2818 if ( (result
= (PyStringObject
*)
2819 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2822 result_s
= PyString_AS_STRING(result
);
2825 end
= self_s
+ self_len
;
2826 while (count
-- > 0) {
2827 offset
= findstring(start
, end
-start
,
2829 0, end
-start
, FORWARD
);
2832 next
= start
+ offset
;
2834 Py_MEMCPY(result_s
, start
, next
-start
);
2836 result_s
+= (next
-start
);
2837 start
= next
+from_len
;
2839 Py_MEMCPY(result_s
, start
, end
-start
);
2843 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2844 Py_LOCAL(PyStringObject
*)
2845 replace_single_character_in_place(PyStringObject
*self
,
2846 char from_c
, char to_c
,
2847 Py_ssize_t maxcount
)
2849 char *self_s
, *result_s
, *start
, *end
, *next
;
2850 Py_ssize_t self_len
;
2851 PyStringObject
*result
;
2853 /* The result string will be the same size */
2854 self_s
= PyString_AS_STRING(self
);
2855 self_len
= PyString_GET_SIZE(self
);
2857 next
= findchar(self_s
, self_len
, from_c
);
2860 /* No matches; return the original string */
2861 return return_self(self
);
2864 /* Need to make a new string */
2865 result
= (PyStringObject
*) PyString_FromStringAndSize(NULL
, self_len
);
2868 result_s
= PyString_AS_STRING(result
);
2869 Py_MEMCPY(result_s
, self_s
, self_len
);
2871 /* change everything in-place, starting with this one */
2872 start
= result_s
+ (next
-self_s
);
2875 end
= result_s
+ self_len
;
2877 while (--maxcount
> 0) {
2878 next
= findchar(start
, end
-start
, from_c
);
2888 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2889 Py_LOCAL(PyStringObject
*)
2890 replace_substring_in_place(PyStringObject
*self
,
2891 const char *from_s
, Py_ssize_t from_len
,
2892 const char *to_s
, Py_ssize_t to_len
,
2893 Py_ssize_t maxcount
)
2895 char *result_s
, *start
, *end
;
2897 Py_ssize_t self_len
, offset
;
2898 PyStringObject
*result
;
2900 /* The result string will be the same size */
2902 self_s
= PyString_AS_STRING(self
);
2903 self_len
= PyString_GET_SIZE(self
);
2905 offset
= findstring(self_s
, self_len
,
2907 0, self_len
, FORWARD
);
2909 /* No matches; return the original string */
2910 return return_self(self
);
2913 /* Need to make a new string */
2914 result
= (PyStringObject
*) PyString_FromStringAndSize(NULL
, self_len
);
2917 result_s
= PyString_AS_STRING(result
);
2918 Py_MEMCPY(result_s
, self_s
, self_len
);
2920 /* change everything in-place, starting with this one */
2921 start
= result_s
+ offset
;
2922 Py_MEMCPY(start
, to_s
, from_len
);
2924 end
= result_s
+ self_len
;
2926 while ( --maxcount
> 0) {
2927 offset
= findstring(start
, end
-start
,
2929 0, end
-start
, FORWARD
);
2932 Py_MEMCPY(start
+offset
, to_s
, from_len
);
2933 start
+= offset
+from_len
;
2939 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2940 Py_LOCAL(PyStringObject
*)
2941 replace_single_character(PyStringObject
*self
,
2943 const char *to_s
, Py_ssize_t to_len
,
2944 Py_ssize_t maxcount
)
2946 char *self_s
, *result_s
;
2947 char *start
, *next
, *end
;
2948 Py_ssize_t self_len
, result_len
;
2949 Py_ssize_t count
, product
;
2950 PyStringObject
*result
;
2952 self_s
= PyString_AS_STRING(self
);
2953 self_len
= PyString_GET_SIZE(self
);
2955 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
2957 /* no matches, return unchanged */
2958 return return_self(self
);
2961 /* use the difference between current and new, hence the "-1" */
2962 /* result_len = self_len + count * (to_len-1) */
2963 product
= count
* (to_len
-1);
2964 if (product
/ (to_len
-1) != count
) {
2965 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2968 result_len
= self_len
+ product
;
2969 if (result_len
< 0) {
2970 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2974 if ( (result
= (PyStringObject
*)
2975 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2977 result_s
= PyString_AS_STRING(result
);
2980 end
= self_s
+ self_len
;
2981 while (count
-- > 0) {
2982 next
= findchar(start
, end
-start
, from_c
);
2986 if (next
== start
) {
2987 /* replace with the 'to' */
2988 Py_MEMCPY(result_s
, to_s
, to_len
);
2992 /* copy the unchanged old then the 'to' */
2993 Py_MEMCPY(result_s
, start
, next
-start
);
2994 result_s
+= (next
-start
);
2995 Py_MEMCPY(result_s
, to_s
, to_len
);
3000 /* Copy the remainder of the remaining string */
3001 Py_MEMCPY(result_s
, start
, end
-start
);
3006 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
3007 Py_LOCAL(PyStringObject
*)
3008 replace_substring(PyStringObject
*self
,
3009 const char *from_s
, Py_ssize_t from_len
,
3010 const char *to_s
, Py_ssize_t to_len
,
3011 Py_ssize_t maxcount
) {
3012 char *self_s
, *result_s
;
3013 char *start
, *next
, *end
;
3014 Py_ssize_t self_len
, result_len
;
3015 Py_ssize_t count
, offset
, product
;
3016 PyStringObject
*result
;
3018 self_s
= PyString_AS_STRING(self
);
3019 self_len
= PyString_GET_SIZE(self
);
3021 count
= countstring(self_s
, self_len
,
3023 0, self_len
, FORWARD
, maxcount
);
3025 /* no matches, return unchanged */
3026 return return_self(self
);
3029 /* Check for overflow */
3030 /* result_len = self_len + count * (to_len-from_len) */
3031 product
= count
* (to_len
-from_len
);
3032 if (product
/ (to_len
-from_len
) != count
) {
3033 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
3036 result_len
= self_len
+ product
;
3037 if (result_len
< 0) {
3038 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
3042 if ( (result
= (PyStringObject
*)
3043 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
3045 result_s
= PyString_AS_STRING(result
);
3048 end
= self_s
+ self_len
;
3049 while (count
-- > 0) {
3050 offset
= findstring(start
, end
-start
,
3052 0, end
-start
, FORWARD
);
3055 next
= start
+offset
;
3056 if (next
== start
) {
3057 /* replace with the 'to' */
3058 Py_MEMCPY(result_s
, to_s
, to_len
);
3062 /* copy the unchanged old then the 'to' */
3063 Py_MEMCPY(result_s
, start
, next
-start
);
3064 result_s
+= (next
-start
);
3065 Py_MEMCPY(result_s
, to_s
, to_len
);
3067 start
= next
+from_len
;
3070 /* Copy the remainder of the remaining string */
3071 Py_MEMCPY(result_s
, start
, end
-start
);
3077 Py_LOCAL(PyStringObject
*)
3078 replace(PyStringObject
*self
,
3079 const char *from_s
, Py_ssize_t from_len
,
3080 const char *to_s
, Py_ssize_t to_len
,
3081 Py_ssize_t maxcount
)
3084 maxcount
= PY_SSIZE_T_MAX
;
3085 } else if (maxcount
== 0 || PyString_GET_SIZE(self
) == 0) {
3086 /* nothing to do; return the original string */
3087 return return_self(self
);
3090 if (maxcount
== 0 ||
3091 (from_len
== 0 && to_len
== 0)) {
3092 /* nothing to do; return the original string */
3093 return return_self(self
);
3096 /* Handle zero-length special cases */
3098 if (from_len
== 0) {
3099 /* insert the 'to' string everywhere. */
3100 /* >>> "Python".replace("", ".") */
3101 /* '.P.y.t.h.o.n.' */
3102 return replace_interleave(self
, to_s
, to_len
, maxcount
);
3105 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3106 /* point for an empty self string to generate a non-empty string */
3107 /* Special case so the remaining code always gets a non-empty string */
3108 if (PyString_GET_SIZE(self
) == 0) {
3109 return return_self(self
);
3113 /* delete all occurances of 'from' string */
3114 if (from_len
== 1) {
3115 return replace_delete_single_character(
3116 self
, from_s
[0], maxcount
);
3118 return replace_delete_substring(self
, from_s
, from_len
, maxcount
);
3122 /* Handle special case where both strings have the same length */
3124 if (from_len
== to_len
) {
3125 if (from_len
== 1) {
3126 return replace_single_character_in_place(
3132 return replace_substring_in_place(
3133 self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
3137 /* Otherwise use the more generic algorithms */
3138 if (from_len
== 1) {
3139 return replace_single_character(self
, from_s
[0],
3140 to_s
, to_len
, maxcount
);
3142 /* len('from')>=2, len('to')>=1 */
3143 return replace_substring(self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
3147 PyDoc_STRVAR(replace__doc__
,
3148 "S.replace (old, new[, count]) -> string\n\
3150 Return a copy of string S with all occurrences of substring\n\
3151 old replaced by new. If the optional argument count is\n\
3152 given, only the first count occurrences are replaced.");
3155 string_replace(PyStringObject
*self
, PyObject
*args
)
3157 Py_ssize_t count
= -1;
3158 PyObject
*from
, *to
;
3159 const char *from_s
, *to_s
;
3160 Py_ssize_t from_len
, to_len
;
3162 if (!PyArg_ParseTuple(args
, "OO|n:replace", &from
, &to
, &count
))
3165 if (PyString_Check(from
)) {
3166 from_s
= PyString_AS_STRING(from
);
3167 from_len
= PyString_GET_SIZE(from
);
3169 #ifdef Py_USING_UNICODE
3170 if (PyUnicode_Check(from
))
3171 return PyUnicode_Replace((PyObject
*)self
,
3174 else if (PyObject_AsCharBuffer(from
, &from_s
, &from_len
))
3177 if (PyString_Check(to
)) {
3178 to_s
= PyString_AS_STRING(to
);
3179 to_len
= PyString_GET_SIZE(to
);
3181 #ifdef Py_USING_UNICODE
3182 else if (PyUnicode_Check(to
))
3183 return PyUnicode_Replace((PyObject
*)self
,
3186 else if (PyObject_AsCharBuffer(to
, &to_s
, &to_len
))
3189 return (PyObject
*)replace((PyStringObject
*) self
,
3191 to_s
, to_len
, count
);
3196 /* Matches the end (direction >= 0) or start (direction < 0) of self
3197 * against substr, using the start and end arguments. Returns
3198 * -1 on error, 0 if not found and 1 if found.
3201 _string_tailmatch(PyStringObject
*self
, PyObject
*substr
, Py_ssize_t start
,
3202 Py_ssize_t end
, int direction
)
3204 Py_ssize_t len
= PyString_GET_SIZE(self
);
3209 if (PyString_Check(substr
)) {
3210 sub
= PyString_AS_STRING(substr
);
3211 slen
= PyString_GET_SIZE(substr
);
3213 #ifdef Py_USING_UNICODE
3214 else if (PyUnicode_Check(substr
))
3215 return PyUnicode_Tailmatch((PyObject
*)self
,
3216 substr
, start
, end
, direction
);
3218 else if (PyObject_AsCharBuffer(substr
, &sub
, &slen
))
3220 str
= PyString_AS_STRING(self
);
3222 string_adjust_indices(&start
, &end
, len
);
3224 if (direction
< 0) {
3226 if (start
+slen
> len
)
3230 if (end
-start
< slen
|| start
> len
)
3233 if (end
-slen
> start
)
3236 if (end
-start
>= slen
)
3237 return ! memcmp(str
+start
, sub
, slen
);
3242 PyDoc_STRVAR(startswith__doc__
,
3243 "S.startswith(prefix[, start[, end]]) -> bool\n\
3245 Return True if S starts with the specified prefix, False otherwise.\n\
3246 With optional start, test S beginning at that position.\n\
3247 With optional end, stop comparing S at that position.\n\
3248 prefix can also be a tuple of strings to try.");
3251 string_startswith(PyStringObject
*self
, PyObject
*args
)
3253 Py_ssize_t start
= 0;
3254 Py_ssize_t end
= PY_SSIZE_T_MAX
;
3258 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
3259 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
3261 if (PyTuple_Check(subobj
)) {
3263 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
3264 result
= _string_tailmatch(self
,
3265 PyTuple_GET_ITEM(subobj
, i
),
3275 result
= _string_tailmatch(self
, subobj
, start
, end
, -1);
3279 return PyBool_FromLong(result
);
3283 PyDoc_STRVAR(endswith__doc__
,
3284 "S.endswith(suffix[, start[, end]]) -> bool\n\
3286 Return True if S ends with the specified suffix, False otherwise.\n\
3287 With optional start, test S beginning at that position.\n\
3288 With optional end, stop comparing S at that position.\n\
3289 suffix can also be a tuple of strings to try.");
3292 string_endswith(PyStringObject
*self
, PyObject
*args
)
3294 Py_ssize_t start
= 0;
3295 Py_ssize_t end
= PY_SSIZE_T_MAX
;
3299 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
3300 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
3302 if (PyTuple_Check(subobj
)) {
3304 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
3305 result
= _string_tailmatch(self
,
3306 PyTuple_GET_ITEM(subobj
, i
),
3316 result
= _string_tailmatch(self
, subobj
, start
, end
, +1);
3320 return PyBool_FromLong(result
);
3324 PyDoc_STRVAR(encode__doc__
,
3325 "S.encode([encoding[,errors]]) -> object\n\
3327 Encodes S using the codec registered for encoding. encoding defaults\n\
3328 to the default encoding. errors may be given to set a different error\n\
3329 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3330 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3331 'xmlcharrefreplace' as well as any other name registered with\n\
3332 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3335 string_encode(PyStringObject
*self
, PyObject
*args
, PyObject
*kwargs
)
3337 static char *kwlist
[] = {"encoding", "errors", 0};
3338 char *encoding
= NULL
;
3339 char *errors
= NULL
;
3342 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "|ss:encode",
3343 kwlist
, &encoding
, &errors
))
3345 v
= PyString_AsEncodedObject((PyObject
*)self
, encoding
, errors
);
3348 if (!PyString_Check(v
) && !PyUnicode_Check(v
)) {
3349 PyErr_Format(PyExc_TypeError
,
3350 "encoder did not return a string/unicode object "
3352 Py_TYPE(v
)->tp_name
);
3363 PyDoc_STRVAR(decode__doc__
,
3364 "S.decode([encoding[,errors]]) -> object\n\
3366 Decodes S using the codec registered for encoding. encoding defaults\n\
3367 to the default encoding. errors may be given to set a different error\n\
3368 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3369 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3370 as well as any other name registered with codecs.register_error that is\n\
3371 able to handle UnicodeDecodeErrors.");
3374 string_decode(PyStringObject
*self
, PyObject
*args
, PyObject
*kwargs
)
3376 static char *kwlist
[] = {"encoding", "errors", 0};
3377 char *encoding
= NULL
;
3378 char *errors
= NULL
;
3381 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "|ss:decode",
3382 kwlist
, &encoding
, &errors
))
3384 v
= PyString_AsDecodedObject((PyObject
*)self
, encoding
, errors
);
3387 if (!PyString_Check(v
) && !PyUnicode_Check(v
)) {
3388 PyErr_Format(PyExc_TypeError
,
3389 "decoder did not return a string/unicode object "
3391 Py_TYPE(v
)->tp_name
);
3402 PyDoc_STRVAR(expandtabs__doc__
,
3403 "S.expandtabs([tabsize]) -> string\n\
3405 Return a copy of S where all tab characters are expanded using spaces.\n\
3406 If tabsize is not given, a tab size of 8 characters is assumed.");
3409 string_expandtabs(PyStringObject
*self
, PyObject
*args
)
3411 const char *e
, *p
, *qe
;
3413 Py_ssize_t i
, j
, incr
;
3417 if (!PyArg_ParseTuple(args
, "|i:expandtabs", &tabsize
))
3420 /* First pass: determine size of output string */
3421 i
= 0; /* chars up to and including most recent \n or \r */
3422 j
= 0; /* chars since most recent \n or \r (use in tab calculations) */
3423 e
= PyString_AS_STRING(self
) + PyString_GET_SIZE(self
); /* end of input */
3424 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
3427 incr
= tabsize
- (j
% tabsize
);
3428 if (j
> PY_SSIZE_T_MAX
- incr
)
3434 if (j
> PY_SSIZE_T_MAX
- 1)
3437 if (*p
== '\n' || *p
== '\r') {
3438 if (i
> PY_SSIZE_T_MAX
- j
)
3445 if (i
> PY_SSIZE_T_MAX
- j
)
3448 /* Second pass: create output string and fill it */
3449 u
= PyString_FromStringAndSize(NULL
, i
+ j
);
3453 j
= 0; /* same as in first pass */
3454 q
= PyString_AS_STRING(u
); /* next output char */
3455 qe
= PyString_AS_STRING(u
) + PyString_GET_SIZE(u
); /* end of output */
3457 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
3460 i
= tabsize
- (j
% tabsize
);
3474 if (*p
== '\n' || *p
== '\r')
3483 PyErr_SetString(PyExc_OverflowError
, "new string is too long");
3487 Py_LOCAL_INLINE(PyObject
*)
3488 pad(PyStringObject
*self
, Py_ssize_t left
, Py_ssize_t right
, char fill
)
3497 if (left
== 0 && right
== 0 && PyString_CheckExact(self
)) {
3499 return (PyObject
*)self
;
3502 u
= PyString_FromStringAndSize(NULL
,
3503 left
+ PyString_GET_SIZE(self
) + right
);
3506 memset(PyString_AS_STRING(u
), fill
, left
);
3507 Py_MEMCPY(PyString_AS_STRING(u
) + left
,
3508 PyString_AS_STRING(self
),
3509 PyString_GET_SIZE(self
));
3511 memset(PyString_AS_STRING(u
) + left
+ PyString_GET_SIZE(self
),
3518 PyDoc_STRVAR(ljust__doc__
,
3519 "S.ljust(width[, fillchar]) -> string\n"
3521 "Return S left-justified in a string of length width. Padding is\n"
3522 "done using the specified fill character (default is a space).");
3525 string_ljust(PyStringObject
*self
, PyObject
*args
)
3528 char fillchar
= ' ';
3530 if (!PyArg_ParseTuple(args
, "n|c:ljust", &width
, &fillchar
))
3533 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3535 return (PyObject
*) self
;
3538 return pad(self
, 0, width
- PyString_GET_SIZE(self
), fillchar
);
3542 PyDoc_STRVAR(rjust__doc__
,
3543 "S.rjust(width[, fillchar]) -> string\n"
3545 "Return S right-justified in a string of length width. Padding is\n"
3546 "done using the specified fill character (default is a space)");
3549 string_rjust(PyStringObject
*self
, PyObject
*args
)
3552 char fillchar
= ' ';
3554 if (!PyArg_ParseTuple(args
, "n|c:rjust", &width
, &fillchar
))
3557 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3559 return (PyObject
*) self
;
3562 return pad(self
, width
- PyString_GET_SIZE(self
), 0, fillchar
);
3566 PyDoc_STRVAR(center__doc__
,
3567 "S.center(width[, fillchar]) -> string\n"
3569 "Return S centered in a string of length width. Padding is\n"
3570 "done using the specified fill character (default is a space)");
3573 string_center(PyStringObject
*self
, PyObject
*args
)
3575 Py_ssize_t marg
, left
;
3577 char fillchar
= ' ';
3579 if (!PyArg_ParseTuple(args
, "n|c:center", &width
, &fillchar
))
3582 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3584 return (PyObject
*) self
;
3587 marg
= width
- PyString_GET_SIZE(self
);
3588 left
= marg
/ 2 + (marg
& width
& 1);
3590 return pad(self
, left
, marg
- left
, fillchar
);
3593 PyDoc_STRVAR(zfill__doc__
,
3594 "S.zfill(width) -> string\n"
3596 "Pad a numeric string S with zeros on the left, to fill a field\n"
3597 "of the specified width. The string S is never truncated.");
3600 string_zfill(PyStringObject
*self
, PyObject
*args
)
3607 if (!PyArg_ParseTuple(args
, "n:zfill", &width
))
3610 if (PyString_GET_SIZE(self
) >= width
) {
3611 if (PyString_CheckExact(self
)) {
3613 return (PyObject
*) self
;
3616 return PyString_FromStringAndSize(
3617 PyString_AS_STRING(self
),
3618 PyString_GET_SIZE(self
)
3622 fill
= width
- PyString_GET_SIZE(self
);
3624 s
= pad(self
, fill
, 0, '0');
3629 p
= PyString_AS_STRING(s
);
3630 if (p
[fill
] == '+' || p
[fill
] == '-') {
3631 /* move sign to beginning of string */
3636 return (PyObject
*) s
;
3639 PyDoc_STRVAR(isspace__doc__
,
3640 "S.isspace() -> bool\n\
3642 Return True if all characters in S are whitespace\n\
3643 and there is at least one character in S, False otherwise.");
3646 string_isspace(PyStringObject
*self
)
3648 register const unsigned char *p
3649 = (unsigned char *) PyString_AS_STRING(self
);
3650 register const unsigned char *e
;
3652 /* Shortcut for single character strings */
3653 if (PyString_GET_SIZE(self
) == 1 &&
3655 return PyBool_FromLong(1);
3657 /* Special case for empty strings */
3658 if (PyString_GET_SIZE(self
) == 0)
3659 return PyBool_FromLong(0);
3661 e
= p
+ PyString_GET_SIZE(self
);
3662 for (; p
< e
; p
++) {
3664 return PyBool_FromLong(0);
3666 return PyBool_FromLong(1);
3670 PyDoc_STRVAR(isalpha__doc__
,
3671 "S.isalpha() -> bool\n\
3673 Return True if all characters in S are alphabetic\n\
3674 and there is at least one character in S, False otherwise.");
3677 string_isalpha(PyStringObject
*self
)
3679 register const unsigned char *p
3680 = (unsigned char *) PyString_AS_STRING(self
);
3681 register const unsigned char *e
;
3683 /* Shortcut for single character strings */
3684 if (PyString_GET_SIZE(self
) == 1 &&
3686 return PyBool_FromLong(1);
3688 /* Special case for empty strings */
3689 if (PyString_GET_SIZE(self
) == 0)
3690 return PyBool_FromLong(0);
3692 e
= p
+ PyString_GET_SIZE(self
);
3693 for (; p
< e
; p
++) {
3695 return PyBool_FromLong(0);
3697 return PyBool_FromLong(1);
3701 PyDoc_STRVAR(isalnum__doc__
,
3702 "S.isalnum() -> bool\n\
3704 Return True if all characters in S are alphanumeric\n\
3705 and there is at least one character in S, False otherwise.");
3708 string_isalnum(PyStringObject
*self
)
3710 register const unsigned char *p
3711 = (unsigned char *) PyString_AS_STRING(self
);
3712 register const unsigned char *e
;
3714 /* Shortcut for single character strings */
3715 if (PyString_GET_SIZE(self
) == 1 &&
3717 return PyBool_FromLong(1);
3719 /* Special case for empty strings */
3720 if (PyString_GET_SIZE(self
) == 0)
3721 return PyBool_FromLong(0);
3723 e
= p
+ PyString_GET_SIZE(self
);
3724 for (; p
< e
; p
++) {
3726 return PyBool_FromLong(0);
3728 return PyBool_FromLong(1);
3732 PyDoc_STRVAR(isdigit__doc__
,
3733 "S.isdigit() -> bool\n\
3735 Return True if all characters in S are digits\n\
3736 and there is at least one character in S, False otherwise.");
3739 string_isdigit(PyStringObject
*self
)
3741 register const unsigned char *p
3742 = (unsigned char *) PyString_AS_STRING(self
);
3743 register const unsigned char *e
;
3745 /* Shortcut for single character strings */
3746 if (PyString_GET_SIZE(self
) == 1 &&
3748 return PyBool_FromLong(1);
3750 /* Special case for empty strings */
3751 if (PyString_GET_SIZE(self
) == 0)
3752 return PyBool_FromLong(0);
3754 e
= p
+ PyString_GET_SIZE(self
);
3755 for (; p
< e
; p
++) {
3757 return PyBool_FromLong(0);
3759 return PyBool_FromLong(1);
3763 PyDoc_STRVAR(islower__doc__
,
3764 "S.islower() -> bool\n\
3766 Return True if all cased characters in S are lowercase and there is\n\
3767 at least one cased character in S, False otherwise.");
3770 string_islower(PyStringObject
*self
)
3772 register const unsigned char *p
3773 = (unsigned char *) PyString_AS_STRING(self
);
3774 register const unsigned char *e
;
3777 /* Shortcut for single character strings */
3778 if (PyString_GET_SIZE(self
) == 1)
3779 return PyBool_FromLong(islower(*p
) != 0);
3781 /* Special case for empty strings */
3782 if (PyString_GET_SIZE(self
) == 0)
3783 return PyBool_FromLong(0);
3785 e
= p
+ PyString_GET_SIZE(self
);
3787 for (; p
< e
; p
++) {
3789 return PyBool_FromLong(0);
3790 else if (!cased
&& islower(*p
))
3793 return PyBool_FromLong(cased
);
3797 PyDoc_STRVAR(isupper__doc__
,
3798 "S.isupper() -> bool\n\
3800 Return True if all cased characters in S are uppercase and there is\n\
3801 at least one cased character in S, False otherwise.");
3804 string_isupper(PyStringObject
*self
)
3806 register const unsigned char *p
3807 = (unsigned char *) PyString_AS_STRING(self
);
3808 register const unsigned char *e
;
3811 /* Shortcut for single character strings */
3812 if (PyString_GET_SIZE(self
) == 1)
3813 return PyBool_FromLong(isupper(*p
) != 0);
3815 /* Special case for empty strings */
3816 if (PyString_GET_SIZE(self
) == 0)
3817 return PyBool_FromLong(0);
3819 e
= p
+ PyString_GET_SIZE(self
);
3821 for (; p
< e
; p
++) {
3823 return PyBool_FromLong(0);
3824 else if (!cased
&& isupper(*p
))
3827 return PyBool_FromLong(cased
);
3831 PyDoc_STRVAR(istitle__doc__
,
3832 "S.istitle() -> bool\n\
3834 Return True if S is a titlecased string and there is at least one\n\
3835 character in S, i.e. uppercase characters may only follow uncased\n\
3836 characters and lowercase characters only cased ones. Return False\n\
3840 string_istitle(PyStringObject
*self
, PyObject
*uncased
)
3842 register const unsigned char *p
3843 = (unsigned char *) PyString_AS_STRING(self
);
3844 register const unsigned char *e
;
3845 int cased
, previous_is_cased
;
3847 /* Shortcut for single character strings */
3848 if (PyString_GET_SIZE(self
) == 1)
3849 return PyBool_FromLong(isupper(*p
) != 0);
3851 /* Special case for empty strings */
3852 if (PyString_GET_SIZE(self
) == 0)
3853 return PyBool_FromLong(0);
3855 e
= p
+ PyString_GET_SIZE(self
);
3857 previous_is_cased
= 0;
3858 for (; p
< e
; p
++) {
3859 register const unsigned char ch
= *p
;
3862 if (previous_is_cased
)
3863 return PyBool_FromLong(0);
3864 previous_is_cased
= 1;
3867 else if (islower(ch
)) {
3868 if (!previous_is_cased
)
3869 return PyBool_FromLong(0);
3870 previous_is_cased
= 1;
3874 previous_is_cased
= 0;
3876 return PyBool_FromLong(cased
);
3880 PyDoc_STRVAR(splitlines__doc__
,
3881 "S.splitlines([keepends]) -> list of strings\n\
3883 Return a list of the lines in S, breaking at line boundaries.\n\
3884 Line breaks are not included in the resulting list unless keepends\n\
3885 is given and true.");
3888 string_splitlines(PyStringObject
*self
, PyObject
*args
)
3890 register Py_ssize_t i
;
3891 register Py_ssize_t j
;
3898 if (!PyArg_ParseTuple(args
, "|i:splitlines", &keepends
))
3901 data
= PyString_AS_STRING(self
);
3902 len
= PyString_GET_SIZE(self
);
3904 /* This does not use the preallocated list because splitlines is
3905 usually run with hundreds of newlines. The overhead of
3906 switching between PyList_SET_ITEM and append causes about a
3907 2-3% slowdown for that common case. A smarter implementation
3908 could move the if check out, so the SET_ITEMs are done first
3909 and the appends only done when the prealloc buffer is full.
3910 That's too much work for little gain.*/
3912 list
= PyList_New(0);
3916 for (i
= j
= 0; i
< len
; ) {
3919 /* Find a line and append it */
3920 while (i
< len
&& data
[i
] != '\n' && data
[i
] != '\r')
3923 /* Skip the line break reading CRLF as one line break */
3926 if (data
[i
] == '\r' && i
+ 1 < len
&&
3934 SPLIT_APPEND(data
, j
, eol
);
3938 SPLIT_APPEND(data
, j
, len
);
3948 PyDoc_STRVAR(sizeof__doc__
,
3949 "S.__sizeof__() -> size of S in memory, in bytes");
3952 string_sizeof(PyStringObject
*v
)
3955 res
= PyStringObject_SIZE
+ PyString_GET_SIZE(v
) * Py_TYPE(v
)->tp_itemsize
;
3956 return PyInt_FromSsize_t(res
);
3962 #undef PREALLOC_SIZE
3965 string_getnewargs(PyStringObject
*v
)
3967 return Py_BuildValue("(s#)", v
->ob_sval
, Py_SIZE(v
));
3971 #include "stringlib/string_format.h"
3973 PyDoc_STRVAR(format__doc__
,
3974 "S.format(*args, **kwargs) -> unicode\n\
3979 string__format__(PyObject
* self
, PyObject
* args
)
3981 PyObject
*format_spec
;
3982 PyObject
*result
= NULL
;
3983 PyObject
*tmp
= NULL
;
3985 /* If 2.x, convert format_spec to the same type as value */
3986 /* This is to allow things like u''.format('') */
3987 if (!PyArg_ParseTuple(args
, "O:__format__", &format_spec
))
3989 if (!(PyString_Check(format_spec
) || PyUnicode_Check(format_spec
))) {
3990 PyErr_Format(PyExc_TypeError
, "__format__ arg must be str "
3991 "or unicode, not %s", Py_TYPE(format_spec
)->tp_name
);
3994 tmp
= PyObject_Str(format_spec
);
3999 result
= _PyBytes_FormatAdvanced(self
,
4000 PyString_AS_STRING(format_spec
),
4001 PyString_GET_SIZE(format_spec
));
4007 PyDoc_STRVAR(p_format__doc__
,
4008 "S.__format__(format_spec) -> unicode\n\
4014 string_methods
[] = {
4015 /* Counterparts of the obsolete stropmodule functions; except
4016 string.maketrans(). */
4017 {"join", (PyCFunction
)string_join
, METH_O
, join__doc__
},
4018 {"split", (PyCFunction
)string_split
, METH_VARARGS
, split__doc__
},
4019 {"rsplit", (PyCFunction
)string_rsplit
, METH_VARARGS
, rsplit__doc__
},
4020 {"lower", (PyCFunction
)string_lower
, METH_NOARGS
, lower__doc__
},
4021 {"upper", (PyCFunction
)string_upper
, METH_NOARGS
, upper__doc__
},
4022 {"islower", (PyCFunction
)string_islower
, METH_NOARGS
, islower__doc__
},
4023 {"isupper", (PyCFunction
)string_isupper
, METH_NOARGS
, isupper__doc__
},
4024 {"isspace", (PyCFunction
)string_isspace
, METH_NOARGS
, isspace__doc__
},
4025 {"isdigit", (PyCFunction
)string_isdigit
, METH_NOARGS
, isdigit__doc__
},
4026 {"istitle", (PyCFunction
)string_istitle
, METH_NOARGS
, istitle__doc__
},
4027 {"isalpha", (PyCFunction
)string_isalpha
, METH_NOARGS
, isalpha__doc__
},
4028 {"isalnum", (PyCFunction
)string_isalnum
, METH_NOARGS
, isalnum__doc__
},
4029 {"capitalize", (PyCFunction
)string_capitalize
, METH_NOARGS
,
4031 {"count", (PyCFunction
)string_count
, METH_VARARGS
, count__doc__
},
4032 {"endswith", (PyCFunction
)string_endswith
, METH_VARARGS
,
4034 {"partition", (PyCFunction
)string_partition
, METH_O
, partition__doc__
},
4035 {"find", (PyCFunction
)string_find
, METH_VARARGS
, find__doc__
},
4036 {"index", (PyCFunction
)string_index
, METH_VARARGS
, index__doc__
},
4037 {"lstrip", (PyCFunction
)string_lstrip
, METH_VARARGS
, lstrip__doc__
},
4038 {"replace", (PyCFunction
)string_replace
, METH_VARARGS
, replace__doc__
},
4039 {"rfind", (PyCFunction
)string_rfind
, METH_VARARGS
, rfind__doc__
},
4040 {"rindex", (PyCFunction
)string_rindex
, METH_VARARGS
, rindex__doc__
},
4041 {"rstrip", (PyCFunction
)string_rstrip
, METH_VARARGS
, rstrip__doc__
},
4042 {"rpartition", (PyCFunction
)string_rpartition
, METH_O
,
4044 {"startswith", (PyCFunction
)string_startswith
, METH_VARARGS
,
4046 {"strip", (PyCFunction
)string_strip
, METH_VARARGS
, strip__doc__
},
4047 {"swapcase", (PyCFunction
)string_swapcase
, METH_NOARGS
,
4049 {"translate", (PyCFunction
)string_translate
, METH_VARARGS
,
4051 {"title", (PyCFunction
)string_title
, METH_NOARGS
, title__doc__
},
4052 {"ljust", (PyCFunction
)string_ljust
, METH_VARARGS
, ljust__doc__
},
4053 {"rjust", (PyCFunction
)string_rjust
, METH_VARARGS
, rjust__doc__
},
4054 {"center", (PyCFunction
)string_center
, METH_VARARGS
, center__doc__
},
4055 {"zfill", (PyCFunction
)string_zfill
, METH_VARARGS
, zfill__doc__
},
4056 {"format", (PyCFunction
) do_string_format
, METH_VARARGS
| METH_KEYWORDS
, format__doc__
},
4057 {"__format__", (PyCFunction
) string__format__
, METH_VARARGS
, p_format__doc__
},
4058 {"_formatter_field_name_split", (PyCFunction
) formatter_field_name_split
, METH_NOARGS
},
4059 {"_formatter_parser", (PyCFunction
) formatter_parser
, METH_NOARGS
},
4060 {"encode", (PyCFunction
)string_encode
, METH_VARARGS
| METH_KEYWORDS
, encode__doc__
},
4061 {"decode", (PyCFunction
)string_decode
, METH_VARARGS
| METH_KEYWORDS
, decode__doc__
},
4062 {"expandtabs", (PyCFunction
)string_expandtabs
, METH_VARARGS
,
4064 {"splitlines", (PyCFunction
)string_splitlines
, METH_VARARGS
,
4066 {"__sizeof__", (PyCFunction
)string_sizeof
, METH_NOARGS
,
4068 {"__getnewargs__", (PyCFunction
)string_getnewargs
, METH_NOARGS
},
4069 {NULL
, NULL
} /* sentinel */
4073 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
);
4076 string_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
4079 static char *kwlist
[] = {"object", 0};
4081 if (type
!= &PyString_Type
)
4082 return str_subtype_new(type
, args
, kwds
);
4083 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|O:str", kwlist
, &x
))
4086 return PyString_FromString("");
4087 return PyObject_Str(x
);
4091 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
4093 PyObject
*tmp
, *pnew
;
4096 assert(PyType_IsSubtype(type
, &PyString_Type
));
4097 tmp
= string_new(&PyString_Type
, args
, kwds
);
4100 assert(PyString_CheckExact(tmp
));
4101 n
= PyString_GET_SIZE(tmp
);
4102 pnew
= type
->tp_alloc(type
, n
);
4104 Py_MEMCPY(PyString_AS_STRING(pnew
), PyString_AS_STRING(tmp
), n
+1);
4105 ((PyStringObject
*)pnew
)->ob_shash
=
4106 ((PyStringObject
*)tmp
)->ob_shash
;
4107 ((PyStringObject
*)pnew
)->ob_sstate
= SSTATE_NOT_INTERNED
;
4114 basestring_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
4116 PyErr_SetString(PyExc_TypeError
,
4117 "The basestring type cannot be instantiated");
4122 string_mod(PyObject
*v
, PyObject
*w
)
4124 if (!PyString_Check(v
)) {
4125 Py_INCREF(Py_NotImplemented
);
4126 return Py_NotImplemented
;
4128 return PyString_Format(v
, w
);
4131 PyDoc_STRVAR(basestring_doc
,
4132 "Type basestring cannot be instantiated; it is the base for str and unicode.");
4134 static PyNumberMethods string_as_number
= {
4139 string_mod
, /*nb_remainder*/
4143 PyTypeObject PyBaseString_Type
= {
4144 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
4154 0, /* tp_as_number */
4155 0, /* tp_as_sequence */
4156 0, /* tp_as_mapping */
4160 0, /* tp_getattro */
4161 0, /* tp_setattro */
4162 0, /* tp_as_buffer */
4163 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /* tp_flags */
4164 basestring_doc
, /* tp_doc */
4165 0, /* tp_traverse */
4167 0, /* tp_richcompare */
4168 0, /* tp_weaklistoffset */
4170 0, /* tp_iternext */
4174 &PyBaseObject_Type
, /* tp_base */
4176 0, /* tp_descr_get */
4177 0, /* tp_descr_set */
4178 0, /* tp_dictoffset */
4181 basestring_new
, /* tp_new */
4185 PyDoc_STRVAR(string_doc
,
4186 "str(object) -> string\n\
4188 Return a nice string representation of the object.\n\
4189 If the argument is a string, the return value is the same object.");
4191 PyTypeObject PyString_Type
= {
4192 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
4194 PyStringObject_SIZE
,
4196 string_dealloc
, /* tp_dealloc */
4197 (printfunc
)string_print
, /* tp_print */
4201 string_repr
, /* tp_repr */
4202 &string_as_number
, /* tp_as_number */
4203 &string_as_sequence
, /* tp_as_sequence */
4204 &string_as_mapping
, /* tp_as_mapping */
4205 (hashfunc
)string_hash
, /* tp_hash */
4207 string_str
, /* tp_str */
4208 PyObject_GenericGetAttr
, /* tp_getattro */
4209 0, /* tp_setattro */
4210 &string_as_buffer
, /* tp_as_buffer */
4211 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_CHECKTYPES
|
4212 Py_TPFLAGS_BASETYPE
| Py_TPFLAGS_STRING_SUBCLASS
|
4213 Py_TPFLAGS_HAVE_NEWBUFFER
, /* tp_flags */
4214 string_doc
, /* tp_doc */
4215 0, /* tp_traverse */
4217 (richcmpfunc
)string_richcompare
, /* tp_richcompare */
4218 0, /* tp_weaklistoffset */
4220 0, /* tp_iternext */
4221 string_methods
, /* tp_methods */
4224 &PyBaseString_Type
, /* tp_base */
4226 0, /* tp_descr_get */
4227 0, /* tp_descr_set */
4228 0, /* tp_dictoffset */
4231 string_new
, /* tp_new */
4232 PyObject_Del
, /* tp_free */
4236 PyString_Concat(register PyObject
**pv
, register PyObject
*w
)
4238 register PyObject
*v
;
4241 if (w
== NULL
|| !PyString_Check(*pv
)) {
4246 v
= string_concat((PyStringObject
*) *pv
, w
);
4252 PyString_ConcatAndDel(register PyObject
**pv
, register PyObject
*w
)
4254 PyString_Concat(pv
, w
);
4259 /* The following function breaks the notion that strings are immutable:
4260 it changes the size of a string. We get away with this only if there
4261 is only one module referencing the object. You can also think of it
4262 as creating a new string object and destroying the old one, only
4263 more efficiently. In any case, don't use this if the string may
4264 already be known to some other part of the code...
4265 Note that if there's not enough memory to resize the string, the original
4266 string object at *pv is deallocated, *pv is set to NULL, an "out of
4267 memory" exception is set, and -1 is returned. Else (on success) 0 is
4268 returned, and the value in *pv may or may not be the same as on input.
4269 As always, an extra byte is allocated for a trailing \0 byte (newsize
4270 does *not* include that), and a trailing \0 byte is stored.
4274 _PyString_Resize(PyObject
**pv
, Py_ssize_t newsize
)
4276 register PyObject
*v
;
4277 register PyStringObject
*sv
;
4279 if (!PyString_Check(v
) || Py_REFCNT(v
) != 1 || newsize
< 0 ||
4280 PyString_CHECK_INTERNED(v
)) {
4283 PyErr_BadInternalCall();
4286 /* XXX UNREF/NEWREF interface should be more symmetrical */
4288 _Py_ForgetReference(v
);
4290 PyObject_REALLOC((char *)v
, PyStringObject_SIZE
+ newsize
);
4296 _Py_NewReference(*pv
);
4297 sv
= (PyStringObject
*) *pv
;
4298 Py_SIZE(sv
) = newsize
;
4299 sv
->ob_sval
[newsize
] = '\0';
4300 sv
->ob_shash
= -1; /* invalidate cached hash value */
4304 /* Helpers for formatstring */
4306 Py_LOCAL_INLINE(PyObject
*)
4307 getnextarg(PyObject
*args
, Py_ssize_t arglen
, Py_ssize_t
*p_argidx
)
4309 Py_ssize_t argidx
= *p_argidx
;
4310 if (argidx
< arglen
) {
4315 return PyTuple_GetItem(args
, argidx
);
4317 PyErr_SetString(PyExc_TypeError
,
4318 "not enough arguments for format string");
4329 #define F_LJUST (1<<0)
4330 #define F_SIGN (1<<1)
4331 #define F_BLANK (1<<2)
4332 #define F_ALT (1<<3)
4333 #define F_ZERO (1<<4)
4335 Py_LOCAL_INLINE(int)
4336 formatfloat(char *buf
, size_t buflen
, int flags
,
4337 int prec
, int type
, PyObject
*v
)
4343 x
= PyFloat_AsDouble(v
);
4344 if (x
== -1.0 && PyErr_Occurred()) {
4345 PyErr_Format(PyExc_TypeError
, "float argument required, "
4346 "not %.200s", Py_TYPE(v
)->tp_name
);
4352 /* make sure that the decimal representation of precision really does
4353 need at most 10 digits: platforms with sizeof(int) == 8 exist! */
4354 if (prec
> 0x7fffffff) {
4355 PyErr_SetString(PyExc_OverflowError
,
4356 "outrageously large precision "
4357 "for formatted float");
4362 if (type
== 'f' && fabs(x
) >= 1e50
)
4364 /* Worst case length calc to ensure no buffer overrun:
4368 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4369 for any double rep.)
4370 len = 1 + prec + 1 + 2 + 5 = 9 + prec
4373 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4374 len = 1 + 50 + 1 + prec = 52 + prec
4376 If prec=0 the effective precision is 1 (the leading digit is
4377 always given), therefore increase the length by one.
4380 if (((type
== 'g' || type
== 'G') &&
4381 buflen
<= (size_t)10 + (size_t)prec
) ||
4382 (type
== 'f' && buflen
<= (size_t)53 + (size_t)prec
)) {
4383 PyErr_SetString(PyExc_OverflowError
,
4384 "formatted float is too long (precision too large?)");
4387 tmp
= PyOS_double_to_string(x
, type
, prec
,
4388 (flags
&F_ALT
)?Py_DTSF_ALT
:0, NULL
);
4392 if (len
>= buflen
) {
4393 PyErr_SetString(PyExc_OverflowError
,
4394 "formatted float is too long (precision too large?)");
4403 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4404 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4405 * Python's regular ints.
4406 * Return value: a new PyString*, or NULL if error.
4407 * . *pbuf is set to point into it,
4408 * *plen set to the # of chars following that.
4409 * Caller must decref it when done using pbuf.
4410 * The string starting at *pbuf is of the form
4411 * "-"? ("0x" | "0X")? digit+
4412 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4413 * set in flags. The case of hex digits will be correct,
4414 * There will be at least prec digits, zero-filled on the left if
4415 * necessary to get that many.
4416 * val object to be converted
4417 * flags bitmask of format flags; only F_ALT is looked at
4418 * prec minimum number of digits; 0-fill on left if needed
4419 * type a character in [duoxX]; u acts the same as d
4421 * CAUTION: o, x and X conversions on regular ints can never
4422 * produce a '-' sign, but can for Python's unbounded ints.
4425 _PyString_FormatLong(PyObject
*val
, int flags
, int prec
, int type
,
4426 char **pbuf
, int *plen
)
4428 PyObject
*result
= NULL
;
4431 int sign
; /* 1 if '-', else 0 */
4432 int len
; /* number of characters */
4434 int numdigits
; /* len == numnondigits + numdigits */
4435 int numnondigits
= 0;
4440 result
= Py_TYPE(val
)->tp_str(val
);
4443 result
= Py_TYPE(val
)->tp_as_number
->nb_oct(val
);
4448 result
= Py_TYPE(val
)->tp_as_number
->nb_hex(val
);
4451 assert(!"'type' not in [duoxX]");
4456 buf
= PyString_AsString(result
);
4462 /* To modify the string in-place, there can only be one reference. */
4463 if (Py_REFCNT(result
) != 1) {
4464 PyErr_BadInternalCall();
4467 llen
= PyString_Size(result
);
4468 if (llen
> INT_MAX
) {
4469 PyErr_SetString(PyExc_ValueError
, "string too large in _PyString_FormatLong");
4473 if (buf
[len
-1] == 'L') {
4477 sign
= buf
[0] == '-';
4478 numnondigits
+= sign
;
4479 numdigits
= len
- numnondigits
;
4480 assert(numdigits
> 0);
4482 /* Get rid of base marker unless F_ALT */
4483 if ((flags
& F_ALT
) == 0) {
4484 /* Need to skip 0x, 0X or 0. */
4488 assert(buf
[sign
] == '0');
4489 /* If 0 is only digit, leave it alone. */
4490 if (numdigits
> 1) {
4497 assert(buf
[sign
] == '0');
4498 assert(buf
[sign
+ 1] == 'x');
4509 assert(len
== numnondigits
+ numdigits
);
4510 assert(numdigits
> 0);
4513 /* Fill with leading zeroes to meet minimum width. */
4514 if (prec
> numdigits
) {
4515 PyObject
*r1
= PyString_FromStringAndSize(NULL
,
4516 numnondigits
+ prec
);
4522 b1
= PyString_AS_STRING(r1
);
4523 for (i
= 0; i
< numnondigits
; ++i
)
4525 for (i
= 0; i
< prec
- numdigits
; i
++)
4527 for (i
= 0; i
< numdigits
; i
++)
4532 buf
= PyString_AS_STRING(result
);
4533 len
= numnondigits
+ prec
;
4536 /* Fix up case for hex conversions. */
4538 /* Need to convert all lower case letters to upper case.
4539 and need to convert 0x to 0X (and -0x to -0X). */
4540 for (i
= 0; i
< len
; i
++)
4541 if (buf
[i
] >= 'a' && buf
[i
] <= 'x')
4549 Py_LOCAL_INLINE(int)
4550 formatint(char *buf
, size_t buflen
, int flags
,
4551 int prec
, int type
, PyObject
*v
)
4553 /* fmt = '%#.' + `prec` + 'l' + `type`
4554 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4556 char fmt
[64]; /* plenty big enough! */
4560 x
= PyInt_AsLong(v
);
4561 if (x
== -1 && PyErr_Occurred()) {
4562 PyErr_Format(PyExc_TypeError
, "int argument required, not %.200s",
4563 Py_TYPE(v
)->tp_name
);
4566 if (x
< 0 && type
== 'u') {
4569 if (x
< 0 && (type
== 'x' || type
== 'X' || type
== 'o'))
4576 if ((flags
& F_ALT
) &&
4577 (type
== 'x' || type
== 'X')) {
4578 /* When converting under %#x or %#X, there are a number
4579 * of issues that cause pain:
4580 * - when 0 is being converted, the C standard leaves off
4581 * the '0x' or '0X', which is inconsistent with other
4582 * %#x/%#X conversions and inconsistent with Python's
4584 * - there are platforms that violate the standard and
4585 * convert 0 with the '0x' or '0X'
4586 * (Metrowerks, Compaq Tru64)
4587 * - there are platforms that give '0x' when converting
4588 * under %#X, but convert 0 in accordance with the
4589 * standard (OS/2 EMX)
4591 * We can achieve the desired consistency by inserting our
4592 * own '0x' or '0X' prefix, and substituting %x/%X in place
4595 * Note that this is the same approach as used in
4596 * formatint() in unicodeobject.c
4598 PyOS_snprintf(fmt
, sizeof(fmt
), "%s0%c%%.%dl%c",
4599 sign
, type
, prec
, type
);
4602 PyOS_snprintf(fmt
, sizeof(fmt
), "%s%%%s.%dl%c",
4603 sign
, (flags
&F_ALT
) ? "#" : "",
4607 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4608 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4610 if (buflen
<= 14 || buflen
<= (size_t)3 + (size_t)prec
) {
4611 PyErr_SetString(PyExc_OverflowError
,
4612 "formatted integer is too long (precision too large?)");
4616 PyOS_snprintf(buf
, buflen
, fmt
, -x
);
4618 PyOS_snprintf(buf
, buflen
, fmt
, x
);
4619 return (int)strlen(buf
);
4622 Py_LOCAL_INLINE(int)
4623 formatchar(char *buf
, size_t buflen
, PyObject
*v
)
4625 /* presume that the buffer is at least 2 characters long */
4626 if (PyString_Check(v
)) {
4627 if (!PyArg_Parse(v
, "c;%c requires int or char", &buf
[0]))
4631 if (!PyArg_Parse(v
, "b;%c requires int or char", &buf
[0]))
4638 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4640 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4641 chars are formatted. XXX This is a magic number. Each formatting
4642 routine does bounds checking to ensure no overflow, but a better
4643 solution may be to malloc a buffer of appropriate size for each
4644 format. For now, the current solution is sufficient.
4646 #define FORMATBUFLEN (size_t)120
4649 PyString_Format(PyObject
*format
, PyObject
*args
)
4652 Py_ssize_t arglen
, argidx
;
4653 Py_ssize_t reslen
, rescnt
, fmtcnt
;
4655 PyObject
*result
, *orig_args
;
4656 #ifdef Py_USING_UNICODE
4659 PyObject
*dict
= NULL
;
4660 if (format
== NULL
|| !PyString_Check(format
) || args
== NULL
) {
4661 PyErr_BadInternalCall();
4665 fmt
= PyString_AS_STRING(format
);
4666 fmtcnt
= PyString_GET_SIZE(format
);
4667 reslen
= rescnt
= fmtcnt
+ 100;
4668 result
= PyString_FromStringAndSize((char *)NULL
, reslen
);
4671 res
= PyString_AsString(result
);
4672 if (PyTuple_Check(args
)) {
4673 arglen
= PyTuple_GET_SIZE(args
);
4680 if (Py_TYPE(args
)->tp_as_mapping
&& !PyTuple_Check(args
) &&
4681 !PyObject_TypeCheck(args
, &PyBaseString_Type
))
4683 while (--fmtcnt
>= 0) {
4686 rescnt
= fmtcnt
+ 100;
4688 if (_PyString_Resize(&result
, reslen
) < 0)
4690 res
= PyString_AS_STRING(result
)
4697 /* Got a format specifier */
4699 Py_ssize_t width
= -1;
4705 PyObject
*temp
= NULL
;
4709 char formatbuf
[FORMATBUFLEN
];
4710 /* For format{float,int,char}() */
4711 #ifdef Py_USING_UNICODE
4712 char *fmt_start
= fmt
;
4713 Py_ssize_t argidx_start
= argidx
;
4724 PyErr_SetString(PyExc_TypeError
,
4725 "format requires a mapping");
4731 /* Skip over balanced parentheses */
4732 while (pcount
> 0 && --fmtcnt
>= 0) {
4735 else if (*fmt
== '(')
4739 keylen
= fmt
- keystart
- 1;
4740 if (fmtcnt
< 0 || pcount
> 0) {
4741 PyErr_SetString(PyExc_ValueError
,
4742 "incomplete format key");
4745 key
= PyString_FromStringAndSize(keystart
,
4753 args
= PyObject_GetItem(dict
, key
);
4762 while (--fmtcnt
>= 0) {
4763 switch (c
= *fmt
++) {
4764 case '-': flags
|= F_LJUST
; continue;
4765 case '+': flags
|= F_SIGN
; continue;
4766 case ' ': flags
|= F_BLANK
; continue;
4767 case '#': flags
|= F_ALT
; continue;
4768 case '0': flags
|= F_ZERO
; continue;
4773 v
= getnextarg(args
, arglen
, &argidx
);
4776 if (!PyInt_Check(v
)) {
4777 PyErr_SetString(PyExc_TypeError
,
4781 width
= PyInt_AsLong(v
);
4789 else if (c
>= 0 && isdigit(c
)) {
4791 while (--fmtcnt
>= 0) {
4792 c
= Py_CHARMASK(*fmt
++);
4795 if ((width
*10) / 10 != width
) {
4801 width
= width
*10 + (c
- '0');
4809 v
= getnextarg(args
, arglen
, &argidx
);
4812 if (!PyInt_Check(v
)) {
4818 prec
= PyInt_AsLong(v
);
4824 else if (c
>= 0 && isdigit(c
)) {
4826 while (--fmtcnt
>= 0) {
4827 c
= Py_CHARMASK(*fmt
++);
4830 if ((prec
*10) / 10 != prec
) {
4836 prec
= prec
*10 + (c
- '0');
4841 if (c
== 'h' || c
== 'l' || c
== 'L') {
4847 PyErr_SetString(PyExc_ValueError
,
4848 "incomplete format");
4852 v
= getnextarg(args
, arglen
, &argidx
);
4864 #ifdef Py_USING_UNICODE
4865 if (PyUnicode_Check(v
)) {
4867 argidx
= argidx_start
;
4871 temp
= _PyObject_Str(v
);
4872 #ifdef Py_USING_UNICODE
4873 if (temp
!= NULL
&& PyUnicode_Check(temp
)) {
4876 argidx
= argidx_start
;
4883 temp
= PyObject_Repr(v
);
4886 if (!PyString_Check(temp
)) {
4887 PyErr_SetString(PyExc_TypeError
,
4888 "%s argument has non-string str()");
4892 pbuf
= PyString_AS_STRING(temp
);
4893 len
= PyString_GET_SIZE(temp
);
4894 if (prec
>= 0 && len
> prec
)
4906 if (PyNumber_Check(v
)) {
4907 PyObject
*iobj
=NULL
;
4909 if (PyInt_Check(v
) || (PyLong_Check(v
))) {
4914 iobj
= PyNumber_Int(v
);
4915 if (iobj
==NULL
) iobj
= PyNumber_Long(v
);
4918 if (PyInt_Check(iobj
)) {
4921 len
= formatint(pbuf
,
4923 flags
, prec
, c
, iobj
);
4929 else if (PyLong_Check(iobj
)) {
4933 temp
= _PyString_FormatLong(iobj
, flags
,
4934 prec
, c
, &pbuf
, &ilen
);
4947 PyErr_Format(PyExc_TypeError
,
4948 "%%%c format: a number is required, "
4949 "not %.200s", c
, Py_TYPE(v
)->tp_name
);
4964 len
= formatfloat(pbuf
, sizeof(formatbuf
),
4973 #ifdef Py_USING_UNICODE
4974 if (PyUnicode_Check(v
)) {
4976 argidx
= argidx_start
;
4981 len
= formatchar(pbuf
, sizeof(formatbuf
), v
);
4986 PyErr_Format(PyExc_ValueError
,
4987 "unsupported format character '%c' (0x%x) "
4990 (Py_ssize_t
)(fmt
- 1 -
4991 PyString_AsString(format
)));
4995 if (*pbuf
== '-' || *pbuf
== '+') {
4999 else if (flags
& F_SIGN
)
5001 else if (flags
& F_BLANK
)
5008 if (rescnt
- (sign
!= 0) < width
) {
5010 rescnt
= width
+ fmtcnt
+ 100;
5015 return PyErr_NoMemory();
5017 if (_PyString_Resize(&result
, reslen
) < 0) {
5021 res
= PyString_AS_STRING(result
)
5031 if ((flags
& F_ALT
) && (c
== 'x' || c
== 'X')) {
5032 assert(pbuf
[0] == '0');
5033 assert(pbuf
[1] == c
);
5044 if (width
> len
&& !(flags
& F_LJUST
)) {
5048 } while (--width
> len
);
5053 if ((flags
& F_ALT
) &&
5054 (c
== 'x' || c
== 'X')) {
5055 assert(pbuf
[0] == '0');
5056 assert(pbuf
[1] == c
);
5061 Py_MEMCPY(res
, pbuf
, len
);
5064 while (--width
>= len
) {
5068 if (dict
&& (argidx
< arglen
) && c
!= '%') {
5069 PyErr_SetString(PyExc_TypeError
,
5070 "not all arguments converted during string formatting");
5077 if (argidx
< arglen
&& !dict
) {
5078 PyErr_SetString(PyExc_TypeError
,
5079 "not all arguments converted during string formatting");
5085 _PyString_Resize(&result
, reslen
- rescnt
);
5088 #ifdef Py_USING_UNICODE
5094 /* Fiddle args right (remove the first argidx arguments) */
5095 if (PyTuple_Check(orig_args
) && argidx
> 0) {
5097 Py_ssize_t n
= PyTuple_GET_SIZE(orig_args
) - argidx
;
5102 PyObject
*w
= PyTuple_GET_ITEM(orig_args
, n
+ argidx
);
5104 PyTuple_SET_ITEM(v
, n
, w
);
5108 Py_INCREF(orig_args
);
5112 /* Take what we have of the result and let the Unicode formatting
5113 function format the rest of the input. */
5114 rescnt
= res
- PyString_AS_STRING(result
);
5115 if (_PyString_Resize(&result
, rescnt
))
5117 fmtcnt
= PyString_GET_SIZE(format
) - \
5118 (fmt
- PyString_AS_STRING(format
));
5119 format
= PyUnicode_Decode(fmt
, fmtcnt
, NULL
, NULL
);
5122 v
= PyUnicode_Format(format
, args
);
5126 /* Paste what we have (result) to what the Unicode formatting
5127 function returned (v) and return the result (or error) */
5128 w
= PyUnicode_Concat(result
, v
);
5133 #endif /* Py_USING_UNICODE */
5144 PyString_InternInPlace(PyObject
**p
)
5146 register PyStringObject
*s
= (PyStringObject
*)(*p
);
5148 if (s
== NULL
|| !PyString_Check(s
))
5149 Py_FatalError("PyString_InternInPlace: strings only please!");
5150 /* If it's a string subclass, we don't really know what putting
5151 it in the interned dict might do. */
5152 if (!PyString_CheckExact(s
))
5154 if (PyString_CHECK_INTERNED(s
))
5156 if (interned
== NULL
) {
5157 interned
= PyDict_New();
5158 if (interned
== NULL
) {
5159 PyErr_Clear(); /* Don't leave an exception */
5163 t
= PyDict_GetItem(interned
, (PyObject
*)s
);
5171 if (PyDict_SetItem(interned
, (PyObject
*)s
, (PyObject
*)s
) < 0) {
5175 /* The two references in interned are not counted by refcnt.
5176 The string deallocator will take care of this */
5178 PyString_CHECK_INTERNED(s
) = SSTATE_INTERNED_MORTAL
;
5182 PyString_InternImmortal(PyObject
**p
)
5184 PyString_InternInPlace(p
);
5185 if (PyString_CHECK_INTERNED(*p
) != SSTATE_INTERNED_IMMORTAL
) {
5186 PyString_CHECK_INTERNED(*p
) = SSTATE_INTERNED_IMMORTAL
;
5193 PyString_InternFromString(const char *cp
)
5195 PyObject
*s
= PyString_FromString(cp
);
5198 PyString_InternInPlace(&s
);
5206 for (i
= 0; i
< UCHAR_MAX
+ 1; i
++) {
5207 Py_XDECREF(characters
[i
]);
5208 characters
[i
] = NULL
;
5210 Py_XDECREF(nullstring
);
5214 void _Py_ReleaseInternedStrings(void)
5219 Py_ssize_t immortal_size
= 0, mortal_size
= 0;
5221 if (interned
== NULL
|| !PyDict_Check(interned
))
5223 keys
= PyDict_Keys(interned
);
5224 if (keys
== NULL
|| !PyList_Check(keys
)) {
5229 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5230 detector, interned strings are not forcibly deallocated; rather, we
5231 give them their stolen references back, and then clear and DECREF
5232 the interned dict. */
5234 n
= PyList_GET_SIZE(keys
);
5235 fprintf(stderr
, "releasing %" PY_FORMAT_SIZE_T
"d interned strings\n",
5237 for (i
= 0; i
< n
; i
++) {
5238 s
= (PyStringObject
*) PyList_GET_ITEM(keys
, i
);
5239 switch (s
->ob_sstate
) {
5240 case SSTATE_NOT_INTERNED
:
5241 /* XXX Shouldn't happen */
5243 case SSTATE_INTERNED_IMMORTAL
:
5245 immortal_size
+= Py_SIZE(s
);
5247 case SSTATE_INTERNED_MORTAL
:
5249 mortal_size
+= Py_SIZE(s
);
5252 Py_FatalError("Inconsistent interned string state.");
5254 s
->ob_sstate
= SSTATE_NOT_INTERNED
;
5256 fprintf(stderr
, "total size of all interned strings: "
5257 "%" PY_FORMAT_SIZE_T
"d/%" PY_FORMAT_SIZE_T
"d "
5258 "mortal/immortal\n", mortal_size
, immortal_size
);
5260 PyDict_Clear(interned
);
5261 Py_DECREF(interned
);