1 /* String (str/bytes) object implementation */
3 #define PY_SSIZE_T_CLEAN
9 int null_strings
, one_strings
;
12 static PyStringObject
*characters
[UCHAR_MAX
+ 1];
13 static PyStringObject
*nullstring
;
15 /* This dictionary holds all interned strings. Note that references to
16 strings in this dictionary are *not* counted in the string's ob_refcnt.
17 When the interned string reaches a refcnt of 0 the string deallocation
18 function will delete the reference from this dictionary.
20 Another way to look at this is that to say that the actual reference
21 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23 static PyObject
*interned
;
26 For both PyString_FromString() and PyString_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
28 null terminating character.
30 For PyString_FromString(), the parameter `str' points to a null-terminated
31 string containing exactly `size' bytes.
33 For PyString_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyString_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
44 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
52 PyString_FromStringAndSize(const char *str
, Py_ssize_t size
)
54 register PyStringObject
*op
;
56 PyErr_SetString(PyExc_SystemError
,
57 "Negative size passed to PyString_FromStringAndSize");
60 if (size
== 0 && (op
= nullstring
) != NULL
) {
65 return (PyObject
*)op
;
67 if (size
== 1 && str
!= NULL
&&
68 (op
= characters
[*str
& UCHAR_MAX
]) != NULL
)
74 return (PyObject
*)op
;
77 if (size
> PY_SSIZE_T_MAX
- sizeof(PyStringObject
)) {
78 PyErr_SetString(PyExc_OverflowError
, "string is too large");
82 /* Inline PyObject_NewVar */
83 op
= (PyStringObject
*)PyObject_MALLOC(sizeof(PyStringObject
) + size
);
85 return PyErr_NoMemory();
86 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
88 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
90 Py_MEMCPY(op
->ob_sval
, str
, size
);
91 op
->ob_sval
[size
] = '\0';
92 /* share short strings */
94 PyObject
*t
= (PyObject
*)op
;
95 PyString_InternInPlace(&t
);
96 op
= (PyStringObject
*)t
;
99 } else if (size
== 1 && str
!= NULL
) {
100 PyObject
*t
= (PyObject
*)op
;
101 PyString_InternInPlace(&t
);
102 op
= (PyStringObject
*)t
;
103 characters
[*str
& UCHAR_MAX
] = op
;
106 return (PyObject
*) op
;
110 PyString_FromString(const char *str
)
112 register size_t size
;
113 register PyStringObject
*op
;
117 if (size
> PY_SSIZE_T_MAX
- sizeof(PyStringObject
)) {
118 PyErr_SetString(PyExc_OverflowError
,
119 "string is too long for a Python string");
122 if (size
== 0 && (op
= nullstring
) != NULL
) {
127 return (PyObject
*)op
;
129 if (size
== 1 && (op
= characters
[*str
& UCHAR_MAX
]) != NULL
) {
134 return (PyObject
*)op
;
137 /* Inline PyObject_NewVar */
138 op
= (PyStringObject
*)PyObject_MALLOC(sizeof(PyStringObject
) + size
);
140 return PyErr_NoMemory();
141 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
143 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
144 Py_MEMCPY(op
->ob_sval
, str
, size
+1);
145 /* share short strings */
147 PyObject
*t
= (PyObject
*)op
;
148 PyString_InternInPlace(&t
);
149 op
= (PyStringObject
*)t
;
152 } else if (size
== 1) {
153 PyObject
*t
= (PyObject
*)op
;
154 PyString_InternInPlace(&t
);
155 op
= (PyStringObject
*)t
;
156 characters
[*str
& UCHAR_MAX
] = op
;
159 return (PyObject
*) op
;
163 PyString_FromFormatV(const char *format
, va_list vargs
)
171 #ifdef VA_LIST_IS_ARRAY
172 Py_MEMCPY(count
, vargs
, sizeof(va_list));
175 __va_copy(count
, vargs
);
180 /* step 1: figure out how large a buffer we need */
181 for (f
= format
; *f
; f
++) {
184 while (*++f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
187 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
188 * they don't affect the amount of space we reserve.
190 if ((*f
== 'l' || *f
== 'z') &&
191 (f
[1] == 'd' || f
[1] == 'u'))
196 (void)va_arg(count
, int);
197 /* fall through... */
201 case 'd': case 'u': case 'i': case 'x':
202 (void) va_arg(count
, int);
203 /* 20 bytes is enough to hold a 64-bit
204 integer. Decimal takes the most space.
205 This isn't enough for octal. */
209 s
= va_arg(count
, char*);
213 (void) va_arg(count
, int);
214 /* maximum 64-bit pointer representation:
216 * so 19 characters is enough.
217 * XXX I count 18 -- what's the extra for?
222 /* if we stumble upon an unknown
223 formatting code, copy the rest of
224 the format string to the output
225 string. (we cannot just skip the
226 code, since there's no way to know
227 what's in the argument list) */
235 /* step 2: fill the buffer */
236 /* Since we've analyzed how much space we need for the worst case,
237 use sprintf directly instead of the slower PyOS_snprintf. */
238 string
= PyString_FromStringAndSize(NULL
, n
);
242 s
= PyString_AsString(string
);
244 for (f
= format
; *f
; f
++) {
250 /* parse the width.precision part (we're only
251 interested in the precision value, if any) */
253 while (isdigit(Py_CHARMASK(*f
)))
254 n
= (n
*10) + *f
++ - '0';
258 while (isdigit(Py_CHARMASK(*f
)))
259 n
= (n
*10) + *f
++ - '0';
261 while (*f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
263 /* handle the long flag, but only for %ld and %lu.
264 others can be added when necessary. */
265 if (*f
== 'l' && (f
[1] == 'd' || f
[1] == 'u')) {
269 /* handle the size_t flag. */
270 if (*f
== 'z' && (f
[1] == 'd' || f
[1] == 'u')) {
277 *s
++ = va_arg(vargs
, int);
281 sprintf(s
, "%ld", va_arg(vargs
, long));
283 sprintf(s
, "%" PY_FORMAT_SIZE_T
"d",
284 va_arg(vargs
, Py_ssize_t
));
286 sprintf(s
, "%d", va_arg(vargs
, int));
292 va_arg(vargs
, unsigned long));
294 sprintf(s
, "%" PY_FORMAT_SIZE_T
"u",
295 va_arg(vargs
, size_t));
298 va_arg(vargs
, unsigned int));
302 sprintf(s
, "%i", va_arg(vargs
, int));
306 sprintf(s
, "%x", va_arg(vargs
, int));
310 p
= va_arg(vargs
, char*);
318 sprintf(s
, "%p", va_arg(vargs
, void*));
319 /* %p is ill-defined: ensure leading 0x. */
322 else if (s
[1] != 'x') {
323 memmove(s
+2, s
, strlen(s
)+1);
342 _PyString_Resize(&string
, s
- PyString_AS_STRING(string
));
347 PyString_FromFormat(const char *format
, ...)
352 #ifdef HAVE_STDARG_PROTOTYPES
353 va_start(vargs
, format
);
357 ret
= PyString_FromFormatV(format
, vargs
);
363 PyObject
*PyString_Decode(const char *s
,
365 const char *encoding
,
370 str
= PyString_FromStringAndSize(s
, size
);
373 v
= PyString_AsDecodedString(str
, encoding
, errors
);
378 PyObject
*PyString_AsDecodedObject(PyObject
*str
,
379 const char *encoding
,
384 if (!PyString_Check(str
)) {
389 if (encoding
== NULL
) {
390 #ifdef Py_USING_UNICODE
391 encoding
= PyUnicode_GetDefaultEncoding();
393 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
398 /* Decode via the codec registry */
399 v
= PyCodec_Decode(str
, encoding
, errors
);
409 PyObject
*PyString_AsDecodedString(PyObject
*str
,
410 const char *encoding
,
415 v
= PyString_AsDecodedObject(str
, encoding
, errors
);
419 #ifdef Py_USING_UNICODE
420 /* Convert Unicode to a string using the default encoding */
421 if (PyUnicode_Check(v
)) {
423 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
429 if (!PyString_Check(v
)) {
430 PyErr_Format(PyExc_TypeError
,
431 "decoder did not return a string object (type=%.400s)",
432 Py_TYPE(v
)->tp_name
);
443 PyObject
*PyString_Encode(const char *s
,
445 const char *encoding
,
450 str
= PyString_FromStringAndSize(s
, size
);
453 v
= PyString_AsEncodedString(str
, encoding
, errors
);
458 PyObject
*PyString_AsEncodedObject(PyObject
*str
,
459 const char *encoding
,
464 if (!PyString_Check(str
)) {
469 if (encoding
== NULL
) {
470 #ifdef Py_USING_UNICODE
471 encoding
= PyUnicode_GetDefaultEncoding();
473 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
478 /* Encode via the codec registry */
479 v
= PyCodec_Encode(str
, encoding
, errors
);
489 PyObject
*PyString_AsEncodedString(PyObject
*str
,
490 const char *encoding
,
495 v
= PyString_AsEncodedObject(str
, encoding
, errors
);
499 #ifdef Py_USING_UNICODE
500 /* Convert Unicode to a string using the default encoding */
501 if (PyUnicode_Check(v
)) {
503 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
509 if (!PyString_Check(v
)) {
510 PyErr_Format(PyExc_TypeError
,
511 "encoder did not return a string object (type=%.400s)",
512 Py_TYPE(v
)->tp_name
);
524 string_dealloc(PyObject
*op
)
526 switch (PyString_CHECK_INTERNED(op
)) {
527 case SSTATE_NOT_INTERNED
:
530 case SSTATE_INTERNED_MORTAL
:
531 /* revive dead object temporarily for DelItem */
533 if (PyDict_DelItem(interned
, op
) != 0)
535 "deletion of interned string failed");
538 case SSTATE_INTERNED_IMMORTAL
:
539 Py_FatalError("Immortal interned string died.");
542 Py_FatalError("Inconsistent interned string state.");
544 Py_TYPE(op
)->tp_free(op
);
547 /* Unescape a backslash-escaped string. If unicode is non-zero,
548 the string is a u-literal. If recode_encoding is non-zero,
549 the string is UTF-8 encoded and should be re-encoded in the
550 specified encoding. */
552 PyObject
*PyString_DecodeEscape(const char *s
,
556 const char *recode_encoding
)
562 Py_ssize_t newlen
= recode_encoding
? 4*len
:len
;
563 v
= PyString_FromStringAndSize((char *)NULL
, newlen
);
566 p
= buf
= PyString_AsString(v
);
571 #ifdef Py_USING_UNICODE
572 if (recode_encoding
&& (*s
& 0x80)) {
578 /* Decode non-ASCII bytes as UTF-8. */
579 while (t
< end
&& (*t
& 0x80)) t
++;
580 u
= PyUnicode_DecodeUTF8(s
, t
- s
, errors
);
583 /* Recode them in target encoding. */
584 w
= PyUnicode_AsEncodedString(
585 u
, recode_encoding
, errors
);
589 /* Append bytes to output buffer. */
590 assert(PyString_Check(w
));
591 r
= PyString_AS_STRING(w
);
592 rn
= PyString_GET_SIZE(w
);
607 PyErr_SetString(PyExc_ValueError
,
608 "Trailing \\ in string");
612 /* XXX This assumes ASCII! */
614 case '\\': *p
++ = '\\'; break;
615 case '\'': *p
++ = '\''; break;
616 case '\"': *p
++ = '\"'; break;
617 case 'b': *p
++ = '\b'; break;
618 case 'f': *p
++ = '\014'; break; /* FF */
619 case 't': *p
++ = '\t'; break;
620 case 'n': *p
++ = '\n'; break;
621 case 'r': *p
++ = '\r'; break;
622 case 'v': *p
++ = '\013'; break; /* VT */
623 case 'a': *p
++ = '\007'; break; /* BEL, not classic C */
624 case '0': case '1': case '2': case '3':
625 case '4': case '5': case '6': case '7':
627 if (s
< end
&& '0' <= *s
&& *s
<= '7') {
628 c
= (c
<<3) + *s
++ - '0';
629 if (s
< end
&& '0' <= *s
&& *s
<= '7')
630 c
= (c
<<3) + *s
++ - '0';
636 isxdigit(Py_CHARMASK(s
[0])) &&
637 isxdigit(Py_CHARMASK(s
[1])))
660 if (!errors
|| strcmp(errors
, "strict") == 0) {
661 PyErr_SetString(PyExc_ValueError
,
662 "invalid \\x escape");
665 if (strcmp(errors
, "replace") == 0) {
667 } else if (strcmp(errors
, "ignore") == 0)
670 PyErr_Format(PyExc_ValueError
,
672 "unknown error handling code: %.400s",
676 #ifndef Py_USING_UNICODE
681 PyErr_SetString(PyExc_ValueError
,
682 "Unicode escapes not legal "
683 "when Unicode disabled");
690 goto non_esc
; /* an arbitry number of unescaped
691 UTF-8 bytes may follow. */
695 _PyString_Resize(&v
, p
- buf
);
702 /* -------------------------------------------------------------------- */
706 string_getsize(register PyObject
*op
)
710 if (PyString_AsStringAndSize(op
, &s
, &len
))
715 static /*const*/ char *
716 string_getbuffer(register PyObject
*op
)
720 if (PyString_AsStringAndSize(op
, &s
, &len
))
726 PyString_Size(register PyObject
*op
)
728 if (!PyString_Check(op
))
729 return string_getsize(op
);
734 PyString_AsString(register PyObject
*op
)
736 if (!PyString_Check(op
))
737 return string_getbuffer(op
);
738 return ((PyStringObject
*)op
) -> ob_sval
;
742 PyString_AsStringAndSize(register PyObject
*obj
,
744 register Py_ssize_t
*len
)
747 PyErr_BadInternalCall();
751 if (!PyString_Check(obj
)) {
752 #ifdef Py_USING_UNICODE
753 if (PyUnicode_Check(obj
)) {
754 obj
= _PyUnicode_AsDefaultEncodedString(obj
, NULL
);
761 PyErr_Format(PyExc_TypeError
,
762 "expected string or Unicode object, "
763 "%.200s found", Py_TYPE(obj
)->tp_name
);
768 *s
= PyString_AS_STRING(obj
);
770 *len
= PyString_GET_SIZE(obj
);
771 else if (strlen(*s
) != (size_t)PyString_GET_SIZE(obj
)) {
772 PyErr_SetString(PyExc_TypeError
,
773 "expected string without null bytes");
779 /* -------------------------------------------------------------------- */
782 #include "stringlib/stringdefs.h"
783 #include "stringlib/fastsearch.h"
785 #include "stringlib/count.h"
786 #include "stringlib/find.h"
787 #include "stringlib/partition.h"
789 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
790 #include "stringlib/localeutil.h"
795 string_print(PyStringObject
*op
, FILE *fp
, int flags
)
797 Py_ssize_t i
, str_len
;
801 /* XXX Ought to check for interrupts when writing long strings */
802 if (! PyString_CheckExact(op
)) {
804 /* A str subclass may have its own __str__ method. */
805 op
= (PyStringObject
*) PyObject_Str((PyObject
*)op
);
808 ret
= string_print(op
, fp
, flags
);
812 if (flags
& Py_PRINT_RAW
) {
813 char *data
= op
->ob_sval
;
814 Py_ssize_t size
= Py_SIZE(op
);
815 Py_BEGIN_ALLOW_THREADS
816 while (size
> INT_MAX
) {
817 /* Very long strings cannot be written atomically.
818 * But don't write exactly INT_MAX bytes at a time
819 * to avoid memory aligment issues.
821 const int chunk_size
= INT_MAX
& ~0x3FFF;
822 fwrite(data
, 1, chunk_size
, fp
);
827 if (size
) fwrite(data
, (int)size
, 1, fp
);
829 fwrite(data
, 1, (int)size
, fp
);
835 /* figure out which quote to use; single is preferred */
837 if (memchr(op
->ob_sval
, '\'', Py_SIZE(op
)) &&
838 !memchr(op
->ob_sval
, '"', Py_SIZE(op
)))
841 str_len
= Py_SIZE(op
);
842 Py_BEGIN_ALLOW_THREADS
844 for (i
= 0; i
< str_len
; i
++) {
845 /* Since strings are immutable and the caller should have a
846 reference, accessing the interal buffer should not be an issue
847 with the GIL released. */
849 if (c
== quote
|| c
== '\\')
850 fprintf(fp
, "\\%c", c
);
857 else if (c
< ' ' || c
>= 0x7f)
858 fprintf(fp
, "\\x%02x", c
& 0xff);
868 PyString_Repr(PyObject
*obj
, int smartquotes
)
870 register PyStringObject
* op
= (PyStringObject
*) obj
;
871 size_t newsize
= 2 + 4 * Py_SIZE(op
);
873 if (newsize
> PY_SSIZE_T_MAX
|| newsize
/ 4 != Py_SIZE(op
)) {
874 PyErr_SetString(PyExc_OverflowError
,
875 "string is too large to make repr");
878 v
= PyString_FromStringAndSize((char *)NULL
, newsize
);
883 register Py_ssize_t i
;
888 /* figure out which quote to use; single is preferred */
891 memchr(op
->ob_sval
, '\'', Py_SIZE(op
)) &&
892 !memchr(op
->ob_sval
, '"', Py_SIZE(op
)))
895 p
= PyString_AS_STRING(v
);
897 for (i
= 0; i
< Py_SIZE(op
); i
++) {
898 /* There's at least enough room for a hex escape
899 and a closing quote. */
900 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 5);
902 if (c
== quote
|| c
== '\\')
903 *p
++ = '\\', *p
++ = c
;
905 *p
++ = '\\', *p
++ = 't';
907 *p
++ = '\\', *p
++ = 'n';
909 *p
++ = '\\', *p
++ = 'r';
910 else if (c
< ' ' || c
>= 0x7f) {
911 /* For performance, we don't want to call
912 PyOS_snprintf here (extra layers of
914 sprintf(p
, "\\x%02x", c
& 0xff);
920 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 1);
924 &v
, (p
- PyString_AS_STRING(v
)));
930 string_repr(PyObject
*op
)
932 return PyString_Repr(op
, 1);
936 string_str(PyObject
*s
)
938 assert(PyString_Check(s
));
939 if (PyString_CheckExact(s
)) {
944 /* Subtype -- return genuine string with the same value. */
945 PyStringObject
*t
= (PyStringObject
*) s
;
946 return PyString_FromStringAndSize(t
->ob_sval
, Py_SIZE(t
));
951 string_length(PyStringObject
*a
)
957 string_concat(register PyStringObject
*a
, register PyObject
*bb
)
959 register Py_ssize_t size
;
960 register PyStringObject
*op
;
961 if (!PyString_Check(bb
)) {
962 #ifdef Py_USING_UNICODE
963 if (PyUnicode_Check(bb
))
964 return PyUnicode_Concat((PyObject
*)a
, bb
);
966 if (PyByteArray_Check(bb
))
967 return PyByteArray_Concat((PyObject
*)a
, bb
);
968 PyErr_Format(PyExc_TypeError
,
969 "cannot concatenate 'str' and '%.200s' objects",
970 Py_TYPE(bb
)->tp_name
);
973 #define b ((PyStringObject *)bb)
974 /* Optimize cases with empty left or right operand */
975 if ((Py_SIZE(a
) == 0 || Py_SIZE(b
) == 0) &&
976 PyString_CheckExact(a
) && PyString_CheckExact(b
)) {
977 if (Py_SIZE(a
) == 0) {
982 return (PyObject
*)a
;
984 size
= Py_SIZE(a
) + Py_SIZE(b
);
985 /* Check that string sizes are not negative, to prevent an
986 overflow in cases where we are passed incorrectly-created
987 strings with negative lengths (due to a bug in other code).
989 if (Py_SIZE(a
) < 0 || Py_SIZE(b
) < 0 ||
990 Py_SIZE(a
) > PY_SSIZE_T_MAX
- Py_SIZE(b
)) {
991 PyErr_SetString(PyExc_OverflowError
,
992 "strings are too large to concat");
996 /* Inline PyObject_NewVar */
997 if (size
> PY_SSIZE_T_MAX
- sizeof(PyStringObject
)) {
998 PyErr_SetString(PyExc_OverflowError
,
999 "strings are too large to concat");
1002 op
= (PyStringObject
*)PyObject_MALLOC(sizeof(PyStringObject
) + size
);
1004 return PyErr_NoMemory();
1005 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
1007 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
1008 Py_MEMCPY(op
->ob_sval
, a
->ob_sval
, Py_SIZE(a
));
1009 Py_MEMCPY(op
->ob_sval
+ Py_SIZE(a
), b
->ob_sval
, Py_SIZE(b
));
1010 op
->ob_sval
[size
] = '\0';
1011 return (PyObject
*) op
;
1016 string_repeat(register PyStringObject
*a
, register Py_ssize_t n
)
1018 register Py_ssize_t i
;
1019 register Py_ssize_t j
;
1020 register Py_ssize_t size
;
1021 register PyStringObject
*op
;
1025 /* watch out for overflows: the size can overflow int,
1026 * and the # of bytes needed can overflow size_t
1028 size
= Py_SIZE(a
) * n
;
1029 if (n
&& size
/ n
!= Py_SIZE(a
)) {
1030 PyErr_SetString(PyExc_OverflowError
,
1031 "repeated string is too long");
1034 if (size
== Py_SIZE(a
) && PyString_CheckExact(a
)) {
1036 return (PyObject
*)a
;
1038 nbytes
= (size_t)size
;
1039 if (nbytes
+ sizeof(PyStringObject
) <= nbytes
) {
1040 PyErr_SetString(PyExc_OverflowError
,
1041 "repeated string is too long");
1044 op
= (PyStringObject
*)
1045 PyObject_MALLOC(sizeof(PyStringObject
) + nbytes
);
1047 return PyErr_NoMemory();
1048 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
1050 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
1051 op
->ob_sval
[size
] = '\0';
1052 if (Py_SIZE(a
) == 1 && n
> 0) {
1053 memset(op
->ob_sval
, a
->ob_sval
[0] , n
);
1054 return (PyObject
*) op
;
1058 Py_MEMCPY(op
->ob_sval
, a
->ob_sval
, Py_SIZE(a
));
1062 j
= (i
<= size
-i
) ? i
: size
-i
;
1063 Py_MEMCPY(op
->ob_sval
+i
, op
->ob_sval
, j
);
1066 return (PyObject
*) op
;
1069 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1072 string_slice(register PyStringObject
*a
, register Py_ssize_t i
,
1073 register Py_ssize_t j
)
1074 /* j -- may be negative! */
1079 j
= 0; /* Avoid signed/unsigned bug in next line */
1082 if (i
== 0 && j
== Py_SIZE(a
) && PyString_CheckExact(a
)) {
1083 /* It's the same as a */
1085 return (PyObject
*)a
;
1089 return PyString_FromStringAndSize(a
->ob_sval
+ i
, j
-i
);
1093 string_contains(PyObject
*str_obj
, PyObject
*sub_obj
)
1095 if (!PyString_CheckExact(sub_obj
)) {
1096 #ifdef Py_USING_UNICODE
1097 if (PyUnicode_Check(sub_obj
))
1098 return PyUnicode_Contains(str_obj
, sub_obj
);
1100 if (!PyString_Check(sub_obj
)) {
1101 PyErr_Format(PyExc_TypeError
,
1102 "'in <string>' requires string as left operand, "
1103 "not %.200s", Py_TYPE(sub_obj
)->tp_name
);
1108 return stringlib_contains_obj(str_obj
, sub_obj
);
1112 string_item(PyStringObject
*a
, register Py_ssize_t i
)
1116 if (i
< 0 || i
>= Py_SIZE(a
)) {
1117 PyErr_SetString(PyExc_IndexError
, "string index out of range");
1120 pchar
= a
->ob_sval
[i
];
1121 v
= (PyObject
*)characters
[pchar
& UCHAR_MAX
];
1123 v
= PyString_FromStringAndSize(&pchar
, 1);
1134 string_richcompare(PyStringObject
*a
, PyStringObject
*b
, int op
)
1137 Py_ssize_t len_a
, len_b
;
1141 /* Make sure both arguments are strings. */
1142 if (!(PyString_Check(a
) && PyString_Check(b
))) {
1143 result
= Py_NotImplemented
;
1148 case Py_EQ
:case Py_LE
:case Py_GE
:
1151 case Py_NE
:case Py_LT
:case Py_GT
:
1157 /* Supporting Py_NE here as well does not save
1158 much time, since Py_NE is rarely used. */
1159 if (Py_SIZE(a
) == Py_SIZE(b
)
1160 && (a
->ob_sval
[0] == b
->ob_sval
[0]
1161 && memcmp(a
->ob_sval
, b
->ob_sval
, Py_SIZE(a
)) == 0)) {
1168 len_a
= Py_SIZE(a
); len_b
= Py_SIZE(b
);
1169 min_len
= (len_a
< len_b
) ? len_a
: len_b
;
1171 c
= Py_CHARMASK(*a
->ob_sval
) - Py_CHARMASK(*b
->ob_sval
);
1173 c
= memcmp(a
->ob_sval
, b
->ob_sval
, min_len
);
1177 c
= (len_a
< len_b
) ? -1 : (len_a
> len_b
) ? 1 : 0;
1179 case Py_LT
: c
= c
< 0; break;
1180 case Py_LE
: c
= c
<= 0; break;
1181 case Py_EQ
: assert(0); break; /* unreachable */
1182 case Py_NE
: c
= c
!= 0; break;
1183 case Py_GT
: c
= c
> 0; break;
1184 case Py_GE
: c
= c
>= 0; break;
1186 result
= Py_NotImplemented
;
1189 result
= c
? Py_True
: Py_False
;
1196 _PyString_Eq(PyObject
*o1
, PyObject
*o2
)
1198 PyStringObject
*a
= (PyStringObject
*) o1
;
1199 PyStringObject
*b
= (PyStringObject
*) o2
;
1200 return Py_SIZE(a
) == Py_SIZE(b
)
1201 && *a
->ob_sval
== *b
->ob_sval
1202 && memcmp(a
->ob_sval
, b
->ob_sval
, Py_SIZE(a
)) == 0;
1206 string_hash(PyStringObject
*a
)
1208 register Py_ssize_t len
;
1209 register unsigned char *p
;
1212 if (a
->ob_shash
!= -1)
1215 p
= (unsigned char *) a
->ob_sval
;
1218 x
= (1000003*x
) ^ *p
++;
1227 string_subscript(PyStringObject
* self
, PyObject
* item
)
1229 if (PyIndex_Check(item
)) {
1230 Py_ssize_t i
= PyNumber_AsSsize_t(item
, PyExc_IndexError
);
1231 if (i
== -1 && PyErr_Occurred())
1234 i
+= PyString_GET_SIZE(self
);
1235 return string_item(self
, i
);
1237 else if (PySlice_Check(item
)) {
1238 Py_ssize_t start
, stop
, step
, slicelength
, cur
, i
;
1243 if (PySlice_GetIndicesEx((PySliceObject
*)item
,
1244 PyString_GET_SIZE(self
),
1245 &start
, &stop
, &step
, &slicelength
) < 0) {
1249 if (slicelength
<= 0) {
1250 return PyString_FromStringAndSize("", 0);
1252 else if (start
== 0 && step
== 1 &&
1253 slicelength
== PyString_GET_SIZE(self
) &&
1254 PyString_CheckExact(self
)) {
1256 return (PyObject
*)self
;
1258 else if (step
== 1) {
1259 return PyString_FromStringAndSize(
1260 PyString_AS_STRING(self
) + start
,
1264 source_buf
= PyString_AsString((PyObject
*)self
);
1265 result_buf
= (char *)PyMem_Malloc(slicelength
);
1266 if (result_buf
== NULL
)
1267 return PyErr_NoMemory();
1269 for (cur
= start
, i
= 0; i
< slicelength
;
1271 result_buf
[i
] = source_buf
[cur
];
1274 result
= PyString_FromStringAndSize(result_buf
,
1276 PyMem_Free(result_buf
);
1281 PyErr_Format(PyExc_TypeError
,
1282 "string indices must be integers, not %.200s",
1283 Py_TYPE(item
)->tp_name
);
1289 string_buffer_getreadbuf(PyStringObject
*self
, Py_ssize_t index
, const void **ptr
)
1292 PyErr_SetString(PyExc_SystemError
,
1293 "accessing non-existent string segment");
1296 *ptr
= (void *)self
->ob_sval
;
1297 return Py_SIZE(self
);
1301 string_buffer_getwritebuf(PyStringObject
*self
, Py_ssize_t index
, const void **ptr
)
1303 PyErr_SetString(PyExc_TypeError
,
1304 "Cannot use string as modifiable buffer");
1309 string_buffer_getsegcount(PyStringObject
*self
, Py_ssize_t
*lenp
)
1312 *lenp
= Py_SIZE(self
);
1317 string_buffer_getcharbuf(PyStringObject
*self
, Py_ssize_t index
, const char **ptr
)
1320 PyErr_SetString(PyExc_SystemError
,
1321 "accessing non-existent string segment");
1324 *ptr
= self
->ob_sval
;
1325 return Py_SIZE(self
);
1329 string_buffer_getbuffer(PyStringObject
*self
, Py_buffer
*view
, int flags
)
1331 return PyBuffer_FillInfo(view
, (void *)self
->ob_sval
, Py_SIZE(self
),
1335 static PySequenceMethods string_as_sequence
= {
1336 (lenfunc
)string_length
, /*sq_length*/
1337 (binaryfunc
)string_concat
, /*sq_concat*/
1338 (ssizeargfunc
)string_repeat
, /*sq_repeat*/
1339 (ssizeargfunc
)string_item
, /*sq_item*/
1340 (ssizessizeargfunc
)string_slice
, /*sq_slice*/
1343 (objobjproc
)string_contains
/*sq_contains*/
1346 static PyMappingMethods string_as_mapping
= {
1347 (lenfunc
)string_length
,
1348 (binaryfunc
)string_subscript
,
1352 static PyBufferProcs string_as_buffer
= {
1353 (readbufferproc
)string_buffer_getreadbuf
,
1354 (writebufferproc
)string_buffer_getwritebuf
,
1355 (segcountproc
)string_buffer_getsegcount
,
1356 (charbufferproc
)string_buffer_getcharbuf
,
1357 (getbufferproc
)string_buffer_getbuffer
,
1364 #define RIGHTSTRIP 1
1367 /* Arrays indexed by above */
1368 static const char *stripformat
[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1370 #define STRIPNAME(i) (stripformat[i]+3)
1373 /* Don't call if length < 2 */
1374 #define Py_STRING_MATCH(target, offset, pattern, length) \
1375 (target[offset] == pattern[0] && \
1376 target[offset+length-1] == pattern[length-1] && \
1377 !memcmp(target+offset+1, pattern+1, length-2) )
1380 /* Overallocate the initial list to reduce the number of reallocs for small
1381 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1382 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1383 text (roughly 11 words per line) and field delimited data (usually 1-10
1384 fields). For large strings the split algorithms are bandwidth limited
1385 so increasing the preallocation likely will not improve things.*/
1387 #define MAX_PREALLOC 12
1389 /* 5 splits gives 6 elements */
1390 #define PREALLOC_SIZE(maxsplit) \
1391 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1393 #define SPLIT_APPEND(data, left, right) \
1394 str = PyString_FromStringAndSize((data) + (left), \
1395 (right) - (left)); \
1398 if (PyList_Append(list, str)) { \
1405 #define SPLIT_ADD(data, left, right) { \
1406 str = PyString_FromStringAndSize((data) + (left), \
1407 (right) - (left)); \
1410 if (count < MAX_PREALLOC) { \
1411 PyList_SET_ITEM(list, count, str); \
1413 if (PyList_Append(list, str)) { \
1422 /* Always force the list to the expected size. */
1423 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1425 #define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1426 #define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1427 #define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1428 #define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1430 Py_LOCAL_INLINE(PyObject
*)
1431 split_whitespace(PyStringObject
*self
, Py_ssize_t len
, Py_ssize_t maxsplit
)
1433 const char *s
= PyString_AS_STRING(self
);
1434 Py_ssize_t i
, j
, count
=0;
1436 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1443 while (maxsplit
-- > 0) {
1444 SKIP_SPACE(s
, i
, len
);
1447 SKIP_NONSPACE(s
, i
, len
);
1448 if (j
== 0 && i
== len
&& PyString_CheckExact(self
)) {
1449 /* No whitespace in self, so just use it as list[0] */
1451 PyList_SET_ITEM(list
, 0, (PyObject
*)self
);
1459 /* Only occurs when maxsplit was reached */
1460 /* Skip any remaining whitespace and copy to end of string */
1461 SKIP_SPACE(s
, i
, len
);
1463 SPLIT_ADD(s
, i
, len
);
1465 FIX_PREALLOC_SIZE(list
);
1472 Py_LOCAL_INLINE(PyObject
*)
1473 split_char(PyStringObject
*self
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
1475 const char *s
= PyString_AS_STRING(self
);
1476 register Py_ssize_t i
, j
, count
=0;
1478 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
1484 while ((j
< len
) && (maxcount
-- > 0)) {
1486 /* I found that using memchr makes no difference */
1494 if (i
== 0 && count
== 0 && PyString_CheckExact(self
)) {
1495 /* ch not in self, so just use self as list[0] */
1497 PyList_SET_ITEM(list
, 0, (PyObject
*)self
);
1500 else if (i
<= len
) {
1501 SPLIT_ADD(s
, i
, len
);
1503 FIX_PREALLOC_SIZE(list
);
1511 PyDoc_STRVAR(split__doc__
,
1512 "S.split([sep [,maxsplit]]) -> list of strings\n\
1514 Return a list of the words in the string S, using sep as the\n\
1515 delimiter string. If maxsplit is given, at most maxsplit\n\
1516 splits are done. If sep is not specified or is None, any\n\
1517 whitespace string is a separator and empty strings are removed\n\
1521 string_split(PyStringObject
*self
, PyObject
*args
)
1523 Py_ssize_t len
= PyString_GET_SIZE(self
), n
, i
, j
;
1524 Py_ssize_t maxsplit
= -1, count
=0;
1525 const char *s
= PyString_AS_STRING(self
), *sub
;
1526 PyObject
*list
, *str
, *subobj
= Py_None
;
1531 if (!PyArg_ParseTuple(args
, "|On:split", &subobj
, &maxsplit
))
1534 maxsplit
= PY_SSIZE_T_MAX
;
1535 if (subobj
== Py_None
)
1536 return split_whitespace(self
, len
, maxsplit
);
1537 if (PyString_Check(subobj
)) {
1538 sub
= PyString_AS_STRING(subobj
);
1539 n
= PyString_GET_SIZE(subobj
);
1541 #ifdef Py_USING_UNICODE
1542 else if (PyUnicode_Check(subobj
))
1543 return PyUnicode_Split((PyObject
*)self
, subobj
, maxsplit
);
1545 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1549 PyErr_SetString(PyExc_ValueError
, "empty separator");
1553 return split_char(self
, len
, sub
[0], maxsplit
);
1555 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1561 while (maxsplit
-- > 0) {
1562 pos
= fastsearch(s
+i
, len
-i
, sub
, n
, FAST_SEARCH
);
1571 while ((j
+n
<= len
) && (maxsplit
-- > 0)) {
1572 for (; j
+n
<= len
; j
++) {
1573 if (Py_STRING_MATCH(s
, j
, sub
, n
)) {
1581 SPLIT_ADD(s
, i
, len
);
1582 FIX_PREALLOC_SIZE(list
);
1590 PyDoc_STRVAR(partition__doc__
,
1591 "S.partition(sep) -> (head, sep, tail)\n\
1593 Searches for the separator sep in S, and returns the part before it,\n\
1594 the separator itself, and the part after it. If the separator is not\n\
1595 found, returns S and two empty strings.");
1598 string_partition(PyStringObject
*self
, PyObject
*sep_obj
)
1603 if (PyString_Check(sep_obj
)) {
1604 sep
= PyString_AS_STRING(sep_obj
);
1605 sep_len
= PyString_GET_SIZE(sep_obj
);
1607 #ifdef Py_USING_UNICODE
1608 else if (PyUnicode_Check(sep_obj
))
1609 return PyUnicode_Partition((PyObject
*) self
, sep_obj
);
1611 else if (PyObject_AsCharBuffer(sep_obj
, &sep
, &sep_len
))
1614 return stringlib_partition(
1616 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1617 sep_obj
, sep
, sep_len
1621 PyDoc_STRVAR(rpartition__doc__
,
1622 "S.rpartition(sep) -> (tail, sep, head)\n\
1624 Searches for the separator sep in S, starting at the end of S, and returns\n\
1625 the part before it, the separator itself, and the part after it. If the\n\
1626 separator is not found, returns two empty strings and S.");
1629 string_rpartition(PyStringObject
*self
, PyObject
*sep_obj
)
1634 if (PyString_Check(sep_obj
)) {
1635 sep
= PyString_AS_STRING(sep_obj
);
1636 sep_len
= PyString_GET_SIZE(sep_obj
);
1638 #ifdef Py_USING_UNICODE
1639 else if (PyUnicode_Check(sep_obj
))
1640 return PyUnicode_Partition((PyObject
*) self
, sep_obj
);
1642 else if (PyObject_AsCharBuffer(sep_obj
, &sep
, &sep_len
))
1645 return stringlib_rpartition(
1647 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1648 sep_obj
, sep
, sep_len
1652 Py_LOCAL_INLINE(PyObject
*)
1653 rsplit_whitespace(PyStringObject
*self
, Py_ssize_t len
, Py_ssize_t maxsplit
)
1655 const char *s
= PyString_AS_STRING(self
);
1656 Py_ssize_t i
, j
, count
=0;
1658 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1665 while (maxsplit
-- > 0) {
1669 RSKIP_NONSPACE(s
, i
);
1670 if (j
== len
-1 && i
< 0 && PyString_CheckExact(self
)) {
1671 /* No whitespace in self, so just use it as list[0] */
1673 PyList_SET_ITEM(list
, 0, (PyObject
*)self
);
1677 SPLIT_ADD(s
, i
+ 1, j
+ 1);
1680 /* Only occurs when maxsplit was reached */
1681 /* Skip any remaining whitespace and copy to beginning of string */
1684 SPLIT_ADD(s
, 0, i
+ 1);
1687 FIX_PREALLOC_SIZE(list
);
1688 if (PyList_Reverse(list
) < 0)
1696 Py_LOCAL_INLINE(PyObject
*)
1697 rsplit_char(PyStringObject
*self
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
1699 const char *s
= PyString_AS_STRING(self
);
1700 register Py_ssize_t i
, j
, count
=0;
1702 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
1708 while ((i
>= 0) && (maxcount
-- > 0)) {
1709 for (; i
>= 0; i
--) {
1711 SPLIT_ADD(s
, i
+ 1, j
+ 1);
1717 if (i
< 0 && count
== 0 && PyString_CheckExact(self
)) {
1718 /* ch not in self, so just use self as list[0] */
1720 PyList_SET_ITEM(list
, 0, (PyObject
*)self
);
1724 SPLIT_ADD(s
, 0, j
+ 1);
1726 FIX_PREALLOC_SIZE(list
);
1727 if (PyList_Reverse(list
) < 0)
1736 PyDoc_STRVAR(rsplit__doc__
,
1737 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1739 Return a list of the words in the string S, using sep as the\n\
1740 delimiter string, starting at the end of the string and working\n\
1741 to the front. If maxsplit is given, at most maxsplit splits are\n\
1742 done. If sep is not specified or is None, any whitespace string\n\
1746 string_rsplit(PyStringObject
*self
, PyObject
*args
)
1748 Py_ssize_t len
= PyString_GET_SIZE(self
), n
, i
, j
;
1749 Py_ssize_t maxsplit
= -1, count
=0;
1750 const char *s
, *sub
;
1751 PyObject
*list
, *str
, *subobj
= Py_None
;
1753 if (!PyArg_ParseTuple(args
, "|On:rsplit", &subobj
, &maxsplit
))
1756 maxsplit
= PY_SSIZE_T_MAX
;
1757 if (subobj
== Py_None
)
1758 return rsplit_whitespace(self
, len
, maxsplit
);
1759 if (PyString_Check(subobj
)) {
1760 sub
= PyString_AS_STRING(subobj
);
1761 n
= PyString_GET_SIZE(subobj
);
1763 #ifdef Py_USING_UNICODE
1764 else if (PyUnicode_Check(subobj
))
1765 return PyUnicode_RSplit((PyObject
*)self
, subobj
, maxsplit
);
1767 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1771 PyErr_SetString(PyExc_ValueError
, "empty separator");
1775 return rsplit_char(self
, len
, sub
[0], maxsplit
);
1777 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1784 s
= PyString_AS_STRING(self
);
1785 while ( (i
>= 0) && (maxsplit
-- > 0) ) {
1787 if (Py_STRING_MATCH(s
, i
, sub
, n
)) {
1788 SPLIT_ADD(s
, i
+ n
, j
);
1796 FIX_PREALLOC_SIZE(list
);
1797 if (PyList_Reverse(list
) < 0)
1807 PyDoc_STRVAR(join__doc__
,
1808 "S.join(sequence) -> string\n\
1810 Return a string which is the concatenation of the strings in the\n\
1811 sequence. The separator between elements is S.");
1814 string_join(PyStringObject
*self
, PyObject
*orig
)
1816 char *sep
= PyString_AS_STRING(self
);
1817 const Py_ssize_t seplen
= PyString_GET_SIZE(self
);
1818 PyObject
*res
= NULL
;
1820 Py_ssize_t seqlen
= 0;
1823 PyObject
*seq
, *item
;
1825 seq
= PySequence_Fast(orig
, "");
1830 seqlen
= PySequence_Size(seq
);
1833 return PyString_FromString("");
1836 item
= PySequence_Fast_GET_ITEM(seq
, 0);
1837 if (PyString_CheckExact(item
) || PyUnicode_CheckExact(item
)) {
1844 /* There are at least two things to join, or else we have a subclass
1845 * of the builtin types in the sequence.
1846 * Do a pre-pass to figure out the total amount of space we'll
1847 * need (sz), see whether any argument is absurd, and defer to
1848 * the Unicode join if appropriate.
1850 for (i
= 0; i
< seqlen
; i
++) {
1851 const size_t old_sz
= sz
;
1852 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1853 if (!PyString_Check(item
)){
1854 #ifdef Py_USING_UNICODE
1855 if (PyUnicode_Check(item
)) {
1856 /* Defer to Unicode join.
1857 * CAUTION: There's no gurantee that the
1858 * original sequence can be iterated over
1859 * again, so we must pass seq here.
1862 result
= PyUnicode_Join((PyObject
*)self
, seq
);
1867 PyErr_Format(PyExc_TypeError
,
1868 "sequence item %zd: expected string,"
1870 i
, Py_TYPE(item
)->tp_name
);
1874 sz
+= PyString_GET_SIZE(item
);
1877 if (sz
< old_sz
|| sz
> PY_SSIZE_T_MAX
) {
1878 PyErr_SetString(PyExc_OverflowError
,
1879 "join() result is too long for a Python string");
1885 /* Allocate result space. */
1886 res
= PyString_FromStringAndSize((char*)NULL
, sz
);
1892 /* Catenate everything. */
1893 p
= PyString_AS_STRING(res
);
1894 for (i
= 0; i
< seqlen
; ++i
) {
1896 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1897 n
= PyString_GET_SIZE(item
);
1898 Py_MEMCPY(p
, PyString_AS_STRING(item
), n
);
1900 if (i
< seqlen
- 1) {
1901 Py_MEMCPY(p
, sep
, seplen
);
1911 _PyString_Join(PyObject
*sep
, PyObject
*x
)
1913 assert(sep
!= NULL
&& PyString_Check(sep
));
1915 return string_join((PyStringObject
*)sep
, x
);
1918 Py_LOCAL_INLINE(void)
1919 string_adjust_indices(Py_ssize_t
*start
, Py_ssize_t
*end
, Py_ssize_t len
)
1933 Py_LOCAL_INLINE(Py_ssize_t
)
1934 string_find_internal(PyStringObject
*self
, PyObject
*args
, int dir
)
1939 Py_ssize_t start
=0, end
=PY_SSIZE_T_MAX
;
1940 PyObject
*obj_start
=Py_None
, *obj_end
=Py_None
;
1942 if (!PyArg_ParseTuple(args
, "O|OO:find/rfind/index/rindex", &subobj
,
1943 &obj_start
, &obj_end
))
1945 /* To support None in "start" and "end" arguments, meaning
1946 the same as if they were not passed.
1948 if (obj_start
!= Py_None
)
1949 if (!_PyEval_SliceIndex(obj_start
, &start
))
1951 if (obj_end
!= Py_None
)
1952 if (!_PyEval_SliceIndex(obj_end
, &end
))
1955 if (PyString_Check(subobj
)) {
1956 sub
= PyString_AS_STRING(subobj
);
1957 sub_len
= PyString_GET_SIZE(subobj
);
1959 #ifdef Py_USING_UNICODE
1960 else if (PyUnicode_Check(subobj
))
1961 return PyUnicode_Find(
1962 (PyObject
*)self
, subobj
, start
, end
, dir
);
1964 else if (PyObject_AsCharBuffer(subobj
, &sub
, &sub_len
))
1965 /* XXX - the "expected a character buffer object" is pretty
1966 confusing for a non-expert. remap to something else ? */
1970 return stringlib_find_slice(
1971 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1972 sub
, sub_len
, start
, end
);
1974 return stringlib_rfind_slice(
1975 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1976 sub
, sub_len
, start
, end
);
1980 PyDoc_STRVAR(find__doc__
,
1981 "S.find(sub [,start [,end]]) -> int\n\
1983 Return the lowest index in S where substring sub is found,\n\
1984 such that sub is contained within s[start:end]. Optional\n\
1985 arguments start and end are interpreted as in slice notation.\n\
1987 Return -1 on failure.");
1990 string_find(PyStringObject
*self
, PyObject
*args
)
1992 Py_ssize_t result
= string_find_internal(self
, args
, +1);
1995 return PyInt_FromSsize_t(result
);
1999 PyDoc_STRVAR(index__doc__
,
2000 "S.index(sub [,start [,end]]) -> int\n\
2002 Like S.find() but raise ValueError when the substring is not found.");
2005 string_index(PyStringObject
*self
, PyObject
*args
)
2007 Py_ssize_t result
= string_find_internal(self
, args
, +1);
2011 PyErr_SetString(PyExc_ValueError
,
2012 "substring not found");
2015 return PyInt_FromSsize_t(result
);
2019 PyDoc_STRVAR(rfind__doc__
,
2020 "S.rfind(sub [,start [,end]]) -> int\n\
2022 Return the highest index in S where substring sub is found,\n\
2023 such that sub is contained within s[start:end]. Optional\n\
2024 arguments start and end are interpreted as in slice notation.\n\
2026 Return -1 on failure.");
2029 string_rfind(PyStringObject
*self
, PyObject
*args
)
2031 Py_ssize_t result
= string_find_internal(self
, args
, -1);
2034 return PyInt_FromSsize_t(result
);
2038 PyDoc_STRVAR(rindex__doc__
,
2039 "S.rindex(sub [,start [,end]]) -> int\n\
2041 Like S.rfind() but raise ValueError when the substring is not found.");
2044 string_rindex(PyStringObject
*self
, PyObject
*args
)
2046 Py_ssize_t result
= string_find_internal(self
, args
, -1);
2050 PyErr_SetString(PyExc_ValueError
,
2051 "substring not found");
2054 return PyInt_FromSsize_t(result
);
2058 Py_LOCAL_INLINE(PyObject
*)
2059 do_xstrip(PyStringObject
*self
, int striptype
, PyObject
*sepobj
)
2061 char *s
= PyString_AS_STRING(self
);
2062 Py_ssize_t len
= PyString_GET_SIZE(self
);
2063 char *sep
= PyString_AS_STRING(sepobj
);
2064 Py_ssize_t seplen
= PyString_GET_SIZE(sepobj
);
2068 if (striptype
!= RIGHTSTRIP
) {
2069 while (i
< len
&& memchr(sep
, Py_CHARMASK(s
[i
]), seplen
)) {
2075 if (striptype
!= LEFTSTRIP
) {
2078 } while (j
>= i
&& memchr(sep
, Py_CHARMASK(s
[j
]), seplen
));
2082 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
2084 return (PyObject
*)self
;
2087 return PyString_FromStringAndSize(s
+i
, j
-i
);
2091 Py_LOCAL_INLINE(PyObject
*)
2092 do_strip(PyStringObject
*self
, int striptype
)
2094 char *s
= PyString_AS_STRING(self
);
2095 Py_ssize_t len
= PyString_GET_SIZE(self
), i
, j
;
2098 if (striptype
!= RIGHTSTRIP
) {
2099 while (i
< len
&& isspace(Py_CHARMASK(s
[i
]))) {
2105 if (striptype
!= LEFTSTRIP
) {
2108 } while (j
>= i
&& isspace(Py_CHARMASK(s
[j
])));
2112 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
2114 return (PyObject
*)self
;
2117 return PyString_FromStringAndSize(s
+i
, j
-i
);
2121 Py_LOCAL_INLINE(PyObject
*)
2122 do_argstrip(PyStringObject
*self
, int striptype
, PyObject
*args
)
2124 PyObject
*sep
= NULL
;
2126 if (!PyArg_ParseTuple(args
, (char *)stripformat
[striptype
], &sep
))
2129 if (sep
!= NULL
&& sep
!= Py_None
) {
2130 if (PyString_Check(sep
))
2131 return do_xstrip(self
, striptype
, sep
);
2132 #ifdef Py_USING_UNICODE
2133 else if (PyUnicode_Check(sep
)) {
2134 PyObject
*uniself
= PyUnicode_FromObject((PyObject
*)self
);
2138 res
= _PyUnicode_XStrip((PyUnicodeObject
*)uniself
,
2144 PyErr_Format(PyExc_TypeError
,
2145 #ifdef Py_USING_UNICODE
2146 "%s arg must be None, str or unicode",
2148 "%s arg must be None or str",
2150 STRIPNAME(striptype
));
2154 return do_strip(self
, striptype
);
2158 PyDoc_STRVAR(strip__doc__
,
2159 "S.strip([chars]) -> string or unicode\n\
2161 Return a copy of the string S with leading and trailing\n\
2162 whitespace removed.\n\
2163 If chars is given and not None, remove characters in chars instead.\n\
2164 If chars is unicode, S will be converted to unicode before stripping");
2167 string_strip(PyStringObject
*self
, PyObject
*args
)
2169 if (PyTuple_GET_SIZE(args
) == 0)
2170 return do_strip(self
, BOTHSTRIP
); /* Common case */
2172 return do_argstrip(self
, BOTHSTRIP
, args
);
2176 PyDoc_STRVAR(lstrip__doc__
,
2177 "S.lstrip([chars]) -> string or unicode\n\
2179 Return a copy of the string S with leading whitespace removed.\n\
2180 If chars is given and not None, remove characters in chars instead.\n\
2181 If chars is unicode, S will be converted to unicode before stripping");
2184 string_lstrip(PyStringObject
*self
, PyObject
*args
)
2186 if (PyTuple_GET_SIZE(args
) == 0)
2187 return do_strip(self
, LEFTSTRIP
); /* Common case */
2189 return do_argstrip(self
, LEFTSTRIP
, args
);
2193 PyDoc_STRVAR(rstrip__doc__
,
2194 "S.rstrip([chars]) -> string or unicode\n\
2196 Return a copy of the string S with trailing whitespace removed.\n\
2197 If chars is given and not None, remove characters in chars instead.\n\
2198 If chars is unicode, S will be converted to unicode before stripping");
2201 string_rstrip(PyStringObject
*self
, PyObject
*args
)
2203 if (PyTuple_GET_SIZE(args
) == 0)
2204 return do_strip(self
, RIGHTSTRIP
); /* Common case */
2206 return do_argstrip(self
, RIGHTSTRIP
, args
);
2210 PyDoc_STRVAR(lower__doc__
,
2211 "S.lower() -> string\n\
2213 Return a copy of the string S converted to lowercase.");
2215 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2217 #define _tolower tolower
2221 string_lower(PyStringObject
*self
)
2224 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2227 newobj
= PyString_FromStringAndSize(NULL
, n
);
2231 s
= PyString_AS_STRING(newobj
);
2233 Py_MEMCPY(s
, PyString_AS_STRING(self
), n
);
2235 for (i
= 0; i
< n
; i
++) {
2236 int c
= Py_CHARMASK(s
[i
]);
2244 PyDoc_STRVAR(upper__doc__
,
2245 "S.upper() -> string\n\
2247 Return a copy of the string S converted to uppercase.");
2250 #define _toupper toupper
2254 string_upper(PyStringObject
*self
)
2257 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2260 newobj
= PyString_FromStringAndSize(NULL
, n
);
2264 s
= PyString_AS_STRING(newobj
);
2266 Py_MEMCPY(s
, PyString_AS_STRING(self
), n
);
2268 for (i
= 0; i
< n
; i
++) {
2269 int c
= Py_CHARMASK(s
[i
]);
2277 PyDoc_STRVAR(title__doc__
,
2278 "S.title() -> string\n\
2280 Return a titlecased version of S, i.e. words start with uppercase\n\
2281 characters, all remaining cased characters have lowercase.");
2284 string_title(PyStringObject
*self
)
2286 char *s
= PyString_AS_STRING(self
), *s_new
;
2287 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2288 int previous_is_cased
= 0;
2291 newobj
= PyString_FromStringAndSize(NULL
, n
);
2294 s_new
= PyString_AsString(newobj
);
2295 for (i
= 0; i
< n
; i
++) {
2296 int c
= Py_CHARMASK(*s
++);
2298 if (!previous_is_cased
)
2300 previous_is_cased
= 1;
2301 } else if (isupper(c
)) {
2302 if (previous_is_cased
)
2304 previous_is_cased
= 1;
2306 previous_is_cased
= 0;
2312 PyDoc_STRVAR(capitalize__doc__
,
2313 "S.capitalize() -> string\n\
2315 Return a copy of the string S with only its first character\n\
2319 string_capitalize(PyStringObject
*self
)
2321 char *s
= PyString_AS_STRING(self
), *s_new
;
2322 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2325 newobj
= PyString_FromStringAndSize(NULL
, n
);
2328 s_new
= PyString_AsString(newobj
);
2330 int c
= Py_CHARMASK(*s
++);
2332 *s_new
= toupper(c
);
2337 for (i
= 1; i
< n
; i
++) {
2338 int c
= Py_CHARMASK(*s
++);
2340 *s_new
= tolower(c
);
2349 PyDoc_STRVAR(count__doc__
,
2350 "S.count(sub[, start[, end]]) -> int\n\
2352 Return the number of non-overlapping occurrences of substring sub in\n\
2353 string S[start:end]. Optional arguments start and end are interpreted\n\
2354 as in slice notation.");
2357 string_count(PyStringObject
*self
, PyObject
*args
)
2360 const char *str
= PyString_AS_STRING(self
), *sub
;
2362 Py_ssize_t start
= 0, end
= PY_SSIZE_T_MAX
;
2364 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &sub_obj
,
2365 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
2368 if (PyString_Check(sub_obj
)) {
2369 sub
= PyString_AS_STRING(sub_obj
);
2370 sub_len
= PyString_GET_SIZE(sub_obj
);
2372 #ifdef Py_USING_UNICODE
2373 else if (PyUnicode_Check(sub_obj
)) {
2375 count
= PyUnicode_Count((PyObject
*)self
, sub_obj
, start
, end
);
2379 return PyInt_FromSsize_t(count
);
2382 else if (PyObject_AsCharBuffer(sub_obj
, &sub
, &sub_len
))
2385 string_adjust_indices(&start
, &end
, PyString_GET_SIZE(self
));
2387 return PyInt_FromSsize_t(
2388 stringlib_count(str
+ start
, end
- start
, sub
, sub_len
)
2392 PyDoc_STRVAR(swapcase__doc__
,
2393 "S.swapcase() -> string\n\
2395 Return a copy of the string S with uppercase characters\n\
2396 converted to lowercase and vice versa.");
2399 string_swapcase(PyStringObject
*self
)
2401 char *s
= PyString_AS_STRING(self
), *s_new
;
2402 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2405 newobj
= PyString_FromStringAndSize(NULL
, n
);
2408 s_new
= PyString_AsString(newobj
);
2409 for (i
= 0; i
< n
; i
++) {
2410 int c
= Py_CHARMASK(*s
++);
2412 *s_new
= toupper(c
);
2414 else if (isupper(c
)) {
2415 *s_new
= tolower(c
);
2425 PyDoc_STRVAR(translate__doc__
,
2426 "S.translate(table [,deletechars]) -> string\n\
2428 Return a copy of the string S, where all characters occurring\n\
2429 in the optional argument deletechars are removed, and the\n\
2430 remaining characters have been mapped through the given\n\
2431 translation table, which must be a string of length 256.");
2434 string_translate(PyStringObject
*self
, PyObject
*args
)
2436 register char *input
, *output
;
2438 register Py_ssize_t i
, c
, changed
= 0;
2439 PyObject
*input_obj
= (PyObject
*)self
;
2440 const char *output_start
, *del_table
=NULL
;
2441 Py_ssize_t inlen
, tablen
, dellen
= 0;
2443 int trans_table
[256];
2444 PyObject
*tableobj
, *delobj
= NULL
;
2446 if (!PyArg_UnpackTuple(args
, "translate", 1, 2,
2447 &tableobj
, &delobj
))
2450 if (PyString_Check(tableobj
)) {
2451 table
= PyString_AS_STRING(tableobj
);
2452 tablen
= PyString_GET_SIZE(tableobj
);
2454 else if (tableobj
== Py_None
) {
2458 #ifdef Py_USING_UNICODE
2459 else if (PyUnicode_Check(tableobj
)) {
2460 /* Unicode .translate() does not support the deletechars
2461 parameter; instead a mapping to None will cause characters
2463 if (delobj
!= NULL
) {
2464 PyErr_SetString(PyExc_TypeError
,
2465 "deletions are implemented differently for unicode");
2468 return PyUnicode_Translate((PyObject
*)self
, tableobj
, NULL
);
2471 else if (PyObject_AsCharBuffer(tableobj
, &table
, &tablen
))
2474 if (tablen
!= 256) {
2475 PyErr_SetString(PyExc_ValueError
,
2476 "translation table must be 256 characters long");
2480 if (delobj
!= NULL
) {
2481 if (PyString_Check(delobj
)) {
2482 del_table
= PyString_AS_STRING(delobj
);
2483 dellen
= PyString_GET_SIZE(delobj
);
2485 #ifdef Py_USING_UNICODE
2486 else if (PyUnicode_Check(delobj
)) {
2487 PyErr_SetString(PyExc_TypeError
,
2488 "deletions are implemented differently for unicode");
2492 else if (PyObject_AsCharBuffer(delobj
, &del_table
, &dellen
))
2500 inlen
= PyString_GET_SIZE(input_obj
);
2501 result
= PyString_FromStringAndSize((char *)NULL
, inlen
);
2504 output_start
= output
= PyString_AsString(result
);
2505 input
= PyString_AS_STRING(input_obj
);
2507 if (dellen
== 0 && table
!= NULL
) {
2508 /* If no deletions are required, use faster code */
2509 for (i
= inlen
; --i
>= 0; ) {
2510 c
= Py_CHARMASK(*input
++);
2511 if (Py_CHARMASK((*output
++ = table
[c
])) != c
)
2514 if (changed
|| !PyString_CheckExact(input_obj
))
2517 Py_INCREF(input_obj
);
2521 if (table
== NULL
) {
2522 for (i
= 0; i
< 256; i
++)
2523 trans_table
[i
] = Py_CHARMASK(i
);
2525 for (i
= 0; i
< 256; i
++)
2526 trans_table
[i
] = Py_CHARMASK(table
[i
]);
2529 for (i
= 0; i
< dellen
; i
++)
2530 trans_table
[(int) Py_CHARMASK(del_table
[i
])] = -1;
2532 for (i
= inlen
; --i
>= 0; ) {
2533 c
= Py_CHARMASK(*input
++);
2534 if (trans_table
[c
] != -1)
2535 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
2539 if (!changed
&& PyString_CheckExact(input_obj
)) {
2541 Py_INCREF(input_obj
);
2544 /* Fix the size of the resulting string */
2546 _PyString_Resize(&result
, output
- output_start
);
2554 /* find and count characters and substrings */
2556 #define findchar(target, target_len, c) \
2557 ((char *)memchr((const void *)(target), c, target_len))
2559 /* String ops must return a string. */
2560 /* If the object is subclass of string, create a copy */
2561 Py_LOCAL(PyStringObject
*)
2562 return_self(PyStringObject
*self
)
2564 if (PyString_CheckExact(self
)) {
2568 return (PyStringObject
*)PyString_FromStringAndSize(
2569 PyString_AS_STRING(self
),
2570 PyString_GET_SIZE(self
));
2573 Py_LOCAL_INLINE(Py_ssize_t
)
2574 countchar(const char *target
, int target_len
, char c
, Py_ssize_t maxcount
)
2577 const char *start
=target
;
2578 const char *end
=target
+target_len
;
2580 while ( (start
=findchar(start
, end
-start
, c
)) != NULL
) {
2582 if (count
>= maxcount
)
2589 Py_LOCAL(Py_ssize_t
)
2590 findstring(const char *target
, Py_ssize_t target_len
,
2591 const char *pattern
, Py_ssize_t pattern_len
,
2597 start
+= target_len
;
2601 if (end
> target_len
) {
2603 } else if (end
< 0) {
2609 /* zero-length substrings always match at the first attempt */
2610 if (pattern_len
== 0)
2611 return (direction
> 0) ? start
: end
;
2615 if (direction
< 0) {
2616 for (; end
>= start
; end
--)
2617 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
))
2620 for (; start
<= end
; start
++)
2621 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
))
2627 Py_LOCAL_INLINE(Py_ssize_t
)
2628 countstring(const char *target
, Py_ssize_t target_len
,
2629 const char *pattern
, Py_ssize_t pattern_len
,
2632 int direction
, Py_ssize_t maxcount
)
2637 start
+= target_len
;
2641 if (end
> target_len
) {
2643 } else if (end
< 0) {
2649 /* zero-length substrings match everywhere */
2650 if (pattern_len
== 0 || maxcount
== 0) {
2651 if (target_len
+1 < maxcount
)
2652 return target_len
+1;
2657 if (direction
< 0) {
2658 for (; (end
>= start
); end
--)
2659 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
)) {
2661 if (--maxcount
<= 0) break;
2662 end
-= pattern_len
-1;
2665 for (; (start
<= end
); start
++)
2666 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
)) {
2668 if (--maxcount
<= 0)
2670 start
+= pattern_len
-1;
2677 /* Algorithms for different cases of string replacement */
2679 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2680 Py_LOCAL(PyStringObject
*)
2681 replace_interleave(PyStringObject
*self
,
2682 const char *to_s
, Py_ssize_t to_len
,
2683 Py_ssize_t maxcount
)
2685 char *self_s
, *result_s
;
2686 Py_ssize_t self_len
, result_len
;
2687 Py_ssize_t count
, i
, product
;
2688 PyStringObject
*result
;
2690 self_len
= PyString_GET_SIZE(self
);
2692 /* 1 at the end plus 1 after every character */
2694 if (maxcount
< count
)
2697 /* Check for overflow */
2698 /* result_len = count * to_len + self_len; */
2699 product
= count
* to_len
;
2700 if (product
/ to_len
!= count
) {
2701 PyErr_SetString(PyExc_OverflowError
,
2702 "replace string is too long");
2705 result_len
= product
+ self_len
;
2706 if (result_len
< 0) {
2707 PyErr_SetString(PyExc_OverflowError
,
2708 "replace string is too long");
2712 if (! (result
= (PyStringObject
*)
2713 PyString_FromStringAndSize(NULL
, result_len
)) )
2716 self_s
= PyString_AS_STRING(self
);
2717 result_s
= PyString_AS_STRING(result
);
2719 /* TODO: special case single character, which doesn't need memcpy */
2721 /* Lay the first one down (guaranteed this will occur) */
2722 Py_MEMCPY(result_s
, to_s
, to_len
);
2726 for (i
=0; i
<count
; i
++) {
2727 *result_s
++ = *self_s
++;
2728 Py_MEMCPY(result_s
, to_s
, to_len
);
2732 /* Copy the rest of the original string */
2733 Py_MEMCPY(result_s
, self_s
, self_len
-i
);
2738 /* Special case for deleting a single character */
2739 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2740 Py_LOCAL(PyStringObject
*)
2741 replace_delete_single_character(PyStringObject
*self
,
2742 char from_c
, Py_ssize_t maxcount
)
2744 char *self_s
, *result_s
;
2745 char *start
, *next
, *end
;
2746 Py_ssize_t self_len
, result_len
;
2748 PyStringObject
*result
;
2750 self_len
= PyString_GET_SIZE(self
);
2751 self_s
= PyString_AS_STRING(self
);
2753 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
2755 return return_self(self
);
2758 result_len
= self_len
- count
; /* from_len == 1 */
2759 assert(result_len
>=0);
2761 if ( (result
= (PyStringObject
*)
2762 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2764 result_s
= PyString_AS_STRING(result
);
2767 end
= self_s
+ self_len
;
2768 while (count
-- > 0) {
2769 next
= findchar(start
, end
-start
, from_c
);
2772 Py_MEMCPY(result_s
, start
, next
-start
);
2773 result_s
+= (next
-start
);
2776 Py_MEMCPY(result_s
, start
, end
-start
);
2781 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2783 Py_LOCAL(PyStringObject
*)
2784 replace_delete_substring(PyStringObject
*self
,
2785 const char *from_s
, Py_ssize_t from_len
,
2786 Py_ssize_t maxcount
) {
2787 char *self_s
, *result_s
;
2788 char *start
, *next
, *end
;
2789 Py_ssize_t self_len
, result_len
;
2790 Py_ssize_t count
, offset
;
2791 PyStringObject
*result
;
2793 self_len
= PyString_GET_SIZE(self
);
2794 self_s
= PyString_AS_STRING(self
);
2796 count
= countstring(self_s
, self_len
,
2803 return return_self(self
);
2806 result_len
= self_len
- (count
* from_len
);
2807 assert (result_len
>=0);
2809 if ( (result
= (PyStringObject
*)
2810 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2813 result_s
= PyString_AS_STRING(result
);
2816 end
= self_s
+ self_len
;
2817 while (count
-- > 0) {
2818 offset
= findstring(start
, end
-start
,
2820 0, end
-start
, FORWARD
);
2823 next
= start
+ offset
;
2825 Py_MEMCPY(result_s
, start
, next
-start
);
2827 result_s
+= (next
-start
);
2828 start
= next
+from_len
;
2830 Py_MEMCPY(result_s
, start
, end
-start
);
2834 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2835 Py_LOCAL(PyStringObject
*)
2836 replace_single_character_in_place(PyStringObject
*self
,
2837 char from_c
, char to_c
,
2838 Py_ssize_t maxcount
)
2840 char *self_s
, *result_s
, *start
, *end
, *next
;
2841 Py_ssize_t self_len
;
2842 PyStringObject
*result
;
2844 /* The result string will be the same size */
2845 self_s
= PyString_AS_STRING(self
);
2846 self_len
= PyString_GET_SIZE(self
);
2848 next
= findchar(self_s
, self_len
, from_c
);
2851 /* No matches; return the original string */
2852 return return_self(self
);
2855 /* Need to make a new string */
2856 result
= (PyStringObject
*) PyString_FromStringAndSize(NULL
, self_len
);
2859 result_s
= PyString_AS_STRING(result
);
2860 Py_MEMCPY(result_s
, self_s
, self_len
);
2862 /* change everything in-place, starting with this one */
2863 start
= result_s
+ (next
-self_s
);
2866 end
= result_s
+ self_len
;
2868 while (--maxcount
> 0) {
2869 next
= findchar(start
, end
-start
, from_c
);
2879 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2880 Py_LOCAL(PyStringObject
*)
2881 replace_substring_in_place(PyStringObject
*self
,
2882 const char *from_s
, Py_ssize_t from_len
,
2883 const char *to_s
, Py_ssize_t to_len
,
2884 Py_ssize_t maxcount
)
2886 char *result_s
, *start
, *end
;
2888 Py_ssize_t self_len
, offset
;
2889 PyStringObject
*result
;
2891 /* The result string will be the same size */
2893 self_s
= PyString_AS_STRING(self
);
2894 self_len
= PyString_GET_SIZE(self
);
2896 offset
= findstring(self_s
, self_len
,
2898 0, self_len
, FORWARD
);
2900 /* No matches; return the original string */
2901 return return_self(self
);
2904 /* Need to make a new string */
2905 result
= (PyStringObject
*) PyString_FromStringAndSize(NULL
, self_len
);
2908 result_s
= PyString_AS_STRING(result
);
2909 Py_MEMCPY(result_s
, self_s
, self_len
);
2911 /* change everything in-place, starting with this one */
2912 start
= result_s
+ offset
;
2913 Py_MEMCPY(start
, to_s
, from_len
);
2915 end
= result_s
+ self_len
;
2917 while ( --maxcount
> 0) {
2918 offset
= findstring(start
, end
-start
,
2920 0, end
-start
, FORWARD
);
2923 Py_MEMCPY(start
+offset
, to_s
, from_len
);
2924 start
+= offset
+from_len
;
2930 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2931 Py_LOCAL(PyStringObject
*)
2932 replace_single_character(PyStringObject
*self
,
2934 const char *to_s
, Py_ssize_t to_len
,
2935 Py_ssize_t maxcount
)
2937 char *self_s
, *result_s
;
2938 char *start
, *next
, *end
;
2939 Py_ssize_t self_len
, result_len
;
2940 Py_ssize_t count
, product
;
2941 PyStringObject
*result
;
2943 self_s
= PyString_AS_STRING(self
);
2944 self_len
= PyString_GET_SIZE(self
);
2946 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
2948 /* no matches, return unchanged */
2949 return return_self(self
);
2952 /* use the difference between current and new, hence the "-1" */
2953 /* result_len = self_len + count * (to_len-1) */
2954 product
= count
* (to_len
-1);
2955 if (product
/ (to_len
-1) != count
) {
2956 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2959 result_len
= self_len
+ product
;
2960 if (result_len
< 0) {
2961 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2965 if ( (result
= (PyStringObject
*)
2966 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2968 result_s
= PyString_AS_STRING(result
);
2971 end
= self_s
+ self_len
;
2972 while (count
-- > 0) {
2973 next
= findchar(start
, end
-start
, from_c
);
2977 if (next
== start
) {
2978 /* replace with the 'to' */
2979 Py_MEMCPY(result_s
, to_s
, to_len
);
2983 /* copy the unchanged old then the 'to' */
2984 Py_MEMCPY(result_s
, start
, next
-start
);
2985 result_s
+= (next
-start
);
2986 Py_MEMCPY(result_s
, to_s
, to_len
);
2991 /* Copy the remainder of the remaining string */
2992 Py_MEMCPY(result_s
, start
, end
-start
);
2997 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2998 Py_LOCAL(PyStringObject
*)
2999 replace_substring(PyStringObject
*self
,
3000 const char *from_s
, Py_ssize_t from_len
,
3001 const char *to_s
, Py_ssize_t to_len
,
3002 Py_ssize_t maxcount
) {
3003 char *self_s
, *result_s
;
3004 char *start
, *next
, *end
;
3005 Py_ssize_t self_len
, result_len
;
3006 Py_ssize_t count
, offset
, product
;
3007 PyStringObject
*result
;
3009 self_s
= PyString_AS_STRING(self
);
3010 self_len
= PyString_GET_SIZE(self
);
3012 count
= countstring(self_s
, self_len
,
3014 0, self_len
, FORWARD
, maxcount
);
3016 /* no matches, return unchanged */
3017 return return_self(self
);
3020 /* Check for overflow */
3021 /* result_len = self_len + count * (to_len-from_len) */
3022 product
= count
* (to_len
-from_len
);
3023 if (product
/ (to_len
-from_len
) != count
) {
3024 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
3027 result_len
= self_len
+ product
;
3028 if (result_len
< 0) {
3029 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
3033 if ( (result
= (PyStringObject
*)
3034 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
3036 result_s
= PyString_AS_STRING(result
);
3039 end
= self_s
+ self_len
;
3040 while (count
-- > 0) {
3041 offset
= findstring(start
, end
-start
,
3043 0, end
-start
, FORWARD
);
3046 next
= start
+offset
;
3047 if (next
== start
) {
3048 /* replace with the 'to' */
3049 Py_MEMCPY(result_s
, to_s
, to_len
);
3053 /* copy the unchanged old then the 'to' */
3054 Py_MEMCPY(result_s
, start
, next
-start
);
3055 result_s
+= (next
-start
);
3056 Py_MEMCPY(result_s
, to_s
, to_len
);
3058 start
= next
+from_len
;
3061 /* Copy the remainder of the remaining string */
3062 Py_MEMCPY(result_s
, start
, end
-start
);
3068 Py_LOCAL(PyStringObject
*)
3069 replace(PyStringObject
*self
,
3070 const char *from_s
, Py_ssize_t from_len
,
3071 const char *to_s
, Py_ssize_t to_len
,
3072 Py_ssize_t maxcount
)
3075 maxcount
= PY_SSIZE_T_MAX
;
3076 } else if (maxcount
== 0 || PyString_GET_SIZE(self
) == 0) {
3077 /* nothing to do; return the original string */
3078 return return_self(self
);
3081 if (maxcount
== 0 ||
3082 (from_len
== 0 && to_len
== 0)) {
3083 /* nothing to do; return the original string */
3084 return return_self(self
);
3087 /* Handle zero-length special cases */
3089 if (from_len
== 0) {
3090 /* insert the 'to' string everywhere. */
3091 /* >>> "Python".replace("", ".") */
3092 /* '.P.y.t.h.o.n.' */
3093 return replace_interleave(self
, to_s
, to_len
, maxcount
);
3096 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3097 /* point for an empty self string to generate a non-empty string */
3098 /* Special case so the remaining code always gets a non-empty string */
3099 if (PyString_GET_SIZE(self
) == 0) {
3100 return return_self(self
);
3104 /* delete all occurances of 'from' string */
3105 if (from_len
== 1) {
3106 return replace_delete_single_character(
3107 self
, from_s
[0], maxcount
);
3109 return replace_delete_substring(self
, from_s
, from_len
, maxcount
);
3113 /* Handle special case where both strings have the same length */
3115 if (from_len
== to_len
) {
3116 if (from_len
== 1) {
3117 return replace_single_character_in_place(
3123 return replace_substring_in_place(
3124 self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
3128 /* Otherwise use the more generic algorithms */
3129 if (from_len
== 1) {
3130 return replace_single_character(self
, from_s
[0],
3131 to_s
, to_len
, maxcount
);
3133 /* len('from')>=2, len('to')>=1 */
3134 return replace_substring(self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
3138 PyDoc_STRVAR(replace__doc__
,
3139 "S.replace (old, new[, count]) -> string\n\
3141 Return a copy of string S with all occurrences of substring\n\
3142 old replaced by new. If the optional argument count is\n\
3143 given, only the first count occurrences are replaced.");
3146 string_replace(PyStringObject
*self
, PyObject
*args
)
3148 Py_ssize_t count
= -1;
3149 PyObject
*from
, *to
;
3150 const char *from_s
, *to_s
;
3151 Py_ssize_t from_len
, to_len
;
3153 if (!PyArg_ParseTuple(args
, "OO|n:replace", &from
, &to
, &count
))
3156 if (PyString_Check(from
)) {
3157 from_s
= PyString_AS_STRING(from
);
3158 from_len
= PyString_GET_SIZE(from
);
3160 #ifdef Py_USING_UNICODE
3161 if (PyUnicode_Check(from
))
3162 return PyUnicode_Replace((PyObject
*)self
,
3165 else if (PyObject_AsCharBuffer(from
, &from_s
, &from_len
))
3168 if (PyString_Check(to
)) {
3169 to_s
= PyString_AS_STRING(to
);
3170 to_len
= PyString_GET_SIZE(to
);
3172 #ifdef Py_USING_UNICODE
3173 else if (PyUnicode_Check(to
))
3174 return PyUnicode_Replace((PyObject
*)self
,
3177 else if (PyObject_AsCharBuffer(to
, &to_s
, &to_len
))
3180 return (PyObject
*)replace((PyStringObject
*) self
,
3182 to_s
, to_len
, count
);
3187 /* Matches the end (direction >= 0) or start (direction < 0) of self
3188 * against substr, using the start and end arguments. Returns
3189 * -1 on error, 0 if not found and 1 if found.
3192 _string_tailmatch(PyStringObject
*self
, PyObject
*substr
, Py_ssize_t start
,
3193 Py_ssize_t end
, int direction
)
3195 Py_ssize_t len
= PyString_GET_SIZE(self
);
3200 if (PyString_Check(substr
)) {
3201 sub
= PyString_AS_STRING(substr
);
3202 slen
= PyString_GET_SIZE(substr
);
3204 #ifdef Py_USING_UNICODE
3205 else if (PyUnicode_Check(substr
))
3206 return PyUnicode_Tailmatch((PyObject
*)self
,
3207 substr
, start
, end
, direction
);
3209 else if (PyObject_AsCharBuffer(substr
, &sub
, &slen
))
3211 str
= PyString_AS_STRING(self
);
3213 string_adjust_indices(&start
, &end
, len
);
3215 if (direction
< 0) {
3217 if (start
+slen
> len
)
3221 if (end
-start
< slen
|| start
> len
)
3224 if (end
-slen
> start
)
3227 if (end
-start
>= slen
)
3228 return ! memcmp(str
+start
, sub
, slen
);
3233 PyDoc_STRVAR(startswith__doc__
,
3234 "S.startswith(prefix[, start[, end]]) -> bool\n\
3236 Return True if S starts with the specified prefix, False otherwise.\n\
3237 With optional start, test S beginning at that position.\n\
3238 With optional end, stop comparing S at that position.\n\
3239 prefix can also be a tuple of strings to try.");
3242 string_startswith(PyStringObject
*self
, PyObject
*args
)
3244 Py_ssize_t start
= 0;
3245 Py_ssize_t end
= PY_SSIZE_T_MAX
;
3249 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
3250 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
3252 if (PyTuple_Check(subobj
)) {
3254 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
3255 result
= _string_tailmatch(self
,
3256 PyTuple_GET_ITEM(subobj
, i
),
3266 result
= _string_tailmatch(self
, subobj
, start
, end
, -1);
3270 return PyBool_FromLong(result
);
3274 PyDoc_STRVAR(endswith__doc__
,
3275 "S.endswith(suffix[, start[, end]]) -> bool\n\
3277 Return True if S ends with the specified suffix, False otherwise.\n\
3278 With optional start, test S beginning at that position.\n\
3279 With optional end, stop comparing S at that position.\n\
3280 suffix can also be a tuple of strings to try.");
3283 string_endswith(PyStringObject
*self
, PyObject
*args
)
3285 Py_ssize_t start
= 0;
3286 Py_ssize_t end
= PY_SSIZE_T_MAX
;
3290 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
3291 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
3293 if (PyTuple_Check(subobj
)) {
3295 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
3296 result
= _string_tailmatch(self
,
3297 PyTuple_GET_ITEM(subobj
, i
),
3307 result
= _string_tailmatch(self
, subobj
, start
, end
, +1);
3311 return PyBool_FromLong(result
);
3315 PyDoc_STRVAR(encode__doc__
,
3316 "S.encode([encoding[,errors]]) -> object\n\
3318 Encodes S using the codec registered for encoding. encoding defaults\n\
3319 to the default encoding. errors may be given to set a different error\n\
3320 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3321 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3322 'xmlcharrefreplace' as well as any other name registered with\n\
3323 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3326 string_encode(PyStringObject
*self
, PyObject
*args
)
3328 char *encoding
= NULL
;
3329 char *errors
= NULL
;
3332 if (!PyArg_ParseTuple(args
, "|ss:encode", &encoding
, &errors
))
3334 v
= PyString_AsEncodedObject((PyObject
*)self
, encoding
, errors
);
3337 if (!PyString_Check(v
) && !PyUnicode_Check(v
)) {
3338 PyErr_Format(PyExc_TypeError
,
3339 "encoder did not return a string/unicode object "
3341 Py_TYPE(v
)->tp_name
);
3352 PyDoc_STRVAR(decode__doc__
,
3353 "S.decode([encoding[,errors]]) -> object\n\
3355 Decodes S using the codec registered for encoding. encoding defaults\n\
3356 to the default encoding. errors may be given to set a different error\n\
3357 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3358 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3359 as well as any other name registerd with codecs.register_error that is\n\
3360 able to handle UnicodeDecodeErrors.");
3363 string_decode(PyStringObject
*self
, PyObject
*args
)
3365 char *encoding
= NULL
;
3366 char *errors
= NULL
;
3369 if (!PyArg_ParseTuple(args
, "|ss:decode", &encoding
, &errors
))
3371 v
= PyString_AsDecodedObject((PyObject
*)self
, encoding
, errors
);
3374 if (!PyString_Check(v
) && !PyUnicode_Check(v
)) {
3375 PyErr_Format(PyExc_TypeError
,
3376 "decoder did not return a string/unicode object "
3378 Py_TYPE(v
)->tp_name
);
3389 PyDoc_STRVAR(expandtabs__doc__
,
3390 "S.expandtabs([tabsize]) -> string\n\
3392 Return a copy of S where all tab characters are expanded using spaces.\n\
3393 If tabsize is not given, a tab size of 8 characters is assumed.");
3396 string_expandtabs(PyStringObject
*self
, PyObject
*args
)
3398 const char *e
, *p
, *qe
;
3400 Py_ssize_t i
, j
, incr
;
3404 if (!PyArg_ParseTuple(args
, "|i:expandtabs", &tabsize
))
3407 /* First pass: determine size of output string */
3408 i
= 0; /* chars up to and including most recent \n or \r */
3409 j
= 0; /* chars since most recent \n or \r (use in tab calculations) */
3410 e
= PyString_AS_STRING(self
) + PyString_GET_SIZE(self
); /* end of input */
3411 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
3414 incr
= tabsize
- (j
% tabsize
);
3415 if (j
> PY_SSIZE_T_MAX
- incr
)
3421 if (j
> PY_SSIZE_T_MAX
- 1)
3424 if (*p
== '\n' || *p
== '\r') {
3425 if (i
> PY_SSIZE_T_MAX
- j
)
3432 if (i
> PY_SSIZE_T_MAX
- j
)
3435 /* Second pass: create output string and fill it */
3436 u
= PyString_FromStringAndSize(NULL
, i
+ j
);
3440 j
= 0; /* same as in first pass */
3441 q
= PyString_AS_STRING(u
); /* next output char */
3442 qe
= PyString_AS_STRING(u
) + PyString_GET_SIZE(u
); /* end of output */
3444 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
3447 i
= tabsize
- (j
% tabsize
);
3461 if (*p
== '\n' || *p
== '\r')
3470 PyErr_SetString(PyExc_OverflowError
, "new string is too long");
3474 Py_LOCAL_INLINE(PyObject
*)
3475 pad(PyStringObject
*self
, Py_ssize_t left
, Py_ssize_t right
, char fill
)
3484 if (left
== 0 && right
== 0 && PyString_CheckExact(self
)) {
3486 return (PyObject
*)self
;
3489 u
= PyString_FromStringAndSize(NULL
,
3490 left
+ PyString_GET_SIZE(self
) + right
);
3493 memset(PyString_AS_STRING(u
), fill
, left
);
3494 Py_MEMCPY(PyString_AS_STRING(u
) + left
,
3495 PyString_AS_STRING(self
),
3496 PyString_GET_SIZE(self
));
3498 memset(PyString_AS_STRING(u
) + left
+ PyString_GET_SIZE(self
),
3505 PyDoc_STRVAR(ljust__doc__
,
3506 "S.ljust(width[, fillchar]) -> string\n"
3508 "Return S left justified in a string of length width. Padding is\n"
3509 "done using the specified fill character (default is a space).");
3512 string_ljust(PyStringObject
*self
, PyObject
*args
)
3515 char fillchar
= ' ';
3517 if (!PyArg_ParseTuple(args
, "n|c:ljust", &width
, &fillchar
))
3520 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3522 return (PyObject
*) self
;
3525 return pad(self
, 0, width
- PyString_GET_SIZE(self
), fillchar
);
3529 PyDoc_STRVAR(rjust__doc__
,
3530 "S.rjust(width[, fillchar]) -> string\n"
3532 "Return S right justified in a string of length width. Padding is\n"
3533 "done using the specified fill character (default is a space)");
3536 string_rjust(PyStringObject
*self
, PyObject
*args
)
3539 char fillchar
= ' ';
3541 if (!PyArg_ParseTuple(args
, "n|c:rjust", &width
, &fillchar
))
3544 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3546 return (PyObject
*) self
;
3549 return pad(self
, width
- PyString_GET_SIZE(self
), 0, fillchar
);
3553 PyDoc_STRVAR(center__doc__
,
3554 "S.center(width[, fillchar]) -> string\n"
3556 "Return S centered in a string of length width. Padding is\n"
3557 "done using the specified fill character (default is a space)");
3560 string_center(PyStringObject
*self
, PyObject
*args
)
3562 Py_ssize_t marg
, left
;
3564 char fillchar
= ' ';
3566 if (!PyArg_ParseTuple(args
, "n|c:center", &width
, &fillchar
))
3569 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3571 return (PyObject
*) self
;
3574 marg
= width
- PyString_GET_SIZE(self
);
3575 left
= marg
/ 2 + (marg
& width
& 1);
3577 return pad(self
, left
, marg
- left
, fillchar
);
3580 PyDoc_STRVAR(zfill__doc__
,
3581 "S.zfill(width) -> string\n"
3583 "Pad a numeric string S with zeros on the left, to fill a field\n"
3584 "of the specified width. The string S is never truncated.");
3587 string_zfill(PyStringObject
*self
, PyObject
*args
)
3594 if (!PyArg_ParseTuple(args
, "n:zfill", &width
))
3597 if (PyString_GET_SIZE(self
) >= width
) {
3598 if (PyString_CheckExact(self
)) {
3600 return (PyObject
*) self
;
3603 return PyString_FromStringAndSize(
3604 PyString_AS_STRING(self
),
3605 PyString_GET_SIZE(self
)
3609 fill
= width
- PyString_GET_SIZE(self
);
3611 s
= pad(self
, fill
, 0, '0');
3616 p
= PyString_AS_STRING(s
);
3617 if (p
[fill
] == '+' || p
[fill
] == '-') {
3618 /* move sign to beginning of string */
3623 return (PyObject
*) s
;
3626 PyDoc_STRVAR(isspace__doc__
,
3627 "S.isspace() -> bool\n\
3629 Return True if all characters in S are whitespace\n\
3630 and there is at least one character in S, False otherwise.");
3633 string_isspace(PyStringObject
*self
)
3635 register const unsigned char *p
3636 = (unsigned char *) PyString_AS_STRING(self
);
3637 register const unsigned char *e
;
3639 /* Shortcut for single character strings */
3640 if (PyString_GET_SIZE(self
) == 1 &&
3642 return PyBool_FromLong(1);
3644 /* Special case for empty strings */
3645 if (PyString_GET_SIZE(self
) == 0)
3646 return PyBool_FromLong(0);
3648 e
= p
+ PyString_GET_SIZE(self
);
3649 for (; p
< e
; p
++) {
3651 return PyBool_FromLong(0);
3653 return PyBool_FromLong(1);
3657 PyDoc_STRVAR(isalpha__doc__
,
3658 "S.isalpha() -> bool\n\
3660 Return True if all characters in S are alphabetic\n\
3661 and there is at least one character in S, False otherwise.");
3664 string_isalpha(PyStringObject
*self
)
3666 register const unsigned char *p
3667 = (unsigned char *) PyString_AS_STRING(self
);
3668 register const unsigned char *e
;
3670 /* Shortcut for single character strings */
3671 if (PyString_GET_SIZE(self
) == 1 &&
3673 return PyBool_FromLong(1);
3675 /* Special case for empty strings */
3676 if (PyString_GET_SIZE(self
) == 0)
3677 return PyBool_FromLong(0);
3679 e
= p
+ PyString_GET_SIZE(self
);
3680 for (; p
< e
; p
++) {
3682 return PyBool_FromLong(0);
3684 return PyBool_FromLong(1);
3688 PyDoc_STRVAR(isalnum__doc__
,
3689 "S.isalnum() -> bool\n\
3691 Return True if all characters in S are alphanumeric\n\
3692 and there is at least one character in S, False otherwise.");
3695 string_isalnum(PyStringObject
*self
)
3697 register const unsigned char *p
3698 = (unsigned char *) PyString_AS_STRING(self
);
3699 register const unsigned char *e
;
3701 /* Shortcut for single character strings */
3702 if (PyString_GET_SIZE(self
) == 1 &&
3704 return PyBool_FromLong(1);
3706 /* Special case for empty strings */
3707 if (PyString_GET_SIZE(self
) == 0)
3708 return PyBool_FromLong(0);
3710 e
= p
+ PyString_GET_SIZE(self
);
3711 for (; p
< e
; p
++) {
3713 return PyBool_FromLong(0);
3715 return PyBool_FromLong(1);
3719 PyDoc_STRVAR(isdigit__doc__
,
3720 "S.isdigit() -> bool\n\
3722 Return True if all characters in S are digits\n\
3723 and there is at least one character in S, False otherwise.");
3726 string_isdigit(PyStringObject
*self
)
3728 register const unsigned char *p
3729 = (unsigned char *) PyString_AS_STRING(self
);
3730 register const unsigned char *e
;
3732 /* Shortcut for single character strings */
3733 if (PyString_GET_SIZE(self
) == 1 &&
3735 return PyBool_FromLong(1);
3737 /* Special case for empty strings */
3738 if (PyString_GET_SIZE(self
) == 0)
3739 return PyBool_FromLong(0);
3741 e
= p
+ PyString_GET_SIZE(self
);
3742 for (; p
< e
; p
++) {
3744 return PyBool_FromLong(0);
3746 return PyBool_FromLong(1);
3750 PyDoc_STRVAR(islower__doc__
,
3751 "S.islower() -> bool\n\
3753 Return True if all cased characters in S are lowercase and there is\n\
3754 at least one cased character in S, False otherwise.");
3757 string_islower(PyStringObject
*self
)
3759 register const unsigned char *p
3760 = (unsigned char *) PyString_AS_STRING(self
);
3761 register const unsigned char *e
;
3764 /* Shortcut for single character strings */
3765 if (PyString_GET_SIZE(self
) == 1)
3766 return PyBool_FromLong(islower(*p
) != 0);
3768 /* Special case for empty strings */
3769 if (PyString_GET_SIZE(self
) == 0)
3770 return PyBool_FromLong(0);
3772 e
= p
+ PyString_GET_SIZE(self
);
3774 for (; p
< e
; p
++) {
3776 return PyBool_FromLong(0);
3777 else if (!cased
&& islower(*p
))
3780 return PyBool_FromLong(cased
);
3784 PyDoc_STRVAR(isupper__doc__
,
3785 "S.isupper() -> bool\n\
3787 Return True if all cased characters in S are uppercase and there is\n\
3788 at least one cased character in S, False otherwise.");
3791 string_isupper(PyStringObject
*self
)
3793 register const unsigned char *p
3794 = (unsigned char *) PyString_AS_STRING(self
);
3795 register const unsigned char *e
;
3798 /* Shortcut for single character strings */
3799 if (PyString_GET_SIZE(self
) == 1)
3800 return PyBool_FromLong(isupper(*p
) != 0);
3802 /* Special case for empty strings */
3803 if (PyString_GET_SIZE(self
) == 0)
3804 return PyBool_FromLong(0);
3806 e
= p
+ PyString_GET_SIZE(self
);
3808 for (; p
< e
; p
++) {
3810 return PyBool_FromLong(0);
3811 else if (!cased
&& isupper(*p
))
3814 return PyBool_FromLong(cased
);
3818 PyDoc_STRVAR(istitle__doc__
,
3819 "S.istitle() -> bool\n\
3821 Return True if S is a titlecased string and there is at least one\n\
3822 character in S, i.e. uppercase characters may only follow uncased\n\
3823 characters and lowercase characters only cased ones. Return False\n\
3827 string_istitle(PyStringObject
*self
, PyObject
*uncased
)
3829 register const unsigned char *p
3830 = (unsigned char *) PyString_AS_STRING(self
);
3831 register const unsigned char *e
;
3832 int cased
, previous_is_cased
;
3834 /* Shortcut for single character strings */
3835 if (PyString_GET_SIZE(self
) == 1)
3836 return PyBool_FromLong(isupper(*p
) != 0);
3838 /* Special case for empty strings */
3839 if (PyString_GET_SIZE(self
) == 0)
3840 return PyBool_FromLong(0);
3842 e
= p
+ PyString_GET_SIZE(self
);
3844 previous_is_cased
= 0;
3845 for (; p
< e
; p
++) {
3846 register const unsigned char ch
= *p
;
3849 if (previous_is_cased
)
3850 return PyBool_FromLong(0);
3851 previous_is_cased
= 1;
3854 else if (islower(ch
)) {
3855 if (!previous_is_cased
)
3856 return PyBool_FromLong(0);
3857 previous_is_cased
= 1;
3861 previous_is_cased
= 0;
3863 return PyBool_FromLong(cased
);
3867 PyDoc_STRVAR(splitlines__doc__
,
3868 "S.splitlines([keepends]) -> list of strings\n\
3870 Return a list of the lines in S, breaking at line boundaries.\n\
3871 Line breaks are not included in the resulting list unless keepends\n\
3872 is given and true.");
3875 string_splitlines(PyStringObject
*self
, PyObject
*args
)
3877 register Py_ssize_t i
;
3878 register Py_ssize_t j
;
3885 if (!PyArg_ParseTuple(args
, "|i:splitlines", &keepends
))
3888 data
= PyString_AS_STRING(self
);
3889 len
= PyString_GET_SIZE(self
);
3891 /* This does not use the preallocated list because splitlines is
3892 usually run with hundreds of newlines. The overhead of
3893 switching between PyList_SET_ITEM and append causes about a
3894 2-3% slowdown for that common case. A smarter implementation
3895 could move the if check out, so the SET_ITEMs are done first
3896 and the appends only done when the prealloc buffer is full.
3897 That's too much work for little gain.*/
3899 list
= PyList_New(0);
3903 for (i
= j
= 0; i
< len
; ) {
3906 /* Find a line and append it */
3907 while (i
< len
&& data
[i
] != '\n' && data
[i
] != '\r')
3910 /* Skip the line break reading CRLF as one line break */
3913 if (data
[i
] == '\r' && i
+ 1 < len
&&
3921 SPLIT_APPEND(data
, j
, eol
);
3925 SPLIT_APPEND(data
, j
, len
);
3935 PyDoc_STRVAR(sizeof__doc__
,
3936 "S.__sizeof__() -> size of S in memory, in bytes");
3939 string_sizeof(PyStringObject
*v
)
3942 res
= sizeof(PyStringObject
) + v
->ob_size
* v
->ob_type
->tp_itemsize
;
3943 return PyInt_FromSsize_t(res
);
3949 #undef PREALLOC_SIZE
3952 string_getnewargs(PyStringObject
*v
)
3954 return Py_BuildValue("(s#)", v
->ob_sval
, Py_SIZE(v
));
3958 #include "stringlib/string_format.h"
3960 PyDoc_STRVAR(format__doc__
,
3961 "S.format(*args, **kwargs) -> unicode\n\
3966 string__format__(PyObject
* self
, PyObject
* args
)
3968 PyObject
*format_spec
;
3969 PyObject
*result
= NULL
;
3970 PyObject
*tmp
= NULL
;
3972 /* If 2.x, convert format_spec to the same type as value */
3973 /* This is to allow things like u''.format('') */
3974 if (!PyArg_ParseTuple(args
, "O:__format__", &format_spec
))
3976 if (!(PyString_Check(format_spec
) || PyUnicode_Check(format_spec
))) {
3977 PyErr_Format(PyExc_TypeError
, "__format__ arg must be str "
3978 "or unicode, not %s", Py_TYPE(format_spec
)->tp_name
);
3981 tmp
= PyObject_Str(format_spec
);
3986 result
= _PyBytes_FormatAdvanced(self
,
3987 PyString_AS_STRING(format_spec
),
3988 PyString_GET_SIZE(format_spec
));
3994 PyDoc_STRVAR(p_format__doc__
,
3995 "S.__format__(format_spec) -> unicode\n\
4001 string_methods
[] = {
4002 /* Counterparts of the obsolete stropmodule functions; except
4003 string.maketrans(). */
4004 {"join", (PyCFunction
)string_join
, METH_O
, join__doc__
},
4005 {"split", (PyCFunction
)string_split
, METH_VARARGS
, split__doc__
},
4006 {"rsplit", (PyCFunction
)string_rsplit
, METH_VARARGS
, rsplit__doc__
},
4007 {"lower", (PyCFunction
)string_lower
, METH_NOARGS
, lower__doc__
},
4008 {"upper", (PyCFunction
)string_upper
, METH_NOARGS
, upper__doc__
},
4009 {"islower", (PyCFunction
)string_islower
, METH_NOARGS
, islower__doc__
},
4010 {"isupper", (PyCFunction
)string_isupper
, METH_NOARGS
, isupper__doc__
},
4011 {"isspace", (PyCFunction
)string_isspace
, METH_NOARGS
, isspace__doc__
},
4012 {"isdigit", (PyCFunction
)string_isdigit
, METH_NOARGS
, isdigit__doc__
},
4013 {"istitle", (PyCFunction
)string_istitle
, METH_NOARGS
, istitle__doc__
},
4014 {"isalpha", (PyCFunction
)string_isalpha
, METH_NOARGS
, isalpha__doc__
},
4015 {"isalnum", (PyCFunction
)string_isalnum
, METH_NOARGS
, isalnum__doc__
},
4016 {"capitalize", (PyCFunction
)string_capitalize
, METH_NOARGS
,
4018 {"count", (PyCFunction
)string_count
, METH_VARARGS
, count__doc__
},
4019 {"endswith", (PyCFunction
)string_endswith
, METH_VARARGS
,
4021 {"partition", (PyCFunction
)string_partition
, METH_O
, partition__doc__
},
4022 {"find", (PyCFunction
)string_find
, METH_VARARGS
, find__doc__
},
4023 {"index", (PyCFunction
)string_index
, METH_VARARGS
, index__doc__
},
4024 {"lstrip", (PyCFunction
)string_lstrip
, METH_VARARGS
, lstrip__doc__
},
4025 {"replace", (PyCFunction
)string_replace
, METH_VARARGS
, replace__doc__
},
4026 {"rfind", (PyCFunction
)string_rfind
, METH_VARARGS
, rfind__doc__
},
4027 {"rindex", (PyCFunction
)string_rindex
, METH_VARARGS
, rindex__doc__
},
4028 {"rstrip", (PyCFunction
)string_rstrip
, METH_VARARGS
, rstrip__doc__
},
4029 {"rpartition", (PyCFunction
)string_rpartition
, METH_O
,
4031 {"startswith", (PyCFunction
)string_startswith
, METH_VARARGS
,
4033 {"strip", (PyCFunction
)string_strip
, METH_VARARGS
, strip__doc__
},
4034 {"swapcase", (PyCFunction
)string_swapcase
, METH_NOARGS
,
4036 {"translate", (PyCFunction
)string_translate
, METH_VARARGS
,
4038 {"title", (PyCFunction
)string_title
, METH_NOARGS
, title__doc__
},
4039 {"ljust", (PyCFunction
)string_ljust
, METH_VARARGS
, ljust__doc__
},
4040 {"rjust", (PyCFunction
)string_rjust
, METH_VARARGS
, rjust__doc__
},
4041 {"center", (PyCFunction
)string_center
, METH_VARARGS
, center__doc__
},
4042 {"zfill", (PyCFunction
)string_zfill
, METH_VARARGS
, zfill__doc__
},
4043 {"format", (PyCFunction
) do_string_format
, METH_VARARGS
| METH_KEYWORDS
, format__doc__
},
4044 {"__format__", (PyCFunction
) string__format__
, METH_VARARGS
, p_format__doc__
},
4045 {"_formatter_field_name_split", (PyCFunction
) formatter_field_name_split
, METH_NOARGS
},
4046 {"_formatter_parser", (PyCFunction
) formatter_parser
, METH_NOARGS
},
4047 {"encode", (PyCFunction
)string_encode
, METH_VARARGS
, encode__doc__
},
4048 {"decode", (PyCFunction
)string_decode
, METH_VARARGS
, decode__doc__
},
4049 {"expandtabs", (PyCFunction
)string_expandtabs
, METH_VARARGS
,
4051 {"splitlines", (PyCFunction
)string_splitlines
, METH_VARARGS
,
4053 {"__sizeof__", (PyCFunction
)string_sizeof
, METH_NOARGS
,
4055 {"__getnewargs__", (PyCFunction
)string_getnewargs
, METH_NOARGS
},
4056 {NULL
, NULL
} /* sentinel */
4060 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
);
4063 string_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
4066 static char *kwlist
[] = {"object", 0};
4068 if (type
!= &PyString_Type
)
4069 return str_subtype_new(type
, args
, kwds
);
4070 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|O:str", kwlist
, &x
))
4073 return PyString_FromString("");
4074 return PyObject_Str(x
);
4078 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
4080 PyObject
*tmp
, *pnew
;
4083 assert(PyType_IsSubtype(type
, &PyString_Type
));
4084 tmp
= string_new(&PyString_Type
, args
, kwds
);
4087 assert(PyString_CheckExact(tmp
));
4088 n
= PyString_GET_SIZE(tmp
);
4089 pnew
= type
->tp_alloc(type
, n
);
4091 Py_MEMCPY(PyString_AS_STRING(pnew
), PyString_AS_STRING(tmp
), n
+1);
4092 ((PyStringObject
*)pnew
)->ob_shash
=
4093 ((PyStringObject
*)tmp
)->ob_shash
;
4094 ((PyStringObject
*)pnew
)->ob_sstate
= SSTATE_NOT_INTERNED
;
4101 basestring_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
4103 PyErr_SetString(PyExc_TypeError
,
4104 "The basestring type cannot be instantiated");
4109 string_mod(PyObject
*v
, PyObject
*w
)
4111 if (!PyString_Check(v
)) {
4112 Py_INCREF(Py_NotImplemented
);
4113 return Py_NotImplemented
;
4115 return PyString_Format(v
, w
);
4118 PyDoc_STRVAR(basestring_doc
,
4119 "Type basestring cannot be instantiated; it is the base for str and unicode.");
4121 static PyNumberMethods string_as_number
= {
4126 string_mod
, /*nb_remainder*/
4130 PyTypeObject PyBaseString_Type
= {
4131 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
4141 0, /* tp_as_number */
4142 0, /* tp_as_sequence */
4143 0, /* tp_as_mapping */
4147 0, /* tp_getattro */
4148 0, /* tp_setattro */
4149 0, /* tp_as_buffer */
4150 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /* tp_flags */
4151 basestring_doc
, /* tp_doc */
4152 0, /* tp_traverse */
4154 0, /* tp_richcompare */
4155 0, /* tp_weaklistoffset */
4157 0, /* tp_iternext */
4161 &PyBaseObject_Type
, /* tp_base */
4163 0, /* tp_descr_get */
4164 0, /* tp_descr_set */
4165 0, /* tp_dictoffset */
4168 basestring_new
, /* tp_new */
4172 PyDoc_STRVAR(string_doc
,
4173 "str(object) -> string\n\
4175 Return a nice string representation of the object.\n\
4176 If the argument is a string, the return value is the same object.");
4178 PyTypeObject PyString_Type
= {
4179 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
4181 sizeof(PyStringObject
),
4183 string_dealloc
, /* tp_dealloc */
4184 (printfunc
)string_print
, /* tp_print */
4188 string_repr
, /* tp_repr */
4189 &string_as_number
, /* tp_as_number */
4190 &string_as_sequence
, /* tp_as_sequence */
4191 &string_as_mapping
, /* tp_as_mapping */
4192 (hashfunc
)string_hash
, /* tp_hash */
4194 string_str
, /* tp_str */
4195 PyObject_GenericGetAttr
, /* tp_getattro */
4196 0, /* tp_setattro */
4197 &string_as_buffer
, /* tp_as_buffer */
4198 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_CHECKTYPES
|
4199 Py_TPFLAGS_BASETYPE
| Py_TPFLAGS_STRING_SUBCLASS
|
4200 Py_TPFLAGS_HAVE_NEWBUFFER
, /* tp_flags */
4201 string_doc
, /* tp_doc */
4202 0, /* tp_traverse */
4204 (richcmpfunc
)string_richcompare
, /* tp_richcompare */
4205 0, /* tp_weaklistoffset */
4207 0, /* tp_iternext */
4208 string_methods
, /* tp_methods */
4211 &PyBaseString_Type
, /* tp_base */
4213 0, /* tp_descr_get */
4214 0, /* tp_descr_set */
4215 0, /* tp_dictoffset */
4218 string_new
, /* tp_new */
4219 PyObject_Del
, /* tp_free */
4223 PyString_Concat(register PyObject
**pv
, register PyObject
*w
)
4225 register PyObject
*v
;
4228 if (w
== NULL
|| !PyString_Check(*pv
)) {
4233 v
= string_concat((PyStringObject
*) *pv
, w
);
4239 PyString_ConcatAndDel(register PyObject
**pv
, register PyObject
*w
)
4241 PyString_Concat(pv
, w
);
4246 /* The following function breaks the notion that strings are immutable:
4247 it changes the size of a string. We get away with this only if there
4248 is only one module referencing the object. You can also think of it
4249 as creating a new string object and destroying the old one, only
4250 more efficiently. In any case, don't use this if the string may
4251 already be known to some other part of the code...
4252 Note that if there's not enough memory to resize the string, the original
4253 string object at *pv is deallocated, *pv is set to NULL, an "out of
4254 memory" exception is set, and -1 is returned. Else (on success) 0 is
4255 returned, and the value in *pv may or may not be the same as on input.
4256 As always, an extra byte is allocated for a trailing \0 byte (newsize
4257 does *not* include that), and a trailing \0 byte is stored.
4261 _PyString_Resize(PyObject
**pv
, Py_ssize_t newsize
)
4263 register PyObject
*v
;
4264 register PyStringObject
*sv
;
4266 if (!PyString_Check(v
) || Py_REFCNT(v
) != 1 || newsize
< 0 ||
4267 PyString_CHECK_INTERNED(v
)) {
4270 PyErr_BadInternalCall();
4273 /* XXX UNREF/NEWREF interface should be more symmetrical */
4275 _Py_ForgetReference(v
);
4277 PyObject_REALLOC((char *)v
, sizeof(PyStringObject
) + newsize
);
4283 _Py_NewReference(*pv
);
4284 sv
= (PyStringObject
*) *pv
;
4285 Py_SIZE(sv
) = newsize
;
4286 sv
->ob_sval
[newsize
] = '\0';
4287 sv
->ob_shash
= -1; /* invalidate cached hash value */
4291 /* Helpers for formatstring */
4293 Py_LOCAL_INLINE(PyObject
*)
4294 getnextarg(PyObject
*args
, Py_ssize_t arglen
, Py_ssize_t
*p_argidx
)
4296 Py_ssize_t argidx
= *p_argidx
;
4297 if (argidx
< arglen
) {
4302 return PyTuple_GetItem(args
, argidx
);
4304 PyErr_SetString(PyExc_TypeError
,
4305 "not enough arguments for format string");
4316 #define F_LJUST (1<<0)
4317 #define F_SIGN (1<<1)
4318 #define F_BLANK (1<<2)
4319 #define F_ALT (1<<3)
4320 #define F_ZERO (1<<4)
4322 Py_LOCAL_INLINE(int)
4323 formatfloat(char *buf
, size_t buflen
, int flags
,
4324 int prec
, int type
, PyObject
*v
)
4326 /* fmt = '%#.' + `prec` + `type`
4327 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4330 x
= PyFloat_AsDouble(v
);
4331 if (x
== -1.0 && PyErr_Occurred()) {
4332 PyErr_Format(PyExc_TypeError
, "float argument required, "
4333 "not %.200s", Py_TYPE(v
)->tp_name
);
4338 if (type
== 'f' && fabs(x
)/1e25
>= 1e25
)
4340 /* Worst case length calc to ensure no buffer overrun:
4344 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4345 for any double rep.)
4346 len = 1 + prec + 1 + 2 + 5 = 9 + prec
4349 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4350 len = 1 + 50 + 1 + prec = 52 + prec
4352 If prec=0 the effective precision is 1 (the leading digit is
4353 always given), therefore increase the length by one.
4356 if (((type
== 'g' || type
== 'G') &&
4357 buflen
<= (size_t)10 + (size_t)prec
) ||
4358 (type
== 'f' && buflen
<= (size_t)53 + (size_t)prec
)) {
4359 PyErr_SetString(PyExc_OverflowError
,
4360 "formatted float is too long (precision too large?)");
4363 PyOS_snprintf(fmt
, sizeof(fmt
), "%%%s.%d%c",
4364 (flags
&F_ALT
) ? "#" : "",
4366 PyOS_ascii_formatd(buf
, buflen
, fmt
, x
);
4367 return (int)strlen(buf
);
4370 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4371 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4372 * Python's regular ints.
4373 * Return value: a new PyString*, or NULL if error.
4374 * . *pbuf is set to point into it,
4375 * *plen set to the # of chars following that.
4376 * Caller must decref it when done using pbuf.
4377 * The string starting at *pbuf is of the form
4378 * "-"? ("0x" | "0X")? digit+
4379 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4380 * set in flags. The case of hex digits will be correct,
4381 * There will be at least prec digits, zero-filled on the left if
4382 * necessary to get that many.
4383 * val object to be converted
4384 * flags bitmask of format flags; only F_ALT is looked at
4385 * prec minimum number of digits; 0-fill on left if needed
4386 * type a character in [duoxX]; u acts the same as d
4388 * CAUTION: o, x and X conversions on regular ints can never
4389 * produce a '-' sign, but can for Python's unbounded ints.
4392 _PyString_FormatLong(PyObject
*val
, int flags
, int prec
, int type
,
4393 char **pbuf
, int *plen
)
4395 PyObject
*result
= NULL
;
4398 int sign
; /* 1 if '-', else 0 */
4399 int len
; /* number of characters */
4401 int numdigits
; /* len == numnondigits + numdigits */
4402 int numnondigits
= 0;
4407 result
= Py_TYPE(val
)->tp_str(val
);
4410 result
= Py_TYPE(val
)->tp_as_number
->nb_oct(val
);
4415 result
= Py_TYPE(val
)->tp_as_number
->nb_hex(val
);
4418 assert(!"'type' not in [duoxX]");
4423 buf
= PyString_AsString(result
);
4429 /* To modify the string in-place, there can only be one reference. */
4430 if (Py_REFCNT(result
) != 1) {
4431 PyErr_BadInternalCall();
4434 llen
= PyString_Size(result
);
4435 if (llen
> INT_MAX
) {
4436 PyErr_SetString(PyExc_ValueError
, "string too large in _PyString_FormatLong");
4440 if (buf
[len
-1] == 'L') {
4444 sign
= buf
[0] == '-';
4445 numnondigits
+= sign
;
4446 numdigits
= len
- numnondigits
;
4447 assert(numdigits
> 0);
4449 /* Get rid of base marker unless F_ALT */
4450 if ((flags
& F_ALT
) == 0) {
4451 /* Need to skip 0x, 0X or 0. */
4455 assert(buf
[sign
] == '0');
4456 /* If 0 is only digit, leave it alone. */
4457 if (numdigits
> 1) {
4464 assert(buf
[sign
] == '0');
4465 assert(buf
[sign
+ 1] == 'x');
4476 assert(len
== numnondigits
+ numdigits
);
4477 assert(numdigits
> 0);
4480 /* Fill with leading zeroes to meet minimum width. */
4481 if (prec
> numdigits
) {
4482 PyObject
*r1
= PyString_FromStringAndSize(NULL
,
4483 numnondigits
+ prec
);
4489 b1
= PyString_AS_STRING(r1
);
4490 for (i
= 0; i
< numnondigits
; ++i
)
4492 for (i
= 0; i
< prec
- numdigits
; i
++)
4494 for (i
= 0; i
< numdigits
; i
++)
4499 buf
= PyString_AS_STRING(result
);
4500 len
= numnondigits
+ prec
;
4503 /* Fix up case for hex conversions. */
4505 /* Need to convert all lower case letters to upper case.
4506 and need to convert 0x to 0X (and -0x to -0X). */
4507 for (i
= 0; i
< len
; i
++)
4508 if (buf
[i
] >= 'a' && buf
[i
] <= 'x')
4516 Py_LOCAL_INLINE(int)
4517 formatint(char *buf
, size_t buflen
, int flags
,
4518 int prec
, int type
, PyObject
*v
)
4520 /* fmt = '%#.' + `prec` + 'l' + `type`
4521 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4523 char fmt
[64]; /* plenty big enough! */
4527 x
= PyInt_AsLong(v
);
4528 if (x
== -1 && PyErr_Occurred()) {
4529 PyErr_Format(PyExc_TypeError
, "int argument required, not %.200s",
4530 Py_TYPE(v
)->tp_name
);
4533 if (x
< 0 && type
== 'u') {
4536 if (x
< 0 && (type
== 'x' || type
== 'X' || type
== 'o'))
4543 if ((flags
& F_ALT
) &&
4544 (type
== 'x' || type
== 'X')) {
4545 /* When converting under %#x or %#X, there are a number
4546 * of issues that cause pain:
4547 * - when 0 is being converted, the C standard leaves off
4548 * the '0x' or '0X', which is inconsistent with other
4549 * %#x/%#X conversions and inconsistent with Python's
4551 * - there are platforms that violate the standard and
4552 * convert 0 with the '0x' or '0X'
4553 * (Metrowerks, Compaq Tru64)
4554 * - there are platforms that give '0x' when converting
4555 * under %#X, but convert 0 in accordance with the
4556 * standard (OS/2 EMX)
4558 * We can achieve the desired consistency by inserting our
4559 * own '0x' or '0X' prefix, and substituting %x/%X in place
4562 * Note that this is the same approach as used in
4563 * formatint() in unicodeobject.c
4565 PyOS_snprintf(fmt
, sizeof(fmt
), "%s0%c%%.%dl%c",
4566 sign
, type
, prec
, type
);
4569 PyOS_snprintf(fmt
, sizeof(fmt
), "%s%%%s.%dl%c",
4570 sign
, (flags
&F_ALT
) ? "#" : "",
4574 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4575 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4577 if (buflen
<= 14 || buflen
<= (size_t)3 + (size_t)prec
) {
4578 PyErr_SetString(PyExc_OverflowError
,
4579 "formatted integer is too long (precision too large?)");
4583 PyOS_snprintf(buf
, buflen
, fmt
, -x
);
4585 PyOS_snprintf(buf
, buflen
, fmt
, x
);
4586 return (int)strlen(buf
);
4589 Py_LOCAL_INLINE(int)
4590 formatchar(char *buf
, size_t buflen
, PyObject
*v
)
4592 /* presume that the buffer is at least 2 characters long */
4593 if (PyString_Check(v
)) {
4594 if (!PyArg_Parse(v
, "c;%c requires int or char", &buf
[0]))
4598 if (!PyArg_Parse(v
, "b;%c requires int or char", &buf
[0]))
4605 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4607 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4608 chars are formatted. XXX This is a magic number. Each formatting
4609 routine does bounds checking to ensure no overflow, but a better
4610 solution may be to malloc a buffer of appropriate size for each
4611 format. For now, the current solution is sufficient.
4613 #define FORMATBUFLEN (size_t)120
4616 PyString_Format(PyObject
*format
, PyObject
*args
)
4619 Py_ssize_t arglen
, argidx
;
4620 Py_ssize_t reslen
, rescnt
, fmtcnt
;
4622 PyObject
*result
, *orig_args
;
4623 #ifdef Py_USING_UNICODE
4626 PyObject
*dict
= NULL
;
4627 if (format
== NULL
|| !PyString_Check(format
) || args
== NULL
) {
4628 PyErr_BadInternalCall();
4632 fmt
= PyString_AS_STRING(format
);
4633 fmtcnt
= PyString_GET_SIZE(format
);
4634 reslen
= rescnt
= fmtcnt
+ 100;
4635 result
= PyString_FromStringAndSize((char *)NULL
, reslen
);
4638 res
= PyString_AsString(result
);
4639 if (PyTuple_Check(args
)) {
4640 arglen
= PyTuple_GET_SIZE(args
);
4647 if (Py_TYPE(args
)->tp_as_mapping
&& !PyTuple_Check(args
) &&
4648 !PyObject_TypeCheck(args
, &PyBaseString_Type
))
4650 while (--fmtcnt
>= 0) {
4653 rescnt
= fmtcnt
+ 100;
4655 if (_PyString_Resize(&result
, reslen
) < 0)
4657 res
= PyString_AS_STRING(result
)
4664 /* Got a format specifier */
4666 Py_ssize_t width
= -1;
4672 PyObject
*temp
= NULL
;
4676 char formatbuf
[FORMATBUFLEN
];
4677 /* For format{float,int,char}() */
4678 #ifdef Py_USING_UNICODE
4679 char *fmt_start
= fmt
;
4680 Py_ssize_t argidx_start
= argidx
;
4691 PyErr_SetString(PyExc_TypeError
,
4692 "format requires a mapping");
4698 /* Skip over balanced parentheses */
4699 while (pcount
> 0 && --fmtcnt
>= 0) {
4702 else if (*fmt
== '(')
4706 keylen
= fmt
- keystart
- 1;
4707 if (fmtcnt
< 0 || pcount
> 0) {
4708 PyErr_SetString(PyExc_ValueError
,
4709 "incomplete format key");
4712 key
= PyString_FromStringAndSize(keystart
,
4720 args
= PyObject_GetItem(dict
, key
);
4729 while (--fmtcnt
>= 0) {
4730 switch (c
= *fmt
++) {
4731 case '-': flags
|= F_LJUST
; continue;
4732 case '+': flags
|= F_SIGN
; continue;
4733 case ' ': flags
|= F_BLANK
; continue;
4734 case '#': flags
|= F_ALT
; continue;
4735 case '0': flags
|= F_ZERO
; continue;
4740 v
= getnextarg(args
, arglen
, &argidx
);
4743 if (!PyInt_Check(v
)) {
4744 PyErr_SetString(PyExc_TypeError
,
4748 width
= PyInt_AsLong(v
);
4756 else if (c
>= 0 && isdigit(c
)) {
4758 while (--fmtcnt
>= 0) {
4759 c
= Py_CHARMASK(*fmt
++);
4762 if ((width
*10) / 10 != width
) {
4768 width
= width
*10 + (c
- '0');
4776 v
= getnextarg(args
, arglen
, &argidx
);
4779 if (!PyInt_Check(v
)) {
4785 prec
= PyInt_AsLong(v
);
4791 else if (c
>= 0 && isdigit(c
)) {
4793 while (--fmtcnt
>= 0) {
4794 c
= Py_CHARMASK(*fmt
++);
4797 if ((prec
*10) / 10 != prec
) {
4803 prec
= prec
*10 + (c
- '0');
4808 if (c
== 'h' || c
== 'l' || c
== 'L') {
4814 PyErr_SetString(PyExc_ValueError
,
4815 "incomplete format");
4819 v
= getnextarg(args
, arglen
, &argidx
);
4831 #ifdef Py_USING_UNICODE
4832 if (PyUnicode_Check(v
)) {
4834 argidx
= argidx_start
;
4838 temp
= _PyObject_Str(v
);
4839 #ifdef Py_USING_UNICODE
4840 if (temp
!= NULL
&& PyUnicode_Check(temp
)) {
4843 argidx
= argidx_start
;
4850 temp
= PyObject_Repr(v
);
4853 if (!PyString_Check(temp
)) {
4854 PyErr_SetString(PyExc_TypeError
,
4855 "%s argument has non-string str()");
4859 pbuf
= PyString_AS_STRING(temp
);
4860 len
= PyString_GET_SIZE(temp
);
4861 if (prec
>= 0 && len
> prec
)
4873 if (PyNumber_Check(v
)) {
4874 PyObject
*iobj
=NULL
;
4876 if (PyInt_Check(v
) || (PyLong_Check(v
))) {
4881 iobj
= PyNumber_Int(v
);
4882 if (iobj
==NULL
) iobj
= PyNumber_Long(v
);
4885 if (PyInt_Check(iobj
)) {
4888 len
= formatint(pbuf
,
4890 flags
, prec
, c
, iobj
);
4896 else if (PyLong_Check(iobj
)) {
4900 temp
= _PyString_FormatLong(iobj
, flags
,
4901 prec
, c
, &pbuf
, &ilen
);
4914 PyErr_Format(PyExc_TypeError
,
4915 "%%%c format: a number is required, "
4916 "not %.200s", c
, Py_TYPE(v
)->tp_name
);
4931 len
= formatfloat(pbuf
, sizeof(formatbuf
),
4940 #ifdef Py_USING_UNICODE
4941 if (PyUnicode_Check(v
)) {
4943 argidx
= argidx_start
;
4948 len
= formatchar(pbuf
, sizeof(formatbuf
), v
);
4953 PyErr_Format(PyExc_ValueError
,
4954 "unsupported format character '%c' (0x%x) "
4957 (Py_ssize_t
)(fmt
- 1 -
4958 PyString_AsString(format
)));
4962 if (*pbuf
== '-' || *pbuf
== '+') {
4966 else if (flags
& F_SIGN
)
4968 else if (flags
& F_BLANK
)
4975 if (rescnt
- (sign
!= 0) < width
) {
4977 rescnt
= width
+ fmtcnt
+ 100;
4982 return PyErr_NoMemory();
4984 if (_PyString_Resize(&result
, reslen
) < 0) {
4988 res
= PyString_AS_STRING(result
)
4998 if ((flags
& F_ALT
) && (c
== 'x' || c
== 'X')) {
4999 assert(pbuf
[0] == '0');
5000 assert(pbuf
[1] == c
);
5011 if (width
> len
&& !(flags
& F_LJUST
)) {
5015 } while (--width
> len
);
5020 if ((flags
& F_ALT
) &&
5021 (c
== 'x' || c
== 'X')) {
5022 assert(pbuf
[0] == '0');
5023 assert(pbuf
[1] == c
);
5028 Py_MEMCPY(res
, pbuf
, len
);
5031 while (--width
>= len
) {
5035 if (dict
&& (argidx
< arglen
) && c
!= '%') {
5036 PyErr_SetString(PyExc_TypeError
,
5037 "not all arguments converted during string formatting");
5044 if (argidx
< arglen
&& !dict
) {
5045 PyErr_SetString(PyExc_TypeError
,
5046 "not all arguments converted during string formatting");
5052 _PyString_Resize(&result
, reslen
- rescnt
);
5055 #ifdef Py_USING_UNICODE
5061 /* Fiddle args right (remove the first argidx arguments) */
5062 if (PyTuple_Check(orig_args
) && argidx
> 0) {
5064 Py_ssize_t n
= PyTuple_GET_SIZE(orig_args
) - argidx
;
5069 PyObject
*w
= PyTuple_GET_ITEM(orig_args
, n
+ argidx
);
5071 PyTuple_SET_ITEM(v
, n
, w
);
5075 Py_INCREF(orig_args
);
5079 /* Take what we have of the result and let the Unicode formatting
5080 function format the rest of the input. */
5081 rescnt
= res
- PyString_AS_STRING(result
);
5082 if (_PyString_Resize(&result
, rescnt
))
5084 fmtcnt
= PyString_GET_SIZE(format
) - \
5085 (fmt
- PyString_AS_STRING(format
));
5086 format
= PyUnicode_Decode(fmt
, fmtcnt
, NULL
, NULL
);
5089 v
= PyUnicode_Format(format
, args
);
5093 /* Paste what we have (result) to what the Unicode formatting
5094 function returned (v) and return the result (or error) */
5095 w
= PyUnicode_Concat(result
, v
);
5100 #endif /* Py_USING_UNICODE */
5111 PyString_InternInPlace(PyObject
**p
)
5113 register PyStringObject
*s
= (PyStringObject
*)(*p
);
5115 if (s
== NULL
|| !PyString_Check(s
))
5116 Py_FatalError("PyString_InternInPlace: strings only please!");
5117 /* If it's a string subclass, we don't really know what putting
5118 it in the interned dict might do. */
5119 if (!PyString_CheckExact(s
))
5121 if (PyString_CHECK_INTERNED(s
))
5123 if (interned
== NULL
) {
5124 interned
= PyDict_New();
5125 if (interned
== NULL
) {
5126 PyErr_Clear(); /* Don't leave an exception */
5130 t
= PyDict_GetItem(interned
, (PyObject
*)s
);
5138 if (PyDict_SetItem(interned
, (PyObject
*)s
, (PyObject
*)s
) < 0) {
5142 /* The two references in interned are not counted by refcnt.
5143 The string deallocator will take care of this */
5145 PyString_CHECK_INTERNED(s
) = SSTATE_INTERNED_MORTAL
;
5149 PyString_InternImmortal(PyObject
**p
)
5151 PyString_InternInPlace(p
);
5152 if (PyString_CHECK_INTERNED(*p
) != SSTATE_INTERNED_IMMORTAL
) {
5153 PyString_CHECK_INTERNED(*p
) = SSTATE_INTERNED_IMMORTAL
;
5160 PyString_InternFromString(const char *cp
)
5162 PyObject
*s
= PyString_FromString(cp
);
5165 PyString_InternInPlace(&s
);
5173 for (i
= 0; i
< UCHAR_MAX
+ 1; i
++) {
5174 Py_XDECREF(characters
[i
]);
5175 characters
[i
] = NULL
;
5177 Py_XDECREF(nullstring
);
5181 void _Py_ReleaseInternedStrings(void)
5186 Py_ssize_t immortal_size
= 0, mortal_size
= 0;
5188 if (interned
== NULL
|| !PyDict_Check(interned
))
5190 keys
= PyDict_Keys(interned
);
5191 if (keys
== NULL
|| !PyList_Check(keys
)) {
5196 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5197 detector, interned strings are not forcibly deallocated; rather, we
5198 give them their stolen references back, and then clear and DECREF
5199 the interned dict. */
5201 n
= PyList_GET_SIZE(keys
);
5202 fprintf(stderr
, "releasing %" PY_FORMAT_SIZE_T
"d interned strings\n",
5204 for (i
= 0; i
< n
; i
++) {
5205 s
= (PyStringObject
*) PyList_GET_ITEM(keys
, i
);
5206 switch (s
->ob_sstate
) {
5207 case SSTATE_NOT_INTERNED
:
5208 /* XXX Shouldn't happen */
5210 case SSTATE_INTERNED_IMMORTAL
:
5212 immortal_size
+= Py_SIZE(s
);
5214 case SSTATE_INTERNED_MORTAL
:
5216 mortal_size
+= Py_SIZE(s
);
5219 Py_FatalError("Inconsistent interned string state.");
5221 s
->ob_sstate
= SSTATE_NOT_INTERNED
;
5223 fprintf(stderr
, "total size of all interned strings: "
5224 "%" PY_FORMAT_SIZE_T
"d/%" PY_FORMAT_SIZE_T
"d "
5225 "mortal/immortal\n", mortal_size
, immortal_size
);
5227 PyDict_Clear(interned
);
5228 Py_DECREF(interned
);