1 /* String (str/bytes) object implementation */
3 #define PY_SSIZE_T_CLEAN
9 int null_strings
, one_strings
;
12 static PyStringObject
*characters
[UCHAR_MAX
+ 1];
13 static PyStringObject
*nullstring
;
15 /* This dictionary holds all interned strings. Note that references to
16 strings in this dictionary are *not* counted in the string's ob_refcnt.
17 When the interned string reaches a refcnt of 0 the string deallocation
18 function will delete the reference from this dictionary.
20 Another way to look at this is that to say that the actual reference
21 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23 static PyObject
*interned
;
26 For both PyString_FromString() and PyString_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
28 null terminating character.
30 For PyString_FromString(), the parameter `str' points to a null-terminated
31 string containing exactly `size' bytes.
33 For PyString_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyString_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
44 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
52 PyString_FromStringAndSize(const char *str
, Py_ssize_t size
)
54 register PyStringObject
*op
;
56 PyErr_SetString(PyExc_SystemError
,
57 "Negative size passed to PyString_FromStringAndSize");
60 if (size
== 0 && (op
= nullstring
) != NULL
) {
65 return (PyObject
*)op
;
67 if (size
== 1 && str
!= NULL
&&
68 (op
= characters
[*str
& UCHAR_MAX
]) != NULL
)
74 return (PyObject
*)op
;
77 if (size
> PY_SSIZE_T_MAX
- sizeof(PyStringObject
)) {
78 PyErr_SetString(PyExc_OverflowError
, "string is too large");
82 /* Inline PyObject_NewVar */
83 op
= (PyStringObject
*)PyObject_MALLOC(sizeof(PyStringObject
) + size
);
85 return PyErr_NoMemory();
86 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
88 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
90 Py_MEMCPY(op
->ob_sval
, str
, size
);
91 op
->ob_sval
[size
] = '\0';
92 /* share short strings */
94 PyObject
*t
= (PyObject
*)op
;
95 PyString_InternInPlace(&t
);
96 op
= (PyStringObject
*)t
;
99 } else if (size
== 1 && str
!= NULL
) {
100 PyObject
*t
= (PyObject
*)op
;
101 PyString_InternInPlace(&t
);
102 op
= (PyStringObject
*)t
;
103 characters
[*str
& UCHAR_MAX
] = op
;
106 return (PyObject
*) op
;
110 PyString_FromString(const char *str
)
112 register size_t size
;
113 register PyStringObject
*op
;
117 if (size
> PY_SSIZE_T_MAX
- sizeof(PyStringObject
)) {
118 PyErr_SetString(PyExc_OverflowError
,
119 "string is too long for a Python string");
122 if (size
== 0 && (op
= nullstring
) != NULL
) {
127 return (PyObject
*)op
;
129 if (size
== 1 && (op
= characters
[*str
& UCHAR_MAX
]) != NULL
) {
134 return (PyObject
*)op
;
137 /* Inline PyObject_NewVar */
138 op
= (PyStringObject
*)PyObject_MALLOC(sizeof(PyStringObject
) + size
);
140 return PyErr_NoMemory();
141 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
143 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
144 Py_MEMCPY(op
->ob_sval
, str
, size
+1);
145 /* share short strings */
147 PyObject
*t
= (PyObject
*)op
;
148 PyString_InternInPlace(&t
);
149 op
= (PyStringObject
*)t
;
152 } else if (size
== 1) {
153 PyObject
*t
= (PyObject
*)op
;
154 PyString_InternInPlace(&t
);
155 op
= (PyStringObject
*)t
;
156 characters
[*str
& UCHAR_MAX
] = op
;
159 return (PyObject
*) op
;
163 PyString_FromFormatV(const char *format
, va_list vargs
)
171 #ifdef VA_LIST_IS_ARRAY
172 Py_MEMCPY(count
, vargs
, sizeof(va_list));
175 __va_copy(count
, vargs
);
180 /* step 1: figure out how large a buffer we need */
181 for (f
= format
; *f
; f
++) {
184 while (*++f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
187 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
188 * they don't affect the amount of space we reserve.
190 if ((*f
== 'l' || *f
== 'z') &&
191 (f
[1] == 'd' || f
[1] == 'u'))
196 (void)va_arg(count
, int);
197 /* fall through... */
201 case 'd': case 'u': case 'i': case 'x':
202 (void) va_arg(count
, int);
203 /* 20 bytes is enough to hold a 64-bit
204 integer. Decimal takes the most space.
205 This isn't enough for octal. */
209 s
= va_arg(count
, char*);
213 (void) va_arg(count
, int);
214 /* maximum 64-bit pointer representation:
216 * so 19 characters is enough.
217 * XXX I count 18 -- what's the extra for?
222 /* if we stumble upon an unknown
223 formatting code, copy the rest of
224 the format string to the output
225 string. (we cannot just skip the
226 code, since there's no way to know
227 what's in the argument list) */
235 /* step 2: fill the buffer */
236 /* Since we've analyzed how much space we need for the worst case,
237 use sprintf directly instead of the slower PyOS_snprintf. */
238 string
= PyString_FromStringAndSize(NULL
, n
);
242 s
= PyString_AsString(string
);
244 for (f
= format
; *f
; f
++) {
250 /* parse the width.precision part (we're only
251 interested in the precision value, if any) */
253 while (isdigit(Py_CHARMASK(*f
)))
254 n
= (n
*10) + *f
++ - '0';
258 while (isdigit(Py_CHARMASK(*f
)))
259 n
= (n
*10) + *f
++ - '0';
261 while (*f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
263 /* handle the long flag, but only for %ld and %lu.
264 others can be added when necessary. */
265 if (*f
== 'l' && (f
[1] == 'd' || f
[1] == 'u')) {
269 /* handle the size_t flag. */
270 if (*f
== 'z' && (f
[1] == 'd' || f
[1] == 'u')) {
277 *s
++ = va_arg(vargs
, int);
281 sprintf(s
, "%ld", va_arg(vargs
, long));
283 sprintf(s
, "%" PY_FORMAT_SIZE_T
"d",
284 va_arg(vargs
, Py_ssize_t
));
286 sprintf(s
, "%d", va_arg(vargs
, int));
292 va_arg(vargs
, unsigned long));
294 sprintf(s
, "%" PY_FORMAT_SIZE_T
"u",
295 va_arg(vargs
, size_t));
298 va_arg(vargs
, unsigned int));
302 sprintf(s
, "%i", va_arg(vargs
, int));
306 sprintf(s
, "%x", va_arg(vargs
, int));
310 p
= va_arg(vargs
, char*);
318 sprintf(s
, "%p", va_arg(vargs
, void*));
319 /* %p is ill-defined: ensure leading 0x. */
322 else if (s
[1] != 'x') {
323 memmove(s
+2, s
, strlen(s
)+1);
342 _PyString_Resize(&string
, s
- PyString_AS_STRING(string
));
347 PyString_FromFormat(const char *format
, ...)
352 #ifdef HAVE_STDARG_PROTOTYPES
353 va_start(vargs
, format
);
357 ret
= PyString_FromFormatV(format
, vargs
);
363 PyObject
*PyString_Decode(const char *s
,
365 const char *encoding
,
370 str
= PyString_FromStringAndSize(s
, size
);
373 v
= PyString_AsDecodedString(str
, encoding
, errors
);
378 PyObject
*PyString_AsDecodedObject(PyObject
*str
,
379 const char *encoding
,
384 if (!PyString_Check(str
)) {
389 if (encoding
== NULL
) {
390 #ifdef Py_USING_UNICODE
391 encoding
= PyUnicode_GetDefaultEncoding();
393 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
398 /* Decode via the codec registry */
399 v
= PyCodec_Decode(str
, encoding
, errors
);
409 PyObject
*PyString_AsDecodedString(PyObject
*str
,
410 const char *encoding
,
415 v
= PyString_AsDecodedObject(str
, encoding
, errors
);
419 #ifdef Py_USING_UNICODE
420 /* Convert Unicode to a string using the default encoding */
421 if (PyUnicode_Check(v
)) {
423 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
429 if (!PyString_Check(v
)) {
430 PyErr_Format(PyExc_TypeError
,
431 "decoder did not return a string object (type=%.400s)",
432 Py_TYPE(v
)->tp_name
);
443 PyObject
*PyString_Encode(const char *s
,
445 const char *encoding
,
450 str
= PyString_FromStringAndSize(s
, size
);
453 v
= PyString_AsEncodedString(str
, encoding
, errors
);
458 PyObject
*PyString_AsEncodedObject(PyObject
*str
,
459 const char *encoding
,
464 if (!PyString_Check(str
)) {
469 if (encoding
== NULL
) {
470 #ifdef Py_USING_UNICODE
471 encoding
= PyUnicode_GetDefaultEncoding();
473 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
478 /* Encode via the codec registry */
479 v
= PyCodec_Encode(str
, encoding
, errors
);
489 PyObject
*PyString_AsEncodedString(PyObject
*str
,
490 const char *encoding
,
495 v
= PyString_AsEncodedObject(str
, encoding
, errors
);
499 #ifdef Py_USING_UNICODE
500 /* Convert Unicode to a string using the default encoding */
501 if (PyUnicode_Check(v
)) {
503 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
509 if (!PyString_Check(v
)) {
510 PyErr_Format(PyExc_TypeError
,
511 "encoder did not return a string object (type=%.400s)",
512 Py_TYPE(v
)->tp_name
);
524 string_dealloc(PyObject
*op
)
526 switch (PyString_CHECK_INTERNED(op
)) {
527 case SSTATE_NOT_INTERNED
:
530 case SSTATE_INTERNED_MORTAL
:
531 /* revive dead object temporarily for DelItem */
533 if (PyDict_DelItem(interned
, op
) != 0)
535 "deletion of interned string failed");
538 case SSTATE_INTERNED_IMMORTAL
:
539 Py_FatalError("Immortal interned string died.");
542 Py_FatalError("Inconsistent interned string state.");
544 Py_TYPE(op
)->tp_free(op
);
547 /* Unescape a backslash-escaped string. If unicode is non-zero,
548 the string is a u-literal. If recode_encoding is non-zero,
549 the string is UTF-8 encoded and should be re-encoded in the
550 specified encoding. */
552 PyObject
*PyString_DecodeEscape(const char *s
,
556 const char *recode_encoding
)
562 Py_ssize_t newlen
= recode_encoding
? 4*len
:len
;
563 v
= PyString_FromStringAndSize((char *)NULL
, newlen
);
566 p
= buf
= PyString_AsString(v
);
571 #ifdef Py_USING_UNICODE
572 if (recode_encoding
&& (*s
& 0x80)) {
578 /* Decode non-ASCII bytes as UTF-8. */
579 while (t
< end
&& (*t
& 0x80)) t
++;
580 u
= PyUnicode_DecodeUTF8(s
, t
- s
, errors
);
583 /* Recode them in target encoding. */
584 w
= PyUnicode_AsEncodedString(
585 u
, recode_encoding
, errors
);
589 /* Append bytes to output buffer. */
590 assert(PyString_Check(w
));
591 r
= PyString_AS_STRING(w
);
592 rn
= PyString_GET_SIZE(w
);
607 PyErr_SetString(PyExc_ValueError
,
608 "Trailing \\ in string");
612 /* XXX This assumes ASCII! */
614 case '\\': *p
++ = '\\'; break;
615 case '\'': *p
++ = '\''; break;
616 case '\"': *p
++ = '\"'; break;
617 case 'b': *p
++ = '\b'; break;
618 case 'f': *p
++ = '\014'; break; /* FF */
619 case 't': *p
++ = '\t'; break;
620 case 'n': *p
++ = '\n'; break;
621 case 'r': *p
++ = '\r'; break;
622 case 'v': *p
++ = '\013'; break; /* VT */
623 case 'a': *p
++ = '\007'; break; /* BEL, not classic C */
624 case '0': case '1': case '2': case '3':
625 case '4': case '5': case '6': case '7':
627 if (s
< end
&& '0' <= *s
&& *s
<= '7') {
628 c
= (c
<<3) + *s
++ - '0';
629 if (s
< end
&& '0' <= *s
&& *s
<= '7')
630 c
= (c
<<3) + *s
++ - '0';
636 isxdigit(Py_CHARMASK(s
[0])) &&
637 isxdigit(Py_CHARMASK(s
[1])))
660 if (!errors
|| strcmp(errors
, "strict") == 0) {
661 PyErr_SetString(PyExc_ValueError
,
662 "invalid \\x escape");
665 if (strcmp(errors
, "replace") == 0) {
667 } else if (strcmp(errors
, "ignore") == 0)
670 PyErr_Format(PyExc_ValueError
,
672 "unknown error handling code: %.400s",
676 #ifndef Py_USING_UNICODE
681 PyErr_SetString(PyExc_ValueError
,
682 "Unicode escapes not legal "
683 "when Unicode disabled");
690 goto non_esc
; /* an arbitry number of unescaped
691 UTF-8 bytes may follow. */
695 _PyString_Resize(&v
, p
- buf
);
702 /* -------------------------------------------------------------------- */
706 string_getsize(register PyObject
*op
)
710 if (PyString_AsStringAndSize(op
, &s
, &len
))
715 static /*const*/ char *
716 string_getbuffer(register PyObject
*op
)
720 if (PyString_AsStringAndSize(op
, &s
, &len
))
726 PyString_Size(register PyObject
*op
)
728 if (!PyString_Check(op
))
729 return string_getsize(op
);
734 PyString_AsString(register PyObject
*op
)
736 if (!PyString_Check(op
))
737 return string_getbuffer(op
);
738 return ((PyStringObject
*)op
) -> ob_sval
;
742 PyString_AsStringAndSize(register PyObject
*obj
,
744 register Py_ssize_t
*len
)
747 PyErr_BadInternalCall();
751 if (!PyString_Check(obj
)) {
752 #ifdef Py_USING_UNICODE
753 if (PyUnicode_Check(obj
)) {
754 obj
= _PyUnicode_AsDefaultEncodedString(obj
, NULL
);
761 PyErr_Format(PyExc_TypeError
,
762 "expected string or Unicode object, "
763 "%.200s found", Py_TYPE(obj
)->tp_name
);
768 *s
= PyString_AS_STRING(obj
);
770 *len
= PyString_GET_SIZE(obj
);
771 else if (strlen(*s
) != (size_t)PyString_GET_SIZE(obj
)) {
772 PyErr_SetString(PyExc_TypeError
,
773 "expected string without null bytes");
779 /* -------------------------------------------------------------------- */
782 #include "stringlib/stringdefs.h"
783 #include "stringlib/fastsearch.h"
785 #include "stringlib/count.h"
786 #include "stringlib/find.h"
787 #include "stringlib/partition.h"
789 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
790 #include "stringlib/localeutil.h"
795 string_print(PyStringObject
*op
, FILE *fp
, int flags
)
797 Py_ssize_t i
, str_len
;
801 /* XXX Ought to check for interrupts when writing long strings */
802 if (! PyString_CheckExact(op
)) {
804 /* A str subclass may have its own __str__ method. */
805 op
= (PyStringObject
*) PyObject_Str((PyObject
*)op
);
808 ret
= string_print(op
, fp
, flags
);
812 if (flags
& Py_PRINT_RAW
) {
813 char *data
= op
->ob_sval
;
814 Py_ssize_t size
= Py_SIZE(op
);
815 Py_BEGIN_ALLOW_THREADS
816 while (size
> INT_MAX
) {
817 /* Very long strings cannot be written atomically.
818 * But don't write exactly INT_MAX bytes at a time
819 * to avoid memory aligment issues.
821 const int chunk_size
= INT_MAX
& ~0x3FFF;
822 fwrite(data
, 1, chunk_size
, fp
);
827 if (size
) fwrite(data
, (int)size
, 1, fp
);
829 fwrite(data
, 1, (int)size
, fp
);
835 /* figure out which quote to use; single is preferred */
837 if (memchr(op
->ob_sval
, '\'', Py_SIZE(op
)) &&
838 !memchr(op
->ob_sval
, '"', Py_SIZE(op
)))
841 str_len
= Py_SIZE(op
);
842 Py_BEGIN_ALLOW_THREADS
844 for (i
= 0; i
< str_len
; i
++) {
845 /* Since strings are immutable and the caller should have a
846 reference, accessing the interal buffer should not be an issue
847 with the GIL released. */
849 if (c
== quote
|| c
== '\\')
850 fprintf(fp
, "\\%c", c
);
857 else if (c
< ' ' || c
>= 0x7f)
858 fprintf(fp
, "\\x%02x", c
& 0xff);
868 PyString_Repr(PyObject
*obj
, int smartquotes
)
870 register PyStringObject
* op
= (PyStringObject
*) obj
;
871 size_t newsize
= 2 + 4 * Py_SIZE(op
);
873 if (newsize
> PY_SSIZE_T_MAX
|| newsize
/ 4 != Py_SIZE(op
)) {
874 PyErr_SetString(PyExc_OverflowError
,
875 "string is too large to make repr");
878 v
= PyString_FromStringAndSize((char *)NULL
, newsize
);
883 register Py_ssize_t i
;
888 /* figure out which quote to use; single is preferred */
891 memchr(op
->ob_sval
, '\'', Py_SIZE(op
)) &&
892 !memchr(op
->ob_sval
, '"', Py_SIZE(op
)))
895 p
= PyString_AS_STRING(v
);
897 for (i
= 0; i
< Py_SIZE(op
); i
++) {
898 /* There's at least enough room for a hex escape
899 and a closing quote. */
900 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 5);
902 if (c
== quote
|| c
== '\\')
903 *p
++ = '\\', *p
++ = c
;
905 *p
++ = '\\', *p
++ = 't';
907 *p
++ = '\\', *p
++ = 'n';
909 *p
++ = '\\', *p
++ = 'r';
910 else if (c
< ' ' || c
>= 0x7f) {
911 /* For performance, we don't want to call
912 PyOS_snprintf here (extra layers of
914 sprintf(p
, "\\x%02x", c
& 0xff);
920 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 1);
924 &v
, (p
- PyString_AS_STRING(v
)));
930 string_repr(PyObject
*op
)
932 return PyString_Repr(op
, 1);
936 string_str(PyObject
*s
)
938 assert(PyString_Check(s
));
939 if (PyString_CheckExact(s
)) {
944 /* Subtype -- return genuine string with the same value. */
945 PyStringObject
*t
= (PyStringObject
*) s
;
946 return PyString_FromStringAndSize(t
->ob_sval
, Py_SIZE(t
));
951 string_length(PyStringObject
*a
)
957 string_concat(register PyStringObject
*a
, register PyObject
*bb
)
959 register Py_ssize_t size
;
960 register PyStringObject
*op
;
961 if (!PyString_Check(bb
)) {
962 #ifdef Py_USING_UNICODE
963 if (PyUnicode_Check(bb
))
964 return PyUnicode_Concat((PyObject
*)a
, bb
);
966 if (PyByteArray_Check(bb
))
967 return PyByteArray_Concat((PyObject
*)a
, bb
);
968 PyErr_Format(PyExc_TypeError
,
969 "cannot concatenate 'str' and '%.200s' objects",
970 Py_TYPE(bb
)->tp_name
);
973 #define b ((PyStringObject *)bb)
974 /* Optimize cases with empty left or right operand */
975 if ((Py_SIZE(a
) == 0 || Py_SIZE(b
) == 0) &&
976 PyString_CheckExact(a
) && PyString_CheckExact(b
)) {
977 if (Py_SIZE(a
) == 0) {
982 return (PyObject
*)a
;
984 size
= Py_SIZE(a
) + Py_SIZE(b
);
985 /* Check that string sizes are not negative, to prevent an
986 overflow in cases where we are passed incorrectly-created
987 strings with negative lengths (due to a bug in other code).
989 if (Py_SIZE(a
) < 0 || Py_SIZE(b
) < 0 ||
990 Py_SIZE(a
) > PY_SSIZE_T_MAX
- Py_SIZE(b
)) {
991 PyErr_SetString(PyExc_OverflowError
,
992 "strings are too large to concat");
996 /* Inline PyObject_NewVar */
997 if (size
> PY_SSIZE_T_MAX
- sizeof(PyStringObject
)) {
998 PyErr_SetString(PyExc_OverflowError
,
999 "strings are too large to concat");
1002 op
= (PyStringObject
*)PyObject_MALLOC(sizeof(PyStringObject
) + size
);
1004 return PyErr_NoMemory();
1005 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
1007 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
1008 Py_MEMCPY(op
->ob_sval
, a
->ob_sval
, Py_SIZE(a
));
1009 Py_MEMCPY(op
->ob_sval
+ Py_SIZE(a
), b
->ob_sval
, Py_SIZE(b
));
1010 op
->ob_sval
[size
] = '\0';
1011 return (PyObject
*) op
;
1016 string_repeat(register PyStringObject
*a
, register Py_ssize_t n
)
1018 register Py_ssize_t i
;
1019 register Py_ssize_t j
;
1020 register Py_ssize_t size
;
1021 register PyStringObject
*op
;
1025 /* watch out for overflows: the size can overflow int,
1026 * and the # of bytes needed can overflow size_t
1028 size
= Py_SIZE(a
) * n
;
1029 if (n
&& size
/ n
!= Py_SIZE(a
)) {
1030 PyErr_SetString(PyExc_OverflowError
,
1031 "repeated string is too long");
1034 if (size
== Py_SIZE(a
) && PyString_CheckExact(a
)) {
1036 return (PyObject
*)a
;
1038 nbytes
= (size_t)size
;
1039 if (nbytes
+ sizeof(PyStringObject
) <= nbytes
) {
1040 PyErr_SetString(PyExc_OverflowError
,
1041 "repeated string is too long");
1044 op
= (PyStringObject
*)
1045 PyObject_MALLOC(sizeof(PyStringObject
) + nbytes
);
1047 return PyErr_NoMemory();
1048 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
1050 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
1051 op
->ob_sval
[size
] = '\0';
1052 if (Py_SIZE(a
) == 1 && n
> 0) {
1053 memset(op
->ob_sval
, a
->ob_sval
[0] , n
);
1054 return (PyObject
*) op
;
1058 Py_MEMCPY(op
->ob_sval
, a
->ob_sval
, Py_SIZE(a
));
1062 j
= (i
<= size
-i
) ? i
: size
-i
;
1063 Py_MEMCPY(op
->ob_sval
+i
, op
->ob_sval
, j
);
1066 return (PyObject
*) op
;
1069 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1072 string_slice(register PyStringObject
*a
, register Py_ssize_t i
,
1073 register Py_ssize_t j
)
1074 /* j -- may be negative! */
1079 j
= 0; /* Avoid signed/unsigned bug in next line */
1082 if (i
== 0 && j
== Py_SIZE(a
) && PyString_CheckExact(a
)) {
1083 /* It's the same as a */
1085 return (PyObject
*)a
;
1089 return PyString_FromStringAndSize(a
->ob_sval
+ i
, j
-i
);
1093 string_contains(PyObject
*str_obj
, PyObject
*sub_obj
)
1095 if (!PyString_CheckExact(sub_obj
)) {
1096 #ifdef Py_USING_UNICODE
1097 if (PyUnicode_Check(sub_obj
))
1098 return PyUnicode_Contains(str_obj
, sub_obj
);
1100 if (!PyString_Check(sub_obj
)) {
1101 PyErr_Format(PyExc_TypeError
,
1102 "'in <string>' requires string as left operand, "
1103 "not %.200s", Py_TYPE(sub_obj
)->tp_name
);
1108 return stringlib_contains_obj(str_obj
, sub_obj
);
1112 string_item(PyStringObject
*a
, register Py_ssize_t i
)
1116 if (i
< 0 || i
>= Py_SIZE(a
)) {
1117 PyErr_SetString(PyExc_IndexError
, "string index out of range");
1120 pchar
= a
->ob_sval
[i
];
1121 v
= (PyObject
*)characters
[pchar
& UCHAR_MAX
];
1123 v
= PyString_FromStringAndSize(&pchar
, 1);
1134 string_richcompare(PyStringObject
*a
, PyStringObject
*b
, int op
)
1137 Py_ssize_t len_a
, len_b
;
1141 /* Make sure both arguments are strings. */
1142 if (!(PyString_Check(a
) && PyString_Check(b
))) {
1143 result
= Py_NotImplemented
;
1148 case Py_EQ
:case Py_LE
:case Py_GE
:
1151 case Py_NE
:case Py_LT
:case Py_GT
:
1157 /* Supporting Py_NE here as well does not save
1158 much time, since Py_NE is rarely used. */
1159 if (Py_SIZE(a
) == Py_SIZE(b
)
1160 && (a
->ob_sval
[0] == b
->ob_sval
[0]
1161 && memcmp(a
->ob_sval
, b
->ob_sval
, Py_SIZE(a
)) == 0)) {
1168 len_a
= Py_SIZE(a
); len_b
= Py_SIZE(b
);
1169 min_len
= (len_a
< len_b
) ? len_a
: len_b
;
1171 c
= Py_CHARMASK(*a
->ob_sval
) - Py_CHARMASK(*b
->ob_sval
);
1173 c
= memcmp(a
->ob_sval
, b
->ob_sval
, min_len
);
1177 c
= (len_a
< len_b
) ? -1 : (len_a
> len_b
) ? 1 : 0;
1179 case Py_LT
: c
= c
< 0; break;
1180 case Py_LE
: c
= c
<= 0; break;
1181 case Py_EQ
: assert(0); break; /* unreachable */
1182 case Py_NE
: c
= c
!= 0; break;
1183 case Py_GT
: c
= c
> 0; break;
1184 case Py_GE
: c
= c
>= 0; break;
1186 result
= Py_NotImplemented
;
1189 result
= c
? Py_True
: Py_False
;
1196 _PyString_Eq(PyObject
*o1
, PyObject
*o2
)
1198 PyStringObject
*a
= (PyStringObject
*) o1
;
1199 PyStringObject
*b
= (PyStringObject
*) o2
;
1200 return Py_SIZE(a
) == Py_SIZE(b
)
1201 && *a
->ob_sval
== *b
->ob_sval
1202 && memcmp(a
->ob_sval
, b
->ob_sval
, Py_SIZE(a
)) == 0;
1206 string_hash(PyStringObject
*a
)
1208 register Py_ssize_t len
;
1209 register unsigned char *p
;
1212 if (a
->ob_shash
!= -1)
1215 p
= (unsigned char *) a
->ob_sval
;
1218 x
= (1000003*x
) ^ *p
++;
1227 string_subscript(PyStringObject
* self
, PyObject
* item
)
1229 if (PyIndex_Check(item
)) {
1230 Py_ssize_t i
= PyNumber_AsSsize_t(item
, PyExc_IndexError
);
1231 if (i
== -1 && PyErr_Occurred())
1234 i
+= PyString_GET_SIZE(self
);
1235 return string_item(self
, i
);
1237 else if (PySlice_Check(item
)) {
1238 Py_ssize_t start
, stop
, step
, slicelength
, cur
, i
;
1243 if (PySlice_GetIndicesEx((PySliceObject
*)item
,
1244 PyString_GET_SIZE(self
),
1245 &start
, &stop
, &step
, &slicelength
) < 0) {
1249 if (slicelength
<= 0) {
1250 return PyString_FromStringAndSize("", 0);
1252 else if (start
== 0 && step
== 1 &&
1253 slicelength
== PyString_GET_SIZE(self
) &&
1254 PyString_CheckExact(self
)) {
1256 return (PyObject
*)self
;
1258 else if (step
== 1) {
1259 return PyString_FromStringAndSize(
1260 PyString_AS_STRING(self
) + start
,
1264 source_buf
= PyString_AsString((PyObject
*)self
);
1265 result_buf
= (char *)PyMem_Malloc(slicelength
);
1266 if (result_buf
== NULL
)
1267 return PyErr_NoMemory();
1269 for (cur
= start
, i
= 0; i
< slicelength
;
1271 result_buf
[i
] = source_buf
[cur
];
1274 result
= PyString_FromStringAndSize(result_buf
,
1276 PyMem_Free(result_buf
);
1281 PyErr_Format(PyExc_TypeError
,
1282 "string indices must be integers, not %.200s",
1283 Py_TYPE(item
)->tp_name
);
1289 string_buffer_getreadbuf(PyStringObject
*self
, Py_ssize_t index
, const void **ptr
)
1292 PyErr_SetString(PyExc_SystemError
,
1293 "accessing non-existent string segment");
1296 *ptr
= (void *)self
->ob_sval
;
1297 return Py_SIZE(self
);
1301 string_buffer_getwritebuf(PyStringObject
*self
, Py_ssize_t index
, const void **ptr
)
1303 PyErr_SetString(PyExc_TypeError
,
1304 "Cannot use string as modifiable buffer");
1309 string_buffer_getsegcount(PyStringObject
*self
, Py_ssize_t
*lenp
)
1312 *lenp
= Py_SIZE(self
);
1317 string_buffer_getcharbuf(PyStringObject
*self
, Py_ssize_t index
, const char **ptr
)
1320 PyErr_SetString(PyExc_SystemError
,
1321 "accessing non-existent string segment");
1324 *ptr
= self
->ob_sval
;
1325 return Py_SIZE(self
);
1329 string_buffer_getbuffer(PyStringObject
*self
, Py_buffer
*view
, int flags
)
1331 return PyBuffer_FillInfo(view
, (PyObject
*)self
,
1332 (void *)self
->ob_sval
, Py_SIZE(self
),
1336 static PySequenceMethods string_as_sequence
= {
1337 (lenfunc
)string_length
, /*sq_length*/
1338 (binaryfunc
)string_concat
, /*sq_concat*/
1339 (ssizeargfunc
)string_repeat
, /*sq_repeat*/
1340 (ssizeargfunc
)string_item
, /*sq_item*/
1341 (ssizessizeargfunc
)string_slice
, /*sq_slice*/
1344 (objobjproc
)string_contains
/*sq_contains*/
1347 static PyMappingMethods string_as_mapping
= {
1348 (lenfunc
)string_length
,
1349 (binaryfunc
)string_subscript
,
1353 static PyBufferProcs string_as_buffer
= {
1354 (readbufferproc
)string_buffer_getreadbuf
,
1355 (writebufferproc
)string_buffer_getwritebuf
,
1356 (segcountproc
)string_buffer_getsegcount
,
1357 (charbufferproc
)string_buffer_getcharbuf
,
1358 (getbufferproc
)string_buffer_getbuffer
,
1365 #define RIGHTSTRIP 1
1368 /* Arrays indexed by above */
1369 static const char *stripformat
[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1371 #define STRIPNAME(i) (stripformat[i]+3)
1374 /* Don't call if length < 2 */
1375 #define Py_STRING_MATCH(target, offset, pattern, length) \
1376 (target[offset] == pattern[0] && \
1377 target[offset+length-1] == pattern[length-1] && \
1378 !memcmp(target+offset+1, pattern+1, length-2) )
1381 /* Overallocate the initial list to reduce the number of reallocs for small
1382 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1383 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1384 text (roughly 11 words per line) and field delimited data (usually 1-10
1385 fields). For large strings the split algorithms are bandwidth limited
1386 so increasing the preallocation likely will not improve things.*/
1388 #define MAX_PREALLOC 12
1390 /* 5 splits gives 6 elements */
1391 #define PREALLOC_SIZE(maxsplit) \
1392 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1394 #define SPLIT_APPEND(data, left, right) \
1395 str = PyString_FromStringAndSize((data) + (left), \
1396 (right) - (left)); \
1399 if (PyList_Append(list, str)) { \
1406 #define SPLIT_ADD(data, left, right) { \
1407 str = PyString_FromStringAndSize((data) + (left), \
1408 (right) - (left)); \
1411 if (count < MAX_PREALLOC) { \
1412 PyList_SET_ITEM(list, count, str); \
1414 if (PyList_Append(list, str)) { \
1423 /* Always force the list to the expected size. */
1424 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1426 #define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1427 #define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1428 #define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1429 #define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1431 Py_LOCAL_INLINE(PyObject
*)
1432 split_whitespace(PyStringObject
*self
, Py_ssize_t len
, Py_ssize_t maxsplit
)
1434 const char *s
= PyString_AS_STRING(self
);
1435 Py_ssize_t i
, j
, count
=0;
1437 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1444 while (maxsplit
-- > 0) {
1445 SKIP_SPACE(s
, i
, len
);
1448 SKIP_NONSPACE(s
, i
, len
);
1449 if (j
== 0 && i
== len
&& PyString_CheckExact(self
)) {
1450 /* No whitespace in self, so just use it as list[0] */
1452 PyList_SET_ITEM(list
, 0, (PyObject
*)self
);
1460 /* Only occurs when maxsplit was reached */
1461 /* Skip any remaining whitespace and copy to end of string */
1462 SKIP_SPACE(s
, i
, len
);
1464 SPLIT_ADD(s
, i
, len
);
1466 FIX_PREALLOC_SIZE(list
);
1473 Py_LOCAL_INLINE(PyObject
*)
1474 split_char(PyStringObject
*self
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
1476 const char *s
= PyString_AS_STRING(self
);
1477 register Py_ssize_t i
, j
, count
=0;
1479 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
1485 while ((j
< len
) && (maxcount
-- > 0)) {
1487 /* I found that using memchr makes no difference */
1495 if (i
== 0 && count
== 0 && PyString_CheckExact(self
)) {
1496 /* ch not in self, so just use self as list[0] */
1498 PyList_SET_ITEM(list
, 0, (PyObject
*)self
);
1501 else if (i
<= len
) {
1502 SPLIT_ADD(s
, i
, len
);
1504 FIX_PREALLOC_SIZE(list
);
1512 PyDoc_STRVAR(split__doc__
,
1513 "S.split([sep [,maxsplit]]) -> list of strings\n\
1515 Return a list of the words in the string S, using sep as the\n\
1516 delimiter string. If maxsplit is given, at most maxsplit\n\
1517 splits are done. If sep is not specified or is None, any\n\
1518 whitespace string is a separator and empty strings are removed\n\
1522 string_split(PyStringObject
*self
, PyObject
*args
)
1524 Py_ssize_t len
= PyString_GET_SIZE(self
), n
, i
, j
;
1525 Py_ssize_t maxsplit
= -1, count
=0;
1526 const char *s
= PyString_AS_STRING(self
), *sub
;
1527 PyObject
*list
, *str
, *subobj
= Py_None
;
1532 if (!PyArg_ParseTuple(args
, "|On:split", &subobj
, &maxsplit
))
1535 maxsplit
= PY_SSIZE_T_MAX
;
1536 if (subobj
== Py_None
)
1537 return split_whitespace(self
, len
, maxsplit
);
1538 if (PyString_Check(subobj
)) {
1539 sub
= PyString_AS_STRING(subobj
);
1540 n
= PyString_GET_SIZE(subobj
);
1542 #ifdef Py_USING_UNICODE
1543 else if (PyUnicode_Check(subobj
))
1544 return PyUnicode_Split((PyObject
*)self
, subobj
, maxsplit
);
1546 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1550 PyErr_SetString(PyExc_ValueError
, "empty separator");
1554 return split_char(self
, len
, sub
[0], maxsplit
);
1556 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1562 while (maxsplit
-- > 0) {
1563 pos
= fastsearch(s
+i
, len
-i
, sub
, n
, FAST_SEARCH
);
1572 while ((j
+n
<= len
) && (maxsplit
-- > 0)) {
1573 for (; j
+n
<= len
; j
++) {
1574 if (Py_STRING_MATCH(s
, j
, sub
, n
)) {
1582 SPLIT_ADD(s
, i
, len
);
1583 FIX_PREALLOC_SIZE(list
);
1591 PyDoc_STRVAR(partition__doc__
,
1592 "S.partition(sep) -> (head, sep, tail)\n\
1594 Searches for the separator sep in S, and returns the part before it,\n\
1595 the separator itself, and the part after it. If the separator is not\n\
1596 found, returns S and two empty strings.");
1599 string_partition(PyStringObject
*self
, PyObject
*sep_obj
)
1604 if (PyString_Check(sep_obj
)) {
1605 sep
= PyString_AS_STRING(sep_obj
);
1606 sep_len
= PyString_GET_SIZE(sep_obj
);
1608 #ifdef Py_USING_UNICODE
1609 else if (PyUnicode_Check(sep_obj
))
1610 return PyUnicode_Partition((PyObject
*) self
, sep_obj
);
1612 else if (PyObject_AsCharBuffer(sep_obj
, &sep
, &sep_len
))
1615 return stringlib_partition(
1617 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1618 sep_obj
, sep
, sep_len
1622 PyDoc_STRVAR(rpartition__doc__
,
1623 "S.rpartition(sep) -> (tail, sep, head)\n\
1625 Searches for the separator sep in S, starting at the end of S, and returns\n\
1626 the part before it, the separator itself, and the part after it. If the\n\
1627 separator is not found, returns two empty strings and S.");
1630 string_rpartition(PyStringObject
*self
, PyObject
*sep_obj
)
1635 if (PyString_Check(sep_obj
)) {
1636 sep
= PyString_AS_STRING(sep_obj
);
1637 sep_len
= PyString_GET_SIZE(sep_obj
);
1639 #ifdef Py_USING_UNICODE
1640 else if (PyUnicode_Check(sep_obj
))
1641 return PyUnicode_Partition((PyObject
*) self
, sep_obj
);
1643 else if (PyObject_AsCharBuffer(sep_obj
, &sep
, &sep_len
))
1646 return stringlib_rpartition(
1648 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1649 sep_obj
, sep
, sep_len
1653 Py_LOCAL_INLINE(PyObject
*)
1654 rsplit_whitespace(PyStringObject
*self
, Py_ssize_t len
, Py_ssize_t maxsplit
)
1656 const char *s
= PyString_AS_STRING(self
);
1657 Py_ssize_t i
, j
, count
=0;
1659 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1666 while (maxsplit
-- > 0) {
1670 RSKIP_NONSPACE(s
, i
);
1671 if (j
== len
-1 && i
< 0 && PyString_CheckExact(self
)) {
1672 /* No whitespace in self, so just use it as list[0] */
1674 PyList_SET_ITEM(list
, 0, (PyObject
*)self
);
1678 SPLIT_ADD(s
, i
+ 1, j
+ 1);
1681 /* Only occurs when maxsplit was reached */
1682 /* Skip any remaining whitespace and copy to beginning of string */
1685 SPLIT_ADD(s
, 0, i
+ 1);
1688 FIX_PREALLOC_SIZE(list
);
1689 if (PyList_Reverse(list
) < 0)
1697 Py_LOCAL_INLINE(PyObject
*)
1698 rsplit_char(PyStringObject
*self
, Py_ssize_t len
, char ch
, Py_ssize_t maxcount
)
1700 const char *s
= PyString_AS_STRING(self
);
1701 register Py_ssize_t i
, j
, count
=0;
1703 PyObject
*list
= PyList_New(PREALLOC_SIZE(maxcount
));
1709 while ((i
>= 0) && (maxcount
-- > 0)) {
1710 for (; i
>= 0; i
--) {
1712 SPLIT_ADD(s
, i
+ 1, j
+ 1);
1718 if (i
< 0 && count
== 0 && PyString_CheckExact(self
)) {
1719 /* ch not in self, so just use self as list[0] */
1721 PyList_SET_ITEM(list
, 0, (PyObject
*)self
);
1725 SPLIT_ADD(s
, 0, j
+ 1);
1727 FIX_PREALLOC_SIZE(list
);
1728 if (PyList_Reverse(list
) < 0)
1737 PyDoc_STRVAR(rsplit__doc__
,
1738 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1740 Return a list of the words in the string S, using sep as the\n\
1741 delimiter string, starting at the end of the string and working\n\
1742 to the front. If maxsplit is given, at most maxsplit splits are\n\
1743 done. If sep is not specified or is None, any whitespace string\n\
1747 string_rsplit(PyStringObject
*self
, PyObject
*args
)
1749 Py_ssize_t len
= PyString_GET_SIZE(self
), n
, i
, j
;
1750 Py_ssize_t maxsplit
= -1, count
=0;
1751 const char *s
, *sub
;
1752 PyObject
*list
, *str
, *subobj
= Py_None
;
1754 if (!PyArg_ParseTuple(args
, "|On:rsplit", &subobj
, &maxsplit
))
1757 maxsplit
= PY_SSIZE_T_MAX
;
1758 if (subobj
== Py_None
)
1759 return rsplit_whitespace(self
, len
, maxsplit
);
1760 if (PyString_Check(subobj
)) {
1761 sub
= PyString_AS_STRING(subobj
);
1762 n
= PyString_GET_SIZE(subobj
);
1764 #ifdef Py_USING_UNICODE
1765 else if (PyUnicode_Check(subobj
))
1766 return PyUnicode_RSplit((PyObject
*)self
, subobj
, maxsplit
);
1768 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1772 PyErr_SetString(PyExc_ValueError
, "empty separator");
1776 return rsplit_char(self
, len
, sub
[0], maxsplit
);
1778 list
= PyList_New(PREALLOC_SIZE(maxsplit
));
1785 s
= PyString_AS_STRING(self
);
1786 while ( (i
>= 0) && (maxsplit
-- > 0) ) {
1788 if (Py_STRING_MATCH(s
, i
, sub
, n
)) {
1789 SPLIT_ADD(s
, i
+ n
, j
);
1797 FIX_PREALLOC_SIZE(list
);
1798 if (PyList_Reverse(list
) < 0)
1808 PyDoc_STRVAR(join__doc__
,
1809 "S.join(sequence) -> string\n\
1811 Return a string which is the concatenation of the strings in the\n\
1812 sequence. The separator between elements is S.");
1815 string_join(PyStringObject
*self
, PyObject
*orig
)
1817 char *sep
= PyString_AS_STRING(self
);
1818 const Py_ssize_t seplen
= PyString_GET_SIZE(self
);
1819 PyObject
*res
= NULL
;
1821 Py_ssize_t seqlen
= 0;
1824 PyObject
*seq
, *item
;
1826 seq
= PySequence_Fast(orig
, "");
1831 seqlen
= PySequence_Size(seq
);
1834 return PyString_FromString("");
1837 item
= PySequence_Fast_GET_ITEM(seq
, 0);
1838 if (PyString_CheckExact(item
) || PyUnicode_CheckExact(item
)) {
1845 /* There are at least two things to join, or else we have a subclass
1846 * of the builtin types in the sequence.
1847 * Do a pre-pass to figure out the total amount of space we'll
1848 * need (sz), see whether any argument is absurd, and defer to
1849 * the Unicode join if appropriate.
1851 for (i
= 0; i
< seqlen
; i
++) {
1852 const size_t old_sz
= sz
;
1853 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1854 if (!PyString_Check(item
)){
1855 #ifdef Py_USING_UNICODE
1856 if (PyUnicode_Check(item
)) {
1857 /* Defer to Unicode join.
1858 * CAUTION: There's no gurantee that the
1859 * original sequence can be iterated over
1860 * again, so we must pass seq here.
1863 result
= PyUnicode_Join((PyObject
*)self
, seq
);
1868 PyErr_Format(PyExc_TypeError
,
1869 "sequence item %zd: expected string,"
1871 i
, Py_TYPE(item
)->tp_name
);
1875 sz
+= PyString_GET_SIZE(item
);
1878 if (sz
< old_sz
|| sz
> PY_SSIZE_T_MAX
) {
1879 PyErr_SetString(PyExc_OverflowError
,
1880 "join() result is too long for a Python string");
1886 /* Allocate result space. */
1887 res
= PyString_FromStringAndSize((char*)NULL
, sz
);
1893 /* Catenate everything. */
1894 p
= PyString_AS_STRING(res
);
1895 for (i
= 0; i
< seqlen
; ++i
) {
1897 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1898 n
= PyString_GET_SIZE(item
);
1899 Py_MEMCPY(p
, PyString_AS_STRING(item
), n
);
1901 if (i
< seqlen
- 1) {
1902 Py_MEMCPY(p
, sep
, seplen
);
1912 _PyString_Join(PyObject
*sep
, PyObject
*x
)
1914 assert(sep
!= NULL
&& PyString_Check(sep
));
1916 return string_join((PyStringObject
*)sep
, x
);
1919 Py_LOCAL_INLINE(void)
1920 string_adjust_indices(Py_ssize_t
*start
, Py_ssize_t
*end
, Py_ssize_t len
)
1934 Py_LOCAL_INLINE(Py_ssize_t
)
1935 string_find_internal(PyStringObject
*self
, PyObject
*args
, int dir
)
1940 Py_ssize_t start
=0, end
=PY_SSIZE_T_MAX
;
1941 PyObject
*obj_start
=Py_None
, *obj_end
=Py_None
;
1943 if (!PyArg_ParseTuple(args
, "O|OO:find/rfind/index/rindex", &subobj
,
1944 &obj_start
, &obj_end
))
1946 /* To support None in "start" and "end" arguments, meaning
1947 the same as if they were not passed.
1949 if (obj_start
!= Py_None
)
1950 if (!_PyEval_SliceIndex(obj_start
, &start
))
1952 if (obj_end
!= Py_None
)
1953 if (!_PyEval_SliceIndex(obj_end
, &end
))
1956 if (PyString_Check(subobj
)) {
1957 sub
= PyString_AS_STRING(subobj
);
1958 sub_len
= PyString_GET_SIZE(subobj
);
1960 #ifdef Py_USING_UNICODE
1961 else if (PyUnicode_Check(subobj
))
1962 return PyUnicode_Find(
1963 (PyObject
*)self
, subobj
, start
, end
, dir
);
1965 else if (PyObject_AsCharBuffer(subobj
, &sub
, &sub_len
))
1966 /* XXX - the "expected a character buffer object" is pretty
1967 confusing for a non-expert. remap to something else ? */
1971 return stringlib_find_slice(
1972 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1973 sub
, sub_len
, start
, end
);
1975 return stringlib_rfind_slice(
1976 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1977 sub
, sub_len
, start
, end
);
1981 PyDoc_STRVAR(find__doc__
,
1982 "S.find(sub [,start [,end]]) -> int\n\
1984 Return the lowest index in S where substring sub is found,\n\
1985 such that sub is contained within s[start:end]. Optional\n\
1986 arguments start and end are interpreted as in slice notation.\n\
1988 Return -1 on failure.");
1991 string_find(PyStringObject
*self
, PyObject
*args
)
1993 Py_ssize_t result
= string_find_internal(self
, args
, +1);
1996 return PyInt_FromSsize_t(result
);
2000 PyDoc_STRVAR(index__doc__
,
2001 "S.index(sub [,start [,end]]) -> int\n\
2003 Like S.find() but raise ValueError when the substring is not found.");
2006 string_index(PyStringObject
*self
, PyObject
*args
)
2008 Py_ssize_t result
= string_find_internal(self
, args
, +1);
2012 PyErr_SetString(PyExc_ValueError
,
2013 "substring not found");
2016 return PyInt_FromSsize_t(result
);
2020 PyDoc_STRVAR(rfind__doc__
,
2021 "S.rfind(sub [,start [,end]]) -> int\n\
2023 Return the highest index in S where substring sub is found,\n\
2024 such that sub is contained within s[start:end]. Optional\n\
2025 arguments start and end are interpreted as in slice notation.\n\
2027 Return -1 on failure.");
2030 string_rfind(PyStringObject
*self
, PyObject
*args
)
2032 Py_ssize_t result
= string_find_internal(self
, args
, -1);
2035 return PyInt_FromSsize_t(result
);
2039 PyDoc_STRVAR(rindex__doc__
,
2040 "S.rindex(sub [,start [,end]]) -> int\n\
2042 Like S.rfind() but raise ValueError when the substring is not found.");
2045 string_rindex(PyStringObject
*self
, PyObject
*args
)
2047 Py_ssize_t result
= string_find_internal(self
, args
, -1);
2051 PyErr_SetString(PyExc_ValueError
,
2052 "substring not found");
2055 return PyInt_FromSsize_t(result
);
2059 Py_LOCAL_INLINE(PyObject
*)
2060 do_xstrip(PyStringObject
*self
, int striptype
, PyObject
*sepobj
)
2062 char *s
= PyString_AS_STRING(self
);
2063 Py_ssize_t len
= PyString_GET_SIZE(self
);
2064 char *sep
= PyString_AS_STRING(sepobj
);
2065 Py_ssize_t seplen
= PyString_GET_SIZE(sepobj
);
2069 if (striptype
!= RIGHTSTRIP
) {
2070 while (i
< len
&& memchr(sep
, Py_CHARMASK(s
[i
]), seplen
)) {
2076 if (striptype
!= LEFTSTRIP
) {
2079 } while (j
>= i
&& memchr(sep
, Py_CHARMASK(s
[j
]), seplen
));
2083 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
2085 return (PyObject
*)self
;
2088 return PyString_FromStringAndSize(s
+i
, j
-i
);
2092 Py_LOCAL_INLINE(PyObject
*)
2093 do_strip(PyStringObject
*self
, int striptype
)
2095 char *s
= PyString_AS_STRING(self
);
2096 Py_ssize_t len
= PyString_GET_SIZE(self
), i
, j
;
2099 if (striptype
!= RIGHTSTRIP
) {
2100 while (i
< len
&& isspace(Py_CHARMASK(s
[i
]))) {
2106 if (striptype
!= LEFTSTRIP
) {
2109 } while (j
>= i
&& isspace(Py_CHARMASK(s
[j
])));
2113 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
2115 return (PyObject
*)self
;
2118 return PyString_FromStringAndSize(s
+i
, j
-i
);
2122 Py_LOCAL_INLINE(PyObject
*)
2123 do_argstrip(PyStringObject
*self
, int striptype
, PyObject
*args
)
2125 PyObject
*sep
= NULL
;
2127 if (!PyArg_ParseTuple(args
, (char *)stripformat
[striptype
], &sep
))
2130 if (sep
!= NULL
&& sep
!= Py_None
) {
2131 if (PyString_Check(sep
))
2132 return do_xstrip(self
, striptype
, sep
);
2133 #ifdef Py_USING_UNICODE
2134 else if (PyUnicode_Check(sep
)) {
2135 PyObject
*uniself
= PyUnicode_FromObject((PyObject
*)self
);
2139 res
= _PyUnicode_XStrip((PyUnicodeObject
*)uniself
,
2145 PyErr_Format(PyExc_TypeError
,
2146 #ifdef Py_USING_UNICODE
2147 "%s arg must be None, str or unicode",
2149 "%s arg must be None or str",
2151 STRIPNAME(striptype
));
2155 return do_strip(self
, striptype
);
2159 PyDoc_STRVAR(strip__doc__
,
2160 "S.strip([chars]) -> string or unicode\n\
2162 Return a copy of the string S with leading and trailing\n\
2163 whitespace removed.\n\
2164 If chars is given and not None, remove characters in chars instead.\n\
2165 If chars is unicode, S will be converted to unicode before stripping");
2168 string_strip(PyStringObject
*self
, PyObject
*args
)
2170 if (PyTuple_GET_SIZE(args
) == 0)
2171 return do_strip(self
, BOTHSTRIP
); /* Common case */
2173 return do_argstrip(self
, BOTHSTRIP
, args
);
2177 PyDoc_STRVAR(lstrip__doc__
,
2178 "S.lstrip([chars]) -> string or unicode\n\
2180 Return a copy of the string S with leading whitespace removed.\n\
2181 If chars is given and not None, remove characters in chars instead.\n\
2182 If chars is unicode, S will be converted to unicode before stripping");
2185 string_lstrip(PyStringObject
*self
, PyObject
*args
)
2187 if (PyTuple_GET_SIZE(args
) == 0)
2188 return do_strip(self
, LEFTSTRIP
); /* Common case */
2190 return do_argstrip(self
, LEFTSTRIP
, args
);
2194 PyDoc_STRVAR(rstrip__doc__
,
2195 "S.rstrip([chars]) -> string or unicode\n\
2197 Return a copy of the string S with trailing whitespace removed.\n\
2198 If chars is given and not None, remove characters in chars instead.\n\
2199 If chars is unicode, S will be converted to unicode before stripping");
2202 string_rstrip(PyStringObject
*self
, PyObject
*args
)
2204 if (PyTuple_GET_SIZE(args
) == 0)
2205 return do_strip(self
, RIGHTSTRIP
); /* Common case */
2207 return do_argstrip(self
, RIGHTSTRIP
, args
);
2211 PyDoc_STRVAR(lower__doc__
,
2212 "S.lower() -> string\n\
2214 Return a copy of the string S converted to lowercase.");
2216 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2218 #define _tolower tolower
2222 string_lower(PyStringObject
*self
)
2225 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2228 newobj
= PyString_FromStringAndSize(NULL
, n
);
2232 s
= PyString_AS_STRING(newobj
);
2234 Py_MEMCPY(s
, PyString_AS_STRING(self
), n
);
2236 for (i
= 0; i
< n
; i
++) {
2237 int c
= Py_CHARMASK(s
[i
]);
2245 PyDoc_STRVAR(upper__doc__
,
2246 "S.upper() -> string\n\
2248 Return a copy of the string S converted to uppercase.");
2251 #define _toupper toupper
2255 string_upper(PyStringObject
*self
)
2258 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2261 newobj
= PyString_FromStringAndSize(NULL
, n
);
2265 s
= PyString_AS_STRING(newobj
);
2267 Py_MEMCPY(s
, PyString_AS_STRING(self
), n
);
2269 for (i
= 0; i
< n
; i
++) {
2270 int c
= Py_CHARMASK(s
[i
]);
2278 PyDoc_STRVAR(title__doc__
,
2279 "S.title() -> string\n\
2281 Return a titlecased version of S, i.e. words start with uppercase\n\
2282 characters, all remaining cased characters have lowercase.");
2285 string_title(PyStringObject
*self
)
2287 char *s
= PyString_AS_STRING(self
), *s_new
;
2288 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2289 int previous_is_cased
= 0;
2292 newobj
= PyString_FromStringAndSize(NULL
, n
);
2295 s_new
= PyString_AsString(newobj
);
2296 for (i
= 0; i
< n
; i
++) {
2297 int c
= Py_CHARMASK(*s
++);
2299 if (!previous_is_cased
)
2301 previous_is_cased
= 1;
2302 } else if (isupper(c
)) {
2303 if (previous_is_cased
)
2305 previous_is_cased
= 1;
2307 previous_is_cased
= 0;
2313 PyDoc_STRVAR(capitalize__doc__
,
2314 "S.capitalize() -> string\n\
2316 Return a copy of the string S with only its first character\n\
2320 string_capitalize(PyStringObject
*self
)
2322 char *s
= PyString_AS_STRING(self
), *s_new
;
2323 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2326 newobj
= PyString_FromStringAndSize(NULL
, n
);
2329 s_new
= PyString_AsString(newobj
);
2331 int c
= Py_CHARMASK(*s
++);
2333 *s_new
= toupper(c
);
2338 for (i
= 1; i
< n
; i
++) {
2339 int c
= Py_CHARMASK(*s
++);
2341 *s_new
= tolower(c
);
2350 PyDoc_STRVAR(count__doc__
,
2351 "S.count(sub[, start[, end]]) -> int\n\
2353 Return the number of non-overlapping occurrences of substring sub in\n\
2354 string S[start:end]. Optional arguments start and end are interpreted\n\
2355 as in slice notation.");
2358 string_count(PyStringObject
*self
, PyObject
*args
)
2361 const char *str
= PyString_AS_STRING(self
), *sub
;
2363 Py_ssize_t start
= 0, end
= PY_SSIZE_T_MAX
;
2365 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &sub_obj
,
2366 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
2369 if (PyString_Check(sub_obj
)) {
2370 sub
= PyString_AS_STRING(sub_obj
);
2371 sub_len
= PyString_GET_SIZE(sub_obj
);
2373 #ifdef Py_USING_UNICODE
2374 else if (PyUnicode_Check(sub_obj
)) {
2376 count
= PyUnicode_Count((PyObject
*)self
, sub_obj
, start
, end
);
2380 return PyInt_FromSsize_t(count
);
2383 else if (PyObject_AsCharBuffer(sub_obj
, &sub
, &sub_len
))
2386 string_adjust_indices(&start
, &end
, PyString_GET_SIZE(self
));
2388 return PyInt_FromSsize_t(
2389 stringlib_count(str
+ start
, end
- start
, sub
, sub_len
)
2393 PyDoc_STRVAR(swapcase__doc__
,
2394 "S.swapcase() -> string\n\
2396 Return a copy of the string S with uppercase characters\n\
2397 converted to lowercase and vice versa.");
2400 string_swapcase(PyStringObject
*self
)
2402 char *s
= PyString_AS_STRING(self
), *s_new
;
2403 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2406 newobj
= PyString_FromStringAndSize(NULL
, n
);
2409 s_new
= PyString_AsString(newobj
);
2410 for (i
= 0; i
< n
; i
++) {
2411 int c
= Py_CHARMASK(*s
++);
2413 *s_new
= toupper(c
);
2415 else if (isupper(c
)) {
2416 *s_new
= tolower(c
);
2426 PyDoc_STRVAR(translate__doc__
,
2427 "S.translate(table [,deletechars]) -> string\n\
2429 Return a copy of the string S, where all characters occurring\n\
2430 in the optional argument deletechars are removed, and the\n\
2431 remaining characters have been mapped through the given\n\
2432 translation table, which must be a string of length 256.");
2435 string_translate(PyStringObject
*self
, PyObject
*args
)
2437 register char *input
, *output
;
2439 register Py_ssize_t i
, c
, changed
= 0;
2440 PyObject
*input_obj
= (PyObject
*)self
;
2441 const char *output_start
, *del_table
=NULL
;
2442 Py_ssize_t inlen
, tablen
, dellen
= 0;
2444 int trans_table
[256];
2445 PyObject
*tableobj
, *delobj
= NULL
;
2447 if (!PyArg_UnpackTuple(args
, "translate", 1, 2,
2448 &tableobj
, &delobj
))
2451 if (PyString_Check(tableobj
)) {
2452 table
= PyString_AS_STRING(tableobj
);
2453 tablen
= PyString_GET_SIZE(tableobj
);
2455 else if (tableobj
== Py_None
) {
2459 #ifdef Py_USING_UNICODE
2460 else if (PyUnicode_Check(tableobj
)) {
2461 /* Unicode .translate() does not support the deletechars
2462 parameter; instead a mapping to None will cause characters
2464 if (delobj
!= NULL
) {
2465 PyErr_SetString(PyExc_TypeError
,
2466 "deletions are implemented differently for unicode");
2469 return PyUnicode_Translate((PyObject
*)self
, tableobj
, NULL
);
2472 else if (PyObject_AsCharBuffer(tableobj
, &table
, &tablen
))
2475 if (tablen
!= 256) {
2476 PyErr_SetString(PyExc_ValueError
,
2477 "translation table must be 256 characters long");
2481 if (delobj
!= NULL
) {
2482 if (PyString_Check(delobj
)) {
2483 del_table
= PyString_AS_STRING(delobj
);
2484 dellen
= PyString_GET_SIZE(delobj
);
2486 #ifdef Py_USING_UNICODE
2487 else if (PyUnicode_Check(delobj
)) {
2488 PyErr_SetString(PyExc_TypeError
,
2489 "deletions are implemented differently for unicode");
2493 else if (PyObject_AsCharBuffer(delobj
, &del_table
, &dellen
))
2501 inlen
= PyString_GET_SIZE(input_obj
);
2502 result
= PyString_FromStringAndSize((char *)NULL
, inlen
);
2505 output_start
= output
= PyString_AsString(result
);
2506 input
= PyString_AS_STRING(input_obj
);
2508 if (dellen
== 0 && table
!= NULL
) {
2509 /* If no deletions are required, use faster code */
2510 for (i
= inlen
; --i
>= 0; ) {
2511 c
= Py_CHARMASK(*input
++);
2512 if (Py_CHARMASK((*output
++ = table
[c
])) != c
)
2515 if (changed
|| !PyString_CheckExact(input_obj
))
2518 Py_INCREF(input_obj
);
2522 if (table
== NULL
) {
2523 for (i
= 0; i
< 256; i
++)
2524 trans_table
[i
] = Py_CHARMASK(i
);
2526 for (i
= 0; i
< 256; i
++)
2527 trans_table
[i
] = Py_CHARMASK(table
[i
]);
2530 for (i
= 0; i
< dellen
; i
++)
2531 trans_table
[(int) Py_CHARMASK(del_table
[i
])] = -1;
2533 for (i
= inlen
; --i
>= 0; ) {
2534 c
= Py_CHARMASK(*input
++);
2535 if (trans_table
[c
] != -1)
2536 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
2540 if (!changed
&& PyString_CheckExact(input_obj
)) {
2542 Py_INCREF(input_obj
);
2545 /* Fix the size of the resulting string */
2547 _PyString_Resize(&result
, output
- output_start
);
2555 /* find and count characters and substrings */
2557 #define findchar(target, target_len, c) \
2558 ((char *)memchr((const void *)(target), c, target_len))
2560 /* String ops must return a string. */
2561 /* If the object is subclass of string, create a copy */
2562 Py_LOCAL(PyStringObject
*)
2563 return_self(PyStringObject
*self
)
2565 if (PyString_CheckExact(self
)) {
2569 return (PyStringObject
*)PyString_FromStringAndSize(
2570 PyString_AS_STRING(self
),
2571 PyString_GET_SIZE(self
));
2574 Py_LOCAL_INLINE(Py_ssize_t
)
2575 countchar(const char *target
, int target_len
, char c
, Py_ssize_t maxcount
)
2578 const char *start
=target
;
2579 const char *end
=target
+target_len
;
2581 while ( (start
=findchar(start
, end
-start
, c
)) != NULL
) {
2583 if (count
>= maxcount
)
2590 Py_LOCAL(Py_ssize_t
)
2591 findstring(const char *target
, Py_ssize_t target_len
,
2592 const char *pattern
, Py_ssize_t pattern_len
,
2598 start
+= target_len
;
2602 if (end
> target_len
) {
2604 } else if (end
< 0) {
2610 /* zero-length substrings always match at the first attempt */
2611 if (pattern_len
== 0)
2612 return (direction
> 0) ? start
: end
;
2616 if (direction
< 0) {
2617 for (; end
>= start
; end
--)
2618 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
))
2621 for (; start
<= end
; start
++)
2622 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
))
2628 Py_LOCAL_INLINE(Py_ssize_t
)
2629 countstring(const char *target
, Py_ssize_t target_len
,
2630 const char *pattern
, Py_ssize_t pattern_len
,
2633 int direction
, Py_ssize_t maxcount
)
2638 start
+= target_len
;
2642 if (end
> target_len
) {
2644 } else if (end
< 0) {
2650 /* zero-length substrings match everywhere */
2651 if (pattern_len
== 0 || maxcount
== 0) {
2652 if (target_len
+1 < maxcount
)
2653 return target_len
+1;
2658 if (direction
< 0) {
2659 for (; (end
>= start
); end
--)
2660 if (Py_STRING_MATCH(target
, end
, pattern
, pattern_len
)) {
2662 if (--maxcount
<= 0) break;
2663 end
-= pattern_len
-1;
2666 for (; (start
<= end
); start
++)
2667 if (Py_STRING_MATCH(target
, start
, pattern
, pattern_len
)) {
2669 if (--maxcount
<= 0)
2671 start
+= pattern_len
-1;
2678 /* Algorithms for different cases of string replacement */
2680 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2681 Py_LOCAL(PyStringObject
*)
2682 replace_interleave(PyStringObject
*self
,
2683 const char *to_s
, Py_ssize_t to_len
,
2684 Py_ssize_t maxcount
)
2686 char *self_s
, *result_s
;
2687 Py_ssize_t self_len
, result_len
;
2688 Py_ssize_t count
, i
, product
;
2689 PyStringObject
*result
;
2691 self_len
= PyString_GET_SIZE(self
);
2693 /* 1 at the end plus 1 after every character */
2695 if (maxcount
< count
)
2698 /* Check for overflow */
2699 /* result_len = count * to_len + self_len; */
2700 product
= count
* to_len
;
2701 if (product
/ to_len
!= count
) {
2702 PyErr_SetString(PyExc_OverflowError
,
2703 "replace string is too long");
2706 result_len
= product
+ self_len
;
2707 if (result_len
< 0) {
2708 PyErr_SetString(PyExc_OverflowError
,
2709 "replace string is too long");
2713 if (! (result
= (PyStringObject
*)
2714 PyString_FromStringAndSize(NULL
, result_len
)) )
2717 self_s
= PyString_AS_STRING(self
);
2718 result_s
= PyString_AS_STRING(result
);
2720 /* TODO: special case single character, which doesn't need memcpy */
2722 /* Lay the first one down (guaranteed this will occur) */
2723 Py_MEMCPY(result_s
, to_s
, to_len
);
2727 for (i
=0; i
<count
; i
++) {
2728 *result_s
++ = *self_s
++;
2729 Py_MEMCPY(result_s
, to_s
, to_len
);
2733 /* Copy the rest of the original string */
2734 Py_MEMCPY(result_s
, self_s
, self_len
-i
);
2739 /* Special case for deleting a single character */
2740 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2741 Py_LOCAL(PyStringObject
*)
2742 replace_delete_single_character(PyStringObject
*self
,
2743 char from_c
, Py_ssize_t maxcount
)
2745 char *self_s
, *result_s
;
2746 char *start
, *next
, *end
;
2747 Py_ssize_t self_len
, result_len
;
2749 PyStringObject
*result
;
2751 self_len
= PyString_GET_SIZE(self
);
2752 self_s
= PyString_AS_STRING(self
);
2754 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
2756 return return_self(self
);
2759 result_len
= self_len
- count
; /* from_len == 1 */
2760 assert(result_len
>=0);
2762 if ( (result
= (PyStringObject
*)
2763 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2765 result_s
= PyString_AS_STRING(result
);
2768 end
= self_s
+ self_len
;
2769 while (count
-- > 0) {
2770 next
= findchar(start
, end
-start
, from_c
);
2773 Py_MEMCPY(result_s
, start
, next
-start
);
2774 result_s
+= (next
-start
);
2777 Py_MEMCPY(result_s
, start
, end
-start
);
2782 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2784 Py_LOCAL(PyStringObject
*)
2785 replace_delete_substring(PyStringObject
*self
,
2786 const char *from_s
, Py_ssize_t from_len
,
2787 Py_ssize_t maxcount
) {
2788 char *self_s
, *result_s
;
2789 char *start
, *next
, *end
;
2790 Py_ssize_t self_len
, result_len
;
2791 Py_ssize_t count
, offset
;
2792 PyStringObject
*result
;
2794 self_len
= PyString_GET_SIZE(self
);
2795 self_s
= PyString_AS_STRING(self
);
2797 count
= countstring(self_s
, self_len
,
2804 return return_self(self
);
2807 result_len
= self_len
- (count
* from_len
);
2808 assert (result_len
>=0);
2810 if ( (result
= (PyStringObject
*)
2811 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2814 result_s
= PyString_AS_STRING(result
);
2817 end
= self_s
+ self_len
;
2818 while (count
-- > 0) {
2819 offset
= findstring(start
, end
-start
,
2821 0, end
-start
, FORWARD
);
2824 next
= start
+ offset
;
2826 Py_MEMCPY(result_s
, start
, next
-start
);
2828 result_s
+= (next
-start
);
2829 start
= next
+from_len
;
2831 Py_MEMCPY(result_s
, start
, end
-start
);
2835 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2836 Py_LOCAL(PyStringObject
*)
2837 replace_single_character_in_place(PyStringObject
*self
,
2838 char from_c
, char to_c
,
2839 Py_ssize_t maxcount
)
2841 char *self_s
, *result_s
, *start
, *end
, *next
;
2842 Py_ssize_t self_len
;
2843 PyStringObject
*result
;
2845 /* The result string will be the same size */
2846 self_s
= PyString_AS_STRING(self
);
2847 self_len
= PyString_GET_SIZE(self
);
2849 next
= findchar(self_s
, self_len
, from_c
);
2852 /* No matches; return the original string */
2853 return return_self(self
);
2856 /* Need to make a new string */
2857 result
= (PyStringObject
*) PyString_FromStringAndSize(NULL
, self_len
);
2860 result_s
= PyString_AS_STRING(result
);
2861 Py_MEMCPY(result_s
, self_s
, self_len
);
2863 /* change everything in-place, starting with this one */
2864 start
= result_s
+ (next
-self_s
);
2867 end
= result_s
+ self_len
;
2869 while (--maxcount
> 0) {
2870 next
= findchar(start
, end
-start
, from_c
);
2880 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2881 Py_LOCAL(PyStringObject
*)
2882 replace_substring_in_place(PyStringObject
*self
,
2883 const char *from_s
, Py_ssize_t from_len
,
2884 const char *to_s
, Py_ssize_t to_len
,
2885 Py_ssize_t maxcount
)
2887 char *result_s
, *start
, *end
;
2889 Py_ssize_t self_len
, offset
;
2890 PyStringObject
*result
;
2892 /* The result string will be the same size */
2894 self_s
= PyString_AS_STRING(self
);
2895 self_len
= PyString_GET_SIZE(self
);
2897 offset
= findstring(self_s
, self_len
,
2899 0, self_len
, FORWARD
);
2901 /* No matches; return the original string */
2902 return return_self(self
);
2905 /* Need to make a new string */
2906 result
= (PyStringObject
*) PyString_FromStringAndSize(NULL
, self_len
);
2909 result_s
= PyString_AS_STRING(result
);
2910 Py_MEMCPY(result_s
, self_s
, self_len
);
2912 /* change everything in-place, starting with this one */
2913 start
= result_s
+ offset
;
2914 Py_MEMCPY(start
, to_s
, from_len
);
2916 end
= result_s
+ self_len
;
2918 while ( --maxcount
> 0) {
2919 offset
= findstring(start
, end
-start
,
2921 0, end
-start
, FORWARD
);
2924 Py_MEMCPY(start
+offset
, to_s
, from_len
);
2925 start
+= offset
+from_len
;
2931 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2932 Py_LOCAL(PyStringObject
*)
2933 replace_single_character(PyStringObject
*self
,
2935 const char *to_s
, Py_ssize_t to_len
,
2936 Py_ssize_t maxcount
)
2938 char *self_s
, *result_s
;
2939 char *start
, *next
, *end
;
2940 Py_ssize_t self_len
, result_len
;
2941 Py_ssize_t count
, product
;
2942 PyStringObject
*result
;
2944 self_s
= PyString_AS_STRING(self
);
2945 self_len
= PyString_GET_SIZE(self
);
2947 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
2949 /* no matches, return unchanged */
2950 return return_self(self
);
2953 /* use the difference between current and new, hence the "-1" */
2954 /* result_len = self_len + count * (to_len-1) */
2955 product
= count
* (to_len
-1);
2956 if (product
/ (to_len
-1) != count
) {
2957 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2960 result_len
= self_len
+ product
;
2961 if (result_len
< 0) {
2962 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2966 if ( (result
= (PyStringObject
*)
2967 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2969 result_s
= PyString_AS_STRING(result
);
2972 end
= self_s
+ self_len
;
2973 while (count
-- > 0) {
2974 next
= findchar(start
, end
-start
, from_c
);
2978 if (next
== start
) {
2979 /* replace with the 'to' */
2980 Py_MEMCPY(result_s
, to_s
, to_len
);
2984 /* copy the unchanged old then the 'to' */
2985 Py_MEMCPY(result_s
, start
, next
-start
);
2986 result_s
+= (next
-start
);
2987 Py_MEMCPY(result_s
, to_s
, to_len
);
2992 /* Copy the remainder of the remaining string */
2993 Py_MEMCPY(result_s
, start
, end
-start
);
2998 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2999 Py_LOCAL(PyStringObject
*)
3000 replace_substring(PyStringObject
*self
,
3001 const char *from_s
, Py_ssize_t from_len
,
3002 const char *to_s
, Py_ssize_t to_len
,
3003 Py_ssize_t maxcount
) {
3004 char *self_s
, *result_s
;
3005 char *start
, *next
, *end
;
3006 Py_ssize_t self_len
, result_len
;
3007 Py_ssize_t count
, offset
, product
;
3008 PyStringObject
*result
;
3010 self_s
= PyString_AS_STRING(self
);
3011 self_len
= PyString_GET_SIZE(self
);
3013 count
= countstring(self_s
, self_len
,
3015 0, self_len
, FORWARD
, maxcount
);
3017 /* no matches, return unchanged */
3018 return return_self(self
);
3021 /* Check for overflow */
3022 /* result_len = self_len + count * (to_len-from_len) */
3023 product
= count
* (to_len
-from_len
);
3024 if (product
/ (to_len
-from_len
) != count
) {
3025 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
3028 result_len
= self_len
+ product
;
3029 if (result_len
< 0) {
3030 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
3034 if ( (result
= (PyStringObject
*)
3035 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
3037 result_s
= PyString_AS_STRING(result
);
3040 end
= self_s
+ self_len
;
3041 while (count
-- > 0) {
3042 offset
= findstring(start
, end
-start
,
3044 0, end
-start
, FORWARD
);
3047 next
= start
+offset
;
3048 if (next
== start
) {
3049 /* replace with the 'to' */
3050 Py_MEMCPY(result_s
, to_s
, to_len
);
3054 /* copy the unchanged old then the 'to' */
3055 Py_MEMCPY(result_s
, start
, next
-start
);
3056 result_s
+= (next
-start
);
3057 Py_MEMCPY(result_s
, to_s
, to_len
);
3059 start
= next
+from_len
;
3062 /* Copy the remainder of the remaining string */
3063 Py_MEMCPY(result_s
, start
, end
-start
);
3069 Py_LOCAL(PyStringObject
*)
3070 replace(PyStringObject
*self
,
3071 const char *from_s
, Py_ssize_t from_len
,
3072 const char *to_s
, Py_ssize_t to_len
,
3073 Py_ssize_t maxcount
)
3076 maxcount
= PY_SSIZE_T_MAX
;
3077 } else if (maxcount
== 0 || PyString_GET_SIZE(self
) == 0) {
3078 /* nothing to do; return the original string */
3079 return return_self(self
);
3082 if (maxcount
== 0 ||
3083 (from_len
== 0 && to_len
== 0)) {
3084 /* nothing to do; return the original string */
3085 return return_self(self
);
3088 /* Handle zero-length special cases */
3090 if (from_len
== 0) {
3091 /* insert the 'to' string everywhere. */
3092 /* >>> "Python".replace("", ".") */
3093 /* '.P.y.t.h.o.n.' */
3094 return replace_interleave(self
, to_s
, to_len
, maxcount
);
3097 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3098 /* point for an empty self string to generate a non-empty string */
3099 /* Special case so the remaining code always gets a non-empty string */
3100 if (PyString_GET_SIZE(self
) == 0) {
3101 return return_self(self
);
3105 /* delete all occurances of 'from' string */
3106 if (from_len
== 1) {
3107 return replace_delete_single_character(
3108 self
, from_s
[0], maxcount
);
3110 return replace_delete_substring(self
, from_s
, from_len
, maxcount
);
3114 /* Handle special case where both strings have the same length */
3116 if (from_len
== to_len
) {
3117 if (from_len
== 1) {
3118 return replace_single_character_in_place(
3124 return replace_substring_in_place(
3125 self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
3129 /* Otherwise use the more generic algorithms */
3130 if (from_len
== 1) {
3131 return replace_single_character(self
, from_s
[0],
3132 to_s
, to_len
, maxcount
);
3134 /* len('from')>=2, len('to')>=1 */
3135 return replace_substring(self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
3139 PyDoc_STRVAR(replace__doc__
,
3140 "S.replace (old, new[, count]) -> string\n\
3142 Return a copy of string S with all occurrences of substring\n\
3143 old replaced by new. If the optional argument count is\n\
3144 given, only the first count occurrences are replaced.");
3147 string_replace(PyStringObject
*self
, PyObject
*args
)
3149 Py_ssize_t count
= -1;
3150 PyObject
*from
, *to
;
3151 const char *from_s
, *to_s
;
3152 Py_ssize_t from_len
, to_len
;
3154 if (!PyArg_ParseTuple(args
, "OO|n:replace", &from
, &to
, &count
))
3157 if (PyString_Check(from
)) {
3158 from_s
= PyString_AS_STRING(from
);
3159 from_len
= PyString_GET_SIZE(from
);
3161 #ifdef Py_USING_UNICODE
3162 if (PyUnicode_Check(from
))
3163 return PyUnicode_Replace((PyObject
*)self
,
3166 else if (PyObject_AsCharBuffer(from
, &from_s
, &from_len
))
3169 if (PyString_Check(to
)) {
3170 to_s
= PyString_AS_STRING(to
);
3171 to_len
= PyString_GET_SIZE(to
);
3173 #ifdef Py_USING_UNICODE
3174 else if (PyUnicode_Check(to
))
3175 return PyUnicode_Replace((PyObject
*)self
,
3178 else if (PyObject_AsCharBuffer(to
, &to_s
, &to_len
))
3181 return (PyObject
*)replace((PyStringObject
*) self
,
3183 to_s
, to_len
, count
);
3188 /* Matches the end (direction >= 0) or start (direction < 0) of self
3189 * against substr, using the start and end arguments. Returns
3190 * -1 on error, 0 if not found and 1 if found.
3193 _string_tailmatch(PyStringObject
*self
, PyObject
*substr
, Py_ssize_t start
,
3194 Py_ssize_t end
, int direction
)
3196 Py_ssize_t len
= PyString_GET_SIZE(self
);
3201 if (PyString_Check(substr
)) {
3202 sub
= PyString_AS_STRING(substr
);
3203 slen
= PyString_GET_SIZE(substr
);
3205 #ifdef Py_USING_UNICODE
3206 else if (PyUnicode_Check(substr
))
3207 return PyUnicode_Tailmatch((PyObject
*)self
,
3208 substr
, start
, end
, direction
);
3210 else if (PyObject_AsCharBuffer(substr
, &sub
, &slen
))
3212 str
= PyString_AS_STRING(self
);
3214 string_adjust_indices(&start
, &end
, len
);
3216 if (direction
< 0) {
3218 if (start
+slen
> len
)
3222 if (end
-start
< slen
|| start
> len
)
3225 if (end
-slen
> start
)
3228 if (end
-start
>= slen
)
3229 return ! memcmp(str
+start
, sub
, slen
);
3234 PyDoc_STRVAR(startswith__doc__
,
3235 "S.startswith(prefix[, start[, end]]) -> bool\n\
3237 Return True if S starts with the specified prefix, False otherwise.\n\
3238 With optional start, test S beginning at that position.\n\
3239 With optional end, stop comparing S at that position.\n\
3240 prefix can also be a tuple of strings to try.");
3243 string_startswith(PyStringObject
*self
, PyObject
*args
)
3245 Py_ssize_t start
= 0;
3246 Py_ssize_t end
= PY_SSIZE_T_MAX
;
3250 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
3251 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
3253 if (PyTuple_Check(subobj
)) {
3255 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
3256 result
= _string_tailmatch(self
,
3257 PyTuple_GET_ITEM(subobj
, i
),
3267 result
= _string_tailmatch(self
, subobj
, start
, end
, -1);
3271 return PyBool_FromLong(result
);
3275 PyDoc_STRVAR(endswith__doc__
,
3276 "S.endswith(suffix[, start[, end]]) -> bool\n\
3278 Return True if S ends with the specified suffix, False otherwise.\n\
3279 With optional start, test S beginning at that position.\n\
3280 With optional end, stop comparing S at that position.\n\
3281 suffix can also be a tuple of strings to try.");
3284 string_endswith(PyStringObject
*self
, PyObject
*args
)
3286 Py_ssize_t start
= 0;
3287 Py_ssize_t end
= PY_SSIZE_T_MAX
;
3291 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
3292 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
3294 if (PyTuple_Check(subobj
)) {
3296 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
3297 result
= _string_tailmatch(self
,
3298 PyTuple_GET_ITEM(subobj
, i
),
3308 result
= _string_tailmatch(self
, subobj
, start
, end
, +1);
3312 return PyBool_FromLong(result
);
3316 PyDoc_STRVAR(encode__doc__
,
3317 "S.encode([encoding[,errors]]) -> object\n\
3319 Encodes S using the codec registered for encoding. encoding defaults\n\
3320 to the default encoding. errors may be given to set a different error\n\
3321 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3322 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3323 'xmlcharrefreplace' as well as any other name registered with\n\
3324 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3327 string_encode(PyStringObject
*self
, PyObject
*args
)
3329 char *encoding
= NULL
;
3330 char *errors
= NULL
;
3333 if (!PyArg_ParseTuple(args
, "|ss:encode", &encoding
, &errors
))
3335 v
= PyString_AsEncodedObject((PyObject
*)self
, encoding
, errors
);
3338 if (!PyString_Check(v
) && !PyUnicode_Check(v
)) {
3339 PyErr_Format(PyExc_TypeError
,
3340 "encoder did not return a string/unicode object "
3342 Py_TYPE(v
)->tp_name
);
3353 PyDoc_STRVAR(decode__doc__
,
3354 "S.decode([encoding[,errors]]) -> object\n\
3356 Decodes S using the codec registered for encoding. encoding defaults\n\
3357 to the default encoding. errors may be given to set a different error\n\
3358 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3359 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3360 as well as any other name registerd with codecs.register_error that is\n\
3361 able to handle UnicodeDecodeErrors.");
3364 string_decode(PyStringObject
*self
, PyObject
*args
)
3366 char *encoding
= NULL
;
3367 char *errors
= NULL
;
3370 if (!PyArg_ParseTuple(args
, "|ss:decode", &encoding
, &errors
))
3372 v
= PyString_AsDecodedObject((PyObject
*)self
, encoding
, errors
);
3375 if (!PyString_Check(v
) && !PyUnicode_Check(v
)) {
3376 PyErr_Format(PyExc_TypeError
,
3377 "decoder did not return a string/unicode object "
3379 Py_TYPE(v
)->tp_name
);
3390 PyDoc_STRVAR(expandtabs__doc__
,
3391 "S.expandtabs([tabsize]) -> string\n\
3393 Return a copy of S where all tab characters are expanded using spaces.\n\
3394 If tabsize is not given, a tab size of 8 characters is assumed.");
3397 string_expandtabs(PyStringObject
*self
, PyObject
*args
)
3399 const char *e
, *p
, *qe
;
3401 Py_ssize_t i
, j
, incr
;
3405 if (!PyArg_ParseTuple(args
, "|i:expandtabs", &tabsize
))
3408 /* First pass: determine size of output string */
3409 i
= 0; /* chars up to and including most recent \n or \r */
3410 j
= 0; /* chars since most recent \n or \r (use in tab calculations) */
3411 e
= PyString_AS_STRING(self
) + PyString_GET_SIZE(self
); /* end of input */
3412 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
3415 incr
= tabsize
- (j
% tabsize
);
3416 if (j
> PY_SSIZE_T_MAX
- incr
)
3422 if (j
> PY_SSIZE_T_MAX
- 1)
3425 if (*p
== '\n' || *p
== '\r') {
3426 if (i
> PY_SSIZE_T_MAX
- j
)
3433 if (i
> PY_SSIZE_T_MAX
- j
)
3436 /* Second pass: create output string and fill it */
3437 u
= PyString_FromStringAndSize(NULL
, i
+ j
);
3441 j
= 0; /* same as in first pass */
3442 q
= PyString_AS_STRING(u
); /* next output char */
3443 qe
= PyString_AS_STRING(u
) + PyString_GET_SIZE(u
); /* end of output */
3445 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
3448 i
= tabsize
- (j
% tabsize
);
3462 if (*p
== '\n' || *p
== '\r')
3471 PyErr_SetString(PyExc_OverflowError
, "new string is too long");
3475 Py_LOCAL_INLINE(PyObject
*)
3476 pad(PyStringObject
*self
, Py_ssize_t left
, Py_ssize_t right
, char fill
)
3485 if (left
== 0 && right
== 0 && PyString_CheckExact(self
)) {
3487 return (PyObject
*)self
;
3490 u
= PyString_FromStringAndSize(NULL
,
3491 left
+ PyString_GET_SIZE(self
) + right
);
3494 memset(PyString_AS_STRING(u
), fill
, left
);
3495 Py_MEMCPY(PyString_AS_STRING(u
) + left
,
3496 PyString_AS_STRING(self
),
3497 PyString_GET_SIZE(self
));
3499 memset(PyString_AS_STRING(u
) + left
+ PyString_GET_SIZE(self
),
3506 PyDoc_STRVAR(ljust__doc__
,
3507 "S.ljust(width[, fillchar]) -> string\n"
3509 "Return S left justified in a string of length width. Padding is\n"
3510 "done using the specified fill character (default is a space).");
3513 string_ljust(PyStringObject
*self
, PyObject
*args
)
3516 char fillchar
= ' ';
3518 if (!PyArg_ParseTuple(args
, "n|c:ljust", &width
, &fillchar
))
3521 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3523 return (PyObject
*) self
;
3526 return pad(self
, 0, width
- PyString_GET_SIZE(self
), fillchar
);
3530 PyDoc_STRVAR(rjust__doc__
,
3531 "S.rjust(width[, fillchar]) -> string\n"
3533 "Return S right justified in a string of length width. Padding is\n"
3534 "done using the specified fill character (default is a space)");
3537 string_rjust(PyStringObject
*self
, PyObject
*args
)
3540 char fillchar
= ' ';
3542 if (!PyArg_ParseTuple(args
, "n|c:rjust", &width
, &fillchar
))
3545 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3547 return (PyObject
*) self
;
3550 return pad(self
, width
- PyString_GET_SIZE(self
), 0, fillchar
);
3554 PyDoc_STRVAR(center__doc__
,
3555 "S.center(width[, fillchar]) -> string\n"
3557 "Return S centered in a string of length width. Padding is\n"
3558 "done using the specified fill character (default is a space)");
3561 string_center(PyStringObject
*self
, PyObject
*args
)
3563 Py_ssize_t marg
, left
;
3565 char fillchar
= ' ';
3567 if (!PyArg_ParseTuple(args
, "n|c:center", &width
, &fillchar
))
3570 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3572 return (PyObject
*) self
;
3575 marg
= width
- PyString_GET_SIZE(self
);
3576 left
= marg
/ 2 + (marg
& width
& 1);
3578 return pad(self
, left
, marg
- left
, fillchar
);
3581 PyDoc_STRVAR(zfill__doc__
,
3582 "S.zfill(width) -> string\n"
3584 "Pad a numeric string S with zeros on the left, to fill a field\n"
3585 "of the specified width. The string S is never truncated.");
3588 string_zfill(PyStringObject
*self
, PyObject
*args
)
3595 if (!PyArg_ParseTuple(args
, "n:zfill", &width
))
3598 if (PyString_GET_SIZE(self
) >= width
) {
3599 if (PyString_CheckExact(self
)) {
3601 return (PyObject
*) self
;
3604 return PyString_FromStringAndSize(
3605 PyString_AS_STRING(self
),
3606 PyString_GET_SIZE(self
)
3610 fill
= width
- PyString_GET_SIZE(self
);
3612 s
= pad(self
, fill
, 0, '0');
3617 p
= PyString_AS_STRING(s
);
3618 if (p
[fill
] == '+' || p
[fill
] == '-') {
3619 /* move sign to beginning of string */
3624 return (PyObject
*) s
;
3627 PyDoc_STRVAR(isspace__doc__
,
3628 "S.isspace() -> bool\n\
3630 Return True if all characters in S are whitespace\n\
3631 and there is at least one character in S, False otherwise.");
3634 string_isspace(PyStringObject
*self
)
3636 register const unsigned char *p
3637 = (unsigned char *) PyString_AS_STRING(self
);
3638 register const unsigned char *e
;
3640 /* Shortcut for single character strings */
3641 if (PyString_GET_SIZE(self
) == 1 &&
3643 return PyBool_FromLong(1);
3645 /* Special case for empty strings */
3646 if (PyString_GET_SIZE(self
) == 0)
3647 return PyBool_FromLong(0);
3649 e
= p
+ PyString_GET_SIZE(self
);
3650 for (; p
< e
; p
++) {
3652 return PyBool_FromLong(0);
3654 return PyBool_FromLong(1);
3658 PyDoc_STRVAR(isalpha__doc__
,
3659 "S.isalpha() -> bool\n\
3661 Return True if all characters in S are alphabetic\n\
3662 and there is at least one character in S, False otherwise.");
3665 string_isalpha(PyStringObject
*self
)
3667 register const unsigned char *p
3668 = (unsigned char *) PyString_AS_STRING(self
);
3669 register const unsigned char *e
;
3671 /* Shortcut for single character strings */
3672 if (PyString_GET_SIZE(self
) == 1 &&
3674 return PyBool_FromLong(1);
3676 /* Special case for empty strings */
3677 if (PyString_GET_SIZE(self
) == 0)
3678 return PyBool_FromLong(0);
3680 e
= p
+ PyString_GET_SIZE(self
);
3681 for (; p
< e
; p
++) {
3683 return PyBool_FromLong(0);
3685 return PyBool_FromLong(1);
3689 PyDoc_STRVAR(isalnum__doc__
,
3690 "S.isalnum() -> bool\n\
3692 Return True if all characters in S are alphanumeric\n\
3693 and there is at least one character in S, False otherwise.");
3696 string_isalnum(PyStringObject
*self
)
3698 register const unsigned char *p
3699 = (unsigned char *) PyString_AS_STRING(self
);
3700 register const unsigned char *e
;
3702 /* Shortcut for single character strings */
3703 if (PyString_GET_SIZE(self
) == 1 &&
3705 return PyBool_FromLong(1);
3707 /* Special case for empty strings */
3708 if (PyString_GET_SIZE(self
) == 0)
3709 return PyBool_FromLong(0);
3711 e
= p
+ PyString_GET_SIZE(self
);
3712 for (; p
< e
; p
++) {
3714 return PyBool_FromLong(0);
3716 return PyBool_FromLong(1);
3720 PyDoc_STRVAR(isdigit__doc__
,
3721 "S.isdigit() -> bool\n\
3723 Return True if all characters in S are digits\n\
3724 and there is at least one character in S, False otherwise.");
3727 string_isdigit(PyStringObject
*self
)
3729 register const unsigned char *p
3730 = (unsigned char *) PyString_AS_STRING(self
);
3731 register const unsigned char *e
;
3733 /* Shortcut for single character strings */
3734 if (PyString_GET_SIZE(self
) == 1 &&
3736 return PyBool_FromLong(1);
3738 /* Special case for empty strings */
3739 if (PyString_GET_SIZE(self
) == 0)
3740 return PyBool_FromLong(0);
3742 e
= p
+ PyString_GET_SIZE(self
);
3743 for (; p
< e
; p
++) {
3745 return PyBool_FromLong(0);
3747 return PyBool_FromLong(1);
3751 PyDoc_STRVAR(islower__doc__
,
3752 "S.islower() -> bool\n\
3754 Return True if all cased characters in S are lowercase and there is\n\
3755 at least one cased character in S, False otherwise.");
3758 string_islower(PyStringObject
*self
)
3760 register const unsigned char *p
3761 = (unsigned char *) PyString_AS_STRING(self
);
3762 register const unsigned char *e
;
3765 /* Shortcut for single character strings */
3766 if (PyString_GET_SIZE(self
) == 1)
3767 return PyBool_FromLong(islower(*p
) != 0);
3769 /* Special case for empty strings */
3770 if (PyString_GET_SIZE(self
) == 0)
3771 return PyBool_FromLong(0);
3773 e
= p
+ PyString_GET_SIZE(self
);
3775 for (; p
< e
; p
++) {
3777 return PyBool_FromLong(0);
3778 else if (!cased
&& islower(*p
))
3781 return PyBool_FromLong(cased
);
3785 PyDoc_STRVAR(isupper__doc__
,
3786 "S.isupper() -> bool\n\
3788 Return True if all cased characters in S are uppercase and there is\n\
3789 at least one cased character in S, False otherwise.");
3792 string_isupper(PyStringObject
*self
)
3794 register const unsigned char *p
3795 = (unsigned char *) PyString_AS_STRING(self
);
3796 register const unsigned char *e
;
3799 /* Shortcut for single character strings */
3800 if (PyString_GET_SIZE(self
) == 1)
3801 return PyBool_FromLong(isupper(*p
) != 0);
3803 /* Special case for empty strings */
3804 if (PyString_GET_SIZE(self
) == 0)
3805 return PyBool_FromLong(0);
3807 e
= p
+ PyString_GET_SIZE(self
);
3809 for (; p
< e
; p
++) {
3811 return PyBool_FromLong(0);
3812 else if (!cased
&& isupper(*p
))
3815 return PyBool_FromLong(cased
);
3819 PyDoc_STRVAR(istitle__doc__
,
3820 "S.istitle() -> bool\n\
3822 Return True if S is a titlecased string and there is at least one\n\
3823 character in S, i.e. uppercase characters may only follow uncased\n\
3824 characters and lowercase characters only cased ones. Return False\n\
3828 string_istitle(PyStringObject
*self
, PyObject
*uncased
)
3830 register const unsigned char *p
3831 = (unsigned char *) PyString_AS_STRING(self
);
3832 register const unsigned char *e
;
3833 int cased
, previous_is_cased
;
3835 /* Shortcut for single character strings */
3836 if (PyString_GET_SIZE(self
) == 1)
3837 return PyBool_FromLong(isupper(*p
) != 0);
3839 /* Special case for empty strings */
3840 if (PyString_GET_SIZE(self
) == 0)
3841 return PyBool_FromLong(0);
3843 e
= p
+ PyString_GET_SIZE(self
);
3845 previous_is_cased
= 0;
3846 for (; p
< e
; p
++) {
3847 register const unsigned char ch
= *p
;
3850 if (previous_is_cased
)
3851 return PyBool_FromLong(0);
3852 previous_is_cased
= 1;
3855 else if (islower(ch
)) {
3856 if (!previous_is_cased
)
3857 return PyBool_FromLong(0);
3858 previous_is_cased
= 1;
3862 previous_is_cased
= 0;
3864 return PyBool_FromLong(cased
);
3868 PyDoc_STRVAR(splitlines__doc__
,
3869 "S.splitlines([keepends]) -> list of strings\n\
3871 Return a list of the lines in S, breaking at line boundaries.\n\
3872 Line breaks are not included in the resulting list unless keepends\n\
3873 is given and true.");
3876 string_splitlines(PyStringObject
*self
, PyObject
*args
)
3878 register Py_ssize_t i
;
3879 register Py_ssize_t j
;
3886 if (!PyArg_ParseTuple(args
, "|i:splitlines", &keepends
))
3889 data
= PyString_AS_STRING(self
);
3890 len
= PyString_GET_SIZE(self
);
3892 /* This does not use the preallocated list because splitlines is
3893 usually run with hundreds of newlines. The overhead of
3894 switching between PyList_SET_ITEM and append causes about a
3895 2-3% slowdown for that common case. A smarter implementation
3896 could move the if check out, so the SET_ITEMs are done first
3897 and the appends only done when the prealloc buffer is full.
3898 That's too much work for little gain.*/
3900 list
= PyList_New(0);
3904 for (i
= j
= 0; i
< len
; ) {
3907 /* Find a line and append it */
3908 while (i
< len
&& data
[i
] != '\n' && data
[i
] != '\r')
3911 /* Skip the line break reading CRLF as one line break */
3914 if (data
[i
] == '\r' && i
+ 1 < len
&&
3922 SPLIT_APPEND(data
, j
, eol
);
3926 SPLIT_APPEND(data
, j
, len
);
3936 PyDoc_STRVAR(sizeof__doc__
,
3937 "S.__sizeof__() -> size of S in memory, in bytes");
3940 string_sizeof(PyStringObject
*v
)
3943 res
= sizeof(PyStringObject
) + v
->ob_size
* v
->ob_type
->tp_itemsize
;
3944 return PyInt_FromSsize_t(res
);
3950 #undef PREALLOC_SIZE
3953 string_getnewargs(PyStringObject
*v
)
3955 return Py_BuildValue("(s#)", v
->ob_sval
, Py_SIZE(v
));
3959 #include "stringlib/string_format.h"
3961 PyDoc_STRVAR(format__doc__
,
3962 "S.format(*args, **kwargs) -> unicode\n\
3967 string__format__(PyObject
* self
, PyObject
* args
)
3969 PyObject
*format_spec
;
3970 PyObject
*result
= NULL
;
3971 PyObject
*tmp
= NULL
;
3973 /* If 2.x, convert format_spec to the same type as value */
3974 /* This is to allow things like u''.format('') */
3975 if (!PyArg_ParseTuple(args
, "O:__format__", &format_spec
))
3977 if (!(PyString_Check(format_spec
) || PyUnicode_Check(format_spec
))) {
3978 PyErr_Format(PyExc_TypeError
, "__format__ arg must be str "
3979 "or unicode, not %s", Py_TYPE(format_spec
)->tp_name
);
3982 tmp
= PyObject_Str(format_spec
);
3987 result
= _PyBytes_FormatAdvanced(self
,
3988 PyString_AS_STRING(format_spec
),
3989 PyString_GET_SIZE(format_spec
));
3995 PyDoc_STRVAR(p_format__doc__
,
3996 "S.__format__(format_spec) -> unicode\n\
4002 string_methods
[] = {
4003 /* Counterparts of the obsolete stropmodule functions; except
4004 string.maketrans(). */
4005 {"join", (PyCFunction
)string_join
, METH_O
, join__doc__
},
4006 {"split", (PyCFunction
)string_split
, METH_VARARGS
, split__doc__
},
4007 {"rsplit", (PyCFunction
)string_rsplit
, METH_VARARGS
, rsplit__doc__
},
4008 {"lower", (PyCFunction
)string_lower
, METH_NOARGS
, lower__doc__
},
4009 {"upper", (PyCFunction
)string_upper
, METH_NOARGS
, upper__doc__
},
4010 {"islower", (PyCFunction
)string_islower
, METH_NOARGS
, islower__doc__
},
4011 {"isupper", (PyCFunction
)string_isupper
, METH_NOARGS
, isupper__doc__
},
4012 {"isspace", (PyCFunction
)string_isspace
, METH_NOARGS
, isspace__doc__
},
4013 {"isdigit", (PyCFunction
)string_isdigit
, METH_NOARGS
, isdigit__doc__
},
4014 {"istitle", (PyCFunction
)string_istitle
, METH_NOARGS
, istitle__doc__
},
4015 {"isalpha", (PyCFunction
)string_isalpha
, METH_NOARGS
, isalpha__doc__
},
4016 {"isalnum", (PyCFunction
)string_isalnum
, METH_NOARGS
, isalnum__doc__
},
4017 {"capitalize", (PyCFunction
)string_capitalize
, METH_NOARGS
,
4019 {"count", (PyCFunction
)string_count
, METH_VARARGS
, count__doc__
},
4020 {"endswith", (PyCFunction
)string_endswith
, METH_VARARGS
,
4022 {"partition", (PyCFunction
)string_partition
, METH_O
, partition__doc__
},
4023 {"find", (PyCFunction
)string_find
, METH_VARARGS
, find__doc__
},
4024 {"index", (PyCFunction
)string_index
, METH_VARARGS
, index__doc__
},
4025 {"lstrip", (PyCFunction
)string_lstrip
, METH_VARARGS
, lstrip__doc__
},
4026 {"replace", (PyCFunction
)string_replace
, METH_VARARGS
, replace__doc__
},
4027 {"rfind", (PyCFunction
)string_rfind
, METH_VARARGS
, rfind__doc__
},
4028 {"rindex", (PyCFunction
)string_rindex
, METH_VARARGS
, rindex__doc__
},
4029 {"rstrip", (PyCFunction
)string_rstrip
, METH_VARARGS
, rstrip__doc__
},
4030 {"rpartition", (PyCFunction
)string_rpartition
, METH_O
,
4032 {"startswith", (PyCFunction
)string_startswith
, METH_VARARGS
,
4034 {"strip", (PyCFunction
)string_strip
, METH_VARARGS
, strip__doc__
},
4035 {"swapcase", (PyCFunction
)string_swapcase
, METH_NOARGS
,
4037 {"translate", (PyCFunction
)string_translate
, METH_VARARGS
,
4039 {"title", (PyCFunction
)string_title
, METH_NOARGS
, title__doc__
},
4040 {"ljust", (PyCFunction
)string_ljust
, METH_VARARGS
, ljust__doc__
},
4041 {"rjust", (PyCFunction
)string_rjust
, METH_VARARGS
, rjust__doc__
},
4042 {"center", (PyCFunction
)string_center
, METH_VARARGS
, center__doc__
},
4043 {"zfill", (PyCFunction
)string_zfill
, METH_VARARGS
, zfill__doc__
},
4044 {"format", (PyCFunction
) do_string_format
, METH_VARARGS
| METH_KEYWORDS
, format__doc__
},
4045 {"__format__", (PyCFunction
) string__format__
, METH_VARARGS
, p_format__doc__
},
4046 {"_formatter_field_name_split", (PyCFunction
) formatter_field_name_split
, METH_NOARGS
},
4047 {"_formatter_parser", (PyCFunction
) formatter_parser
, METH_NOARGS
},
4048 {"encode", (PyCFunction
)string_encode
, METH_VARARGS
, encode__doc__
},
4049 {"decode", (PyCFunction
)string_decode
, METH_VARARGS
, decode__doc__
},
4050 {"expandtabs", (PyCFunction
)string_expandtabs
, METH_VARARGS
,
4052 {"splitlines", (PyCFunction
)string_splitlines
, METH_VARARGS
,
4054 {"__sizeof__", (PyCFunction
)string_sizeof
, METH_NOARGS
,
4056 {"__getnewargs__", (PyCFunction
)string_getnewargs
, METH_NOARGS
},
4057 {NULL
, NULL
} /* sentinel */
4061 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
);
4064 string_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
4067 static char *kwlist
[] = {"object", 0};
4069 if (type
!= &PyString_Type
)
4070 return str_subtype_new(type
, args
, kwds
);
4071 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|O:str", kwlist
, &x
))
4074 return PyString_FromString("");
4075 return PyObject_Str(x
);
4079 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
4081 PyObject
*tmp
, *pnew
;
4084 assert(PyType_IsSubtype(type
, &PyString_Type
));
4085 tmp
= string_new(&PyString_Type
, args
, kwds
);
4088 assert(PyString_CheckExact(tmp
));
4089 n
= PyString_GET_SIZE(tmp
);
4090 pnew
= type
->tp_alloc(type
, n
);
4092 Py_MEMCPY(PyString_AS_STRING(pnew
), PyString_AS_STRING(tmp
), n
+1);
4093 ((PyStringObject
*)pnew
)->ob_shash
=
4094 ((PyStringObject
*)tmp
)->ob_shash
;
4095 ((PyStringObject
*)pnew
)->ob_sstate
= SSTATE_NOT_INTERNED
;
4102 basestring_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
4104 PyErr_SetString(PyExc_TypeError
,
4105 "The basestring type cannot be instantiated");
4110 string_mod(PyObject
*v
, PyObject
*w
)
4112 if (!PyString_Check(v
)) {
4113 Py_INCREF(Py_NotImplemented
);
4114 return Py_NotImplemented
;
4116 return PyString_Format(v
, w
);
4119 PyDoc_STRVAR(basestring_doc
,
4120 "Type basestring cannot be instantiated; it is the base for str and unicode.");
4122 static PyNumberMethods string_as_number
= {
4127 string_mod
, /*nb_remainder*/
4131 PyTypeObject PyBaseString_Type
= {
4132 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
4142 0, /* tp_as_number */
4143 0, /* tp_as_sequence */
4144 0, /* tp_as_mapping */
4148 0, /* tp_getattro */
4149 0, /* tp_setattro */
4150 0, /* tp_as_buffer */
4151 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /* tp_flags */
4152 basestring_doc
, /* tp_doc */
4153 0, /* tp_traverse */
4155 0, /* tp_richcompare */
4156 0, /* tp_weaklistoffset */
4158 0, /* tp_iternext */
4162 &PyBaseObject_Type
, /* tp_base */
4164 0, /* tp_descr_get */
4165 0, /* tp_descr_set */
4166 0, /* tp_dictoffset */
4169 basestring_new
, /* tp_new */
4173 PyDoc_STRVAR(string_doc
,
4174 "str(object) -> string\n\
4176 Return a nice string representation of the object.\n\
4177 If the argument is a string, the return value is the same object.");
4179 PyTypeObject PyString_Type
= {
4180 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
4182 sizeof(PyStringObject
),
4184 string_dealloc
, /* tp_dealloc */
4185 (printfunc
)string_print
, /* tp_print */
4189 string_repr
, /* tp_repr */
4190 &string_as_number
, /* tp_as_number */
4191 &string_as_sequence
, /* tp_as_sequence */
4192 &string_as_mapping
, /* tp_as_mapping */
4193 (hashfunc
)string_hash
, /* tp_hash */
4195 string_str
, /* tp_str */
4196 PyObject_GenericGetAttr
, /* tp_getattro */
4197 0, /* tp_setattro */
4198 &string_as_buffer
, /* tp_as_buffer */
4199 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_CHECKTYPES
|
4200 Py_TPFLAGS_BASETYPE
| Py_TPFLAGS_STRING_SUBCLASS
|
4201 Py_TPFLAGS_HAVE_NEWBUFFER
, /* tp_flags */
4202 string_doc
, /* tp_doc */
4203 0, /* tp_traverse */
4205 (richcmpfunc
)string_richcompare
, /* tp_richcompare */
4206 0, /* tp_weaklistoffset */
4208 0, /* tp_iternext */
4209 string_methods
, /* tp_methods */
4212 &PyBaseString_Type
, /* tp_base */
4214 0, /* tp_descr_get */
4215 0, /* tp_descr_set */
4216 0, /* tp_dictoffset */
4219 string_new
, /* tp_new */
4220 PyObject_Del
, /* tp_free */
4224 PyString_Concat(register PyObject
**pv
, register PyObject
*w
)
4226 register PyObject
*v
;
4229 if (w
== NULL
|| !PyString_Check(*pv
)) {
4234 v
= string_concat((PyStringObject
*) *pv
, w
);
4240 PyString_ConcatAndDel(register PyObject
**pv
, register PyObject
*w
)
4242 PyString_Concat(pv
, w
);
4247 /* The following function breaks the notion that strings are immutable:
4248 it changes the size of a string. We get away with this only if there
4249 is only one module referencing the object. You can also think of it
4250 as creating a new string object and destroying the old one, only
4251 more efficiently. In any case, don't use this if the string may
4252 already be known to some other part of the code...
4253 Note that if there's not enough memory to resize the string, the original
4254 string object at *pv is deallocated, *pv is set to NULL, an "out of
4255 memory" exception is set, and -1 is returned. Else (on success) 0 is
4256 returned, and the value in *pv may or may not be the same as on input.
4257 As always, an extra byte is allocated for a trailing \0 byte (newsize
4258 does *not* include that), and a trailing \0 byte is stored.
4262 _PyString_Resize(PyObject
**pv
, Py_ssize_t newsize
)
4264 register PyObject
*v
;
4265 register PyStringObject
*sv
;
4267 if (!PyString_Check(v
) || Py_REFCNT(v
) != 1 || newsize
< 0 ||
4268 PyString_CHECK_INTERNED(v
)) {
4271 PyErr_BadInternalCall();
4274 /* XXX UNREF/NEWREF interface should be more symmetrical */
4276 _Py_ForgetReference(v
);
4278 PyObject_REALLOC((char *)v
, sizeof(PyStringObject
) + newsize
);
4284 _Py_NewReference(*pv
);
4285 sv
= (PyStringObject
*) *pv
;
4286 Py_SIZE(sv
) = newsize
;
4287 sv
->ob_sval
[newsize
] = '\0';
4288 sv
->ob_shash
= -1; /* invalidate cached hash value */
4292 /* Helpers for formatstring */
4294 Py_LOCAL_INLINE(PyObject
*)
4295 getnextarg(PyObject
*args
, Py_ssize_t arglen
, Py_ssize_t
*p_argidx
)
4297 Py_ssize_t argidx
= *p_argidx
;
4298 if (argidx
< arglen
) {
4303 return PyTuple_GetItem(args
, argidx
);
4305 PyErr_SetString(PyExc_TypeError
,
4306 "not enough arguments for format string");
4317 #define F_LJUST (1<<0)
4318 #define F_SIGN (1<<1)
4319 #define F_BLANK (1<<2)
4320 #define F_ALT (1<<3)
4321 #define F_ZERO (1<<4)
4323 Py_LOCAL_INLINE(int)
4324 formatfloat(char *buf
, size_t buflen
, int flags
,
4325 int prec
, int type
, PyObject
*v
)
4327 /* fmt = '%#.' + `prec` + `type`
4328 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4331 x
= PyFloat_AsDouble(v
);
4332 if (x
== -1.0 && PyErr_Occurred()) {
4333 PyErr_Format(PyExc_TypeError
, "float argument required, "
4334 "not %.200s", Py_TYPE(v
)->tp_name
);
4339 if (type
== 'f' && fabs(x
)/1e25
>= 1e25
)
4341 /* Worst case length calc to ensure no buffer overrun:
4345 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4346 for any double rep.)
4347 len = 1 + prec + 1 + 2 + 5 = 9 + prec
4350 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4351 len = 1 + 50 + 1 + prec = 52 + prec
4353 If prec=0 the effective precision is 1 (the leading digit is
4354 always given), therefore increase the length by one.
4357 if (((type
== 'g' || type
== 'G') &&
4358 buflen
<= (size_t)10 + (size_t)prec
) ||
4359 (type
== 'f' && buflen
<= (size_t)53 + (size_t)prec
)) {
4360 PyErr_SetString(PyExc_OverflowError
,
4361 "formatted float is too long (precision too large?)");
4364 PyOS_snprintf(fmt
, sizeof(fmt
), "%%%s.%d%c",
4365 (flags
&F_ALT
) ? "#" : "",
4367 PyOS_ascii_formatd(buf
, buflen
, fmt
, x
);
4368 return (int)strlen(buf
);
4371 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4372 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4373 * Python's regular ints.
4374 * Return value: a new PyString*, or NULL if error.
4375 * . *pbuf is set to point into it,
4376 * *plen set to the # of chars following that.
4377 * Caller must decref it when done using pbuf.
4378 * The string starting at *pbuf is of the form
4379 * "-"? ("0x" | "0X")? digit+
4380 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4381 * set in flags. The case of hex digits will be correct,
4382 * There will be at least prec digits, zero-filled on the left if
4383 * necessary to get that many.
4384 * val object to be converted
4385 * flags bitmask of format flags; only F_ALT is looked at
4386 * prec minimum number of digits; 0-fill on left if needed
4387 * type a character in [duoxX]; u acts the same as d
4389 * CAUTION: o, x and X conversions on regular ints can never
4390 * produce a '-' sign, but can for Python's unbounded ints.
4393 _PyString_FormatLong(PyObject
*val
, int flags
, int prec
, int type
,
4394 char **pbuf
, int *plen
)
4396 PyObject
*result
= NULL
;
4399 int sign
; /* 1 if '-', else 0 */
4400 int len
; /* number of characters */
4402 int numdigits
; /* len == numnondigits + numdigits */
4403 int numnondigits
= 0;
4408 result
= Py_TYPE(val
)->tp_str(val
);
4411 result
= Py_TYPE(val
)->tp_as_number
->nb_oct(val
);
4416 result
= Py_TYPE(val
)->tp_as_number
->nb_hex(val
);
4419 assert(!"'type' not in [duoxX]");
4424 buf
= PyString_AsString(result
);
4430 /* To modify the string in-place, there can only be one reference. */
4431 if (Py_REFCNT(result
) != 1) {
4432 PyErr_BadInternalCall();
4435 llen
= PyString_Size(result
);
4436 if (llen
> INT_MAX
) {
4437 PyErr_SetString(PyExc_ValueError
, "string too large in _PyString_FormatLong");
4441 if (buf
[len
-1] == 'L') {
4445 sign
= buf
[0] == '-';
4446 numnondigits
+= sign
;
4447 numdigits
= len
- numnondigits
;
4448 assert(numdigits
> 0);
4450 /* Get rid of base marker unless F_ALT */
4451 if ((flags
& F_ALT
) == 0) {
4452 /* Need to skip 0x, 0X or 0. */
4456 assert(buf
[sign
] == '0');
4457 /* If 0 is only digit, leave it alone. */
4458 if (numdigits
> 1) {
4465 assert(buf
[sign
] == '0');
4466 assert(buf
[sign
+ 1] == 'x');
4477 assert(len
== numnondigits
+ numdigits
);
4478 assert(numdigits
> 0);
4481 /* Fill with leading zeroes to meet minimum width. */
4482 if (prec
> numdigits
) {
4483 PyObject
*r1
= PyString_FromStringAndSize(NULL
,
4484 numnondigits
+ prec
);
4490 b1
= PyString_AS_STRING(r1
);
4491 for (i
= 0; i
< numnondigits
; ++i
)
4493 for (i
= 0; i
< prec
- numdigits
; i
++)
4495 for (i
= 0; i
< numdigits
; i
++)
4500 buf
= PyString_AS_STRING(result
);
4501 len
= numnondigits
+ prec
;
4504 /* Fix up case for hex conversions. */
4506 /* Need to convert all lower case letters to upper case.
4507 and need to convert 0x to 0X (and -0x to -0X). */
4508 for (i
= 0; i
< len
; i
++)
4509 if (buf
[i
] >= 'a' && buf
[i
] <= 'x')
4517 Py_LOCAL_INLINE(int)
4518 formatint(char *buf
, size_t buflen
, int flags
,
4519 int prec
, int type
, PyObject
*v
)
4521 /* fmt = '%#.' + `prec` + 'l' + `type`
4522 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4524 char fmt
[64]; /* plenty big enough! */
4528 x
= PyInt_AsLong(v
);
4529 if (x
== -1 && PyErr_Occurred()) {
4530 PyErr_Format(PyExc_TypeError
, "int argument required, not %.200s",
4531 Py_TYPE(v
)->tp_name
);
4534 if (x
< 0 && type
== 'u') {
4537 if (x
< 0 && (type
== 'x' || type
== 'X' || type
== 'o'))
4544 if ((flags
& F_ALT
) &&
4545 (type
== 'x' || type
== 'X')) {
4546 /* When converting under %#x or %#X, there are a number
4547 * of issues that cause pain:
4548 * - when 0 is being converted, the C standard leaves off
4549 * the '0x' or '0X', which is inconsistent with other
4550 * %#x/%#X conversions and inconsistent with Python's
4552 * - there are platforms that violate the standard and
4553 * convert 0 with the '0x' or '0X'
4554 * (Metrowerks, Compaq Tru64)
4555 * - there are platforms that give '0x' when converting
4556 * under %#X, but convert 0 in accordance with the
4557 * standard (OS/2 EMX)
4559 * We can achieve the desired consistency by inserting our
4560 * own '0x' or '0X' prefix, and substituting %x/%X in place
4563 * Note that this is the same approach as used in
4564 * formatint() in unicodeobject.c
4566 PyOS_snprintf(fmt
, sizeof(fmt
), "%s0%c%%.%dl%c",
4567 sign
, type
, prec
, type
);
4570 PyOS_snprintf(fmt
, sizeof(fmt
), "%s%%%s.%dl%c",
4571 sign
, (flags
&F_ALT
) ? "#" : "",
4575 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4576 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4578 if (buflen
<= 14 || buflen
<= (size_t)3 + (size_t)prec
) {
4579 PyErr_SetString(PyExc_OverflowError
,
4580 "formatted integer is too long (precision too large?)");
4584 PyOS_snprintf(buf
, buflen
, fmt
, -x
);
4586 PyOS_snprintf(buf
, buflen
, fmt
, x
);
4587 return (int)strlen(buf
);
4590 Py_LOCAL_INLINE(int)
4591 formatchar(char *buf
, size_t buflen
, PyObject
*v
)
4593 /* presume that the buffer is at least 2 characters long */
4594 if (PyString_Check(v
)) {
4595 if (!PyArg_Parse(v
, "c;%c requires int or char", &buf
[0]))
4599 if (!PyArg_Parse(v
, "b;%c requires int or char", &buf
[0]))
4606 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4608 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4609 chars are formatted. XXX This is a magic number. Each formatting
4610 routine does bounds checking to ensure no overflow, but a better
4611 solution may be to malloc a buffer of appropriate size for each
4612 format. For now, the current solution is sufficient.
4614 #define FORMATBUFLEN (size_t)120
4617 PyString_Format(PyObject
*format
, PyObject
*args
)
4620 Py_ssize_t arglen
, argidx
;
4621 Py_ssize_t reslen
, rescnt
, fmtcnt
;
4623 PyObject
*result
, *orig_args
;
4624 #ifdef Py_USING_UNICODE
4627 PyObject
*dict
= NULL
;
4628 if (format
== NULL
|| !PyString_Check(format
) || args
== NULL
) {
4629 PyErr_BadInternalCall();
4633 fmt
= PyString_AS_STRING(format
);
4634 fmtcnt
= PyString_GET_SIZE(format
);
4635 reslen
= rescnt
= fmtcnt
+ 100;
4636 result
= PyString_FromStringAndSize((char *)NULL
, reslen
);
4639 res
= PyString_AsString(result
);
4640 if (PyTuple_Check(args
)) {
4641 arglen
= PyTuple_GET_SIZE(args
);
4648 if (Py_TYPE(args
)->tp_as_mapping
&& !PyTuple_Check(args
) &&
4649 !PyObject_TypeCheck(args
, &PyBaseString_Type
))
4651 while (--fmtcnt
>= 0) {
4654 rescnt
= fmtcnt
+ 100;
4656 if (_PyString_Resize(&result
, reslen
) < 0)
4658 res
= PyString_AS_STRING(result
)
4665 /* Got a format specifier */
4667 Py_ssize_t width
= -1;
4673 PyObject
*temp
= NULL
;
4677 char formatbuf
[FORMATBUFLEN
];
4678 /* For format{float,int,char}() */
4679 #ifdef Py_USING_UNICODE
4680 char *fmt_start
= fmt
;
4681 Py_ssize_t argidx_start
= argidx
;
4692 PyErr_SetString(PyExc_TypeError
,
4693 "format requires a mapping");
4699 /* Skip over balanced parentheses */
4700 while (pcount
> 0 && --fmtcnt
>= 0) {
4703 else if (*fmt
== '(')
4707 keylen
= fmt
- keystart
- 1;
4708 if (fmtcnt
< 0 || pcount
> 0) {
4709 PyErr_SetString(PyExc_ValueError
,
4710 "incomplete format key");
4713 key
= PyString_FromStringAndSize(keystart
,
4721 args
= PyObject_GetItem(dict
, key
);
4730 while (--fmtcnt
>= 0) {
4731 switch (c
= *fmt
++) {
4732 case '-': flags
|= F_LJUST
; continue;
4733 case '+': flags
|= F_SIGN
; continue;
4734 case ' ': flags
|= F_BLANK
; continue;
4735 case '#': flags
|= F_ALT
; continue;
4736 case '0': flags
|= F_ZERO
; continue;
4741 v
= getnextarg(args
, arglen
, &argidx
);
4744 if (!PyInt_Check(v
)) {
4745 PyErr_SetString(PyExc_TypeError
,
4749 width
= PyInt_AsLong(v
);
4757 else if (c
>= 0 && isdigit(c
)) {
4759 while (--fmtcnt
>= 0) {
4760 c
= Py_CHARMASK(*fmt
++);
4763 if ((width
*10) / 10 != width
) {
4769 width
= width
*10 + (c
- '0');
4777 v
= getnextarg(args
, arglen
, &argidx
);
4780 if (!PyInt_Check(v
)) {
4786 prec
= PyInt_AsLong(v
);
4792 else if (c
>= 0 && isdigit(c
)) {
4794 while (--fmtcnt
>= 0) {
4795 c
= Py_CHARMASK(*fmt
++);
4798 if ((prec
*10) / 10 != prec
) {
4804 prec
= prec
*10 + (c
- '0');
4809 if (c
== 'h' || c
== 'l' || c
== 'L') {
4815 PyErr_SetString(PyExc_ValueError
,
4816 "incomplete format");
4820 v
= getnextarg(args
, arglen
, &argidx
);
4832 #ifdef Py_USING_UNICODE
4833 if (PyUnicode_Check(v
)) {
4835 argidx
= argidx_start
;
4839 temp
= _PyObject_Str(v
);
4840 #ifdef Py_USING_UNICODE
4841 if (temp
!= NULL
&& PyUnicode_Check(temp
)) {
4844 argidx
= argidx_start
;
4851 temp
= PyObject_Repr(v
);
4854 if (!PyString_Check(temp
)) {
4855 PyErr_SetString(PyExc_TypeError
,
4856 "%s argument has non-string str()");
4860 pbuf
= PyString_AS_STRING(temp
);
4861 len
= PyString_GET_SIZE(temp
);
4862 if (prec
>= 0 && len
> prec
)
4874 if (PyNumber_Check(v
)) {
4875 PyObject
*iobj
=NULL
;
4877 if (PyInt_Check(v
) || (PyLong_Check(v
))) {
4882 iobj
= PyNumber_Int(v
);
4883 if (iobj
==NULL
) iobj
= PyNumber_Long(v
);
4886 if (PyInt_Check(iobj
)) {
4889 len
= formatint(pbuf
,
4891 flags
, prec
, c
, iobj
);
4897 else if (PyLong_Check(iobj
)) {
4901 temp
= _PyString_FormatLong(iobj
, flags
,
4902 prec
, c
, &pbuf
, &ilen
);
4915 PyErr_Format(PyExc_TypeError
,
4916 "%%%c format: a number is required, "
4917 "not %.200s", c
, Py_TYPE(v
)->tp_name
);
4932 len
= formatfloat(pbuf
, sizeof(formatbuf
),
4941 #ifdef Py_USING_UNICODE
4942 if (PyUnicode_Check(v
)) {
4944 argidx
= argidx_start
;
4949 len
= formatchar(pbuf
, sizeof(formatbuf
), v
);
4954 PyErr_Format(PyExc_ValueError
,
4955 "unsupported format character '%c' (0x%x) "
4958 (Py_ssize_t
)(fmt
- 1 -
4959 PyString_AsString(format
)));
4963 if (*pbuf
== '-' || *pbuf
== '+') {
4967 else if (flags
& F_SIGN
)
4969 else if (flags
& F_BLANK
)
4976 if (rescnt
- (sign
!= 0) < width
) {
4978 rescnt
= width
+ fmtcnt
+ 100;
4983 return PyErr_NoMemory();
4985 if (_PyString_Resize(&result
, reslen
) < 0) {
4989 res
= PyString_AS_STRING(result
)
4999 if ((flags
& F_ALT
) && (c
== 'x' || c
== 'X')) {
5000 assert(pbuf
[0] == '0');
5001 assert(pbuf
[1] == c
);
5012 if (width
> len
&& !(flags
& F_LJUST
)) {
5016 } while (--width
> len
);
5021 if ((flags
& F_ALT
) &&
5022 (c
== 'x' || c
== 'X')) {
5023 assert(pbuf
[0] == '0');
5024 assert(pbuf
[1] == c
);
5029 Py_MEMCPY(res
, pbuf
, len
);
5032 while (--width
>= len
) {
5036 if (dict
&& (argidx
< arglen
) && c
!= '%') {
5037 PyErr_SetString(PyExc_TypeError
,
5038 "not all arguments converted during string formatting");
5045 if (argidx
< arglen
&& !dict
) {
5046 PyErr_SetString(PyExc_TypeError
,
5047 "not all arguments converted during string formatting");
5053 _PyString_Resize(&result
, reslen
- rescnt
);
5056 #ifdef Py_USING_UNICODE
5062 /* Fiddle args right (remove the first argidx arguments) */
5063 if (PyTuple_Check(orig_args
) && argidx
> 0) {
5065 Py_ssize_t n
= PyTuple_GET_SIZE(orig_args
) - argidx
;
5070 PyObject
*w
= PyTuple_GET_ITEM(orig_args
, n
+ argidx
);
5072 PyTuple_SET_ITEM(v
, n
, w
);
5076 Py_INCREF(orig_args
);
5080 /* Take what we have of the result and let the Unicode formatting
5081 function format the rest of the input. */
5082 rescnt
= res
- PyString_AS_STRING(result
);
5083 if (_PyString_Resize(&result
, rescnt
))
5085 fmtcnt
= PyString_GET_SIZE(format
) - \
5086 (fmt
- PyString_AS_STRING(format
));
5087 format
= PyUnicode_Decode(fmt
, fmtcnt
, NULL
, NULL
);
5090 v
= PyUnicode_Format(format
, args
);
5094 /* Paste what we have (result) to what the Unicode formatting
5095 function returned (v) and return the result (or error) */
5096 w
= PyUnicode_Concat(result
, v
);
5101 #endif /* Py_USING_UNICODE */
5112 PyString_InternInPlace(PyObject
**p
)
5114 register PyStringObject
*s
= (PyStringObject
*)(*p
);
5116 if (s
== NULL
|| !PyString_Check(s
))
5117 Py_FatalError("PyString_InternInPlace: strings only please!");
5118 /* If it's a string subclass, we don't really know what putting
5119 it in the interned dict might do. */
5120 if (!PyString_CheckExact(s
))
5122 if (PyString_CHECK_INTERNED(s
))
5124 if (interned
== NULL
) {
5125 interned
= PyDict_New();
5126 if (interned
== NULL
) {
5127 PyErr_Clear(); /* Don't leave an exception */
5131 t
= PyDict_GetItem(interned
, (PyObject
*)s
);
5139 if (PyDict_SetItem(interned
, (PyObject
*)s
, (PyObject
*)s
) < 0) {
5143 /* The two references in interned are not counted by refcnt.
5144 The string deallocator will take care of this */
5146 PyString_CHECK_INTERNED(s
) = SSTATE_INTERNED_MORTAL
;
5150 PyString_InternImmortal(PyObject
**p
)
5152 PyString_InternInPlace(p
);
5153 if (PyString_CHECK_INTERNED(*p
) != SSTATE_INTERNED_IMMORTAL
) {
5154 PyString_CHECK_INTERNED(*p
) = SSTATE_INTERNED_IMMORTAL
;
5161 PyString_InternFromString(const char *cp
)
5163 PyObject
*s
= PyString_FromString(cp
);
5166 PyString_InternInPlace(&s
);
5174 for (i
= 0; i
< UCHAR_MAX
+ 1; i
++) {
5175 Py_XDECREF(characters
[i
]);
5176 characters
[i
] = NULL
;
5178 Py_XDECREF(nullstring
);
5182 void _Py_ReleaseInternedStrings(void)
5187 Py_ssize_t immortal_size
= 0, mortal_size
= 0;
5189 if (interned
== NULL
|| !PyDict_Check(interned
))
5191 keys
= PyDict_Keys(interned
);
5192 if (keys
== NULL
|| !PyList_Check(keys
)) {
5197 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5198 detector, interned strings are not forcibly deallocated; rather, we
5199 give them their stolen references back, and then clear and DECREF
5200 the interned dict. */
5202 n
= PyList_GET_SIZE(keys
);
5203 fprintf(stderr
, "releasing %" PY_FORMAT_SIZE_T
"d interned strings\n",
5205 for (i
= 0; i
< n
; i
++) {
5206 s
= (PyStringObject
*) PyList_GET_ITEM(keys
, i
);
5207 switch (s
->ob_sstate
) {
5208 case SSTATE_NOT_INTERNED
:
5209 /* XXX Shouldn't happen */
5211 case SSTATE_INTERNED_IMMORTAL
:
5213 immortal_size
+= Py_SIZE(s
);
5215 case SSTATE_INTERNED_MORTAL
:
5217 mortal_size
+= Py_SIZE(s
);
5220 Py_FatalError("Inconsistent interned string state.");
5222 s
->ob_sstate
= SSTATE_NOT_INTERNED
;
5224 fprintf(stderr
, "total size of all interned strings: "
5225 "%" PY_FORMAT_SIZE_T
"d/%" PY_FORMAT_SIZE_T
"d "
5226 "mortal/immortal\n", mortal_size
, immortal_size
);
5228 PyDict_Clear(interned
);
5229 Py_DECREF(interned
);