Change to flush and close logic to fix #1760556.
[python.git] / Objects / stringobject.c
blob22b50d5f76d0aaa8c33c947debdbdc475b098464
1 /* String object implementation */
3 #define PY_SSIZE_T_CLEAN
5 #include "Python.h"
7 #include <ctype.h>
9 #ifdef COUNT_ALLOCS
10 int null_strings, one_strings;
11 #endif
13 static PyStringObject *characters[UCHAR_MAX + 1];
14 static PyStringObject *nullstring;
16 /* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
24 static PyObject *interned;
27 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
29 null terminating character.
31 For PyString_FromString(), the parameter `str' points to a null-terminated
32 string containing exactly `size' bytes.
34 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
45 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
52 PyObject *
53 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
55 register PyStringObject *op;
56 assert(size >= 0);
57 if (size == 0 && (op = nullstring) != NULL) {
58 #ifdef COUNT_ALLOCS
59 null_strings++;
60 #endif
61 Py_INCREF(op);
62 return (PyObject *)op;
64 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
67 #ifdef COUNT_ALLOCS
68 one_strings++;
69 #endif
70 Py_INCREF(op);
71 return (PyObject *)op;
74 /* Inline PyObject_NewVar */
75 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
76 if (op == NULL)
77 return PyErr_NoMemory();
78 PyObject_INIT_VAR(op, &PyString_Type, size);
79 op->ob_shash = -1;
80 op->ob_sstate = SSTATE_NOT_INTERNED;
81 if (str != NULL)
82 Py_MEMCPY(op->ob_sval, str, size);
83 op->ob_sval[size] = '\0';
84 /* share short strings */
85 if (size == 0) {
86 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
88 op = (PyStringObject *)t;
89 nullstring = op;
90 Py_INCREF(op);
91 } else if (size == 1 && str != NULL) {
92 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
94 op = (PyStringObject *)t;
95 characters[*str & UCHAR_MAX] = op;
96 Py_INCREF(op);
98 return (PyObject *) op;
101 PyObject *
102 PyString_FromString(const char *str)
104 register size_t size;
105 register PyStringObject *op;
107 assert(str != NULL);
108 size = strlen(str);
109 if (size > PY_SSIZE_T_MAX) {
110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
114 if (size == 0 && (op = nullstring) != NULL) {
115 #ifdef COUNT_ALLOCS
116 null_strings++;
117 #endif
118 Py_INCREF(op);
119 return (PyObject *)op;
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122 #ifdef COUNT_ALLOCS
123 one_strings++;
124 #endif
125 Py_INCREF(op);
126 return (PyObject *)op;
129 /* Inline PyObject_NewVar */
130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
131 if (op == NULL)
132 return PyErr_NoMemory();
133 PyObject_INIT_VAR(op, &PyString_Type, size);
134 op->ob_shash = -1;
135 op->ob_sstate = SSTATE_NOT_INTERNED;
136 Py_MEMCPY(op->ob_sval, str, size+1);
137 /* share short strings */
138 if (size == 0) {
139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
141 op = (PyStringObject *)t;
142 nullstring = op;
143 Py_INCREF(op);
144 } else if (size == 1) {
145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
147 op = (PyStringObject *)t;
148 characters[*str & UCHAR_MAX] = op;
149 Py_INCREF(op);
151 return (PyObject *) op;
154 PyObject *
155 PyString_FromFormatV(const char *format, va_list vargs)
157 va_list count;
158 Py_ssize_t n = 0;
159 const char* f;
160 char *s;
161 PyObject* string;
163 #ifdef VA_LIST_IS_ARRAY
164 Py_MEMCPY(count, vargs, sizeof(va_list));
165 #else
166 #ifdef __va_copy
167 __va_copy(count, vargs);
168 #else
169 count = vargs;
170 #endif
171 #endif
172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
184 ++f;
186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
193 case 'd': case 'u': case 'i': case 'x':
194 (void) va_arg(count, int);
195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
209 * XXX I count 18 -- what's the extra for?
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
219 what's in the argument list) */
220 n += strlen(p);
221 goto expand;
223 } else
224 n++;
226 expand:
227 /* step 2: fill the buffer */
228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
234 s = PyString_AsString(string);
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
239 Py_ssize_t i;
240 int longflag = 0;
241 int size_tflag = 0;
242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
258 longflag = 1;
259 ++f;
261 /* handle the size_t flag. */
262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
263 size_tflag = 1;
264 ++f;
267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
274 else if (size_tflag)
275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
306 Py_MEMCPY(s, p, i);
307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
329 } else
330 *s++ = *f;
333 end:
334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
335 return string;
338 PyObject *
339 PyString_FromFormat(const char *format, ...)
341 PyObject* ret;
342 va_list vargs;
344 #ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346 #else
347 va_start(vargs);
348 #endif
349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
355 PyObject *PyString_Decode(const char *s,
356 Py_ssize_t size,
357 const char *encoding,
358 const char *errors)
360 PyObject *v, *str;
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
370 PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
374 PyObject *v;
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
381 if (encoding == NULL) {
382 #ifdef Py_USING_UNICODE
383 encoding = PyUnicode_GetDefaultEncoding();
384 #else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387 #endif
390 /* Decode via the codec registry */
391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
393 goto onError;
395 return v;
397 onError:
398 return NULL;
401 PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
405 PyObject *v;
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
411 #ifdef Py_USING_UNICODE
412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
420 #endif
421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
424 Py_Type(v)->tp_name);
425 Py_DECREF(v);
426 goto onError;
429 return v;
431 onError:
432 return NULL;
435 PyObject *PyString_Encode(const char *s,
436 Py_ssize_t size,
437 const char *encoding,
438 const char *errors)
440 PyObject *v, *str;
442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
450 PyObject *PyString_AsEncodedObject(PyObject *str,
451 const char *encoding,
452 const char *errors)
454 PyObject *v;
456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
461 if (encoding == NULL) {
462 #ifdef Py_USING_UNICODE
463 encoding = PyUnicode_GetDefaultEncoding();
464 #else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467 #endif
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
475 return v;
477 onError:
478 return NULL;
481 PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
485 PyObject *v;
487 v = PyString_AsEncodedObject(str, encoding, errors);
488 if (v == NULL)
489 goto onError;
491 #ifdef Py_USING_UNICODE
492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
500 #endif
501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
504 Py_Type(v)->tp_name);
505 Py_DECREF(v);
506 goto onError;
509 return v;
511 onError:
512 return NULL;
515 static void
516 string_dealloc(PyObject *op)
518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
524 Py_Refcnt(op) = 3;
525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
533 default:
534 Py_FatalError("Inconsistent interned string state.");
536 Py_Type(op)->tp_free(op);
539 /* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
544 PyObject *PyString_DecodeEscape(const char *s,
545 Py_ssize_t len,
546 const char *errors,
547 Py_ssize_t unicode,
548 const char *recode_encoding)
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
555 v = PyString_FromStringAndSize((char *)NULL, newlen);
556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
562 non_esc:
563 #ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
568 Py_ssize_t rn;
569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
581 /* Append bytes to output buffer. */
582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
585 Py_MEMCPY(p, r, rn);
586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
592 #else
593 *p++ = *s++;
594 #endif
595 continue;
597 s++;
598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
619 if ('0' <= *s && *s <= '7') {
620 c = (c<<3) + *s++ - '0';
621 if ('0' <= *s && *s <= '7')
622 c = (c<<3) + *s++ - '0';
624 *p++ = c;
625 break;
626 case 'x':
627 if (isxdigit(Py_CHARMASK(s[0]))
628 && isxdigit(Py_CHARMASK(s[1]))) {
629 unsigned int x = 0;
630 c = Py_CHARMASK(*s);
631 s++;
632 if (isdigit(c))
633 x = c - '0';
634 else if (islower(c))
635 x = 10 + c - 'a';
636 else
637 x = 10 + c - 'A';
638 x = x << 4;
639 c = Py_CHARMASK(*s);
640 s++;
641 if (isdigit(c))
642 x += c - '0';
643 else if (islower(c))
644 x += 10 + c - 'a';
645 else
646 x += 10 + c - 'A';
647 *p++ = x;
648 break;
650 if (!errors || strcmp(errors, "strict") == 0) {
651 PyErr_SetString(PyExc_ValueError,
652 "invalid \\x escape");
653 goto failed;
655 if (strcmp(errors, "replace") == 0) {
656 *p++ = '?';
657 } else if (strcmp(errors, "ignore") == 0)
658 /* do nothing */;
659 else {
660 PyErr_Format(PyExc_ValueError,
661 "decoding error; "
662 "unknown error handling code: %.400s",
663 errors);
664 goto failed;
666 #ifndef Py_USING_UNICODE
667 case 'u':
668 case 'U':
669 case 'N':
670 if (unicode) {
671 PyErr_SetString(PyExc_ValueError,
672 "Unicode escapes not legal "
673 "when Unicode disabled");
674 goto failed;
676 #endif
677 default:
678 *p++ = '\\';
679 s--;
680 goto non_esc; /* an arbitry number of unescaped
681 UTF-8 bytes may follow. */
684 if (p-buf < newlen)
685 _PyString_Resize(&v, p - buf);
686 return v;
687 failed:
688 Py_DECREF(v);
689 return NULL;
692 /* -------------------------------------------------------------------- */
693 /* object api */
695 static Py_ssize_t
696 string_getsize(register PyObject *op)
698 char *s;
699 Py_ssize_t len;
700 if (PyString_AsStringAndSize(op, &s, &len))
701 return -1;
702 return len;
705 static /*const*/ char *
706 string_getbuffer(register PyObject *op)
708 char *s;
709 Py_ssize_t len;
710 if (PyString_AsStringAndSize(op, &s, &len))
711 return NULL;
712 return s;
715 Py_ssize_t
716 PyString_Size(register PyObject *op)
718 if (!PyString_Check(op))
719 return string_getsize(op);
720 return Py_Size(op);
723 /*const*/ char *
724 PyString_AsString(register PyObject *op)
726 if (!PyString_Check(op))
727 return string_getbuffer(op);
728 return ((PyStringObject *)op) -> ob_sval;
732 PyString_AsStringAndSize(register PyObject *obj,
733 register char **s,
734 register Py_ssize_t *len)
736 if (s == NULL) {
737 PyErr_BadInternalCall();
738 return -1;
741 if (!PyString_Check(obj)) {
742 #ifdef Py_USING_UNICODE
743 if (PyUnicode_Check(obj)) {
744 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
745 if (obj == NULL)
746 return -1;
748 else
749 #endif
751 PyErr_Format(PyExc_TypeError,
752 "expected string or Unicode object, "
753 "%.200s found", Py_Type(obj)->tp_name);
754 return -1;
758 *s = PyString_AS_STRING(obj);
759 if (len != NULL)
760 *len = PyString_GET_SIZE(obj);
761 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
762 PyErr_SetString(PyExc_TypeError,
763 "expected string without null bytes");
764 return -1;
766 return 0;
769 /* -------------------------------------------------------------------- */
770 /* Methods */
772 #define STRINGLIB_CHAR char
774 #define STRINGLIB_CMP memcmp
775 #define STRINGLIB_LEN PyString_GET_SIZE
776 #define STRINGLIB_NEW PyString_FromStringAndSize
777 #define STRINGLIB_STR PyString_AS_STRING
779 #define STRINGLIB_EMPTY nullstring
781 #include "stringlib/fastsearch.h"
783 #include "stringlib/count.h"
784 #include "stringlib/find.h"
785 #include "stringlib/partition.h"
788 static int
789 string_print(PyStringObject *op, FILE *fp, int flags)
791 Py_ssize_t i, str_len;
792 char c;
793 int quote;
795 /* XXX Ought to check for interrupts when writing long strings */
796 if (! PyString_CheckExact(op)) {
797 int ret;
798 /* A str subclass may have its own __str__ method. */
799 op = (PyStringObject *) PyObject_Str((PyObject *)op);
800 if (op == NULL)
801 return -1;
802 ret = string_print(op, fp, flags);
803 Py_DECREF(op);
804 return ret;
806 if (flags & Py_PRINT_RAW) {
807 char *data = op->ob_sval;
808 Py_ssize_t size = Py_Size(op);
809 Py_BEGIN_ALLOW_THREADS
810 while (size > INT_MAX) {
811 /* Very long strings cannot be written atomically.
812 * But don't write exactly INT_MAX bytes at a time
813 * to avoid memory aligment issues.
815 const int chunk_size = INT_MAX & ~0x3FFF;
816 fwrite(data, 1, chunk_size, fp);
817 data += chunk_size;
818 size -= chunk_size;
820 #ifdef __VMS
821 if (size) fwrite(data, (int)size, 1, fp);
822 #else
823 fwrite(data, 1, (int)size, fp);
824 #endif
825 Py_END_ALLOW_THREADS
826 return 0;
829 /* figure out which quote to use; single is preferred */
830 quote = '\'';
831 if (memchr(op->ob_sval, '\'', Py_Size(op)) &&
832 !memchr(op->ob_sval, '"', Py_Size(op)))
833 quote = '"';
835 str_len = Py_Size(op);
836 Py_BEGIN_ALLOW_THREADS
837 fputc(quote, fp);
838 for (i = 0; i < str_len; i++) {
839 /* Since strings are immutable and the caller should have a
840 reference, accessing the interal buffer should not be an issue
841 with the GIL released. */
842 c = op->ob_sval[i];
843 if (c == quote || c == '\\')
844 fprintf(fp, "\\%c", c);
845 else if (c == '\t')
846 fprintf(fp, "\\t");
847 else if (c == '\n')
848 fprintf(fp, "\\n");
849 else if (c == '\r')
850 fprintf(fp, "\\r");
851 else if (c < ' ' || c >= 0x7f)
852 fprintf(fp, "\\x%02x", c & 0xff);
853 else
854 fputc(c, fp);
856 fputc(quote, fp);
857 Py_END_ALLOW_THREADS
858 return 0;
861 PyObject *
862 PyString_Repr(PyObject *obj, int smartquotes)
864 register PyStringObject* op = (PyStringObject*) obj;
865 size_t newsize = 2 + 4 * Py_Size(op);
866 PyObject *v;
867 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
868 PyErr_SetString(PyExc_OverflowError,
869 "string is too large to make repr");
871 v = PyString_FromStringAndSize((char *)NULL, newsize);
872 if (v == NULL) {
873 return NULL;
875 else {
876 register Py_ssize_t i;
877 register char c;
878 register char *p;
879 int quote;
881 /* figure out which quote to use; single is preferred */
882 quote = '\'';
883 if (smartquotes &&
884 memchr(op->ob_sval, '\'', Py_Size(op)) &&
885 !memchr(op->ob_sval, '"', Py_Size(op)))
886 quote = '"';
888 p = PyString_AS_STRING(v);
889 *p++ = quote;
890 for (i = 0; i < Py_Size(op); i++) {
891 /* There's at least enough room for a hex escape
892 and a closing quote. */
893 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
894 c = op->ob_sval[i];
895 if (c == quote || c == '\\')
896 *p++ = '\\', *p++ = c;
897 else if (c == '\t')
898 *p++ = '\\', *p++ = 't';
899 else if (c == '\n')
900 *p++ = '\\', *p++ = 'n';
901 else if (c == '\r')
902 *p++ = '\\', *p++ = 'r';
903 else if (c < ' ' || c >= 0x7f) {
904 /* For performance, we don't want to call
905 PyOS_snprintf here (extra layers of
906 function call). */
907 sprintf(p, "\\x%02x", c & 0xff);
908 p += 4;
910 else
911 *p++ = c;
913 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
914 *p++ = quote;
915 *p = '\0';
916 _PyString_Resize(
917 &v, (p - PyString_AS_STRING(v)));
918 return v;
922 static PyObject *
923 string_repr(PyObject *op)
925 return PyString_Repr(op, 1);
928 static PyObject *
929 string_str(PyObject *s)
931 assert(PyString_Check(s));
932 if (PyString_CheckExact(s)) {
933 Py_INCREF(s);
934 return s;
936 else {
937 /* Subtype -- return genuine string with the same value. */
938 PyStringObject *t = (PyStringObject *) s;
939 return PyString_FromStringAndSize(t->ob_sval, Py_Size(t));
943 static Py_ssize_t
944 string_length(PyStringObject *a)
946 return Py_Size(a);
949 static PyObject *
950 string_concat(register PyStringObject *a, register PyObject *bb)
952 register Py_ssize_t size;
953 register PyStringObject *op;
954 if (!PyString_Check(bb)) {
955 #ifdef Py_USING_UNICODE
956 if (PyUnicode_Check(bb))
957 return PyUnicode_Concat((PyObject *)a, bb);
958 #endif
959 PyErr_Format(PyExc_TypeError,
960 "cannot concatenate 'str' and '%.200s' objects",
961 Py_Type(bb)->tp_name);
962 return NULL;
964 #define b ((PyStringObject *)bb)
965 /* Optimize cases with empty left or right operand */
966 if ((Py_Size(a) == 0 || Py_Size(b) == 0) &&
967 PyString_CheckExact(a) && PyString_CheckExact(b)) {
968 if (Py_Size(a) == 0) {
969 Py_INCREF(bb);
970 return bb;
972 Py_INCREF(a);
973 return (PyObject *)a;
975 size = Py_Size(a) + Py_Size(b);
976 if (size < 0) {
977 PyErr_SetString(PyExc_OverflowError,
978 "strings are too large to concat");
979 return NULL;
982 /* Inline PyObject_NewVar */
983 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
984 if (op == NULL)
985 return PyErr_NoMemory();
986 PyObject_INIT_VAR(op, &PyString_Type, size);
987 op->ob_shash = -1;
988 op->ob_sstate = SSTATE_NOT_INTERNED;
989 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
990 Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
991 op->ob_sval[size] = '\0';
992 return (PyObject *) op;
993 #undef b
996 static PyObject *
997 string_repeat(register PyStringObject *a, register Py_ssize_t n)
999 register Py_ssize_t i;
1000 register Py_ssize_t j;
1001 register Py_ssize_t size;
1002 register PyStringObject *op;
1003 size_t nbytes;
1004 if (n < 0)
1005 n = 0;
1006 /* watch out for overflows: the size can overflow int,
1007 * and the # of bytes needed can overflow size_t
1009 size = Py_Size(a) * n;
1010 if (n && size / n != Py_Size(a)) {
1011 PyErr_SetString(PyExc_OverflowError,
1012 "repeated string is too long");
1013 return NULL;
1015 if (size == Py_Size(a) && PyString_CheckExact(a)) {
1016 Py_INCREF(a);
1017 return (PyObject *)a;
1019 nbytes = (size_t)size;
1020 if (nbytes + sizeof(PyStringObject) <= nbytes) {
1021 PyErr_SetString(PyExc_OverflowError,
1022 "repeated string is too long");
1023 return NULL;
1025 op = (PyStringObject *)
1026 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
1027 if (op == NULL)
1028 return PyErr_NoMemory();
1029 PyObject_INIT_VAR(op, &PyString_Type, size);
1030 op->ob_shash = -1;
1031 op->ob_sstate = SSTATE_NOT_INTERNED;
1032 op->ob_sval[size] = '\0';
1033 if (Py_Size(a) == 1 && n > 0) {
1034 memset(op->ob_sval, a->ob_sval[0] , n);
1035 return (PyObject *) op;
1037 i = 0;
1038 if (i < size) {
1039 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
1040 i = Py_Size(a);
1042 while (i < size) {
1043 j = (i <= size-i) ? i : size-i;
1044 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1045 i += j;
1047 return (PyObject *) op;
1050 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1052 static PyObject *
1053 string_slice(register PyStringObject *a, register Py_ssize_t i,
1054 register Py_ssize_t j)
1055 /* j -- may be negative! */
1057 if (i < 0)
1058 i = 0;
1059 if (j < 0)
1060 j = 0; /* Avoid signed/unsigned bug in next line */
1061 if (j > Py_Size(a))
1062 j = Py_Size(a);
1063 if (i == 0 && j == Py_Size(a) && PyString_CheckExact(a)) {
1064 /* It's the same as a */
1065 Py_INCREF(a);
1066 return (PyObject *)a;
1068 if (j < i)
1069 j = i;
1070 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1073 static int
1074 string_contains(PyObject *str_obj, PyObject *sub_obj)
1076 if (!PyString_CheckExact(sub_obj)) {
1077 #ifdef Py_USING_UNICODE
1078 if (PyUnicode_Check(sub_obj))
1079 return PyUnicode_Contains(str_obj, sub_obj);
1080 #endif
1081 if (!PyString_Check(sub_obj)) {
1082 PyErr_Format(PyExc_TypeError,
1083 "'in <string>' requires string as left operand, "
1084 "not %.200s", Py_Type(sub_obj)->tp_name);
1085 return -1;
1089 return stringlib_contains_obj(str_obj, sub_obj);
1092 static PyObject *
1093 string_item(PyStringObject *a, register Py_ssize_t i)
1095 char pchar;
1096 PyObject *v;
1097 if (i < 0 || i >= Py_Size(a)) {
1098 PyErr_SetString(PyExc_IndexError, "string index out of range");
1099 return NULL;
1101 pchar = a->ob_sval[i];
1102 v = (PyObject *)characters[pchar & UCHAR_MAX];
1103 if (v == NULL)
1104 v = PyString_FromStringAndSize(&pchar, 1);
1105 else {
1106 #ifdef COUNT_ALLOCS
1107 one_strings++;
1108 #endif
1109 Py_INCREF(v);
1111 return v;
1114 static PyObject*
1115 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1117 int c;
1118 Py_ssize_t len_a, len_b;
1119 Py_ssize_t min_len;
1120 PyObject *result;
1122 /* Make sure both arguments are strings. */
1123 if (!(PyString_Check(a) && PyString_Check(b))) {
1124 result = Py_NotImplemented;
1125 goto out;
1127 if (a == b) {
1128 switch (op) {
1129 case Py_EQ:case Py_LE:case Py_GE:
1130 result = Py_True;
1131 goto out;
1132 case Py_NE:case Py_LT:case Py_GT:
1133 result = Py_False;
1134 goto out;
1137 if (op == Py_EQ) {
1138 /* Supporting Py_NE here as well does not save
1139 much time, since Py_NE is rarely used. */
1140 if (Py_Size(a) == Py_Size(b)
1141 && (a->ob_sval[0] == b->ob_sval[0]
1142 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
1143 result = Py_True;
1144 } else {
1145 result = Py_False;
1147 goto out;
1149 len_a = Py_Size(a); len_b = Py_Size(b);
1150 min_len = (len_a < len_b) ? len_a : len_b;
1151 if (min_len > 0) {
1152 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1153 if (c==0)
1154 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1155 } else
1156 c = 0;
1157 if (c == 0)
1158 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1159 switch (op) {
1160 case Py_LT: c = c < 0; break;
1161 case Py_LE: c = c <= 0; break;
1162 case Py_EQ: assert(0); break; /* unreachable */
1163 case Py_NE: c = c != 0; break;
1164 case Py_GT: c = c > 0; break;
1165 case Py_GE: c = c >= 0; break;
1166 default:
1167 result = Py_NotImplemented;
1168 goto out;
1170 result = c ? Py_True : Py_False;
1171 out:
1172 Py_INCREF(result);
1173 return result;
1177 _PyString_Eq(PyObject *o1, PyObject *o2)
1179 PyStringObject *a = (PyStringObject*) o1;
1180 PyStringObject *b = (PyStringObject*) o2;
1181 return Py_Size(a) == Py_Size(b)
1182 && *a->ob_sval == *b->ob_sval
1183 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
1186 static long
1187 string_hash(PyStringObject *a)
1189 register Py_ssize_t len;
1190 register unsigned char *p;
1191 register long x;
1193 if (a->ob_shash != -1)
1194 return a->ob_shash;
1195 len = Py_Size(a);
1196 p = (unsigned char *) a->ob_sval;
1197 x = *p << 7;
1198 while (--len >= 0)
1199 x = (1000003*x) ^ *p++;
1200 x ^= Py_Size(a);
1201 if (x == -1)
1202 x = -2;
1203 a->ob_shash = x;
1204 return x;
1207 static PyObject*
1208 string_subscript(PyStringObject* self, PyObject* item)
1210 if (PyIndex_Check(item)) {
1211 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1212 if (i == -1 && PyErr_Occurred())
1213 return NULL;
1214 if (i < 0)
1215 i += PyString_GET_SIZE(self);
1216 return string_item(self, i);
1218 else if (PySlice_Check(item)) {
1219 Py_ssize_t start, stop, step, slicelength, cur, i;
1220 char* source_buf;
1221 char* result_buf;
1222 PyObject* result;
1224 if (PySlice_GetIndicesEx((PySliceObject*)item,
1225 PyString_GET_SIZE(self),
1226 &start, &stop, &step, &slicelength) < 0) {
1227 return NULL;
1230 if (slicelength <= 0) {
1231 return PyString_FromStringAndSize("", 0);
1233 else if (start == 0 && step == 1 &&
1234 slicelength == PyString_GET_SIZE(self) &&
1235 PyString_CheckExact(self)) {
1236 Py_INCREF(self);
1237 return (PyObject *)self;
1239 else if (step == 1) {
1240 return PyString_FromStringAndSize(
1241 PyString_AS_STRING(self) + start,
1242 slicelength);
1244 else {
1245 source_buf = PyString_AsString((PyObject*)self);
1246 result_buf = (char *)PyMem_Malloc(slicelength);
1247 if (result_buf == NULL)
1248 return PyErr_NoMemory();
1250 for (cur = start, i = 0; i < slicelength;
1251 cur += step, i++) {
1252 result_buf[i] = source_buf[cur];
1255 result = PyString_FromStringAndSize(result_buf,
1256 slicelength);
1257 PyMem_Free(result_buf);
1258 return result;
1261 else {
1262 PyErr_Format(PyExc_TypeError,
1263 "string indices must be integers, not %.200s",
1264 Py_Type(item)->tp_name);
1265 return NULL;
1269 static Py_ssize_t
1270 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1272 if ( index != 0 ) {
1273 PyErr_SetString(PyExc_SystemError,
1274 "accessing non-existent string segment");
1275 return -1;
1277 *ptr = (void *)self->ob_sval;
1278 return Py_Size(self);
1281 static Py_ssize_t
1282 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1284 PyErr_SetString(PyExc_TypeError,
1285 "Cannot use string as modifiable buffer");
1286 return -1;
1289 static Py_ssize_t
1290 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1292 if ( lenp )
1293 *lenp = Py_Size(self);
1294 return 1;
1297 static Py_ssize_t
1298 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1300 if ( index != 0 ) {
1301 PyErr_SetString(PyExc_SystemError,
1302 "accessing non-existent string segment");
1303 return -1;
1305 *ptr = self->ob_sval;
1306 return Py_Size(self);
1309 static PySequenceMethods string_as_sequence = {
1310 (lenfunc)string_length, /*sq_length*/
1311 (binaryfunc)string_concat, /*sq_concat*/
1312 (ssizeargfunc)string_repeat, /*sq_repeat*/
1313 (ssizeargfunc)string_item, /*sq_item*/
1314 (ssizessizeargfunc)string_slice, /*sq_slice*/
1315 0, /*sq_ass_item*/
1316 0, /*sq_ass_slice*/
1317 (objobjproc)string_contains /*sq_contains*/
1320 static PyMappingMethods string_as_mapping = {
1321 (lenfunc)string_length,
1322 (binaryfunc)string_subscript,
1326 static PyBufferProcs string_as_buffer = {
1327 (readbufferproc)string_buffer_getreadbuf,
1328 (writebufferproc)string_buffer_getwritebuf,
1329 (segcountproc)string_buffer_getsegcount,
1330 (charbufferproc)string_buffer_getcharbuf,
1335 #define LEFTSTRIP 0
1336 #define RIGHTSTRIP 1
1337 #define BOTHSTRIP 2
1339 /* Arrays indexed by above */
1340 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1342 #define STRIPNAME(i) (stripformat[i]+3)
1345 /* Don't call if length < 2 */
1346 #define Py_STRING_MATCH(target, offset, pattern, length) \
1347 (target[offset] == pattern[0] && \
1348 target[offset+length-1] == pattern[length-1] && \
1349 !memcmp(target+offset+1, pattern+1, length-2) )
1352 /* Overallocate the initial list to reduce the number of reallocs for small
1353 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1354 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1355 text (roughly 11 words per line) and field delimited data (usually 1-10
1356 fields). For large strings the split algorithms are bandwidth limited
1357 so increasing the preallocation likely will not improve things.*/
1359 #define MAX_PREALLOC 12
1361 /* 5 splits gives 6 elements */
1362 #define PREALLOC_SIZE(maxsplit) \
1363 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1365 #define SPLIT_APPEND(data, left, right) \
1366 str = PyString_FromStringAndSize((data) + (left), \
1367 (right) - (left)); \
1368 if (str == NULL) \
1369 goto onError; \
1370 if (PyList_Append(list, str)) { \
1371 Py_DECREF(str); \
1372 goto onError; \
1374 else \
1375 Py_DECREF(str);
1377 #define SPLIT_ADD(data, left, right) { \
1378 str = PyString_FromStringAndSize((data) + (left), \
1379 (right) - (left)); \
1380 if (str == NULL) \
1381 goto onError; \
1382 if (count < MAX_PREALLOC) { \
1383 PyList_SET_ITEM(list, count, str); \
1384 } else { \
1385 if (PyList_Append(list, str)) { \
1386 Py_DECREF(str); \
1387 goto onError; \
1389 else \
1390 Py_DECREF(str); \
1392 count++; }
1394 /* Always force the list to the expected size. */
1395 #define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
1397 #define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1398 #define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1399 #define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1400 #define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1402 Py_LOCAL_INLINE(PyObject *)
1403 split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
1405 Py_ssize_t i, j, count=0;
1406 PyObject *str;
1407 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1409 if (list == NULL)
1410 return NULL;
1412 i = j = 0;
1414 while (maxsplit-- > 0) {
1415 SKIP_SPACE(s, i, len);
1416 if (i==len) break;
1417 j = i; i++;
1418 SKIP_NONSPACE(s, i, len);
1419 SPLIT_ADD(s, j, i);
1422 if (i < len) {
1423 /* Only occurs when maxsplit was reached */
1424 /* Skip any remaining whitespace and copy to end of string */
1425 SKIP_SPACE(s, i, len);
1426 if (i != len)
1427 SPLIT_ADD(s, i, len);
1429 FIX_PREALLOC_SIZE(list);
1430 return list;
1431 onError:
1432 Py_DECREF(list);
1433 return NULL;
1436 Py_LOCAL_INLINE(PyObject *)
1437 split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1439 register Py_ssize_t i, j, count=0;
1440 PyObject *str;
1441 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1443 if (list == NULL)
1444 return NULL;
1446 i = j = 0;
1447 while ((j < len) && (maxcount-- > 0)) {
1448 for(; j<len; j++) {
1449 /* I found that using memchr makes no difference */
1450 if (s[j] == ch) {
1451 SPLIT_ADD(s, i, j);
1452 i = j = j + 1;
1453 break;
1457 if (i <= len) {
1458 SPLIT_ADD(s, i, len);
1460 FIX_PREALLOC_SIZE(list);
1461 return list;
1463 onError:
1464 Py_DECREF(list);
1465 return NULL;
1468 PyDoc_STRVAR(split__doc__,
1469 "S.split([sep [,maxsplit]]) -> list of strings\n\
1471 Return a list of the words in the string S, using sep as the\n\
1472 delimiter string. If maxsplit is given, at most maxsplit\n\
1473 splits are done. If sep is not specified or is None, any\n\
1474 whitespace string is a separator.");
1476 static PyObject *
1477 string_split(PyStringObject *self, PyObject *args)
1479 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1480 Py_ssize_t maxsplit = -1, count=0;
1481 const char *s = PyString_AS_STRING(self), *sub;
1482 PyObject *list, *str, *subobj = Py_None;
1483 #ifdef USE_FAST
1484 Py_ssize_t pos;
1485 #endif
1487 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1488 return NULL;
1489 if (maxsplit < 0)
1490 maxsplit = PY_SSIZE_T_MAX;
1491 if (subobj == Py_None)
1492 return split_whitespace(s, len, maxsplit);
1493 if (PyString_Check(subobj)) {
1494 sub = PyString_AS_STRING(subobj);
1495 n = PyString_GET_SIZE(subobj);
1497 #ifdef Py_USING_UNICODE
1498 else if (PyUnicode_Check(subobj))
1499 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1500 #endif
1501 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1502 return NULL;
1504 if (n == 0) {
1505 PyErr_SetString(PyExc_ValueError, "empty separator");
1506 return NULL;
1508 else if (n == 1)
1509 return split_char(s, len, sub[0], maxsplit);
1511 list = PyList_New(PREALLOC_SIZE(maxsplit));
1512 if (list == NULL)
1513 return NULL;
1515 #ifdef USE_FAST
1516 i = j = 0;
1517 while (maxsplit-- > 0) {
1518 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1519 if (pos < 0)
1520 break;
1521 j = i+pos;
1522 SPLIT_ADD(s, i, j);
1523 i = j + n;
1525 #else
1526 i = j = 0;
1527 while ((j+n <= len) && (maxsplit-- > 0)) {
1528 for (; j+n <= len; j++) {
1529 if (Py_STRING_MATCH(s, j, sub, n)) {
1530 SPLIT_ADD(s, i, j);
1531 i = j = j + n;
1532 break;
1536 #endif
1537 SPLIT_ADD(s, i, len);
1538 FIX_PREALLOC_SIZE(list);
1539 return list;
1541 onError:
1542 Py_DECREF(list);
1543 return NULL;
1546 PyDoc_STRVAR(partition__doc__,
1547 "S.partition(sep) -> (head, sep, tail)\n\
1549 Searches for the separator sep in S, and returns the part before it,\n\
1550 the separator itself, and the part after it. If the separator is not\n\
1551 found, returns S and two empty strings.");
1553 static PyObject *
1554 string_partition(PyStringObject *self, PyObject *sep_obj)
1556 const char *sep;
1557 Py_ssize_t sep_len;
1559 if (PyString_Check(sep_obj)) {
1560 sep = PyString_AS_STRING(sep_obj);
1561 sep_len = PyString_GET_SIZE(sep_obj);
1563 #ifdef Py_USING_UNICODE
1564 else if (PyUnicode_Check(sep_obj))
1565 return PyUnicode_Partition((PyObject *) self, sep_obj);
1566 #endif
1567 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1568 return NULL;
1570 return stringlib_partition(
1571 (PyObject*) self,
1572 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1573 sep_obj, sep, sep_len
1577 PyDoc_STRVAR(rpartition__doc__,
1578 "S.rpartition(sep) -> (tail, sep, head)\n\
1580 Searches for the separator sep in S, starting at the end of S, and returns\n\
1581 the part before it, the separator itself, and the part after it. If the\n\
1582 separator is not found, returns two empty strings and S.");
1584 static PyObject *
1585 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1587 const char *sep;
1588 Py_ssize_t sep_len;
1590 if (PyString_Check(sep_obj)) {
1591 sep = PyString_AS_STRING(sep_obj);
1592 sep_len = PyString_GET_SIZE(sep_obj);
1594 #ifdef Py_USING_UNICODE
1595 else if (PyUnicode_Check(sep_obj))
1596 return PyUnicode_Partition((PyObject *) self, sep_obj);
1597 #endif
1598 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1599 return NULL;
1601 return stringlib_rpartition(
1602 (PyObject*) self,
1603 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1604 sep_obj, sep, sep_len
1608 Py_LOCAL_INLINE(PyObject *)
1609 rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
1611 Py_ssize_t i, j, count=0;
1612 PyObject *str;
1613 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1615 if (list == NULL)
1616 return NULL;
1618 i = j = len-1;
1620 while (maxsplit-- > 0) {
1621 RSKIP_SPACE(s, i);
1622 if (i<0) break;
1623 j = i; i--;
1624 RSKIP_NONSPACE(s, i);
1625 SPLIT_ADD(s, i + 1, j + 1);
1627 if (i >= 0) {
1628 /* Only occurs when maxsplit was reached */
1629 /* Skip any remaining whitespace and copy to beginning of string */
1630 RSKIP_SPACE(s, i);
1631 if (i >= 0)
1632 SPLIT_ADD(s, 0, i + 1);
1635 FIX_PREALLOC_SIZE(list);
1636 if (PyList_Reverse(list) < 0)
1637 goto onError;
1638 return list;
1639 onError:
1640 Py_DECREF(list);
1641 return NULL;
1644 Py_LOCAL_INLINE(PyObject *)
1645 rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1647 register Py_ssize_t i, j, count=0;
1648 PyObject *str;
1649 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1651 if (list == NULL)
1652 return NULL;
1654 i = j = len - 1;
1655 while ((i >= 0) && (maxcount-- > 0)) {
1656 for (; i >= 0; i--) {
1657 if (s[i] == ch) {
1658 SPLIT_ADD(s, i + 1, j + 1);
1659 j = i = i - 1;
1660 break;
1664 if (j >= -1) {
1665 SPLIT_ADD(s, 0, j + 1);
1667 FIX_PREALLOC_SIZE(list);
1668 if (PyList_Reverse(list) < 0)
1669 goto onError;
1670 return list;
1672 onError:
1673 Py_DECREF(list);
1674 return NULL;
1677 PyDoc_STRVAR(rsplit__doc__,
1678 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1680 Return a list of the words in the string S, using sep as the\n\
1681 delimiter string, starting at the end of the string and working\n\
1682 to the front. If maxsplit is given, at most maxsplit splits are\n\
1683 done. If sep is not specified or is None, any whitespace string\n\
1684 is a separator.");
1686 static PyObject *
1687 string_rsplit(PyStringObject *self, PyObject *args)
1689 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1690 Py_ssize_t maxsplit = -1, count=0;
1691 const char *s = PyString_AS_STRING(self), *sub;
1692 PyObject *list, *str, *subobj = Py_None;
1694 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1695 return NULL;
1696 if (maxsplit < 0)
1697 maxsplit = PY_SSIZE_T_MAX;
1698 if (subobj == Py_None)
1699 return rsplit_whitespace(s, len, maxsplit);
1700 if (PyString_Check(subobj)) {
1701 sub = PyString_AS_STRING(subobj);
1702 n = PyString_GET_SIZE(subobj);
1704 #ifdef Py_USING_UNICODE
1705 else if (PyUnicode_Check(subobj))
1706 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1707 #endif
1708 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1709 return NULL;
1711 if (n == 0) {
1712 PyErr_SetString(PyExc_ValueError, "empty separator");
1713 return NULL;
1715 else if (n == 1)
1716 return rsplit_char(s, len, sub[0], maxsplit);
1718 list = PyList_New(PREALLOC_SIZE(maxsplit));
1719 if (list == NULL)
1720 return NULL;
1722 j = len;
1723 i = j - n;
1725 while ( (i >= 0) && (maxsplit-- > 0) ) {
1726 for (; i>=0; i--) {
1727 if (Py_STRING_MATCH(s, i, sub, n)) {
1728 SPLIT_ADD(s, i + n, j);
1729 j = i;
1730 i -= n;
1731 break;
1735 SPLIT_ADD(s, 0, j);
1736 FIX_PREALLOC_SIZE(list);
1737 if (PyList_Reverse(list) < 0)
1738 goto onError;
1739 return list;
1741 onError:
1742 Py_DECREF(list);
1743 return NULL;
1747 PyDoc_STRVAR(join__doc__,
1748 "S.join(sequence) -> string\n\
1750 Return a string which is the concatenation of the strings in the\n\
1751 sequence. The separator between elements is S.");
1753 static PyObject *
1754 string_join(PyStringObject *self, PyObject *orig)
1756 char *sep = PyString_AS_STRING(self);
1757 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1758 PyObject *res = NULL;
1759 char *p;
1760 Py_ssize_t seqlen = 0;
1761 size_t sz = 0;
1762 Py_ssize_t i;
1763 PyObject *seq, *item;
1765 seq = PySequence_Fast(orig, "");
1766 if (seq == NULL) {
1767 return NULL;
1770 seqlen = PySequence_Size(seq);
1771 if (seqlen == 0) {
1772 Py_DECREF(seq);
1773 return PyString_FromString("");
1775 if (seqlen == 1) {
1776 item = PySequence_Fast_GET_ITEM(seq, 0);
1777 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1778 Py_INCREF(item);
1779 Py_DECREF(seq);
1780 return item;
1784 /* There are at least two things to join, or else we have a subclass
1785 * of the builtin types in the sequence.
1786 * Do a pre-pass to figure out the total amount of space we'll
1787 * need (sz), see whether any argument is absurd, and defer to
1788 * the Unicode join if appropriate.
1790 for (i = 0; i < seqlen; i++) {
1791 const size_t old_sz = sz;
1792 item = PySequence_Fast_GET_ITEM(seq, i);
1793 if (!PyString_Check(item)){
1794 #ifdef Py_USING_UNICODE
1795 if (PyUnicode_Check(item)) {
1796 /* Defer to Unicode join.
1797 * CAUTION: There's no gurantee that the
1798 * original sequence can be iterated over
1799 * again, so we must pass seq here.
1801 PyObject *result;
1802 result = PyUnicode_Join((PyObject *)self, seq);
1803 Py_DECREF(seq);
1804 return result;
1806 #endif
1807 PyErr_Format(PyExc_TypeError,
1808 "sequence item %zd: expected string,"
1809 " %.80s found",
1810 i, Py_Type(item)->tp_name);
1811 Py_DECREF(seq);
1812 return NULL;
1814 sz += PyString_GET_SIZE(item);
1815 if (i != 0)
1816 sz += seplen;
1817 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1818 PyErr_SetString(PyExc_OverflowError,
1819 "join() result is too long for a Python string");
1820 Py_DECREF(seq);
1821 return NULL;
1825 /* Allocate result space. */
1826 res = PyString_FromStringAndSize((char*)NULL, sz);
1827 if (res == NULL) {
1828 Py_DECREF(seq);
1829 return NULL;
1832 /* Catenate everything. */
1833 p = PyString_AS_STRING(res);
1834 for (i = 0; i < seqlen; ++i) {
1835 size_t n;
1836 item = PySequence_Fast_GET_ITEM(seq, i);
1837 n = PyString_GET_SIZE(item);
1838 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1839 p += n;
1840 if (i < seqlen - 1) {
1841 Py_MEMCPY(p, sep, seplen);
1842 p += seplen;
1846 Py_DECREF(seq);
1847 return res;
1850 PyObject *
1851 _PyString_Join(PyObject *sep, PyObject *x)
1853 assert(sep != NULL && PyString_Check(sep));
1854 assert(x != NULL);
1855 return string_join((PyStringObject *)sep, x);
1858 Py_LOCAL_INLINE(void)
1859 string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1861 if (*end > len)
1862 *end = len;
1863 else if (*end < 0)
1864 *end += len;
1865 if (*end < 0)
1866 *end = 0;
1867 if (*start < 0)
1868 *start += len;
1869 if (*start < 0)
1870 *start = 0;
1873 Py_LOCAL_INLINE(Py_ssize_t)
1874 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1876 PyObject *subobj;
1877 const char *sub;
1878 Py_ssize_t sub_len;
1879 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1881 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1882 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1883 return -2;
1884 if (PyString_Check(subobj)) {
1885 sub = PyString_AS_STRING(subobj);
1886 sub_len = PyString_GET_SIZE(subobj);
1888 #ifdef Py_USING_UNICODE
1889 else if (PyUnicode_Check(subobj))
1890 return PyUnicode_Find(
1891 (PyObject *)self, subobj, start, end, dir);
1892 #endif
1893 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1894 /* XXX - the "expected a character buffer object" is pretty
1895 confusing for a non-expert. remap to something else ? */
1896 return -2;
1898 if (dir > 0)
1899 return stringlib_find_slice(
1900 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1901 sub, sub_len, start, end);
1902 else
1903 return stringlib_rfind_slice(
1904 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1905 sub, sub_len, start, end);
1909 PyDoc_STRVAR(find__doc__,
1910 "S.find(sub [,start [,end]]) -> int\n\
1912 Return the lowest index in S where substring sub is found,\n\
1913 such that sub is contained within s[start:end]. Optional\n\
1914 arguments start and end are interpreted as in slice notation.\n\
1916 Return -1 on failure.");
1918 static PyObject *
1919 string_find(PyStringObject *self, PyObject *args)
1921 Py_ssize_t result = string_find_internal(self, args, +1);
1922 if (result == -2)
1923 return NULL;
1924 return PyInt_FromSsize_t(result);
1928 PyDoc_STRVAR(index__doc__,
1929 "S.index(sub [,start [,end]]) -> int\n\
1931 Like S.find() but raise ValueError when the substring is not found.");
1933 static PyObject *
1934 string_index(PyStringObject *self, PyObject *args)
1936 Py_ssize_t result = string_find_internal(self, args, +1);
1937 if (result == -2)
1938 return NULL;
1939 if (result == -1) {
1940 PyErr_SetString(PyExc_ValueError,
1941 "substring not found");
1942 return NULL;
1944 return PyInt_FromSsize_t(result);
1948 PyDoc_STRVAR(rfind__doc__,
1949 "S.rfind(sub [,start [,end]]) -> int\n\
1951 Return the highest index in S where substring sub is found,\n\
1952 such that sub is contained within s[start:end]. Optional\n\
1953 arguments start and end are interpreted as in slice notation.\n\
1955 Return -1 on failure.");
1957 static PyObject *
1958 string_rfind(PyStringObject *self, PyObject *args)
1960 Py_ssize_t result = string_find_internal(self, args, -1);
1961 if (result == -2)
1962 return NULL;
1963 return PyInt_FromSsize_t(result);
1967 PyDoc_STRVAR(rindex__doc__,
1968 "S.rindex(sub [,start [,end]]) -> int\n\
1970 Like S.rfind() but raise ValueError when the substring is not found.");
1972 static PyObject *
1973 string_rindex(PyStringObject *self, PyObject *args)
1975 Py_ssize_t result = string_find_internal(self, args, -1);
1976 if (result == -2)
1977 return NULL;
1978 if (result == -1) {
1979 PyErr_SetString(PyExc_ValueError,
1980 "substring not found");
1981 return NULL;
1983 return PyInt_FromSsize_t(result);
1987 Py_LOCAL_INLINE(PyObject *)
1988 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1990 char *s = PyString_AS_STRING(self);
1991 Py_ssize_t len = PyString_GET_SIZE(self);
1992 char *sep = PyString_AS_STRING(sepobj);
1993 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1994 Py_ssize_t i, j;
1996 i = 0;
1997 if (striptype != RIGHTSTRIP) {
1998 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1999 i++;
2003 j = len;
2004 if (striptype != LEFTSTRIP) {
2005 do {
2006 j--;
2007 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2008 j++;
2011 if (i == 0 && j == len && PyString_CheckExact(self)) {
2012 Py_INCREF(self);
2013 return (PyObject*)self;
2015 else
2016 return PyString_FromStringAndSize(s+i, j-i);
2020 Py_LOCAL_INLINE(PyObject *)
2021 do_strip(PyStringObject *self, int striptype)
2023 char *s = PyString_AS_STRING(self);
2024 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
2026 i = 0;
2027 if (striptype != RIGHTSTRIP) {
2028 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2029 i++;
2033 j = len;
2034 if (striptype != LEFTSTRIP) {
2035 do {
2036 j--;
2037 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2038 j++;
2041 if (i == 0 && j == len && PyString_CheckExact(self)) {
2042 Py_INCREF(self);
2043 return (PyObject*)self;
2045 else
2046 return PyString_FromStringAndSize(s+i, j-i);
2050 Py_LOCAL_INLINE(PyObject *)
2051 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2053 PyObject *sep = NULL;
2055 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2056 return NULL;
2058 if (sep != NULL && sep != Py_None) {
2059 if (PyString_Check(sep))
2060 return do_xstrip(self, striptype, sep);
2061 #ifdef Py_USING_UNICODE
2062 else if (PyUnicode_Check(sep)) {
2063 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2064 PyObject *res;
2065 if (uniself==NULL)
2066 return NULL;
2067 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2068 striptype, sep);
2069 Py_DECREF(uniself);
2070 return res;
2072 #endif
2073 PyErr_Format(PyExc_TypeError,
2074 #ifdef Py_USING_UNICODE
2075 "%s arg must be None, str or unicode",
2076 #else
2077 "%s arg must be None or str",
2078 #endif
2079 STRIPNAME(striptype));
2080 return NULL;
2083 return do_strip(self, striptype);
2087 PyDoc_STRVAR(strip__doc__,
2088 "S.strip([chars]) -> string or unicode\n\
2090 Return a copy of the string S with leading and trailing\n\
2091 whitespace removed.\n\
2092 If chars is given and not None, remove characters in chars instead.\n\
2093 If chars is unicode, S will be converted to unicode before stripping");
2095 static PyObject *
2096 string_strip(PyStringObject *self, PyObject *args)
2098 if (PyTuple_GET_SIZE(args) == 0)
2099 return do_strip(self, BOTHSTRIP); /* Common case */
2100 else
2101 return do_argstrip(self, BOTHSTRIP, args);
2105 PyDoc_STRVAR(lstrip__doc__,
2106 "S.lstrip([chars]) -> string or unicode\n\
2108 Return a copy of the string S with leading whitespace removed.\n\
2109 If chars is given and not None, remove characters in chars instead.\n\
2110 If chars is unicode, S will be converted to unicode before stripping");
2112 static PyObject *
2113 string_lstrip(PyStringObject *self, PyObject *args)
2115 if (PyTuple_GET_SIZE(args) == 0)
2116 return do_strip(self, LEFTSTRIP); /* Common case */
2117 else
2118 return do_argstrip(self, LEFTSTRIP, args);
2122 PyDoc_STRVAR(rstrip__doc__,
2123 "S.rstrip([chars]) -> string or unicode\n\
2125 Return a copy of the string S with trailing whitespace removed.\n\
2126 If chars is given and not None, remove characters in chars instead.\n\
2127 If chars is unicode, S will be converted to unicode before stripping");
2129 static PyObject *
2130 string_rstrip(PyStringObject *self, PyObject *args)
2132 if (PyTuple_GET_SIZE(args) == 0)
2133 return do_strip(self, RIGHTSTRIP); /* Common case */
2134 else
2135 return do_argstrip(self, RIGHTSTRIP, args);
2139 PyDoc_STRVAR(lower__doc__,
2140 "S.lower() -> string\n\
2142 Return a copy of the string S converted to lowercase.");
2144 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2145 #ifndef _tolower
2146 #define _tolower tolower
2147 #endif
2149 static PyObject *
2150 string_lower(PyStringObject *self)
2152 char *s;
2153 Py_ssize_t i, n = PyString_GET_SIZE(self);
2154 PyObject *newobj;
2156 newobj = PyString_FromStringAndSize(NULL, n);
2157 if (!newobj)
2158 return NULL;
2160 s = PyString_AS_STRING(newobj);
2162 Py_MEMCPY(s, PyString_AS_STRING(self), n);
2164 for (i = 0; i < n; i++) {
2165 int c = Py_CHARMASK(s[i]);
2166 if (isupper(c))
2167 s[i] = _tolower(c);
2170 return newobj;
2173 PyDoc_STRVAR(upper__doc__,
2174 "S.upper() -> string\n\
2176 Return a copy of the string S converted to uppercase.");
2178 #ifndef _toupper
2179 #define _toupper toupper
2180 #endif
2182 static PyObject *
2183 string_upper(PyStringObject *self)
2185 char *s;
2186 Py_ssize_t i, n = PyString_GET_SIZE(self);
2187 PyObject *newobj;
2189 newobj = PyString_FromStringAndSize(NULL, n);
2190 if (!newobj)
2191 return NULL;
2193 s = PyString_AS_STRING(newobj);
2195 Py_MEMCPY(s, PyString_AS_STRING(self), n);
2197 for (i = 0; i < n; i++) {
2198 int c = Py_CHARMASK(s[i]);
2199 if (islower(c))
2200 s[i] = _toupper(c);
2203 return newobj;
2206 PyDoc_STRVAR(title__doc__,
2207 "S.title() -> string\n\
2209 Return a titlecased version of S, i.e. words start with uppercase\n\
2210 characters, all remaining cased characters have lowercase.");
2212 static PyObject*
2213 string_title(PyStringObject *self)
2215 char *s = PyString_AS_STRING(self), *s_new;
2216 Py_ssize_t i, n = PyString_GET_SIZE(self);
2217 int previous_is_cased = 0;
2218 PyObject *newobj;
2220 newobj = PyString_FromStringAndSize(NULL, n);
2221 if (newobj == NULL)
2222 return NULL;
2223 s_new = PyString_AsString(newobj);
2224 for (i = 0; i < n; i++) {
2225 int c = Py_CHARMASK(*s++);
2226 if (islower(c)) {
2227 if (!previous_is_cased)
2228 c = toupper(c);
2229 previous_is_cased = 1;
2230 } else if (isupper(c)) {
2231 if (previous_is_cased)
2232 c = tolower(c);
2233 previous_is_cased = 1;
2234 } else
2235 previous_is_cased = 0;
2236 *s_new++ = c;
2238 return newobj;
2241 PyDoc_STRVAR(capitalize__doc__,
2242 "S.capitalize() -> string\n\
2244 Return a copy of the string S with only its first character\n\
2245 capitalized.");
2247 static PyObject *
2248 string_capitalize(PyStringObject *self)
2250 char *s = PyString_AS_STRING(self), *s_new;
2251 Py_ssize_t i, n = PyString_GET_SIZE(self);
2252 PyObject *newobj;
2254 newobj = PyString_FromStringAndSize(NULL, n);
2255 if (newobj == NULL)
2256 return NULL;
2257 s_new = PyString_AsString(newobj);
2258 if (0 < n) {
2259 int c = Py_CHARMASK(*s++);
2260 if (islower(c))
2261 *s_new = toupper(c);
2262 else
2263 *s_new = c;
2264 s_new++;
2266 for (i = 1; i < n; i++) {
2267 int c = Py_CHARMASK(*s++);
2268 if (isupper(c))
2269 *s_new = tolower(c);
2270 else
2271 *s_new = c;
2272 s_new++;
2274 return newobj;
2278 PyDoc_STRVAR(count__doc__,
2279 "S.count(sub[, start[, end]]) -> int\n\
2281 Return the number of non-overlapping occurrences of substring sub in\n\
2282 string S[start:end]. Optional arguments start and end are interpreted\n\
2283 as in slice notation.");
2285 static PyObject *
2286 string_count(PyStringObject *self, PyObject *args)
2288 PyObject *sub_obj;
2289 const char *str = PyString_AS_STRING(self), *sub;
2290 Py_ssize_t sub_len;
2291 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2293 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2294 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2295 return NULL;
2297 if (PyString_Check(sub_obj)) {
2298 sub = PyString_AS_STRING(sub_obj);
2299 sub_len = PyString_GET_SIZE(sub_obj);
2301 #ifdef Py_USING_UNICODE
2302 else if (PyUnicode_Check(sub_obj)) {
2303 Py_ssize_t count;
2304 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2305 if (count == -1)
2306 return NULL;
2307 else
2308 return PyInt_FromSsize_t(count);
2310 #endif
2311 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2312 return NULL;
2314 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
2316 return PyInt_FromSsize_t(
2317 stringlib_count(str + start, end - start, sub, sub_len)
2321 PyDoc_STRVAR(swapcase__doc__,
2322 "S.swapcase() -> string\n\
2324 Return a copy of the string S with uppercase characters\n\
2325 converted to lowercase and vice versa.");
2327 static PyObject *
2328 string_swapcase(PyStringObject *self)
2330 char *s = PyString_AS_STRING(self), *s_new;
2331 Py_ssize_t i, n = PyString_GET_SIZE(self);
2332 PyObject *newobj;
2334 newobj = PyString_FromStringAndSize(NULL, n);
2335 if (newobj == NULL)
2336 return NULL;
2337 s_new = PyString_AsString(newobj);
2338 for (i = 0; i < n; i++) {
2339 int c = Py_CHARMASK(*s++);
2340 if (islower(c)) {
2341 *s_new = toupper(c);
2343 else if (isupper(c)) {
2344 *s_new = tolower(c);
2346 else
2347 *s_new = c;
2348 s_new++;
2350 return newobj;
2354 PyDoc_STRVAR(translate__doc__,
2355 "S.translate(table [,deletechars]) -> string\n\
2357 Return a copy of the string S, where all characters occurring\n\
2358 in the optional argument deletechars are removed, and the\n\
2359 remaining characters have been mapped through the given\n\
2360 translation table, which must be a string of length 256.");
2362 static PyObject *
2363 string_translate(PyStringObject *self, PyObject *args)
2365 register char *input, *output;
2366 const char *table;
2367 register Py_ssize_t i, c, changed = 0;
2368 PyObject *input_obj = (PyObject*)self;
2369 const char *output_start, *del_table=NULL;
2370 Py_ssize_t inlen, tablen, dellen = 0;
2371 PyObject *result;
2372 int trans_table[256];
2373 PyObject *tableobj, *delobj = NULL;
2375 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2376 &tableobj, &delobj))
2377 return NULL;
2379 if (PyString_Check(tableobj)) {
2380 table = PyString_AS_STRING(tableobj);
2381 tablen = PyString_GET_SIZE(tableobj);
2383 else if (tableobj == Py_None) {
2384 table = NULL;
2385 tablen = 256;
2387 #ifdef Py_USING_UNICODE
2388 else if (PyUnicode_Check(tableobj)) {
2389 /* Unicode .translate() does not support the deletechars
2390 parameter; instead a mapping to None will cause characters
2391 to be deleted. */
2392 if (delobj != NULL) {
2393 PyErr_SetString(PyExc_TypeError,
2394 "deletions are implemented differently for unicode");
2395 return NULL;
2397 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2399 #endif
2400 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2401 return NULL;
2403 if (tablen != 256) {
2404 PyErr_SetString(PyExc_ValueError,
2405 "translation table must be 256 characters long");
2406 return NULL;
2409 if (delobj != NULL) {
2410 if (PyString_Check(delobj)) {
2411 del_table = PyString_AS_STRING(delobj);
2412 dellen = PyString_GET_SIZE(delobj);
2414 #ifdef Py_USING_UNICODE
2415 else if (PyUnicode_Check(delobj)) {
2416 PyErr_SetString(PyExc_TypeError,
2417 "deletions are implemented differently for unicode");
2418 return NULL;
2420 #endif
2421 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2422 return NULL;
2424 else {
2425 del_table = NULL;
2426 dellen = 0;
2429 inlen = PyString_GET_SIZE(input_obj);
2430 result = PyString_FromStringAndSize((char *)NULL, inlen);
2431 if (result == NULL)
2432 return NULL;
2433 output_start = output = PyString_AsString(result);
2434 input = PyString_AS_STRING(input_obj);
2436 if (dellen == 0 && table != NULL) {
2437 /* If no deletions are required, use faster code */
2438 for (i = inlen; --i >= 0; ) {
2439 c = Py_CHARMASK(*input++);
2440 if (Py_CHARMASK((*output++ = table[c])) != c)
2441 changed = 1;
2443 if (changed || !PyString_CheckExact(input_obj))
2444 return result;
2445 Py_DECREF(result);
2446 Py_INCREF(input_obj);
2447 return input_obj;
2450 if (table == NULL) {
2451 for (i = 0; i < 256; i++)
2452 trans_table[i] = Py_CHARMASK(i);
2453 } else {
2454 for (i = 0; i < 256; i++)
2455 trans_table[i] = Py_CHARMASK(table[i]);
2458 for (i = 0; i < dellen; i++)
2459 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2461 for (i = inlen; --i >= 0; ) {
2462 c = Py_CHARMASK(*input++);
2463 if (trans_table[c] != -1)
2464 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2465 continue;
2466 changed = 1;
2468 if (!changed && PyString_CheckExact(input_obj)) {
2469 Py_DECREF(result);
2470 Py_INCREF(input_obj);
2471 return input_obj;
2473 /* Fix the size of the resulting string */
2474 if (inlen > 0)
2475 _PyString_Resize(&result, output - output_start);
2476 return result;
2480 #define FORWARD 1
2481 #define REVERSE -1
2483 /* find and count characters and substrings */
2485 #define findchar(target, target_len, c) \
2486 ((char *)memchr((const void *)(target), c, target_len))
2488 /* String ops must return a string. */
2489 /* If the object is subclass of string, create a copy */
2490 Py_LOCAL(PyStringObject *)
2491 return_self(PyStringObject *self)
2493 if (PyString_CheckExact(self)) {
2494 Py_INCREF(self);
2495 return self;
2497 return (PyStringObject *)PyString_FromStringAndSize(
2498 PyString_AS_STRING(self),
2499 PyString_GET_SIZE(self));
2502 Py_LOCAL_INLINE(Py_ssize_t)
2503 countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2505 Py_ssize_t count=0;
2506 const char *start=target;
2507 const char *end=target+target_len;
2509 while ( (start=findchar(start, end-start, c)) != NULL ) {
2510 count++;
2511 if (count >= maxcount)
2512 break;
2513 start += 1;
2515 return count;
2518 Py_LOCAL(Py_ssize_t)
2519 findstring(const char *target, Py_ssize_t target_len,
2520 const char *pattern, Py_ssize_t pattern_len,
2521 Py_ssize_t start,
2522 Py_ssize_t end,
2523 int direction)
2525 if (start < 0) {
2526 start += target_len;
2527 if (start < 0)
2528 start = 0;
2530 if (end > target_len) {
2531 end = target_len;
2532 } else if (end < 0) {
2533 end += target_len;
2534 if (end < 0)
2535 end = 0;
2538 /* zero-length substrings always match at the first attempt */
2539 if (pattern_len == 0)
2540 return (direction > 0) ? start : end;
2542 end -= pattern_len;
2544 if (direction < 0) {
2545 for (; end >= start; end--)
2546 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2547 return end;
2548 } else {
2549 for (; start <= end; start++)
2550 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2551 return start;
2553 return -1;
2556 Py_LOCAL_INLINE(Py_ssize_t)
2557 countstring(const char *target, Py_ssize_t target_len,
2558 const char *pattern, Py_ssize_t pattern_len,
2559 Py_ssize_t start,
2560 Py_ssize_t end,
2561 int direction, Py_ssize_t maxcount)
2563 Py_ssize_t count=0;
2565 if (start < 0) {
2566 start += target_len;
2567 if (start < 0)
2568 start = 0;
2570 if (end > target_len) {
2571 end = target_len;
2572 } else if (end < 0) {
2573 end += target_len;
2574 if (end < 0)
2575 end = 0;
2578 /* zero-length substrings match everywhere */
2579 if (pattern_len == 0 || maxcount == 0) {
2580 if (target_len+1 < maxcount)
2581 return target_len+1;
2582 return maxcount;
2585 end -= pattern_len;
2586 if (direction < 0) {
2587 for (; (end >= start); end--)
2588 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2589 count++;
2590 if (--maxcount <= 0) break;
2591 end -= pattern_len-1;
2593 } else {
2594 for (; (start <= end); start++)
2595 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2596 count++;
2597 if (--maxcount <= 0)
2598 break;
2599 start += pattern_len-1;
2602 return count;
2606 /* Algorithms for different cases of string replacement */
2608 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2609 Py_LOCAL(PyStringObject *)
2610 replace_interleave(PyStringObject *self,
2611 const char *to_s, Py_ssize_t to_len,
2612 Py_ssize_t maxcount)
2614 char *self_s, *result_s;
2615 Py_ssize_t self_len, result_len;
2616 Py_ssize_t count, i, product;
2617 PyStringObject *result;
2619 self_len = PyString_GET_SIZE(self);
2621 /* 1 at the end plus 1 after every character */
2622 count = self_len+1;
2623 if (maxcount < count)
2624 count = maxcount;
2626 /* Check for overflow */
2627 /* result_len = count * to_len + self_len; */
2628 product = count * to_len;
2629 if (product / to_len != count) {
2630 PyErr_SetString(PyExc_OverflowError,
2631 "replace string is too long");
2632 return NULL;
2634 result_len = product + self_len;
2635 if (result_len < 0) {
2636 PyErr_SetString(PyExc_OverflowError,
2637 "replace string is too long");
2638 return NULL;
2641 if (! (result = (PyStringObject *)
2642 PyString_FromStringAndSize(NULL, result_len)) )
2643 return NULL;
2645 self_s = PyString_AS_STRING(self);
2646 result_s = PyString_AS_STRING(result);
2648 /* TODO: special case single character, which doesn't need memcpy */
2650 /* Lay the first one down (guaranteed this will occur) */
2651 Py_MEMCPY(result_s, to_s, to_len);
2652 result_s += to_len;
2653 count -= 1;
2655 for (i=0; i<count; i++) {
2656 *result_s++ = *self_s++;
2657 Py_MEMCPY(result_s, to_s, to_len);
2658 result_s += to_len;
2661 /* Copy the rest of the original string */
2662 Py_MEMCPY(result_s, self_s, self_len-i);
2664 return result;
2667 /* Special case for deleting a single character */
2668 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2669 Py_LOCAL(PyStringObject *)
2670 replace_delete_single_character(PyStringObject *self,
2671 char from_c, Py_ssize_t maxcount)
2673 char *self_s, *result_s;
2674 char *start, *next, *end;
2675 Py_ssize_t self_len, result_len;
2676 Py_ssize_t count;
2677 PyStringObject *result;
2679 self_len = PyString_GET_SIZE(self);
2680 self_s = PyString_AS_STRING(self);
2682 count = countchar(self_s, self_len, from_c, maxcount);
2683 if (count == 0) {
2684 return return_self(self);
2687 result_len = self_len - count; /* from_len == 1 */
2688 assert(result_len>=0);
2690 if ( (result = (PyStringObject *)
2691 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2692 return NULL;
2693 result_s = PyString_AS_STRING(result);
2695 start = self_s;
2696 end = self_s + self_len;
2697 while (count-- > 0) {
2698 next = findchar(start, end-start, from_c);
2699 if (next == NULL)
2700 break;
2701 Py_MEMCPY(result_s, start, next-start);
2702 result_s += (next-start);
2703 start = next+1;
2705 Py_MEMCPY(result_s, start, end-start);
2707 return result;
2710 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2712 Py_LOCAL(PyStringObject *)
2713 replace_delete_substring(PyStringObject *self,
2714 const char *from_s, Py_ssize_t from_len,
2715 Py_ssize_t maxcount) {
2716 char *self_s, *result_s;
2717 char *start, *next, *end;
2718 Py_ssize_t self_len, result_len;
2719 Py_ssize_t count, offset;
2720 PyStringObject *result;
2722 self_len = PyString_GET_SIZE(self);
2723 self_s = PyString_AS_STRING(self);
2725 count = countstring(self_s, self_len,
2726 from_s, from_len,
2727 0, self_len, 1,
2728 maxcount);
2730 if (count == 0) {
2731 /* no matches */
2732 return return_self(self);
2735 result_len = self_len - (count * from_len);
2736 assert (result_len>=0);
2738 if ( (result = (PyStringObject *)
2739 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2740 return NULL;
2742 result_s = PyString_AS_STRING(result);
2744 start = self_s;
2745 end = self_s + self_len;
2746 while (count-- > 0) {
2747 offset = findstring(start, end-start,
2748 from_s, from_len,
2749 0, end-start, FORWARD);
2750 if (offset == -1)
2751 break;
2752 next = start + offset;
2754 Py_MEMCPY(result_s, start, next-start);
2756 result_s += (next-start);
2757 start = next+from_len;
2759 Py_MEMCPY(result_s, start, end-start);
2760 return result;
2763 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2764 Py_LOCAL(PyStringObject *)
2765 replace_single_character_in_place(PyStringObject *self,
2766 char from_c, char to_c,
2767 Py_ssize_t maxcount)
2769 char *self_s, *result_s, *start, *end, *next;
2770 Py_ssize_t self_len;
2771 PyStringObject *result;
2773 /* The result string will be the same size */
2774 self_s = PyString_AS_STRING(self);
2775 self_len = PyString_GET_SIZE(self);
2777 next = findchar(self_s, self_len, from_c);
2779 if (next == NULL) {
2780 /* No matches; return the original string */
2781 return return_self(self);
2784 /* Need to make a new string */
2785 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2786 if (result == NULL)
2787 return NULL;
2788 result_s = PyString_AS_STRING(result);
2789 Py_MEMCPY(result_s, self_s, self_len);
2791 /* change everything in-place, starting with this one */
2792 start = result_s + (next-self_s);
2793 *start = to_c;
2794 start++;
2795 end = result_s + self_len;
2797 while (--maxcount > 0) {
2798 next = findchar(start, end-start, from_c);
2799 if (next == NULL)
2800 break;
2801 *next = to_c;
2802 start = next+1;
2805 return result;
2808 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2809 Py_LOCAL(PyStringObject *)
2810 replace_substring_in_place(PyStringObject *self,
2811 const char *from_s, Py_ssize_t from_len,
2812 const char *to_s, Py_ssize_t to_len,
2813 Py_ssize_t maxcount)
2815 char *result_s, *start, *end;
2816 char *self_s;
2817 Py_ssize_t self_len, offset;
2818 PyStringObject *result;
2820 /* The result string will be the same size */
2822 self_s = PyString_AS_STRING(self);
2823 self_len = PyString_GET_SIZE(self);
2825 offset = findstring(self_s, self_len,
2826 from_s, from_len,
2827 0, self_len, FORWARD);
2828 if (offset == -1) {
2829 /* No matches; return the original string */
2830 return return_self(self);
2833 /* Need to make a new string */
2834 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2835 if (result == NULL)
2836 return NULL;
2837 result_s = PyString_AS_STRING(result);
2838 Py_MEMCPY(result_s, self_s, self_len);
2840 /* change everything in-place, starting with this one */
2841 start = result_s + offset;
2842 Py_MEMCPY(start, to_s, from_len);
2843 start += from_len;
2844 end = result_s + self_len;
2846 while ( --maxcount > 0) {
2847 offset = findstring(start, end-start,
2848 from_s, from_len,
2849 0, end-start, FORWARD);
2850 if (offset==-1)
2851 break;
2852 Py_MEMCPY(start+offset, to_s, from_len);
2853 start += offset+from_len;
2856 return result;
2859 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2860 Py_LOCAL(PyStringObject *)
2861 replace_single_character(PyStringObject *self,
2862 char from_c,
2863 const char *to_s, Py_ssize_t to_len,
2864 Py_ssize_t maxcount)
2866 char *self_s, *result_s;
2867 char *start, *next, *end;
2868 Py_ssize_t self_len, result_len;
2869 Py_ssize_t count, product;
2870 PyStringObject *result;
2872 self_s = PyString_AS_STRING(self);
2873 self_len = PyString_GET_SIZE(self);
2875 count = countchar(self_s, self_len, from_c, maxcount);
2876 if (count == 0) {
2877 /* no matches, return unchanged */
2878 return return_self(self);
2881 /* use the difference between current and new, hence the "-1" */
2882 /* result_len = self_len + count * (to_len-1) */
2883 product = count * (to_len-1);
2884 if (product / (to_len-1) != count) {
2885 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2886 return NULL;
2888 result_len = self_len + product;
2889 if (result_len < 0) {
2890 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2891 return NULL;
2894 if ( (result = (PyStringObject *)
2895 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2896 return NULL;
2897 result_s = PyString_AS_STRING(result);
2899 start = self_s;
2900 end = self_s + self_len;
2901 while (count-- > 0) {
2902 next = findchar(start, end-start, from_c);
2903 if (next == NULL)
2904 break;
2906 if (next == start) {
2907 /* replace with the 'to' */
2908 Py_MEMCPY(result_s, to_s, to_len);
2909 result_s += to_len;
2910 start += 1;
2911 } else {
2912 /* copy the unchanged old then the 'to' */
2913 Py_MEMCPY(result_s, start, next-start);
2914 result_s += (next-start);
2915 Py_MEMCPY(result_s, to_s, to_len);
2916 result_s += to_len;
2917 start = next+1;
2920 /* Copy the remainder of the remaining string */
2921 Py_MEMCPY(result_s, start, end-start);
2923 return result;
2926 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2927 Py_LOCAL(PyStringObject *)
2928 replace_substring(PyStringObject *self,
2929 const char *from_s, Py_ssize_t from_len,
2930 const char *to_s, Py_ssize_t to_len,
2931 Py_ssize_t maxcount) {
2932 char *self_s, *result_s;
2933 char *start, *next, *end;
2934 Py_ssize_t self_len, result_len;
2935 Py_ssize_t count, offset, product;
2936 PyStringObject *result;
2938 self_s = PyString_AS_STRING(self);
2939 self_len = PyString_GET_SIZE(self);
2941 count = countstring(self_s, self_len,
2942 from_s, from_len,
2943 0, self_len, FORWARD, maxcount);
2944 if (count == 0) {
2945 /* no matches, return unchanged */
2946 return return_self(self);
2949 /* Check for overflow */
2950 /* result_len = self_len + count * (to_len-from_len) */
2951 product = count * (to_len-from_len);
2952 if (product / (to_len-from_len) != count) {
2953 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2954 return NULL;
2956 result_len = self_len + product;
2957 if (result_len < 0) {
2958 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2959 return NULL;
2962 if ( (result = (PyStringObject *)
2963 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2964 return NULL;
2965 result_s = PyString_AS_STRING(result);
2967 start = self_s;
2968 end = self_s + self_len;
2969 while (count-- > 0) {
2970 offset = findstring(start, end-start,
2971 from_s, from_len,
2972 0, end-start, FORWARD);
2973 if (offset == -1)
2974 break;
2975 next = start+offset;
2976 if (next == start) {
2977 /* replace with the 'to' */
2978 Py_MEMCPY(result_s, to_s, to_len);
2979 result_s += to_len;
2980 start += from_len;
2981 } else {
2982 /* copy the unchanged old then the 'to' */
2983 Py_MEMCPY(result_s, start, next-start);
2984 result_s += (next-start);
2985 Py_MEMCPY(result_s, to_s, to_len);
2986 result_s += to_len;
2987 start = next+from_len;
2990 /* Copy the remainder of the remaining string */
2991 Py_MEMCPY(result_s, start, end-start);
2993 return result;
2997 Py_LOCAL(PyStringObject *)
2998 replace(PyStringObject *self,
2999 const char *from_s, Py_ssize_t from_len,
3000 const char *to_s, Py_ssize_t to_len,
3001 Py_ssize_t maxcount)
3003 if (maxcount < 0) {
3004 maxcount = PY_SSIZE_T_MAX;
3005 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3006 /* nothing to do; return the original string */
3007 return return_self(self);
3010 if (maxcount == 0 ||
3011 (from_len == 0 && to_len == 0)) {
3012 /* nothing to do; return the original string */
3013 return return_self(self);
3016 /* Handle zero-length special cases */
3018 if (from_len == 0) {
3019 /* insert the 'to' string everywhere. */
3020 /* >>> "Python".replace("", ".") */
3021 /* '.P.y.t.h.o.n.' */
3022 return replace_interleave(self, to_s, to_len, maxcount);
3025 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3026 /* point for an empty self string to generate a non-empty string */
3027 /* Special case so the remaining code always gets a non-empty string */
3028 if (PyString_GET_SIZE(self) == 0) {
3029 return return_self(self);
3032 if (to_len == 0) {
3033 /* delete all occurances of 'from' string */
3034 if (from_len == 1) {
3035 return replace_delete_single_character(
3036 self, from_s[0], maxcount);
3037 } else {
3038 return replace_delete_substring(self, from_s, from_len, maxcount);
3042 /* Handle special case where both strings have the same length */
3044 if (from_len == to_len) {
3045 if (from_len == 1) {
3046 return replace_single_character_in_place(
3047 self,
3048 from_s[0],
3049 to_s[0],
3050 maxcount);
3051 } else {
3052 return replace_substring_in_place(
3053 self, from_s, from_len, to_s, to_len, maxcount);
3057 /* Otherwise use the more generic algorithms */
3058 if (from_len == 1) {
3059 return replace_single_character(self, from_s[0],
3060 to_s, to_len, maxcount);
3061 } else {
3062 /* len('from')>=2, len('to')>=1 */
3063 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3067 PyDoc_STRVAR(replace__doc__,
3068 "S.replace (old, new[, count]) -> string\n\
3070 Return a copy of string S with all occurrences of substring\n\
3071 old replaced by new. If the optional argument count is\n\
3072 given, only the first count occurrences are replaced.");
3074 static PyObject *
3075 string_replace(PyStringObject *self, PyObject *args)
3077 Py_ssize_t count = -1;
3078 PyObject *from, *to;
3079 const char *from_s, *to_s;
3080 Py_ssize_t from_len, to_len;
3082 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3083 return NULL;
3085 if (PyString_Check(from)) {
3086 from_s = PyString_AS_STRING(from);
3087 from_len = PyString_GET_SIZE(from);
3089 #ifdef Py_USING_UNICODE
3090 if (PyUnicode_Check(from))
3091 return PyUnicode_Replace((PyObject *)self,
3092 from, to, count);
3093 #endif
3094 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3095 return NULL;
3097 if (PyString_Check(to)) {
3098 to_s = PyString_AS_STRING(to);
3099 to_len = PyString_GET_SIZE(to);
3101 #ifdef Py_USING_UNICODE
3102 else if (PyUnicode_Check(to))
3103 return PyUnicode_Replace((PyObject *)self,
3104 from, to, count);
3105 #endif
3106 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3107 return NULL;
3109 return (PyObject *)replace((PyStringObject *) self,
3110 from_s, from_len,
3111 to_s, to_len, count);
3114 /** End DALKE **/
3116 /* Matches the end (direction >= 0) or start (direction < 0) of self
3117 * against substr, using the start and end arguments. Returns
3118 * -1 on error, 0 if not found and 1 if found.
3120 Py_LOCAL(int)
3121 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3122 Py_ssize_t end, int direction)
3124 Py_ssize_t len = PyString_GET_SIZE(self);
3125 Py_ssize_t slen;
3126 const char* sub;
3127 const char* str;
3129 if (PyString_Check(substr)) {
3130 sub = PyString_AS_STRING(substr);
3131 slen = PyString_GET_SIZE(substr);
3133 #ifdef Py_USING_UNICODE
3134 else if (PyUnicode_Check(substr))
3135 return PyUnicode_Tailmatch((PyObject *)self,
3136 substr, start, end, direction);
3137 #endif
3138 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3139 return -1;
3140 str = PyString_AS_STRING(self);
3142 string_adjust_indices(&start, &end, len);
3144 if (direction < 0) {
3145 /* startswith */
3146 if (start+slen > len)
3147 return 0;
3148 } else {
3149 /* endswith */
3150 if (end-start < slen || start > len)
3151 return 0;
3153 if (end-slen > start)
3154 start = end - slen;
3156 if (end-start >= slen)
3157 return ! memcmp(str+start, sub, slen);
3158 return 0;
3162 PyDoc_STRVAR(startswith__doc__,
3163 "S.startswith(prefix[, start[, end]]) -> bool\n\
3165 Return True if S starts with the specified prefix, False otherwise.\n\
3166 With optional start, test S beginning at that position.\n\
3167 With optional end, stop comparing S at that position.\n\
3168 prefix can also be a tuple of strings to try.");
3170 static PyObject *
3171 string_startswith(PyStringObject *self, PyObject *args)
3173 Py_ssize_t start = 0;
3174 Py_ssize_t end = PY_SSIZE_T_MAX;
3175 PyObject *subobj;
3176 int result;
3178 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3179 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3180 return NULL;
3181 if (PyTuple_Check(subobj)) {
3182 Py_ssize_t i;
3183 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3184 result = _string_tailmatch(self,
3185 PyTuple_GET_ITEM(subobj, i),
3186 start, end, -1);
3187 if (result == -1)
3188 return NULL;
3189 else if (result) {
3190 Py_RETURN_TRUE;
3193 Py_RETURN_FALSE;
3195 result = _string_tailmatch(self, subobj, start, end, -1);
3196 if (result == -1)
3197 return NULL;
3198 else
3199 return PyBool_FromLong(result);
3203 PyDoc_STRVAR(endswith__doc__,
3204 "S.endswith(suffix[, start[, end]]) -> bool\n\
3206 Return True if S ends with the specified suffix, False otherwise.\n\
3207 With optional start, test S beginning at that position.\n\
3208 With optional end, stop comparing S at that position.\n\
3209 suffix can also be a tuple of strings to try.");
3211 static PyObject *
3212 string_endswith(PyStringObject *self, PyObject *args)
3214 Py_ssize_t start = 0;
3215 Py_ssize_t end = PY_SSIZE_T_MAX;
3216 PyObject *subobj;
3217 int result;
3219 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3220 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3221 return NULL;
3222 if (PyTuple_Check(subobj)) {
3223 Py_ssize_t i;
3224 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3225 result = _string_tailmatch(self,
3226 PyTuple_GET_ITEM(subobj, i),
3227 start, end, +1);
3228 if (result == -1)
3229 return NULL;
3230 else if (result) {
3231 Py_RETURN_TRUE;
3234 Py_RETURN_FALSE;
3236 result = _string_tailmatch(self, subobj, start, end, +1);
3237 if (result == -1)
3238 return NULL;
3239 else
3240 return PyBool_FromLong(result);
3244 PyDoc_STRVAR(encode__doc__,
3245 "S.encode([encoding[,errors]]) -> object\n\
3247 Encodes S using the codec registered for encoding. encoding defaults\n\
3248 to the default encoding. errors may be given to set a different error\n\
3249 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3250 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3251 'xmlcharrefreplace' as well as any other name registered with\n\
3252 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3254 static PyObject *
3255 string_encode(PyStringObject *self, PyObject *args)
3257 char *encoding = NULL;
3258 char *errors = NULL;
3259 PyObject *v;
3261 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3262 return NULL;
3263 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3264 if (v == NULL)
3265 goto onError;
3266 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3267 PyErr_Format(PyExc_TypeError,
3268 "encoder did not return a string/unicode object "
3269 "(type=%.400s)",
3270 Py_Type(v)->tp_name);
3271 Py_DECREF(v);
3272 return NULL;
3274 return v;
3276 onError:
3277 return NULL;
3281 PyDoc_STRVAR(decode__doc__,
3282 "S.decode([encoding[,errors]]) -> object\n\
3284 Decodes S using the codec registered for encoding. encoding defaults\n\
3285 to the default encoding. errors may be given to set a different error\n\
3286 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3287 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3288 as well as any other name registerd with codecs.register_error that is\n\
3289 able to handle UnicodeDecodeErrors.");
3291 static PyObject *
3292 string_decode(PyStringObject *self, PyObject *args)
3294 char *encoding = NULL;
3295 char *errors = NULL;
3296 PyObject *v;
3298 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3299 return NULL;
3300 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3301 if (v == NULL)
3302 goto onError;
3303 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3304 PyErr_Format(PyExc_TypeError,
3305 "decoder did not return a string/unicode object "
3306 "(type=%.400s)",
3307 Py_Type(v)->tp_name);
3308 Py_DECREF(v);
3309 return NULL;
3311 return v;
3313 onError:
3314 return NULL;
3318 PyDoc_STRVAR(expandtabs__doc__,
3319 "S.expandtabs([tabsize]) -> string\n\
3321 Return a copy of S where all tab characters are expanded using spaces.\n\
3322 If tabsize is not given, a tab size of 8 characters is assumed.");
3324 static PyObject*
3325 string_expandtabs(PyStringObject *self, PyObject *args)
3327 const char *e, *p;
3328 char *q;
3329 Py_ssize_t i, j, old_j;
3330 PyObject *u;
3331 int tabsize = 8;
3333 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3334 return NULL;
3336 /* First pass: determine size of output string */
3337 i = j = old_j = 0;
3338 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3339 for (p = PyString_AS_STRING(self); p < e; p++)
3340 if (*p == '\t') {
3341 if (tabsize > 0) {
3342 j += tabsize - (j % tabsize);
3343 if (old_j > j) {
3344 PyErr_SetString(PyExc_OverflowError,
3345 "new string is too long");
3346 return NULL;
3348 old_j = j;
3351 else {
3352 j++;
3353 if (*p == '\n' || *p == '\r') {
3354 i += j;
3355 old_j = j = 0;
3356 if (i < 0) {
3357 PyErr_SetString(PyExc_OverflowError,
3358 "new string is too long");
3359 return NULL;
3364 if ((i + j) < 0) {
3365 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3366 return NULL;
3369 /* Second pass: create output string and fill it */
3370 u = PyString_FromStringAndSize(NULL, i + j);
3371 if (!u)
3372 return NULL;
3374 j = 0;
3375 q = PyString_AS_STRING(u);
3377 for (p = PyString_AS_STRING(self); p < e; p++)
3378 if (*p == '\t') {
3379 if (tabsize > 0) {
3380 i = tabsize - (j % tabsize);
3381 j += i;
3382 while (i--)
3383 *q++ = ' ';
3386 else {
3387 j++;
3388 *q++ = *p;
3389 if (*p == '\n' || *p == '\r')
3390 j = 0;
3393 return u;
3396 Py_LOCAL_INLINE(PyObject *)
3397 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3399 PyObject *u;
3401 if (left < 0)
3402 left = 0;
3403 if (right < 0)
3404 right = 0;
3406 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3407 Py_INCREF(self);
3408 return (PyObject *)self;
3411 u = PyString_FromStringAndSize(NULL,
3412 left + PyString_GET_SIZE(self) + right);
3413 if (u) {
3414 if (left)
3415 memset(PyString_AS_STRING(u), fill, left);
3416 Py_MEMCPY(PyString_AS_STRING(u) + left,
3417 PyString_AS_STRING(self),
3418 PyString_GET_SIZE(self));
3419 if (right)
3420 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3421 fill, right);
3424 return u;
3427 PyDoc_STRVAR(ljust__doc__,
3428 "S.ljust(width[, fillchar]) -> string\n"
3429 "\n"
3430 "Return S left justified in a string of length width. Padding is\n"
3431 "done using the specified fill character (default is a space).");
3433 static PyObject *
3434 string_ljust(PyStringObject *self, PyObject *args)
3436 Py_ssize_t width;
3437 char fillchar = ' ';
3439 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3440 return NULL;
3442 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3443 Py_INCREF(self);
3444 return (PyObject*) self;
3447 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3451 PyDoc_STRVAR(rjust__doc__,
3452 "S.rjust(width[, fillchar]) -> string\n"
3453 "\n"
3454 "Return S right justified in a string of length width. Padding is\n"
3455 "done using the specified fill character (default is a space)");
3457 static PyObject *
3458 string_rjust(PyStringObject *self, PyObject *args)
3460 Py_ssize_t width;
3461 char fillchar = ' ';
3463 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3464 return NULL;
3466 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3467 Py_INCREF(self);
3468 return (PyObject*) self;
3471 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3475 PyDoc_STRVAR(center__doc__,
3476 "S.center(width[, fillchar]) -> string\n"
3477 "\n"
3478 "Return S centered in a string of length width. Padding is\n"
3479 "done using the specified fill character (default is a space)");
3481 static PyObject *
3482 string_center(PyStringObject *self, PyObject *args)
3484 Py_ssize_t marg, left;
3485 Py_ssize_t width;
3486 char fillchar = ' ';
3488 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3489 return NULL;
3491 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3492 Py_INCREF(self);
3493 return (PyObject*) self;
3496 marg = width - PyString_GET_SIZE(self);
3497 left = marg / 2 + (marg & width & 1);
3499 return pad(self, left, marg - left, fillchar);
3502 PyDoc_STRVAR(zfill__doc__,
3503 "S.zfill(width) -> string\n"
3504 "\n"
3505 "Pad a numeric string S with zeros on the left, to fill a field\n"
3506 "of the specified width. The string S is never truncated.");
3508 static PyObject *
3509 string_zfill(PyStringObject *self, PyObject *args)
3511 Py_ssize_t fill;
3512 PyObject *s;
3513 char *p;
3514 Py_ssize_t width;
3516 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3517 return NULL;
3519 if (PyString_GET_SIZE(self) >= width) {
3520 if (PyString_CheckExact(self)) {
3521 Py_INCREF(self);
3522 return (PyObject*) self;
3524 else
3525 return PyString_FromStringAndSize(
3526 PyString_AS_STRING(self),
3527 PyString_GET_SIZE(self)
3531 fill = width - PyString_GET_SIZE(self);
3533 s = pad(self, fill, 0, '0');
3535 if (s == NULL)
3536 return NULL;
3538 p = PyString_AS_STRING(s);
3539 if (p[fill] == '+' || p[fill] == '-') {
3540 /* move sign to beginning of string */
3541 p[0] = p[fill];
3542 p[fill] = '0';
3545 return (PyObject*) s;
3548 PyDoc_STRVAR(isspace__doc__,
3549 "S.isspace() -> bool\n\
3551 Return True if all characters in S are whitespace\n\
3552 and there is at least one character in S, False otherwise.");
3554 static PyObject*
3555 string_isspace(PyStringObject *self)
3557 register const unsigned char *p
3558 = (unsigned char *) PyString_AS_STRING(self);
3559 register const unsigned char *e;
3561 /* Shortcut for single character strings */
3562 if (PyString_GET_SIZE(self) == 1 &&
3563 isspace(*p))
3564 return PyBool_FromLong(1);
3566 /* Special case for empty strings */
3567 if (PyString_GET_SIZE(self) == 0)
3568 return PyBool_FromLong(0);
3570 e = p + PyString_GET_SIZE(self);
3571 for (; p < e; p++) {
3572 if (!isspace(*p))
3573 return PyBool_FromLong(0);
3575 return PyBool_FromLong(1);
3579 PyDoc_STRVAR(isalpha__doc__,
3580 "S.isalpha() -> bool\n\
3582 Return True if all characters in S are alphabetic\n\
3583 and there is at least one character in S, False otherwise.");
3585 static PyObject*
3586 string_isalpha(PyStringObject *self)
3588 register const unsigned char *p
3589 = (unsigned char *) PyString_AS_STRING(self);
3590 register const unsigned char *e;
3592 /* Shortcut for single character strings */
3593 if (PyString_GET_SIZE(self) == 1 &&
3594 isalpha(*p))
3595 return PyBool_FromLong(1);
3597 /* Special case for empty strings */
3598 if (PyString_GET_SIZE(self) == 0)
3599 return PyBool_FromLong(0);
3601 e = p + PyString_GET_SIZE(self);
3602 for (; p < e; p++) {
3603 if (!isalpha(*p))
3604 return PyBool_FromLong(0);
3606 return PyBool_FromLong(1);
3610 PyDoc_STRVAR(isalnum__doc__,
3611 "S.isalnum() -> bool\n\
3613 Return True if all characters in S are alphanumeric\n\
3614 and there is at least one character in S, False otherwise.");
3616 static PyObject*
3617 string_isalnum(PyStringObject *self)
3619 register const unsigned char *p
3620 = (unsigned char *) PyString_AS_STRING(self);
3621 register const unsigned char *e;
3623 /* Shortcut for single character strings */
3624 if (PyString_GET_SIZE(self) == 1 &&
3625 isalnum(*p))
3626 return PyBool_FromLong(1);
3628 /* Special case for empty strings */
3629 if (PyString_GET_SIZE(self) == 0)
3630 return PyBool_FromLong(0);
3632 e = p + PyString_GET_SIZE(self);
3633 for (; p < e; p++) {
3634 if (!isalnum(*p))
3635 return PyBool_FromLong(0);
3637 return PyBool_FromLong(1);
3641 PyDoc_STRVAR(isdigit__doc__,
3642 "S.isdigit() -> bool\n\
3644 Return True if all characters in S are digits\n\
3645 and there is at least one character in S, False otherwise.");
3647 static PyObject*
3648 string_isdigit(PyStringObject *self)
3650 register const unsigned char *p
3651 = (unsigned char *) PyString_AS_STRING(self);
3652 register const unsigned char *e;
3654 /* Shortcut for single character strings */
3655 if (PyString_GET_SIZE(self) == 1 &&
3656 isdigit(*p))
3657 return PyBool_FromLong(1);
3659 /* Special case for empty strings */
3660 if (PyString_GET_SIZE(self) == 0)
3661 return PyBool_FromLong(0);
3663 e = p + PyString_GET_SIZE(self);
3664 for (; p < e; p++) {
3665 if (!isdigit(*p))
3666 return PyBool_FromLong(0);
3668 return PyBool_FromLong(1);
3672 PyDoc_STRVAR(islower__doc__,
3673 "S.islower() -> bool\n\
3675 Return True if all cased characters in S are lowercase and there is\n\
3676 at least one cased character in S, False otherwise.");
3678 static PyObject*
3679 string_islower(PyStringObject *self)
3681 register const unsigned char *p
3682 = (unsigned char *) PyString_AS_STRING(self);
3683 register const unsigned char *e;
3684 int cased;
3686 /* Shortcut for single character strings */
3687 if (PyString_GET_SIZE(self) == 1)
3688 return PyBool_FromLong(islower(*p) != 0);
3690 /* Special case for empty strings */
3691 if (PyString_GET_SIZE(self) == 0)
3692 return PyBool_FromLong(0);
3694 e = p + PyString_GET_SIZE(self);
3695 cased = 0;
3696 for (; p < e; p++) {
3697 if (isupper(*p))
3698 return PyBool_FromLong(0);
3699 else if (!cased && islower(*p))
3700 cased = 1;
3702 return PyBool_FromLong(cased);
3706 PyDoc_STRVAR(isupper__doc__,
3707 "S.isupper() -> bool\n\
3709 Return True if all cased characters in S are uppercase and there is\n\
3710 at least one cased character in S, False otherwise.");
3712 static PyObject*
3713 string_isupper(PyStringObject *self)
3715 register const unsigned char *p
3716 = (unsigned char *) PyString_AS_STRING(self);
3717 register const unsigned char *e;
3718 int cased;
3720 /* Shortcut for single character strings */
3721 if (PyString_GET_SIZE(self) == 1)
3722 return PyBool_FromLong(isupper(*p) != 0);
3724 /* Special case for empty strings */
3725 if (PyString_GET_SIZE(self) == 0)
3726 return PyBool_FromLong(0);
3728 e = p + PyString_GET_SIZE(self);
3729 cased = 0;
3730 for (; p < e; p++) {
3731 if (islower(*p))
3732 return PyBool_FromLong(0);
3733 else if (!cased && isupper(*p))
3734 cased = 1;
3736 return PyBool_FromLong(cased);
3740 PyDoc_STRVAR(istitle__doc__,
3741 "S.istitle() -> bool\n\
3743 Return True if S is a titlecased string and there is at least one\n\
3744 character in S, i.e. uppercase characters may only follow uncased\n\
3745 characters and lowercase characters only cased ones. Return False\n\
3746 otherwise.");
3748 static PyObject*
3749 string_istitle(PyStringObject *self, PyObject *uncased)
3751 register const unsigned char *p
3752 = (unsigned char *) PyString_AS_STRING(self);
3753 register const unsigned char *e;
3754 int cased, previous_is_cased;
3756 /* Shortcut for single character strings */
3757 if (PyString_GET_SIZE(self) == 1)
3758 return PyBool_FromLong(isupper(*p) != 0);
3760 /* Special case for empty strings */
3761 if (PyString_GET_SIZE(self) == 0)
3762 return PyBool_FromLong(0);
3764 e = p + PyString_GET_SIZE(self);
3765 cased = 0;
3766 previous_is_cased = 0;
3767 for (; p < e; p++) {
3768 register const unsigned char ch = *p;
3770 if (isupper(ch)) {
3771 if (previous_is_cased)
3772 return PyBool_FromLong(0);
3773 previous_is_cased = 1;
3774 cased = 1;
3776 else if (islower(ch)) {
3777 if (!previous_is_cased)
3778 return PyBool_FromLong(0);
3779 previous_is_cased = 1;
3780 cased = 1;
3782 else
3783 previous_is_cased = 0;
3785 return PyBool_FromLong(cased);
3789 PyDoc_STRVAR(splitlines__doc__,
3790 "S.splitlines([keepends]) -> list of strings\n\
3792 Return a list of the lines in S, breaking at line boundaries.\n\
3793 Line breaks are not included in the resulting list unless keepends\n\
3794 is given and true.");
3796 static PyObject*
3797 string_splitlines(PyStringObject *self, PyObject *args)
3799 register Py_ssize_t i;
3800 register Py_ssize_t j;
3801 Py_ssize_t len;
3802 int keepends = 0;
3803 PyObject *list;
3804 PyObject *str;
3805 char *data;
3807 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3808 return NULL;
3810 data = PyString_AS_STRING(self);
3811 len = PyString_GET_SIZE(self);
3813 /* This does not use the preallocated list because splitlines is
3814 usually run with hundreds of newlines. The overhead of
3815 switching between PyList_SET_ITEM and append causes about a
3816 2-3% slowdown for that common case. A smarter implementation
3817 could move the if check out, so the SET_ITEMs are done first
3818 and the appends only done when the prealloc buffer is full.
3819 That's too much work for little gain.*/
3821 list = PyList_New(0);
3822 if (!list)
3823 goto onError;
3825 for (i = j = 0; i < len; ) {
3826 Py_ssize_t eol;
3828 /* Find a line and append it */
3829 while (i < len && data[i] != '\n' && data[i] != '\r')
3830 i++;
3832 /* Skip the line break reading CRLF as one line break */
3833 eol = i;
3834 if (i < len) {
3835 if (data[i] == '\r' && i + 1 < len &&
3836 data[i+1] == '\n')
3837 i += 2;
3838 else
3839 i++;
3840 if (keepends)
3841 eol = i;
3843 SPLIT_APPEND(data, j, eol);
3844 j = i;
3846 if (j < len) {
3847 SPLIT_APPEND(data, j, len);
3850 return list;
3852 onError:
3853 Py_XDECREF(list);
3854 return NULL;
3857 #undef SPLIT_APPEND
3858 #undef SPLIT_ADD
3859 #undef MAX_PREALLOC
3860 #undef PREALLOC_SIZE
3862 static PyObject *
3863 string_getnewargs(PyStringObject *v)
3865 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
3869 static PyMethodDef
3870 string_methods[] = {
3871 /* Counterparts of the obsolete stropmodule functions; except
3872 string.maketrans(). */
3873 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3874 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3875 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3876 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3877 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3878 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3879 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3880 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3881 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3882 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3883 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3884 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3885 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3886 capitalize__doc__},
3887 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3888 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3889 endswith__doc__},
3890 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3891 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3892 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3893 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3894 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3895 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3896 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3897 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3898 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3899 rpartition__doc__},
3900 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3901 startswith__doc__},
3902 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3903 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3904 swapcase__doc__},
3905 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3906 translate__doc__},
3907 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3908 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3909 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3910 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3911 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3912 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3913 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3914 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3915 expandtabs__doc__},
3916 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3917 splitlines__doc__},
3918 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3919 {NULL, NULL} /* sentinel */
3922 static PyObject *
3923 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3925 static PyObject *
3926 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3928 PyObject *x = NULL;
3929 static char *kwlist[] = {"object", 0};
3931 if (type != &PyString_Type)
3932 return str_subtype_new(type, args, kwds);
3933 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3934 return NULL;
3935 if (x == NULL)
3936 return PyString_FromString("");
3937 return PyObject_Str(x);
3940 static PyObject *
3941 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3943 PyObject *tmp, *pnew;
3944 Py_ssize_t n;
3946 assert(PyType_IsSubtype(type, &PyString_Type));
3947 tmp = string_new(&PyString_Type, args, kwds);
3948 if (tmp == NULL)
3949 return NULL;
3950 assert(PyString_CheckExact(tmp));
3951 n = PyString_GET_SIZE(tmp);
3952 pnew = type->tp_alloc(type, n);
3953 if (pnew != NULL) {
3954 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3955 ((PyStringObject *)pnew)->ob_shash =
3956 ((PyStringObject *)tmp)->ob_shash;
3957 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3959 Py_DECREF(tmp);
3960 return pnew;
3963 static PyObject *
3964 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3966 PyErr_SetString(PyExc_TypeError,
3967 "The basestring type cannot be instantiated");
3968 return NULL;
3971 static PyObject *
3972 string_mod(PyObject *v, PyObject *w)
3974 if (!PyString_Check(v)) {
3975 Py_INCREF(Py_NotImplemented);
3976 return Py_NotImplemented;
3978 return PyString_Format(v, w);
3981 PyDoc_STRVAR(basestring_doc,
3982 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3984 static PyNumberMethods string_as_number = {
3985 0, /*nb_add*/
3986 0, /*nb_subtract*/
3987 0, /*nb_multiply*/
3988 0, /*nb_divide*/
3989 string_mod, /*nb_remainder*/
3993 PyTypeObject PyBaseString_Type = {
3994 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3995 "basestring",
3998 0, /* tp_dealloc */
3999 0, /* tp_print */
4000 0, /* tp_getattr */
4001 0, /* tp_setattr */
4002 0, /* tp_compare */
4003 0, /* tp_repr */
4004 0, /* tp_as_number */
4005 0, /* tp_as_sequence */
4006 0, /* tp_as_mapping */
4007 0, /* tp_hash */
4008 0, /* tp_call */
4009 0, /* tp_str */
4010 0, /* tp_getattro */
4011 0, /* tp_setattro */
4012 0, /* tp_as_buffer */
4013 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4014 basestring_doc, /* tp_doc */
4015 0, /* tp_traverse */
4016 0, /* tp_clear */
4017 0, /* tp_richcompare */
4018 0, /* tp_weaklistoffset */
4019 0, /* tp_iter */
4020 0, /* tp_iternext */
4021 0, /* tp_methods */
4022 0, /* tp_members */
4023 0, /* tp_getset */
4024 &PyBaseObject_Type, /* tp_base */
4025 0, /* tp_dict */
4026 0, /* tp_descr_get */
4027 0, /* tp_descr_set */
4028 0, /* tp_dictoffset */
4029 0, /* tp_init */
4030 0, /* tp_alloc */
4031 basestring_new, /* tp_new */
4032 0, /* tp_free */
4035 PyDoc_STRVAR(string_doc,
4036 "str(object) -> string\n\
4038 Return a nice string representation of the object.\n\
4039 If the argument is a string, the return value is the same object.");
4041 PyTypeObject PyString_Type = {
4042 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4043 "str",
4044 sizeof(PyStringObject),
4045 sizeof(char),
4046 string_dealloc, /* tp_dealloc */
4047 (printfunc)string_print, /* tp_print */
4048 0, /* tp_getattr */
4049 0, /* tp_setattr */
4050 0, /* tp_compare */
4051 string_repr, /* tp_repr */
4052 &string_as_number, /* tp_as_number */
4053 &string_as_sequence, /* tp_as_sequence */
4054 &string_as_mapping, /* tp_as_mapping */
4055 (hashfunc)string_hash, /* tp_hash */
4056 0, /* tp_call */
4057 string_str, /* tp_str */
4058 PyObject_GenericGetAttr, /* tp_getattro */
4059 0, /* tp_setattro */
4060 &string_as_buffer, /* tp_as_buffer */
4061 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4062 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
4063 string_doc, /* tp_doc */
4064 0, /* tp_traverse */
4065 0, /* tp_clear */
4066 (richcmpfunc)string_richcompare, /* tp_richcompare */
4067 0, /* tp_weaklistoffset */
4068 0, /* tp_iter */
4069 0, /* tp_iternext */
4070 string_methods, /* tp_methods */
4071 0, /* tp_members */
4072 0, /* tp_getset */
4073 &PyBaseString_Type, /* tp_base */
4074 0, /* tp_dict */
4075 0, /* tp_descr_get */
4076 0, /* tp_descr_set */
4077 0, /* tp_dictoffset */
4078 0, /* tp_init */
4079 0, /* tp_alloc */
4080 string_new, /* tp_new */
4081 PyObject_Del, /* tp_free */
4084 void
4085 PyString_Concat(register PyObject **pv, register PyObject *w)
4087 register PyObject *v;
4088 if (*pv == NULL)
4089 return;
4090 if (w == NULL || !PyString_Check(*pv)) {
4091 Py_DECREF(*pv);
4092 *pv = NULL;
4093 return;
4095 v = string_concat((PyStringObject *) *pv, w);
4096 Py_DECREF(*pv);
4097 *pv = v;
4100 void
4101 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
4103 PyString_Concat(pv, w);
4104 Py_XDECREF(w);
4108 /* The following function breaks the notion that strings are immutable:
4109 it changes the size of a string. We get away with this only if there
4110 is only one module referencing the object. You can also think of it
4111 as creating a new string object and destroying the old one, only
4112 more efficiently. In any case, don't use this if the string may
4113 already be known to some other part of the code...
4114 Note that if there's not enough memory to resize the string, the original
4115 string object at *pv is deallocated, *pv is set to NULL, an "out of
4116 memory" exception is set, and -1 is returned. Else (on success) 0 is
4117 returned, and the value in *pv may or may not be the same as on input.
4118 As always, an extra byte is allocated for a trailing \0 byte (newsize
4119 does *not* include that), and a trailing \0 byte is stored.
4123 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
4125 register PyObject *v;
4126 register PyStringObject *sv;
4127 v = *pv;
4128 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
4129 PyString_CHECK_INTERNED(v)) {
4130 *pv = 0;
4131 Py_DECREF(v);
4132 PyErr_BadInternalCall();
4133 return -1;
4135 /* XXX UNREF/NEWREF interface should be more symmetrical */
4136 _Py_DEC_REFTOTAL;
4137 _Py_ForgetReference(v);
4138 *pv = (PyObject *)
4139 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
4140 if (*pv == NULL) {
4141 PyObject_Del(v);
4142 PyErr_NoMemory();
4143 return -1;
4145 _Py_NewReference(*pv);
4146 sv = (PyStringObject *) *pv;
4147 Py_Size(sv) = newsize;
4148 sv->ob_sval[newsize] = '\0';
4149 sv->ob_shash = -1; /* invalidate cached hash value */
4150 return 0;
4153 /* Helpers for formatstring */
4155 Py_LOCAL_INLINE(PyObject *)
4156 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4158 Py_ssize_t argidx = *p_argidx;
4159 if (argidx < arglen) {
4160 (*p_argidx)++;
4161 if (arglen < 0)
4162 return args;
4163 else
4164 return PyTuple_GetItem(args, argidx);
4166 PyErr_SetString(PyExc_TypeError,
4167 "not enough arguments for format string");
4168 return NULL;
4171 /* Format codes
4172 * F_LJUST '-'
4173 * F_SIGN '+'
4174 * F_BLANK ' '
4175 * F_ALT '#'
4176 * F_ZERO '0'
4178 #define F_LJUST (1<<0)
4179 #define F_SIGN (1<<1)
4180 #define F_BLANK (1<<2)
4181 #define F_ALT (1<<3)
4182 #define F_ZERO (1<<4)
4184 Py_LOCAL_INLINE(int)
4185 formatfloat(char *buf, size_t buflen, int flags,
4186 int prec, int type, PyObject *v)
4188 /* fmt = '%#.' + `prec` + `type`
4189 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4190 char fmt[20];
4191 double x;
4192 x = PyFloat_AsDouble(v);
4193 if (x == -1.0 && PyErr_Occurred()) {
4194 PyErr_Format(PyExc_TypeError, "float argument required, "
4195 "not %.200s", Py_Type(v)->tp_name);
4196 return -1;
4198 if (prec < 0)
4199 prec = 6;
4200 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4201 type = 'g';
4202 /* Worst case length calc to ensure no buffer overrun:
4204 'g' formats:
4205 fmt = %#.<prec>g
4206 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4207 for any double rep.)
4208 len = 1 + prec + 1 + 2 + 5 = 9 + prec
4210 'f' formats:
4211 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4212 len = 1 + 50 + 1 + prec = 52 + prec
4214 If prec=0 the effective precision is 1 (the leading digit is
4215 always given), therefore increase the length by one.
4218 if (((type == 'g' || type == 'G') &&
4219 buflen <= (size_t)10 + (size_t)prec) ||
4220 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
4221 PyErr_SetString(PyExc_OverflowError,
4222 "formatted float is too long (precision too large?)");
4223 return -1;
4225 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4226 (flags&F_ALT) ? "#" : "",
4227 prec, type);
4228 PyOS_ascii_formatd(buf, buflen, fmt, x);
4229 return (int)strlen(buf);
4232 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4233 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4234 * Python's regular ints.
4235 * Return value: a new PyString*, or NULL if error.
4236 * . *pbuf is set to point into it,
4237 * *plen set to the # of chars following that.
4238 * Caller must decref it when done using pbuf.
4239 * The string starting at *pbuf is of the form
4240 * "-"? ("0x" | "0X")? digit+
4241 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4242 * set in flags. The case of hex digits will be correct,
4243 * There will be at least prec digits, zero-filled on the left if
4244 * necessary to get that many.
4245 * val object to be converted
4246 * flags bitmask of format flags; only F_ALT is looked at
4247 * prec minimum number of digits; 0-fill on left if needed
4248 * type a character in [duoxX]; u acts the same as d
4250 * CAUTION: o, x and X conversions on regular ints can never
4251 * produce a '-' sign, but can for Python's unbounded ints.
4253 PyObject*
4254 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4255 char **pbuf, int *plen)
4257 PyObject *result = NULL;
4258 char *buf;
4259 Py_ssize_t i;
4260 int sign; /* 1 if '-', else 0 */
4261 int len; /* number of characters */
4262 Py_ssize_t llen;
4263 int numdigits; /* len == numnondigits + numdigits */
4264 int numnondigits = 0;
4266 switch (type) {
4267 case 'd':
4268 case 'u':
4269 result = Py_Type(val)->tp_str(val);
4270 break;
4271 case 'o':
4272 result = Py_Type(val)->tp_as_number->nb_oct(val);
4273 break;
4274 case 'x':
4275 case 'X':
4276 numnondigits = 2;
4277 result = Py_Type(val)->tp_as_number->nb_hex(val);
4278 break;
4279 default:
4280 assert(!"'type' not in [duoxX]");
4282 if (!result)
4283 return NULL;
4285 buf = PyString_AsString(result);
4286 if (!buf) {
4287 Py_DECREF(result);
4288 return NULL;
4291 /* To modify the string in-place, there can only be one reference. */
4292 if (Py_Refcnt(result) != 1) {
4293 PyErr_BadInternalCall();
4294 return NULL;
4296 llen = PyString_Size(result);
4297 if (llen > INT_MAX) {
4298 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4299 return NULL;
4301 len = (int)llen;
4302 if (buf[len-1] == 'L') {
4303 --len;
4304 buf[len] = '\0';
4306 sign = buf[0] == '-';
4307 numnondigits += sign;
4308 numdigits = len - numnondigits;
4309 assert(numdigits > 0);
4311 /* Get rid of base marker unless F_ALT */
4312 if ((flags & F_ALT) == 0) {
4313 /* Need to skip 0x, 0X or 0. */
4314 int skipped = 0;
4315 switch (type) {
4316 case 'o':
4317 assert(buf[sign] == '0');
4318 /* If 0 is only digit, leave it alone. */
4319 if (numdigits > 1) {
4320 skipped = 1;
4321 --numdigits;
4323 break;
4324 case 'x':
4325 case 'X':
4326 assert(buf[sign] == '0');
4327 assert(buf[sign + 1] == 'x');
4328 skipped = 2;
4329 numnondigits -= 2;
4330 break;
4332 if (skipped) {
4333 buf += skipped;
4334 len -= skipped;
4335 if (sign)
4336 buf[0] = '-';
4338 assert(len == numnondigits + numdigits);
4339 assert(numdigits > 0);
4342 /* Fill with leading zeroes to meet minimum width. */
4343 if (prec > numdigits) {
4344 PyObject *r1 = PyString_FromStringAndSize(NULL,
4345 numnondigits + prec);
4346 char *b1;
4347 if (!r1) {
4348 Py_DECREF(result);
4349 return NULL;
4351 b1 = PyString_AS_STRING(r1);
4352 for (i = 0; i < numnondigits; ++i)
4353 *b1++ = *buf++;
4354 for (i = 0; i < prec - numdigits; i++)
4355 *b1++ = '0';
4356 for (i = 0; i < numdigits; i++)
4357 *b1++ = *buf++;
4358 *b1 = '\0';
4359 Py_DECREF(result);
4360 result = r1;
4361 buf = PyString_AS_STRING(result);
4362 len = numnondigits + prec;
4365 /* Fix up case for hex conversions. */
4366 if (type == 'X') {
4367 /* Need to convert all lower case letters to upper case.
4368 and need to convert 0x to 0X (and -0x to -0X). */
4369 for (i = 0; i < len; i++)
4370 if (buf[i] >= 'a' && buf[i] <= 'x')
4371 buf[i] -= 'a'-'A';
4373 *pbuf = buf;
4374 *plen = len;
4375 return result;
4378 Py_LOCAL_INLINE(int)
4379 formatint(char *buf, size_t buflen, int flags,
4380 int prec, int type, PyObject *v)
4382 /* fmt = '%#.' + `prec` + 'l' + `type`
4383 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4384 + 1 + 1 = 24 */
4385 char fmt[64]; /* plenty big enough! */
4386 char *sign;
4387 long x;
4389 x = PyInt_AsLong(v);
4390 if (x == -1 && PyErr_Occurred()) {
4391 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4392 Py_Type(v)->tp_name);
4393 return -1;
4395 if (x < 0 && type == 'u') {
4396 type = 'd';
4398 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4399 sign = "-";
4400 else
4401 sign = "";
4402 if (prec < 0)
4403 prec = 1;
4405 if ((flags & F_ALT) &&
4406 (type == 'x' || type == 'X')) {
4407 /* When converting under %#x or %#X, there are a number
4408 * of issues that cause pain:
4409 * - when 0 is being converted, the C standard leaves off
4410 * the '0x' or '0X', which is inconsistent with other
4411 * %#x/%#X conversions and inconsistent with Python's
4412 * hex() function
4413 * - there are platforms that violate the standard and
4414 * convert 0 with the '0x' or '0X'
4415 * (Metrowerks, Compaq Tru64)
4416 * - there are platforms that give '0x' when converting
4417 * under %#X, but convert 0 in accordance with the
4418 * standard (OS/2 EMX)
4420 * We can achieve the desired consistency by inserting our
4421 * own '0x' or '0X' prefix, and substituting %x/%X in place
4422 * of %#x/%#X.
4424 * Note that this is the same approach as used in
4425 * formatint() in unicodeobject.c
4427 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4428 sign, type, prec, type);
4430 else {
4431 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4432 sign, (flags&F_ALT) ? "#" : "",
4433 prec, type);
4436 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4437 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4439 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4440 PyErr_SetString(PyExc_OverflowError,
4441 "formatted integer is too long (precision too large?)");
4442 return -1;
4444 if (sign[0])
4445 PyOS_snprintf(buf, buflen, fmt, -x);
4446 else
4447 PyOS_snprintf(buf, buflen, fmt, x);
4448 return (int)strlen(buf);
4451 Py_LOCAL_INLINE(int)
4452 formatchar(char *buf, size_t buflen, PyObject *v)
4454 /* presume that the buffer is at least 2 characters long */
4455 if (PyString_Check(v)) {
4456 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4457 return -1;
4459 else {
4460 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4461 return -1;
4463 buf[1] = '\0';
4464 return 1;
4467 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4469 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4470 chars are formatted. XXX This is a magic number. Each formatting
4471 routine does bounds checking to ensure no overflow, but a better
4472 solution may be to malloc a buffer of appropriate size for each
4473 format. For now, the current solution is sufficient.
4475 #define FORMATBUFLEN (size_t)120
4477 PyObject *
4478 PyString_Format(PyObject *format, PyObject *args)
4480 char *fmt, *res;
4481 Py_ssize_t arglen, argidx;
4482 Py_ssize_t reslen, rescnt, fmtcnt;
4483 int args_owned = 0;
4484 PyObject *result, *orig_args;
4485 #ifdef Py_USING_UNICODE
4486 PyObject *v, *w;
4487 #endif
4488 PyObject *dict = NULL;
4489 if (format == NULL || !PyString_Check(format) || args == NULL) {
4490 PyErr_BadInternalCall();
4491 return NULL;
4493 orig_args = args;
4494 fmt = PyString_AS_STRING(format);
4495 fmtcnt = PyString_GET_SIZE(format);
4496 reslen = rescnt = fmtcnt + 100;
4497 result = PyString_FromStringAndSize((char *)NULL, reslen);
4498 if (result == NULL)
4499 return NULL;
4500 res = PyString_AsString(result);
4501 if (PyTuple_Check(args)) {
4502 arglen = PyTuple_GET_SIZE(args);
4503 argidx = 0;
4505 else {
4506 arglen = -1;
4507 argidx = -2;
4509 if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
4510 !PyObject_TypeCheck(args, &PyBaseString_Type))
4511 dict = args;
4512 while (--fmtcnt >= 0) {
4513 if (*fmt != '%') {
4514 if (--rescnt < 0) {
4515 rescnt = fmtcnt + 100;
4516 reslen += rescnt;
4517 if (_PyString_Resize(&result, reslen) < 0)
4518 return NULL;
4519 res = PyString_AS_STRING(result)
4520 + reslen - rescnt;
4521 --rescnt;
4523 *res++ = *fmt++;
4525 else {
4526 /* Got a format specifier */
4527 int flags = 0;
4528 Py_ssize_t width = -1;
4529 int prec = -1;
4530 int c = '\0';
4531 int fill;
4532 PyObject *v = NULL;
4533 PyObject *temp = NULL;
4534 char *pbuf;
4535 int sign;
4536 Py_ssize_t len;
4537 char formatbuf[FORMATBUFLEN];
4538 /* For format{float,int,char}() */
4539 #ifdef Py_USING_UNICODE
4540 char *fmt_start = fmt;
4541 Py_ssize_t argidx_start = argidx;
4542 #endif
4544 fmt++;
4545 if (*fmt == '(') {
4546 char *keystart;
4547 Py_ssize_t keylen;
4548 PyObject *key;
4549 int pcount = 1;
4551 if (dict == NULL) {
4552 PyErr_SetString(PyExc_TypeError,
4553 "format requires a mapping");
4554 goto error;
4556 ++fmt;
4557 --fmtcnt;
4558 keystart = fmt;
4559 /* Skip over balanced parentheses */
4560 while (pcount > 0 && --fmtcnt >= 0) {
4561 if (*fmt == ')')
4562 --pcount;
4563 else if (*fmt == '(')
4564 ++pcount;
4565 fmt++;
4567 keylen = fmt - keystart - 1;
4568 if (fmtcnt < 0 || pcount > 0) {
4569 PyErr_SetString(PyExc_ValueError,
4570 "incomplete format key");
4571 goto error;
4573 key = PyString_FromStringAndSize(keystart,
4574 keylen);
4575 if (key == NULL)
4576 goto error;
4577 if (args_owned) {
4578 Py_DECREF(args);
4579 args_owned = 0;
4581 args = PyObject_GetItem(dict, key);
4582 Py_DECREF(key);
4583 if (args == NULL) {
4584 goto error;
4586 args_owned = 1;
4587 arglen = -1;
4588 argidx = -2;
4590 while (--fmtcnt >= 0) {
4591 switch (c = *fmt++) {
4592 case '-': flags |= F_LJUST; continue;
4593 case '+': flags |= F_SIGN; continue;
4594 case ' ': flags |= F_BLANK; continue;
4595 case '#': flags |= F_ALT; continue;
4596 case '0': flags |= F_ZERO; continue;
4598 break;
4600 if (c == '*') {
4601 v = getnextarg(args, arglen, &argidx);
4602 if (v == NULL)
4603 goto error;
4604 if (!PyInt_Check(v)) {
4605 PyErr_SetString(PyExc_TypeError,
4606 "* wants int");
4607 goto error;
4609 width = PyInt_AsLong(v);
4610 if (width < 0) {
4611 flags |= F_LJUST;
4612 width = -width;
4614 if (--fmtcnt >= 0)
4615 c = *fmt++;
4617 else if (c >= 0 && isdigit(c)) {
4618 width = c - '0';
4619 while (--fmtcnt >= 0) {
4620 c = Py_CHARMASK(*fmt++);
4621 if (!isdigit(c))
4622 break;
4623 if ((width*10) / 10 != width) {
4624 PyErr_SetString(
4625 PyExc_ValueError,
4626 "width too big");
4627 goto error;
4629 width = width*10 + (c - '0');
4632 if (c == '.') {
4633 prec = 0;
4634 if (--fmtcnt >= 0)
4635 c = *fmt++;
4636 if (c == '*') {
4637 v = getnextarg(args, arglen, &argidx);
4638 if (v == NULL)
4639 goto error;
4640 if (!PyInt_Check(v)) {
4641 PyErr_SetString(
4642 PyExc_TypeError,
4643 "* wants int");
4644 goto error;
4646 prec = PyInt_AsLong(v);
4647 if (prec < 0)
4648 prec = 0;
4649 if (--fmtcnt >= 0)
4650 c = *fmt++;
4652 else if (c >= 0 && isdigit(c)) {
4653 prec = c - '0';
4654 while (--fmtcnt >= 0) {
4655 c = Py_CHARMASK(*fmt++);
4656 if (!isdigit(c))
4657 break;
4658 if ((prec*10) / 10 != prec) {
4659 PyErr_SetString(
4660 PyExc_ValueError,
4661 "prec too big");
4662 goto error;
4664 prec = prec*10 + (c - '0');
4667 } /* prec */
4668 if (fmtcnt >= 0) {
4669 if (c == 'h' || c == 'l' || c == 'L') {
4670 if (--fmtcnt >= 0)
4671 c = *fmt++;
4674 if (fmtcnt < 0) {
4675 PyErr_SetString(PyExc_ValueError,
4676 "incomplete format");
4677 goto error;
4679 if (c != '%') {
4680 v = getnextarg(args, arglen, &argidx);
4681 if (v == NULL)
4682 goto error;
4684 sign = 0;
4685 fill = ' ';
4686 switch (c) {
4687 case '%':
4688 pbuf = "%";
4689 len = 1;
4690 break;
4691 case 's':
4692 #ifdef Py_USING_UNICODE
4693 if (PyUnicode_Check(v)) {
4694 fmt = fmt_start;
4695 argidx = argidx_start;
4696 goto unicode;
4698 #endif
4699 temp = _PyObject_Str(v);
4700 #ifdef Py_USING_UNICODE
4701 if (temp != NULL && PyUnicode_Check(temp)) {
4702 Py_DECREF(temp);
4703 fmt = fmt_start;
4704 argidx = argidx_start;
4705 goto unicode;
4707 #endif
4708 /* Fall through */
4709 case 'r':
4710 if (c == 'r')
4711 temp = PyObject_Repr(v);
4712 if (temp == NULL)
4713 goto error;
4714 if (!PyString_Check(temp)) {
4715 PyErr_SetString(PyExc_TypeError,
4716 "%s argument has non-string str()");
4717 Py_DECREF(temp);
4718 goto error;
4720 pbuf = PyString_AS_STRING(temp);
4721 len = PyString_GET_SIZE(temp);
4722 if (prec >= 0 && len > prec)
4723 len = prec;
4724 break;
4725 case 'i':
4726 case 'd':
4727 case 'u':
4728 case 'o':
4729 case 'x':
4730 case 'X':
4731 if (c == 'i')
4732 c = 'd';
4733 if (PyLong_Check(v)) {
4734 int ilen;
4735 temp = _PyString_FormatLong(v, flags,
4736 prec, c, &pbuf, &ilen);
4737 len = ilen;
4738 if (!temp)
4739 goto error;
4740 sign = 1;
4742 else {
4743 pbuf = formatbuf;
4744 len = formatint(pbuf,
4745 sizeof(formatbuf),
4746 flags, prec, c, v);
4747 if (len < 0)
4748 goto error;
4749 sign = 1;
4751 if (flags & F_ZERO)
4752 fill = '0';
4753 break;
4754 case 'e':
4755 case 'E':
4756 case 'f':
4757 case 'F':
4758 case 'g':
4759 case 'G':
4760 if (c == 'F')
4761 c = 'f';
4762 pbuf = formatbuf;
4763 len = formatfloat(pbuf, sizeof(formatbuf),
4764 flags, prec, c, v);
4765 if (len < 0)
4766 goto error;
4767 sign = 1;
4768 if (flags & F_ZERO)
4769 fill = '0';
4770 break;
4771 case 'c':
4772 #ifdef Py_USING_UNICODE
4773 if (PyUnicode_Check(v)) {
4774 fmt = fmt_start;
4775 argidx = argidx_start;
4776 goto unicode;
4778 #endif
4779 pbuf = formatbuf;
4780 len = formatchar(pbuf, sizeof(formatbuf), v);
4781 if (len < 0)
4782 goto error;
4783 break;
4784 default:
4785 PyErr_Format(PyExc_ValueError,
4786 "unsupported format character '%c' (0x%x) "
4787 "at index %zd",
4788 c, c,
4789 (Py_ssize_t)(fmt - 1 -
4790 PyString_AsString(format)));
4791 goto error;
4793 if (sign) {
4794 if (*pbuf == '-' || *pbuf == '+') {
4795 sign = *pbuf++;
4796 len--;
4798 else if (flags & F_SIGN)
4799 sign = '+';
4800 else if (flags & F_BLANK)
4801 sign = ' ';
4802 else
4803 sign = 0;
4805 if (width < len)
4806 width = len;
4807 if (rescnt - (sign != 0) < width) {
4808 reslen -= rescnt;
4809 rescnt = width + fmtcnt + 100;
4810 reslen += rescnt;
4811 if (reslen < 0) {
4812 Py_DECREF(result);
4813 Py_XDECREF(temp);
4814 return PyErr_NoMemory();
4816 if (_PyString_Resize(&result, reslen) < 0) {
4817 Py_XDECREF(temp);
4818 return NULL;
4820 res = PyString_AS_STRING(result)
4821 + reslen - rescnt;
4823 if (sign) {
4824 if (fill != ' ')
4825 *res++ = sign;
4826 rescnt--;
4827 if (width > len)
4828 width--;
4830 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4831 assert(pbuf[0] == '0');
4832 assert(pbuf[1] == c);
4833 if (fill != ' ') {
4834 *res++ = *pbuf++;
4835 *res++ = *pbuf++;
4837 rescnt -= 2;
4838 width -= 2;
4839 if (width < 0)
4840 width = 0;
4841 len -= 2;
4843 if (width > len && !(flags & F_LJUST)) {
4844 do {
4845 --rescnt;
4846 *res++ = fill;
4847 } while (--width > len);
4849 if (fill == ' ') {
4850 if (sign)
4851 *res++ = sign;
4852 if ((flags & F_ALT) &&
4853 (c == 'x' || c == 'X')) {
4854 assert(pbuf[0] == '0');
4855 assert(pbuf[1] == c);
4856 *res++ = *pbuf++;
4857 *res++ = *pbuf++;
4860 Py_MEMCPY(res, pbuf, len);
4861 res += len;
4862 rescnt -= len;
4863 while (--width >= len) {
4864 --rescnt;
4865 *res++ = ' ';
4867 if (dict && (argidx < arglen) && c != '%') {
4868 PyErr_SetString(PyExc_TypeError,
4869 "not all arguments converted during string formatting");
4870 Py_XDECREF(temp);
4871 goto error;
4873 Py_XDECREF(temp);
4874 } /* '%' */
4875 } /* until end */
4876 if (argidx < arglen && !dict) {
4877 PyErr_SetString(PyExc_TypeError,
4878 "not all arguments converted during string formatting");
4879 goto error;
4881 if (args_owned) {
4882 Py_DECREF(args);
4884 _PyString_Resize(&result, reslen - rescnt);
4885 return result;
4887 #ifdef Py_USING_UNICODE
4888 unicode:
4889 if (args_owned) {
4890 Py_DECREF(args);
4891 args_owned = 0;
4893 /* Fiddle args right (remove the first argidx arguments) */
4894 if (PyTuple_Check(orig_args) && argidx > 0) {
4895 PyObject *v;
4896 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4897 v = PyTuple_New(n);
4898 if (v == NULL)
4899 goto error;
4900 while (--n >= 0) {
4901 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4902 Py_INCREF(w);
4903 PyTuple_SET_ITEM(v, n, w);
4905 args = v;
4906 } else {
4907 Py_INCREF(orig_args);
4908 args = orig_args;
4910 args_owned = 1;
4911 /* Take what we have of the result and let the Unicode formatting
4912 function format the rest of the input. */
4913 rescnt = res - PyString_AS_STRING(result);
4914 if (_PyString_Resize(&result, rescnt))
4915 goto error;
4916 fmtcnt = PyString_GET_SIZE(format) - \
4917 (fmt - PyString_AS_STRING(format));
4918 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4919 if (format == NULL)
4920 goto error;
4921 v = PyUnicode_Format(format, args);
4922 Py_DECREF(format);
4923 if (v == NULL)
4924 goto error;
4925 /* Paste what we have (result) to what the Unicode formatting
4926 function returned (v) and return the result (or error) */
4927 w = PyUnicode_Concat(result, v);
4928 Py_DECREF(result);
4929 Py_DECREF(v);
4930 Py_DECREF(args);
4931 return w;
4932 #endif /* Py_USING_UNICODE */
4934 error:
4935 Py_DECREF(result);
4936 if (args_owned) {
4937 Py_DECREF(args);
4939 return NULL;
4942 void
4943 PyString_InternInPlace(PyObject **p)
4945 register PyStringObject *s = (PyStringObject *)(*p);
4946 PyObject *t;
4947 if (s == NULL || !PyString_Check(s))
4948 Py_FatalError("PyString_InternInPlace: strings only please!");
4949 /* If it's a string subclass, we don't really know what putting
4950 it in the interned dict might do. */
4951 if (!PyString_CheckExact(s))
4952 return;
4953 if (PyString_CHECK_INTERNED(s))
4954 return;
4955 if (interned == NULL) {
4956 interned = PyDict_New();
4957 if (interned == NULL) {
4958 PyErr_Clear(); /* Don't leave an exception */
4959 return;
4962 t = PyDict_GetItem(interned, (PyObject *)s);
4963 if (t) {
4964 Py_INCREF(t);
4965 Py_DECREF(*p);
4966 *p = t;
4967 return;
4970 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4971 PyErr_Clear();
4972 return;
4974 /* The two references in interned are not counted by refcnt.
4975 The string deallocator will take care of this */
4976 Py_Refcnt(s) -= 2;
4977 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4980 void
4981 PyString_InternImmortal(PyObject **p)
4983 PyString_InternInPlace(p);
4984 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4985 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4986 Py_INCREF(*p);
4991 PyObject *
4992 PyString_InternFromString(const char *cp)
4994 PyObject *s = PyString_FromString(cp);
4995 if (s == NULL)
4996 return NULL;
4997 PyString_InternInPlace(&s);
4998 return s;
5001 void
5002 PyString_Fini(void)
5004 int i;
5005 for (i = 0; i < UCHAR_MAX + 1; i++) {
5006 Py_XDECREF(characters[i]);
5007 characters[i] = NULL;
5009 Py_XDECREF(nullstring);
5010 nullstring = NULL;
5013 void _Py_ReleaseInternedStrings(void)
5015 PyObject *keys;
5016 PyStringObject *s;
5017 Py_ssize_t i, n;
5018 Py_ssize_t immortal_size = 0, mortal_size = 0;
5020 if (interned == NULL || !PyDict_Check(interned))
5021 return;
5022 keys = PyDict_Keys(interned);
5023 if (keys == NULL || !PyList_Check(keys)) {
5024 PyErr_Clear();
5025 return;
5028 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5029 detector, interned strings are not forcibly deallocated; rather, we
5030 give them their stolen references back, and then clear and DECREF
5031 the interned dict. */
5033 n = PyList_GET_SIZE(keys);
5034 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5036 for (i = 0; i < n; i++) {
5037 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5038 switch (s->ob_sstate) {
5039 case SSTATE_NOT_INTERNED:
5040 /* XXX Shouldn't happen */
5041 break;
5042 case SSTATE_INTERNED_IMMORTAL:
5043 Py_Refcnt(s) += 1;
5044 immortal_size += Py_Size(s);
5045 break;
5046 case SSTATE_INTERNED_MORTAL:
5047 Py_Refcnt(s) += 2;
5048 mortal_size += Py_Size(s);
5049 break;
5050 default:
5051 Py_FatalError("Inconsistent interned string state.");
5053 s->ob_sstate = SSTATE_NOT_INTERNED;
5055 fprintf(stderr, "total size of all interned strings: "
5056 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5057 "mortal/immortal\n", mortal_size, immortal_size);
5058 Py_DECREF(keys);
5059 PyDict_Clear(interned);
5060 Py_DECREF(interned);
5061 interned = NULL;