Typo fix
[pytest.git] / Objects / stringobject.c
blob92477eea0c71e8151cfad83d203297788f05e534
1 /* String object implementation */
3 #define PY_SSIZE_T_CLEAN
5 #include "Python.h"
7 #include <ctype.h>
9 #ifdef COUNT_ALLOCS
10 int null_strings, one_strings;
11 #endif
13 static PyStringObject *characters[UCHAR_MAX + 1];
14 static PyStringObject *nullstring;
16 /* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
24 static PyObject *interned;
27 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
29 null terminating character.
31 For PyString_FromString(), the parameter `str' points to a null-terminated
32 string containing exactly `size' bytes.
34 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
45 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
52 PyObject *
53 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
55 register PyStringObject *op;
56 assert(size >= 0);
57 if (size == 0 && (op = nullstring) != NULL) {
58 #ifdef COUNT_ALLOCS
59 null_strings++;
60 #endif
61 Py_INCREF(op);
62 return (PyObject *)op;
64 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
67 #ifdef COUNT_ALLOCS
68 one_strings++;
69 #endif
70 Py_INCREF(op);
71 return (PyObject *)op;
74 /* Inline PyObject_NewVar */
75 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
76 if (op == NULL)
77 return PyErr_NoMemory();
78 PyObject_INIT_VAR(op, &PyString_Type, size);
79 op->ob_shash = -1;
80 op->ob_sstate = SSTATE_NOT_INTERNED;
81 if (str != NULL)
82 Py_MEMCPY(op->ob_sval, str, size);
83 op->ob_sval[size] = '\0';
84 /* share short strings */
85 if (size == 0) {
86 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
88 op = (PyStringObject *)t;
89 nullstring = op;
90 Py_INCREF(op);
91 } else if (size == 1 && str != NULL) {
92 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
94 op = (PyStringObject *)t;
95 characters[*str & UCHAR_MAX] = op;
96 Py_INCREF(op);
98 return (PyObject *) op;
101 PyObject *
102 PyString_FromString(const char *str)
104 register size_t size;
105 register PyStringObject *op;
107 assert(str != NULL);
108 size = strlen(str);
109 if (size > PY_SSIZE_T_MAX) {
110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
114 if (size == 0 && (op = nullstring) != NULL) {
115 #ifdef COUNT_ALLOCS
116 null_strings++;
117 #endif
118 Py_INCREF(op);
119 return (PyObject *)op;
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122 #ifdef COUNT_ALLOCS
123 one_strings++;
124 #endif
125 Py_INCREF(op);
126 return (PyObject *)op;
129 /* Inline PyObject_NewVar */
130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
131 if (op == NULL)
132 return PyErr_NoMemory();
133 PyObject_INIT_VAR(op, &PyString_Type, size);
134 op->ob_shash = -1;
135 op->ob_sstate = SSTATE_NOT_INTERNED;
136 Py_MEMCPY(op->ob_sval, str, size+1);
137 /* share short strings */
138 if (size == 0) {
139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
141 op = (PyStringObject *)t;
142 nullstring = op;
143 Py_INCREF(op);
144 } else if (size == 1) {
145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
147 op = (PyStringObject *)t;
148 characters[*str & UCHAR_MAX] = op;
149 Py_INCREF(op);
151 return (PyObject *) op;
154 PyObject *
155 PyString_FromFormatV(const char *format, va_list vargs)
157 va_list count;
158 Py_ssize_t n = 0;
159 const char* f;
160 char *s;
161 PyObject* string;
163 #ifdef VA_LIST_IS_ARRAY
164 Py_MEMCPY(count, vargs, sizeof(va_list));
165 #else
166 #ifdef __va_copy
167 __va_copy(count, vargs);
168 #else
169 count = vargs;
170 #endif
171 #endif
172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
184 ++f;
186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
193 case 'd': case 'u': case 'i': case 'x':
194 (void) va_arg(count, int);
195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
209 * XXX I count 18 -- what's the extra for?
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
219 what's in the argument list) */
220 n += strlen(p);
221 goto expand;
223 } else
224 n++;
226 expand:
227 /* step 2: fill the buffer */
228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
234 s = PyString_AsString(string);
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
239 Py_ssize_t i;
240 int longflag = 0;
241 int size_tflag = 0;
242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
258 longflag = 1;
259 ++f;
261 /* handle the size_t flag. */
262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
263 size_tflag = 1;
264 ++f;
267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
274 else if (size_tflag)
275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
306 Py_MEMCPY(s, p, i);
307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
329 } else
330 *s++ = *f;
333 end:
334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
335 return string;
338 PyObject *
339 PyString_FromFormat(const char *format, ...)
341 PyObject* ret;
342 va_list vargs;
344 #ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346 #else
347 va_start(vargs);
348 #endif
349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
355 PyObject *PyString_Decode(const char *s,
356 Py_ssize_t size,
357 const char *encoding,
358 const char *errors)
360 PyObject *v, *str;
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
370 PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
374 PyObject *v;
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
381 if (encoding == NULL) {
382 #ifdef Py_USING_UNICODE
383 encoding = PyUnicode_GetDefaultEncoding();
384 #else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387 #endif
390 /* Decode via the codec registry */
391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
393 goto onError;
395 return v;
397 onError:
398 return NULL;
401 PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
405 PyObject *v;
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
411 #ifdef Py_USING_UNICODE
412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
420 #endif
421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
424 v->ob_type->tp_name);
425 Py_DECREF(v);
426 goto onError;
429 return v;
431 onError:
432 return NULL;
435 PyObject *PyString_Encode(const char *s,
436 Py_ssize_t size,
437 const char *encoding,
438 const char *errors)
440 PyObject *v, *str;
442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
450 PyObject *PyString_AsEncodedObject(PyObject *str,
451 const char *encoding,
452 const char *errors)
454 PyObject *v;
456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
461 if (encoding == NULL) {
462 #ifdef Py_USING_UNICODE
463 encoding = PyUnicode_GetDefaultEncoding();
464 #else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467 #endif
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
475 return v;
477 onError:
478 return NULL;
481 PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
485 PyObject *v;
487 v = PyString_AsEncodedObject(str, encoding, errors);
488 if (v == NULL)
489 goto onError;
491 #ifdef Py_USING_UNICODE
492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
500 #endif
501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
504 v->ob_type->tp_name);
505 Py_DECREF(v);
506 goto onError;
509 return v;
511 onError:
512 return NULL;
515 static void
516 string_dealloc(PyObject *op)
518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
524 op->ob_refcnt = 3;
525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
533 default:
534 Py_FatalError("Inconsistent interned string state.");
536 op->ob_type->tp_free(op);
539 /* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
544 PyObject *PyString_DecodeEscape(const char *s,
545 Py_ssize_t len,
546 const char *errors,
547 Py_ssize_t unicode,
548 const char *recode_encoding)
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
555 v = PyString_FromStringAndSize((char *)NULL, newlen);
556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
562 non_esc:
563 #ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
568 Py_ssize_t rn;
569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
581 /* Append bytes to output buffer. */
582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
585 Py_MEMCPY(p, r, rn);
586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
592 #else
593 *p++ = *s++;
594 #endif
595 continue;
597 s++;
598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
619 if ('0' <= *s && *s <= '7') {
620 c = (c<<3) + *s++ - '0';
621 if ('0' <= *s && *s <= '7')
622 c = (c<<3) + *s++ - '0';
624 *p++ = c;
625 break;
626 case 'x':
627 if (isxdigit(Py_CHARMASK(s[0]))
628 && isxdigit(Py_CHARMASK(s[1]))) {
629 unsigned int x = 0;
630 c = Py_CHARMASK(*s);
631 s++;
632 if (isdigit(c))
633 x = c - '0';
634 else if (islower(c))
635 x = 10 + c - 'a';
636 else
637 x = 10 + c - 'A';
638 x = x << 4;
639 c = Py_CHARMASK(*s);
640 s++;
641 if (isdigit(c))
642 x += c - '0';
643 else if (islower(c))
644 x += 10 + c - 'a';
645 else
646 x += 10 + c - 'A';
647 *p++ = x;
648 break;
650 if (!errors || strcmp(errors, "strict") == 0) {
651 PyErr_SetString(PyExc_ValueError,
652 "invalid \\x escape");
653 goto failed;
655 if (strcmp(errors, "replace") == 0) {
656 *p++ = '?';
657 } else if (strcmp(errors, "ignore") == 0)
658 /* do nothing */;
659 else {
660 PyErr_Format(PyExc_ValueError,
661 "decoding error; "
662 "unknown error handling code: %.400s",
663 errors);
664 goto failed;
666 #ifndef Py_USING_UNICODE
667 case 'u':
668 case 'U':
669 case 'N':
670 if (unicode) {
671 PyErr_SetString(PyExc_ValueError,
672 "Unicode escapes not legal "
673 "when Unicode disabled");
674 goto failed;
676 #endif
677 default:
678 *p++ = '\\';
679 s--;
680 goto non_esc; /* an arbitry number of unescaped
681 UTF-8 bytes may follow. */
684 if (p-buf < newlen)
685 _PyString_Resize(&v, p - buf);
686 return v;
687 failed:
688 Py_DECREF(v);
689 return NULL;
692 /* -------------------------------------------------------------------- */
693 /* object api */
695 static Py_ssize_t
696 string_getsize(register PyObject *op)
698 char *s;
699 Py_ssize_t len;
700 if (PyString_AsStringAndSize(op, &s, &len))
701 return -1;
702 return len;
705 static /*const*/ char *
706 string_getbuffer(register PyObject *op)
708 char *s;
709 Py_ssize_t len;
710 if (PyString_AsStringAndSize(op, &s, &len))
711 return NULL;
712 return s;
715 Py_ssize_t
716 PyString_Size(register PyObject *op)
718 if (!PyString_Check(op))
719 return string_getsize(op);
720 return ((PyStringObject *)op) -> ob_size;
723 /*const*/ char *
724 PyString_AsString(register PyObject *op)
726 if (!PyString_Check(op))
727 return string_getbuffer(op);
728 return ((PyStringObject *)op) -> ob_sval;
732 PyString_AsStringAndSize(register PyObject *obj,
733 register char **s,
734 register Py_ssize_t *len)
736 if (s == NULL) {
737 PyErr_BadInternalCall();
738 return -1;
741 if (!PyString_Check(obj)) {
742 #ifdef Py_USING_UNICODE
743 if (PyUnicode_Check(obj)) {
744 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
745 if (obj == NULL)
746 return -1;
748 else
749 #endif
751 PyErr_Format(PyExc_TypeError,
752 "expected string or Unicode object, "
753 "%.200s found", obj->ob_type->tp_name);
754 return -1;
758 *s = PyString_AS_STRING(obj);
759 if (len != NULL)
760 *len = PyString_GET_SIZE(obj);
761 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
762 PyErr_SetString(PyExc_TypeError,
763 "expected string without null bytes");
764 return -1;
766 return 0;
769 /* -------------------------------------------------------------------- */
770 /* Methods */
772 #define STRINGLIB_CHAR char
774 #define STRINGLIB_CMP memcmp
775 #define STRINGLIB_LEN PyString_GET_SIZE
776 #define STRINGLIB_NEW PyString_FromStringAndSize
777 #define STRINGLIB_STR PyString_AS_STRING
779 #define STRINGLIB_EMPTY nullstring
781 #include "stringlib/fastsearch.h"
783 #include "stringlib/count.h"
784 #include "stringlib/find.h"
785 #include "stringlib/partition.h"
788 static int
789 string_print(PyStringObject *op, FILE *fp, int flags)
791 Py_ssize_t i;
792 char c;
793 int quote;
795 /* XXX Ought to check for interrupts when writing long strings */
796 if (! PyString_CheckExact(op)) {
797 int ret;
798 /* A str subclass may have its own __str__ method. */
799 op = (PyStringObject *) PyObject_Str((PyObject *)op);
800 if (op == NULL)
801 return -1;
802 ret = string_print(op, fp, flags);
803 Py_DECREF(op);
804 return ret;
806 if (flags & Py_PRINT_RAW) {
807 #ifdef __VMS
808 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
809 #else
810 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
811 #endif
812 return 0;
815 /* figure out which quote to use; single is preferred */
816 quote = '\'';
817 if (memchr(op->ob_sval, '\'', op->ob_size) &&
818 !memchr(op->ob_sval, '"', op->ob_size))
819 quote = '"';
821 fputc(quote, fp);
822 for (i = 0; i < op->ob_size; i++) {
823 c = op->ob_sval[i];
824 if (c == quote || c == '\\')
825 fprintf(fp, "\\%c", c);
826 else if (c == '\t')
827 fprintf(fp, "\\t");
828 else if (c == '\n')
829 fprintf(fp, "\\n");
830 else if (c == '\r')
831 fprintf(fp, "\\r");
832 else if (c < ' ' || c >= 0x7f)
833 fprintf(fp, "\\x%02x", c & 0xff);
834 else
835 fputc(c, fp);
837 fputc(quote, fp);
838 return 0;
841 PyObject *
842 PyString_Repr(PyObject *obj, int smartquotes)
844 register PyStringObject* op = (PyStringObject*) obj;
845 size_t newsize = 2 + 4 * op->ob_size;
846 PyObject *v;
847 if (newsize > PY_SSIZE_T_MAX) {
848 PyErr_SetString(PyExc_OverflowError,
849 "string is too large to make repr");
851 v = PyString_FromStringAndSize((char *)NULL, newsize);
852 if (v == NULL) {
853 return NULL;
855 else {
856 register Py_ssize_t i;
857 register char c;
858 register char *p;
859 int quote;
861 /* figure out which quote to use; single is preferred */
862 quote = '\'';
863 if (smartquotes &&
864 memchr(op->ob_sval, '\'', op->ob_size) &&
865 !memchr(op->ob_sval, '"', op->ob_size))
866 quote = '"';
868 p = PyString_AS_STRING(v);
869 *p++ = quote;
870 for (i = 0; i < op->ob_size; i++) {
871 /* There's at least enough room for a hex escape
872 and a closing quote. */
873 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
874 c = op->ob_sval[i];
875 if (c == quote || c == '\\')
876 *p++ = '\\', *p++ = c;
877 else if (c == '\t')
878 *p++ = '\\', *p++ = 't';
879 else if (c == '\n')
880 *p++ = '\\', *p++ = 'n';
881 else if (c == '\r')
882 *p++ = '\\', *p++ = 'r';
883 else if (c < ' ' || c >= 0x7f) {
884 /* For performance, we don't want to call
885 PyOS_snprintf here (extra layers of
886 function call). */
887 sprintf(p, "\\x%02x", c & 0xff);
888 p += 4;
890 else
891 *p++ = c;
893 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
894 *p++ = quote;
895 *p = '\0';
896 _PyString_Resize(
897 &v, (p - PyString_AS_STRING(v)));
898 return v;
902 static PyObject *
903 string_repr(PyObject *op)
905 return PyString_Repr(op, 1);
908 static PyObject *
909 string_str(PyObject *s)
911 assert(PyString_Check(s));
912 if (PyString_CheckExact(s)) {
913 Py_INCREF(s);
914 return s;
916 else {
917 /* Subtype -- return genuine string with the same value. */
918 PyStringObject *t = (PyStringObject *) s;
919 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
923 static Py_ssize_t
924 string_length(PyStringObject *a)
926 return a->ob_size;
929 static PyObject *
930 string_concat(register PyStringObject *a, register PyObject *bb)
932 register Py_ssize_t size;
933 register PyStringObject *op;
934 if (!PyString_Check(bb)) {
935 #ifdef Py_USING_UNICODE
936 if (PyUnicode_Check(bb))
937 return PyUnicode_Concat((PyObject *)a, bb);
938 #endif
939 PyErr_Format(PyExc_TypeError,
940 "cannot concatenate 'str' and '%.200s' objects",
941 bb->ob_type->tp_name);
942 return NULL;
944 #define b ((PyStringObject *)bb)
945 /* Optimize cases with empty left or right operand */
946 if ((a->ob_size == 0 || b->ob_size == 0) &&
947 PyString_CheckExact(a) && PyString_CheckExact(b)) {
948 if (a->ob_size == 0) {
949 Py_INCREF(bb);
950 return bb;
952 Py_INCREF(a);
953 return (PyObject *)a;
955 size = a->ob_size + b->ob_size;
956 if (size < 0) {
957 PyErr_SetString(PyExc_OverflowError,
958 "strings are too large to concat");
959 return NULL;
962 /* Inline PyObject_NewVar */
963 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
964 if (op == NULL)
965 return PyErr_NoMemory();
966 PyObject_INIT_VAR(op, &PyString_Type, size);
967 op->ob_shash = -1;
968 op->ob_sstate = SSTATE_NOT_INTERNED;
969 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
970 Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
971 op->ob_sval[size] = '\0';
972 return (PyObject *) op;
973 #undef b
976 static PyObject *
977 string_repeat(register PyStringObject *a, register Py_ssize_t n)
979 register Py_ssize_t i;
980 register Py_ssize_t j;
981 register Py_ssize_t size;
982 register PyStringObject *op;
983 size_t nbytes;
984 if (n < 0)
985 n = 0;
986 /* watch out for overflows: the size can overflow int,
987 * and the # of bytes needed can overflow size_t
989 size = a->ob_size * n;
990 if (n && size / n != a->ob_size) {
991 PyErr_SetString(PyExc_OverflowError,
992 "repeated string is too long");
993 return NULL;
995 if (size == a->ob_size && PyString_CheckExact(a)) {
996 Py_INCREF(a);
997 return (PyObject *)a;
999 nbytes = (size_t)size;
1000 if (nbytes + sizeof(PyStringObject) <= nbytes) {
1001 PyErr_SetString(PyExc_OverflowError,
1002 "repeated string is too long");
1003 return NULL;
1005 op = (PyStringObject *)
1006 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
1007 if (op == NULL)
1008 return PyErr_NoMemory();
1009 PyObject_INIT_VAR(op, &PyString_Type, size);
1010 op->ob_shash = -1;
1011 op->ob_sstate = SSTATE_NOT_INTERNED;
1012 op->ob_sval[size] = '\0';
1013 if (a->ob_size == 1 && n > 0) {
1014 memset(op->ob_sval, a->ob_sval[0] , n);
1015 return (PyObject *) op;
1017 i = 0;
1018 if (i < size) {
1019 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
1020 i = a->ob_size;
1022 while (i < size) {
1023 j = (i <= size-i) ? i : size-i;
1024 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1025 i += j;
1027 return (PyObject *) op;
1030 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1032 static PyObject *
1033 string_slice(register PyStringObject *a, register Py_ssize_t i,
1034 register Py_ssize_t j)
1035 /* j -- may be negative! */
1037 if (i < 0)
1038 i = 0;
1039 if (j < 0)
1040 j = 0; /* Avoid signed/unsigned bug in next line */
1041 if (j > a->ob_size)
1042 j = a->ob_size;
1043 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1044 /* It's the same as a */
1045 Py_INCREF(a);
1046 return (PyObject *)a;
1048 if (j < i)
1049 j = i;
1050 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1053 static int
1054 string_contains(PyObject *str_obj, PyObject *sub_obj)
1056 if (!PyString_CheckExact(sub_obj)) {
1057 #ifdef Py_USING_UNICODE
1058 if (PyUnicode_Check(sub_obj))
1059 return PyUnicode_Contains(str_obj, sub_obj);
1060 #endif
1061 if (!PyString_Check(sub_obj)) {
1062 PyErr_SetString(PyExc_TypeError,
1063 "'in <string>' requires string as left operand");
1064 return -1;
1068 return stringlib_contains_obj(str_obj, sub_obj);
1071 static PyObject *
1072 string_item(PyStringObject *a, register Py_ssize_t i)
1074 char pchar;
1075 PyObject *v;
1076 if (i < 0 || i >= a->ob_size) {
1077 PyErr_SetString(PyExc_IndexError, "string index out of range");
1078 return NULL;
1080 pchar = a->ob_sval[i];
1081 v = (PyObject *)characters[pchar & UCHAR_MAX];
1082 if (v == NULL)
1083 v = PyString_FromStringAndSize(&pchar, 1);
1084 else {
1085 #ifdef COUNT_ALLOCS
1086 one_strings++;
1087 #endif
1088 Py_INCREF(v);
1090 return v;
1093 static PyObject*
1094 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1096 int c;
1097 Py_ssize_t len_a, len_b;
1098 Py_ssize_t min_len;
1099 PyObject *result;
1101 /* Make sure both arguments are strings. */
1102 if (!(PyString_Check(a) && PyString_Check(b))) {
1103 result = Py_NotImplemented;
1104 goto out;
1106 if (a == b) {
1107 switch (op) {
1108 case Py_EQ:case Py_LE:case Py_GE:
1109 result = Py_True;
1110 goto out;
1111 case Py_NE:case Py_LT:case Py_GT:
1112 result = Py_False;
1113 goto out;
1116 if (op == Py_EQ) {
1117 /* Supporting Py_NE here as well does not save
1118 much time, since Py_NE is rarely used. */
1119 if (a->ob_size == b->ob_size
1120 && (a->ob_sval[0] == b->ob_sval[0]
1121 && memcmp(a->ob_sval, b->ob_sval,
1122 a->ob_size) == 0)) {
1123 result = Py_True;
1124 } else {
1125 result = Py_False;
1127 goto out;
1129 len_a = a->ob_size; len_b = b->ob_size;
1130 min_len = (len_a < len_b) ? len_a : len_b;
1131 if (min_len > 0) {
1132 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1133 if (c==0)
1134 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1135 }else
1136 c = 0;
1137 if (c == 0)
1138 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1139 switch (op) {
1140 case Py_LT: c = c < 0; break;
1141 case Py_LE: c = c <= 0; break;
1142 case Py_EQ: assert(0); break; /* unreachable */
1143 case Py_NE: c = c != 0; break;
1144 case Py_GT: c = c > 0; break;
1145 case Py_GE: c = c >= 0; break;
1146 default:
1147 result = Py_NotImplemented;
1148 goto out;
1150 result = c ? Py_True : Py_False;
1151 out:
1152 Py_INCREF(result);
1153 return result;
1157 _PyString_Eq(PyObject *o1, PyObject *o2)
1159 PyStringObject *a = (PyStringObject*) o1;
1160 PyStringObject *b = (PyStringObject*) o2;
1161 return a->ob_size == b->ob_size
1162 && *a->ob_sval == *b->ob_sval
1163 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
1166 static long
1167 string_hash(PyStringObject *a)
1169 register Py_ssize_t len;
1170 register unsigned char *p;
1171 register long x;
1173 if (a->ob_shash != -1)
1174 return a->ob_shash;
1175 len = a->ob_size;
1176 p = (unsigned char *) a->ob_sval;
1177 x = *p << 7;
1178 while (--len >= 0)
1179 x = (1000003*x) ^ *p++;
1180 x ^= a->ob_size;
1181 if (x == -1)
1182 x = -2;
1183 a->ob_shash = x;
1184 return x;
1187 #define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1189 static PyObject*
1190 string_subscript(PyStringObject* self, PyObject* item)
1192 PyNumberMethods *nb = item->ob_type->tp_as_number;
1193 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1194 Py_ssize_t i = nb->nb_index(item);
1195 if (i == -1 && PyErr_Occurred())
1196 return NULL;
1197 if (i < 0)
1198 i += PyString_GET_SIZE(self);
1199 return string_item(self, i);
1201 else if (PySlice_Check(item)) {
1202 Py_ssize_t start, stop, step, slicelength, cur, i;
1203 char* source_buf;
1204 char* result_buf;
1205 PyObject* result;
1207 if (PySlice_GetIndicesEx((PySliceObject*)item,
1208 PyString_GET_SIZE(self),
1209 &start, &stop, &step, &slicelength) < 0) {
1210 return NULL;
1213 if (slicelength <= 0) {
1214 return PyString_FromStringAndSize("", 0);
1216 else {
1217 source_buf = PyString_AsString((PyObject*)self);
1218 result_buf = (char *)PyMem_Malloc(slicelength);
1219 if (result_buf == NULL)
1220 return PyErr_NoMemory();
1222 for (cur = start, i = 0; i < slicelength;
1223 cur += step, i++) {
1224 result_buf[i] = source_buf[cur];
1227 result = PyString_FromStringAndSize(result_buf,
1228 slicelength);
1229 PyMem_Free(result_buf);
1230 return result;
1233 else {
1234 PyErr_SetString(PyExc_TypeError,
1235 "string indices must be integers");
1236 return NULL;
1240 static Py_ssize_t
1241 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1243 if ( index != 0 ) {
1244 PyErr_SetString(PyExc_SystemError,
1245 "accessing non-existent string segment");
1246 return -1;
1248 *ptr = (void *)self->ob_sval;
1249 return self->ob_size;
1252 static Py_ssize_t
1253 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1255 PyErr_SetString(PyExc_TypeError,
1256 "Cannot use string as modifiable buffer");
1257 return -1;
1260 static Py_ssize_t
1261 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1263 if ( lenp )
1264 *lenp = self->ob_size;
1265 return 1;
1268 static Py_ssize_t
1269 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1271 if ( index != 0 ) {
1272 PyErr_SetString(PyExc_SystemError,
1273 "accessing non-existent string segment");
1274 return -1;
1276 *ptr = self->ob_sval;
1277 return self->ob_size;
1280 static PySequenceMethods string_as_sequence = {
1281 (lenfunc)string_length, /*sq_length*/
1282 (binaryfunc)string_concat, /*sq_concat*/
1283 (ssizeargfunc)string_repeat, /*sq_repeat*/
1284 (ssizeargfunc)string_item, /*sq_item*/
1285 (ssizessizeargfunc)string_slice, /*sq_slice*/
1286 0, /*sq_ass_item*/
1287 0, /*sq_ass_slice*/
1288 (objobjproc)string_contains /*sq_contains*/
1291 static PyMappingMethods string_as_mapping = {
1292 (lenfunc)string_length,
1293 (binaryfunc)string_subscript,
1297 static PyBufferProcs string_as_buffer = {
1298 (readbufferproc)string_buffer_getreadbuf,
1299 (writebufferproc)string_buffer_getwritebuf,
1300 (segcountproc)string_buffer_getsegcount,
1301 (charbufferproc)string_buffer_getcharbuf,
1306 #define LEFTSTRIP 0
1307 #define RIGHTSTRIP 1
1308 #define BOTHSTRIP 2
1310 /* Arrays indexed by above */
1311 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1313 #define STRIPNAME(i) (stripformat[i]+3)
1316 /* Don't call if length < 2 */
1317 #define Py_STRING_MATCH(target, offset, pattern, length) \
1318 (target[offset] == pattern[0] && \
1319 target[offset+length-1] == pattern[length-1] && \
1320 !memcmp(target+offset+1, pattern+1, length-2) )
1323 /* Overallocate the initial list to reduce the number of reallocs for small
1324 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1325 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1326 text (roughly 11 words per line) and field delimited data (usually 1-10
1327 fields). For large strings the split algorithms are bandwidth limited
1328 so increasing the preallocation likely will not improve things.*/
1330 #define MAX_PREALLOC 12
1332 /* 5 splits gives 6 elements */
1333 #define PREALLOC_SIZE(maxsplit) \
1334 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1336 #define SPLIT_APPEND(data, left, right) \
1337 str = PyString_FromStringAndSize((data) + (left), \
1338 (right) - (left)); \
1339 if (str == NULL) \
1340 goto onError; \
1341 if (PyList_Append(list, str)) { \
1342 Py_DECREF(str); \
1343 goto onError; \
1345 else \
1346 Py_DECREF(str);
1348 #define SPLIT_ADD(data, left, right) { \
1349 str = PyString_FromStringAndSize((data) + (left), \
1350 (right) - (left)); \
1351 if (str == NULL) \
1352 goto onError; \
1353 if (count < MAX_PREALLOC) { \
1354 PyList_SET_ITEM(list, count, str); \
1355 } else { \
1356 if (PyList_Append(list, str)) { \
1357 Py_DECREF(str); \
1358 goto onError; \
1360 else \
1361 Py_DECREF(str); \
1363 count++; }
1365 /* Always force the list to the expected size. */
1366 #define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
1368 #define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1369 #define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1370 #define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1371 #define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1373 Py_LOCAL_INLINE(PyObject *)
1374 split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
1376 Py_ssize_t i, j, count=0;
1377 PyObject *str;
1378 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1380 if (list == NULL)
1381 return NULL;
1383 i = j = 0;
1385 while (maxsplit-- > 0) {
1386 SKIP_SPACE(s, i, len);
1387 if (i==len) break;
1388 j = i; i++;
1389 SKIP_NONSPACE(s, i, len);
1390 SPLIT_ADD(s, j, i);
1393 if (i < len) {
1394 /* Only occurs when maxsplit was reached */
1395 /* Skip any remaining whitespace and copy to end of string */
1396 SKIP_SPACE(s, i, len);
1397 if (i != len)
1398 SPLIT_ADD(s, i, len);
1400 FIX_PREALLOC_SIZE(list);
1401 return list;
1402 onError:
1403 Py_DECREF(list);
1404 return NULL;
1407 Py_LOCAL_INLINE(PyObject *)
1408 split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1410 register Py_ssize_t i, j, count=0;
1411 PyObject *str;
1412 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1414 if (list == NULL)
1415 return NULL;
1417 i = j = 0;
1418 while ((j < len) && (maxcount-- > 0)) {
1419 for(; j<len; j++) {
1420 /* I found that using memchr makes no difference */
1421 if (s[j] == ch) {
1422 SPLIT_ADD(s, i, j);
1423 i = j = j + 1;
1424 break;
1428 if (i <= len) {
1429 SPLIT_ADD(s, i, len);
1431 FIX_PREALLOC_SIZE(list);
1432 return list;
1434 onError:
1435 Py_DECREF(list);
1436 return NULL;
1439 PyDoc_STRVAR(split__doc__,
1440 "S.split([sep [,maxsplit]]) -> list of strings\n\
1442 Return a list of the words in the string S, using sep as the\n\
1443 delimiter string. If maxsplit is given, at most maxsplit\n\
1444 splits are done. If sep is not specified or is None, any\n\
1445 whitespace string is a separator.");
1447 static PyObject *
1448 string_split(PyStringObject *self, PyObject *args)
1450 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1451 Py_ssize_t maxsplit = -1, count=0;
1452 const char *s = PyString_AS_STRING(self), *sub;
1453 PyObject *list, *str, *subobj = Py_None;
1454 #ifdef USE_FAST
1455 Py_ssize_t pos;
1456 #endif
1458 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1459 return NULL;
1460 if (maxsplit < 0)
1461 maxsplit = PY_SSIZE_T_MAX;
1462 if (subobj == Py_None)
1463 return split_whitespace(s, len, maxsplit);
1464 if (PyString_Check(subobj)) {
1465 sub = PyString_AS_STRING(subobj);
1466 n = PyString_GET_SIZE(subobj);
1468 #ifdef Py_USING_UNICODE
1469 else if (PyUnicode_Check(subobj))
1470 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1471 #endif
1472 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1473 return NULL;
1475 if (n == 0) {
1476 PyErr_SetString(PyExc_ValueError, "empty separator");
1477 return NULL;
1479 else if (n == 1)
1480 return split_char(s, len, sub[0], maxsplit);
1482 list = PyList_New(PREALLOC_SIZE(maxsplit));
1483 if (list == NULL)
1484 return NULL;
1486 #ifdef USE_FAST
1487 i = j = 0;
1488 while (maxsplit-- > 0) {
1489 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1490 if (pos < 0)
1491 break;
1492 j = i+pos;
1493 SPLIT_ADD(s, i, j);
1494 i = j + n;
1497 #else
1498 i = j = 0;
1499 while ((j+n <= len) && (maxsplit-- > 0)) {
1500 for (; j+n <= len; j++) {
1501 if (Py_STRING_MATCH(s, j, sub, n)) {
1502 SPLIT_ADD(s, i, j);
1503 i = j = j + n;
1504 break;
1508 #endif
1509 SPLIT_ADD(s, i, len);
1510 FIX_PREALLOC_SIZE(list);
1511 return list;
1513 onError:
1514 Py_DECREF(list);
1515 return NULL;
1518 PyDoc_STRVAR(partition__doc__,
1519 "S.partition(sep) -> (head, sep, tail)\n\
1521 Searches for the separator sep in S, and returns the part before it,\n\
1522 the separator itself, and the part after it. If the separator is not\n\
1523 found, returns S and two empty strings.");
1525 static PyObject *
1526 string_partition(PyStringObject *self, PyObject *sep_obj)
1528 const char *sep;
1529 Py_ssize_t sep_len;
1531 if (PyString_Check(sep_obj)) {
1532 sep = PyString_AS_STRING(sep_obj);
1533 sep_len = PyString_GET_SIZE(sep_obj);
1535 #ifdef Py_USING_UNICODE
1536 else if (PyUnicode_Check(sep_obj))
1537 return PyUnicode_Partition((PyObject *) self, sep_obj);
1538 #endif
1539 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1540 return NULL;
1542 return stringlib_partition(
1543 (PyObject*) self,
1544 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1545 sep_obj, sep, sep_len
1549 PyDoc_STRVAR(rpartition__doc__,
1550 "S.rpartition(sep) -> (head, sep, tail)\n\
1552 Searches for the separator sep in S, starting at the end of S, and returns\n\
1553 the part before it, the separator itself, and the part after it. If the\n\
1554 separator is not found, returns S and two empty strings.");
1556 static PyObject *
1557 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1559 const char *sep;
1560 Py_ssize_t sep_len;
1562 if (PyString_Check(sep_obj)) {
1563 sep = PyString_AS_STRING(sep_obj);
1564 sep_len = PyString_GET_SIZE(sep_obj);
1566 #ifdef Py_USING_UNICODE
1567 else if (PyUnicode_Check(sep_obj))
1568 return PyUnicode_Partition((PyObject *) self, sep_obj);
1569 #endif
1570 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1571 return NULL;
1573 return stringlib_rpartition(
1574 (PyObject*) self,
1575 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1576 sep_obj, sep, sep_len
1580 Py_LOCAL_INLINE(PyObject *)
1581 rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
1583 Py_ssize_t i, j, count=0;
1584 PyObject *str;
1585 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1587 if (list == NULL)
1588 return NULL;
1590 i = j = len-1;
1592 while (maxsplit-- > 0) {
1593 RSKIP_SPACE(s, i);
1594 if (i<0) break;
1595 j = i; i--;
1596 RSKIP_NONSPACE(s, i);
1597 SPLIT_ADD(s, i + 1, j + 1);
1599 if (i >= 0) {
1600 /* Only occurs when maxsplit was reached */
1601 /* Skip any remaining whitespace and copy to beginning of string */
1602 RSKIP_SPACE(s, i);
1603 if (i >= 0)
1604 SPLIT_ADD(s, 0, i + 1);
1607 FIX_PREALLOC_SIZE(list);
1608 if (PyList_Reverse(list) < 0)
1609 goto onError;
1610 return list;
1611 onError:
1612 Py_DECREF(list);
1613 return NULL;
1616 Py_LOCAL_INLINE(PyObject *)
1617 rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1619 register Py_ssize_t i, j, count=0;
1620 PyObject *str;
1621 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1623 if (list == NULL)
1624 return NULL;
1626 i = j = len - 1;
1627 while ((i >= 0) && (maxcount-- > 0)) {
1628 for (; i >= 0; i--) {
1629 if (s[i] == ch) {
1630 SPLIT_ADD(s, i + 1, j + 1);
1631 j = i = i - 1;
1632 break;
1636 if (j >= -1) {
1637 SPLIT_ADD(s, 0, j + 1);
1639 FIX_PREALLOC_SIZE(list);
1640 if (PyList_Reverse(list) < 0)
1641 goto onError;
1642 return list;
1644 onError:
1645 Py_DECREF(list);
1646 return NULL;
1649 PyDoc_STRVAR(rsplit__doc__,
1650 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1652 Return a list of the words in the string S, using sep as the\n\
1653 delimiter string, starting at the end of the string and working\n\
1654 to the front. If maxsplit is given, at most maxsplit splits are\n\
1655 done. If sep is not specified or is None, any whitespace string\n\
1656 is a separator.");
1658 static PyObject *
1659 string_rsplit(PyStringObject *self, PyObject *args)
1661 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1662 Py_ssize_t maxsplit = -1, count=0;
1663 const char *s = PyString_AS_STRING(self), *sub;
1664 PyObject *list, *str, *subobj = Py_None;
1666 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1667 return NULL;
1668 if (maxsplit < 0)
1669 maxsplit = PY_SSIZE_T_MAX;
1670 if (subobj == Py_None)
1671 return rsplit_whitespace(s, len, maxsplit);
1672 if (PyString_Check(subobj)) {
1673 sub = PyString_AS_STRING(subobj);
1674 n = PyString_GET_SIZE(subobj);
1676 #ifdef Py_USING_UNICODE
1677 else if (PyUnicode_Check(subobj))
1678 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1679 #endif
1680 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1681 return NULL;
1683 if (n == 0) {
1684 PyErr_SetString(PyExc_ValueError, "empty separator");
1685 return NULL;
1687 else if (n == 1)
1688 return rsplit_char(s, len, sub[0], maxsplit);
1690 list = PyList_New(PREALLOC_SIZE(maxsplit));
1691 if (list == NULL)
1692 return NULL;
1694 j = len;
1695 i = j - n;
1697 while ( (i >= 0) && (maxsplit-- > 0) ) {
1698 for (; i>=0; i--) {
1699 if (Py_STRING_MATCH(s, i, sub, n)) {
1700 SPLIT_ADD(s, i + n, j);
1701 j = i;
1702 i -= n;
1703 break;
1707 SPLIT_ADD(s, 0, j);
1708 FIX_PREALLOC_SIZE(list);
1709 if (PyList_Reverse(list) < 0)
1710 goto onError;
1711 return list;
1713 onError:
1714 Py_DECREF(list);
1715 return NULL;
1719 PyDoc_STRVAR(join__doc__,
1720 "S.join(sequence) -> string\n\
1722 Return a string which is the concatenation of the strings in the\n\
1723 sequence. The separator between elements is S.");
1725 static PyObject *
1726 string_join(PyStringObject *self, PyObject *orig)
1728 char *sep = PyString_AS_STRING(self);
1729 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1730 PyObject *res = NULL;
1731 char *p;
1732 Py_ssize_t seqlen = 0;
1733 size_t sz = 0;
1734 Py_ssize_t i;
1735 PyObject *seq, *item;
1737 seq = PySequence_Fast(orig, "");
1738 if (seq == NULL) {
1739 return NULL;
1742 seqlen = PySequence_Size(seq);
1743 if (seqlen == 0) {
1744 Py_DECREF(seq);
1745 return PyString_FromString("");
1747 if (seqlen == 1) {
1748 item = PySequence_Fast_GET_ITEM(seq, 0);
1749 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1750 Py_INCREF(item);
1751 Py_DECREF(seq);
1752 return item;
1756 /* There are at least two things to join, or else we have a subclass
1757 * of the builtin types in the sequence.
1758 * Do a pre-pass to figure out the total amount of space we'll
1759 * need (sz), see whether any argument is absurd, and defer to
1760 * the Unicode join if appropriate.
1762 for (i = 0; i < seqlen; i++) {
1763 const size_t old_sz = sz;
1764 item = PySequence_Fast_GET_ITEM(seq, i);
1765 if (!PyString_Check(item)){
1766 #ifdef Py_USING_UNICODE
1767 if (PyUnicode_Check(item)) {
1768 /* Defer to Unicode join.
1769 * CAUTION: There's no gurantee that the
1770 * original sequence can be iterated over
1771 * again, so we must pass seq here.
1773 PyObject *result;
1774 result = PyUnicode_Join((PyObject *)self, seq);
1775 Py_DECREF(seq);
1776 return result;
1778 #endif
1779 PyErr_Format(PyExc_TypeError,
1780 "sequence item %zd: expected string,"
1781 " %.80s found",
1782 i, item->ob_type->tp_name);
1783 Py_DECREF(seq);
1784 return NULL;
1786 sz += PyString_GET_SIZE(item);
1787 if (i != 0)
1788 sz += seplen;
1789 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1790 PyErr_SetString(PyExc_OverflowError,
1791 "join() result is too long for a Python string");
1792 Py_DECREF(seq);
1793 return NULL;
1797 /* Allocate result space. */
1798 res = PyString_FromStringAndSize((char*)NULL, sz);
1799 if (res == NULL) {
1800 Py_DECREF(seq);
1801 return NULL;
1804 /* Catenate everything. */
1805 p = PyString_AS_STRING(res);
1806 for (i = 0; i < seqlen; ++i) {
1807 size_t n;
1808 item = PySequence_Fast_GET_ITEM(seq, i);
1809 n = PyString_GET_SIZE(item);
1810 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1811 p += n;
1812 if (i < seqlen - 1) {
1813 Py_MEMCPY(p, sep, seplen);
1814 p += seplen;
1818 Py_DECREF(seq);
1819 return res;
1822 PyObject *
1823 _PyString_Join(PyObject *sep, PyObject *x)
1825 assert(sep != NULL && PyString_Check(sep));
1826 assert(x != NULL);
1827 return string_join((PyStringObject *)sep, x);
1830 Py_LOCAL_INLINE(void)
1831 string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1833 if (*end > len)
1834 *end = len;
1835 else if (*end < 0)
1836 *end += len;
1837 if (*end < 0)
1838 *end = 0;
1839 if (*start < 0)
1840 *start += len;
1841 if (*start < 0)
1842 *start = 0;
1845 Py_LOCAL_INLINE(Py_ssize_t)
1846 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1848 PyObject *subobj;
1849 const char *sub;
1850 Py_ssize_t sub_len;
1851 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1853 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1854 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1855 return -2;
1856 if (PyString_Check(subobj)) {
1857 sub = PyString_AS_STRING(subobj);
1858 sub_len = PyString_GET_SIZE(subobj);
1860 #ifdef Py_USING_UNICODE
1861 else if (PyUnicode_Check(subobj))
1862 return PyUnicode_Find(
1863 (PyObject *)self, subobj, start, end, dir);
1864 #endif
1865 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1866 /* XXX - the "expected a character buffer object" is pretty
1867 confusing for a non-expert. remap to something else ? */
1868 return -2;
1870 if (dir > 0)
1871 return stringlib_find_slice(
1872 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1873 sub, sub_len, start, end);
1874 else
1875 return stringlib_rfind_slice(
1876 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1877 sub, sub_len, start, end);
1881 PyDoc_STRVAR(find__doc__,
1882 "S.find(sub [,start [,end]]) -> int\n\
1884 Return the lowest index in S where substring sub is found,\n\
1885 such that sub is contained within s[start,end]. Optional\n\
1886 arguments start and end are interpreted as in slice notation.\n\
1888 Return -1 on failure.");
1890 static PyObject *
1891 string_find(PyStringObject *self, PyObject *args)
1893 Py_ssize_t result = string_find_internal(self, args, +1);
1894 if (result == -2)
1895 return NULL;
1896 return PyInt_FromSsize_t(result);
1900 PyDoc_STRVAR(index__doc__,
1901 "S.index(sub [,start [,end]]) -> int\n\
1903 Like S.find() but raise ValueError when the substring is not found.");
1905 static PyObject *
1906 string_index(PyStringObject *self, PyObject *args)
1908 Py_ssize_t result = string_find_internal(self, args, +1);
1909 if (result == -2)
1910 return NULL;
1911 if (result == -1) {
1912 PyErr_SetString(PyExc_ValueError,
1913 "substring not found");
1914 return NULL;
1916 return PyInt_FromSsize_t(result);
1920 PyDoc_STRVAR(rfind__doc__,
1921 "S.rfind(sub [,start [,end]]) -> int\n\
1923 Return the highest index in S where substring sub is found,\n\
1924 such that sub is contained within s[start,end]. Optional\n\
1925 arguments start and end are interpreted as in slice notation.\n\
1927 Return -1 on failure.");
1929 static PyObject *
1930 string_rfind(PyStringObject *self, PyObject *args)
1932 Py_ssize_t result = string_find_internal(self, args, -1);
1933 if (result == -2)
1934 return NULL;
1935 return PyInt_FromSsize_t(result);
1939 PyDoc_STRVAR(rindex__doc__,
1940 "S.rindex(sub [,start [,end]]) -> int\n\
1942 Like S.rfind() but raise ValueError when the substring is not found.");
1944 static PyObject *
1945 string_rindex(PyStringObject *self, PyObject *args)
1947 Py_ssize_t result = string_find_internal(self, args, -1);
1948 if (result == -2)
1949 return NULL;
1950 if (result == -1) {
1951 PyErr_SetString(PyExc_ValueError,
1952 "substring not found");
1953 return NULL;
1955 return PyInt_FromSsize_t(result);
1959 Py_LOCAL_INLINE(PyObject *)
1960 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1962 char *s = PyString_AS_STRING(self);
1963 Py_ssize_t len = PyString_GET_SIZE(self);
1964 char *sep = PyString_AS_STRING(sepobj);
1965 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1966 Py_ssize_t i, j;
1968 i = 0;
1969 if (striptype != RIGHTSTRIP) {
1970 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1971 i++;
1975 j = len;
1976 if (striptype != LEFTSTRIP) {
1977 do {
1978 j--;
1979 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1980 j++;
1983 if (i == 0 && j == len && PyString_CheckExact(self)) {
1984 Py_INCREF(self);
1985 return (PyObject*)self;
1987 else
1988 return PyString_FromStringAndSize(s+i, j-i);
1992 Py_LOCAL_INLINE(PyObject *)
1993 do_strip(PyStringObject *self, int striptype)
1995 char *s = PyString_AS_STRING(self);
1996 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1998 i = 0;
1999 if (striptype != RIGHTSTRIP) {
2000 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2001 i++;
2005 j = len;
2006 if (striptype != LEFTSTRIP) {
2007 do {
2008 j--;
2009 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2010 j++;
2013 if (i == 0 && j == len && PyString_CheckExact(self)) {
2014 Py_INCREF(self);
2015 return (PyObject*)self;
2017 else
2018 return PyString_FromStringAndSize(s+i, j-i);
2022 Py_LOCAL_INLINE(PyObject *)
2023 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2025 PyObject *sep = NULL;
2027 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2028 return NULL;
2030 if (sep != NULL && sep != Py_None) {
2031 if (PyString_Check(sep))
2032 return do_xstrip(self, striptype, sep);
2033 #ifdef Py_USING_UNICODE
2034 else if (PyUnicode_Check(sep)) {
2035 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2036 PyObject *res;
2037 if (uniself==NULL)
2038 return NULL;
2039 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2040 striptype, sep);
2041 Py_DECREF(uniself);
2042 return res;
2044 #endif
2045 PyErr_Format(PyExc_TypeError,
2046 #ifdef Py_USING_UNICODE
2047 "%s arg must be None, str or unicode",
2048 #else
2049 "%s arg must be None or str",
2050 #endif
2051 STRIPNAME(striptype));
2052 return NULL;
2055 return do_strip(self, striptype);
2059 PyDoc_STRVAR(strip__doc__,
2060 "S.strip([chars]) -> string or unicode\n\
2062 Return a copy of the string S with leading and trailing\n\
2063 whitespace removed.\n\
2064 If chars is given and not None, remove characters in chars instead.\n\
2065 If chars is unicode, S will be converted to unicode before stripping");
2067 static PyObject *
2068 string_strip(PyStringObject *self, PyObject *args)
2070 if (PyTuple_GET_SIZE(args) == 0)
2071 return do_strip(self, BOTHSTRIP); /* Common case */
2072 else
2073 return do_argstrip(self, BOTHSTRIP, args);
2077 PyDoc_STRVAR(lstrip__doc__,
2078 "S.lstrip([chars]) -> string or unicode\n\
2080 Return a copy of the string S with leading whitespace removed.\n\
2081 If chars is given and not None, remove characters in chars instead.\n\
2082 If chars is unicode, S will be converted to unicode before stripping");
2084 static PyObject *
2085 string_lstrip(PyStringObject *self, PyObject *args)
2087 if (PyTuple_GET_SIZE(args) == 0)
2088 return do_strip(self, LEFTSTRIP); /* Common case */
2089 else
2090 return do_argstrip(self, LEFTSTRIP, args);
2094 PyDoc_STRVAR(rstrip__doc__,
2095 "S.rstrip([chars]) -> string or unicode\n\
2097 Return a copy of the string S with trailing whitespace removed.\n\
2098 If chars is given and not None, remove characters in chars instead.\n\
2099 If chars is unicode, S will be converted to unicode before stripping");
2101 static PyObject *
2102 string_rstrip(PyStringObject *self, PyObject *args)
2104 if (PyTuple_GET_SIZE(args) == 0)
2105 return do_strip(self, RIGHTSTRIP); /* Common case */
2106 else
2107 return do_argstrip(self, RIGHTSTRIP, args);
2111 PyDoc_STRVAR(lower__doc__,
2112 "S.lower() -> string\n\
2114 Return a copy of the string S converted to lowercase.");
2116 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2117 #ifndef _tolower
2118 #define _tolower tolower
2119 #endif
2121 static PyObject *
2122 string_lower(PyStringObject *self)
2124 char *s;
2125 Py_ssize_t i, n = PyString_GET_SIZE(self);
2126 PyObject *newobj;
2128 newobj = PyString_FromStringAndSize(NULL, n);
2129 if (!newobj)
2130 return NULL;
2132 s = PyString_AS_STRING(newobj);
2134 Py_MEMCPY(s, PyString_AS_STRING(self), n);
2136 for (i = 0; i < n; i++) {
2137 int c = Py_CHARMASK(s[i]);
2138 if (isupper(c))
2139 s[i] = _tolower(c);
2142 return newobj;
2145 PyDoc_STRVAR(upper__doc__,
2146 "S.upper() -> string\n\
2148 Return a copy of the string S converted to uppercase.");
2150 #ifndef _toupper
2151 #define _toupper toupper
2152 #endif
2154 static PyObject *
2155 string_upper(PyStringObject *self)
2157 char *s;
2158 Py_ssize_t i, n = PyString_GET_SIZE(self);
2159 PyObject *newobj;
2161 newobj = PyString_FromStringAndSize(NULL, n);
2162 if (!newobj)
2163 return NULL;
2165 s = PyString_AS_STRING(newobj);
2167 Py_MEMCPY(s, PyString_AS_STRING(self), n);
2169 for (i = 0; i < n; i++) {
2170 int c = Py_CHARMASK(s[i]);
2171 if (islower(c))
2172 s[i] = _toupper(c);
2175 return newobj;
2178 PyDoc_STRVAR(title__doc__,
2179 "S.title() -> string\n\
2181 Return a titlecased version of S, i.e. words start with uppercase\n\
2182 characters, all remaining cased characters have lowercase.");
2184 static PyObject*
2185 string_title(PyStringObject *self)
2187 char *s = PyString_AS_STRING(self), *s_new;
2188 Py_ssize_t i, n = PyString_GET_SIZE(self);
2189 int previous_is_cased = 0;
2190 PyObject *newobj;
2192 newobj = PyString_FromStringAndSize(NULL, n);
2193 if (newobj == NULL)
2194 return NULL;
2195 s_new = PyString_AsString(newobj);
2196 for (i = 0; i < n; i++) {
2197 int c = Py_CHARMASK(*s++);
2198 if (islower(c)) {
2199 if (!previous_is_cased)
2200 c = toupper(c);
2201 previous_is_cased = 1;
2202 } else if (isupper(c)) {
2203 if (previous_is_cased)
2204 c = tolower(c);
2205 previous_is_cased = 1;
2206 } else
2207 previous_is_cased = 0;
2208 *s_new++ = c;
2210 return newobj;
2213 PyDoc_STRVAR(capitalize__doc__,
2214 "S.capitalize() -> string\n\
2216 Return a copy of the string S with only its first character\n\
2217 capitalized.");
2219 static PyObject *
2220 string_capitalize(PyStringObject *self)
2222 char *s = PyString_AS_STRING(self), *s_new;
2223 Py_ssize_t i, n = PyString_GET_SIZE(self);
2224 PyObject *newobj;
2226 newobj = PyString_FromStringAndSize(NULL, n);
2227 if (newobj == NULL)
2228 return NULL;
2229 s_new = PyString_AsString(newobj);
2230 if (0 < n) {
2231 int c = Py_CHARMASK(*s++);
2232 if (islower(c))
2233 *s_new = toupper(c);
2234 else
2235 *s_new = c;
2236 s_new++;
2238 for (i = 1; i < n; i++) {
2239 int c = Py_CHARMASK(*s++);
2240 if (isupper(c))
2241 *s_new = tolower(c);
2242 else
2243 *s_new = c;
2244 s_new++;
2246 return newobj;
2250 PyDoc_STRVAR(count__doc__,
2251 "S.count(sub[, start[, end]]) -> int\n\
2253 Return the number of non-overlapping occurrences of substring sub in\n\
2254 string S[start:end]. Optional arguments start and end are interpreted\n\
2255 as in slice notation.");
2257 static PyObject *
2258 string_count(PyStringObject *self, PyObject *args)
2260 PyObject *sub_obj;
2261 const char *str = PyString_AS_STRING(self), *sub;
2262 Py_ssize_t sub_len;
2263 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2265 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2266 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2267 return NULL;
2269 if (PyString_Check(sub_obj)) {
2270 sub = PyString_AS_STRING(sub_obj);
2271 sub_len = PyString_GET_SIZE(sub_obj);
2273 #ifdef Py_USING_UNICODE
2274 else if (PyUnicode_Check(sub_obj)) {
2275 Py_ssize_t count;
2276 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2277 if (count == -1)
2278 return NULL;
2279 else
2280 return PyInt_FromSsize_t(count);
2282 #endif
2283 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2284 return NULL;
2286 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
2288 return PyInt_FromSsize_t(
2289 stringlib_count(str + start, end - start, sub, sub_len)
2293 PyDoc_STRVAR(swapcase__doc__,
2294 "S.swapcase() -> string\n\
2296 Return a copy of the string S with uppercase characters\n\
2297 converted to lowercase and vice versa.");
2299 static PyObject *
2300 string_swapcase(PyStringObject *self)
2302 char *s = PyString_AS_STRING(self), *s_new;
2303 Py_ssize_t i, n = PyString_GET_SIZE(self);
2304 PyObject *newobj;
2306 newobj = PyString_FromStringAndSize(NULL, n);
2307 if (newobj == NULL)
2308 return NULL;
2309 s_new = PyString_AsString(newobj);
2310 for (i = 0; i < n; i++) {
2311 int c = Py_CHARMASK(*s++);
2312 if (islower(c)) {
2313 *s_new = toupper(c);
2315 else if (isupper(c)) {
2316 *s_new = tolower(c);
2318 else
2319 *s_new = c;
2320 s_new++;
2322 return newobj;
2326 PyDoc_STRVAR(translate__doc__,
2327 "S.translate(table [,deletechars]) -> string\n\
2329 Return a copy of the string S, where all characters occurring\n\
2330 in the optional argument deletechars are removed, and the\n\
2331 remaining characters have been mapped through the given\n\
2332 translation table, which must be a string of length 256.");
2334 static PyObject *
2335 string_translate(PyStringObject *self, PyObject *args)
2337 register char *input, *output;
2338 register const char *table;
2339 register Py_ssize_t i, c, changed = 0;
2340 PyObject *input_obj = (PyObject*)self;
2341 const char *table1, *output_start, *del_table=NULL;
2342 Py_ssize_t inlen, tablen, dellen = 0;
2343 PyObject *result;
2344 int trans_table[256];
2345 PyObject *tableobj, *delobj = NULL;
2347 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2348 &tableobj, &delobj))
2349 return NULL;
2351 if (PyString_Check(tableobj)) {
2352 table1 = PyString_AS_STRING(tableobj);
2353 tablen = PyString_GET_SIZE(tableobj);
2355 #ifdef Py_USING_UNICODE
2356 else if (PyUnicode_Check(tableobj)) {
2357 /* Unicode .translate() does not support the deletechars
2358 parameter; instead a mapping to None will cause characters
2359 to be deleted. */
2360 if (delobj != NULL) {
2361 PyErr_SetString(PyExc_TypeError,
2362 "deletions are implemented differently for unicode");
2363 return NULL;
2365 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2367 #endif
2368 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
2369 return NULL;
2371 if (tablen != 256) {
2372 PyErr_SetString(PyExc_ValueError,
2373 "translation table must be 256 characters long");
2374 return NULL;
2377 if (delobj != NULL) {
2378 if (PyString_Check(delobj)) {
2379 del_table = PyString_AS_STRING(delobj);
2380 dellen = PyString_GET_SIZE(delobj);
2382 #ifdef Py_USING_UNICODE
2383 else if (PyUnicode_Check(delobj)) {
2384 PyErr_SetString(PyExc_TypeError,
2385 "deletions are implemented differently for unicode");
2386 return NULL;
2388 #endif
2389 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2390 return NULL;
2392 else {
2393 del_table = NULL;
2394 dellen = 0;
2397 table = table1;
2398 inlen = PyString_GET_SIZE(input_obj);
2399 result = PyString_FromStringAndSize((char *)NULL, inlen);
2400 if (result == NULL)
2401 return NULL;
2402 output_start = output = PyString_AsString(result);
2403 input = PyString_AS_STRING(input_obj);
2405 if (dellen == 0) {
2406 /* If no deletions are required, use faster code */
2407 for (i = inlen; --i >= 0; ) {
2408 c = Py_CHARMASK(*input++);
2409 if (Py_CHARMASK((*output++ = table[c])) != c)
2410 changed = 1;
2412 if (changed || !PyString_CheckExact(input_obj))
2413 return result;
2414 Py_DECREF(result);
2415 Py_INCREF(input_obj);
2416 return input_obj;
2419 for (i = 0; i < 256; i++)
2420 trans_table[i] = Py_CHARMASK(table[i]);
2422 for (i = 0; i < dellen; i++)
2423 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2425 for (i = inlen; --i >= 0; ) {
2426 c = Py_CHARMASK(*input++);
2427 if (trans_table[c] != -1)
2428 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2429 continue;
2430 changed = 1;
2432 if (!changed && PyString_CheckExact(input_obj)) {
2433 Py_DECREF(result);
2434 Py_INCREF(input_obj);
2435 return input_obj;
2437 /* Fix the size of the resulting string */
2438 if (inlen > 0)
2439 _PyString_Resize(&result, output - output_start);
2440 return result;
2444 #define FORWARD 1
2445 #define REVERSE -1
2447 /* find and count characters and substrings */
2449 #define findchar(target, target_len, c) \
2450 ((char *)memchr((const void *)(target), c, target_len))
2452 /* String ops must return a string. */
2453 /* If the object is subclass of string, create a copy */
2454 Py_LOCAL(PyStringObject *)
2455 return_self(PyStringObject *self)
2457 if (PyString_CheckExact(self)) {
2458 Py_INCREF(self);
2459 return self;
2461 return (PyStringObject *)PyString_FromStringAndSize(
2462 PyString_AS_STRING(self),
2463 PyString_GET_SIZE(self));
2466 Py_LOCAL_INLINE(Py_ssize_t)
2467 countchar(char *target, int target_len, char c, Py_ssize_t maxcount)
2469 Py_ssize_t count=0;
2470 char *start=target;
2471 char *end=target+target_len;
2473 while ( (start=findchar(start, end-start, c)) != NULL ) {
2474 count++;
2475 if (count >= maxcount)
2476 break;
2477 start += 1;
2479 return count;
2482 Py_LOCAL(Py_ssize_t)
2483 findstring(char *target, Py_ssize_t target_len,
2484 char *pattern, Py_ssize_t pattern_len,
2485 Py_ssize_t start,
2486 Py_ssize_t end,
2487 int direction)
2489 if (start < 0) {
2490 start += target_len;
2491 if (start < 0)
2492 start = 0;
2494 if (end > target_len) {
2495 end = target_len;
2496 } else if (end < 0) {
2497 end += target_len;
2498 if (end < 0)
2499 end = 0;
2502 /* zero-length substrings always match at the first attempt */
2503 if (pattern_len == 0)
2504 return (direction > 0) ? start : end;
2506 end -= pattern_len;
2508 if (direction < 0) {
2509 for (; end >= start; end--)
2510 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2511 return end;
2512 } else {
2513 for (; start <= end; start++)
2514 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2515 return start;
2517 return -1;
2520 Py_LOCAL_INLINE(Py_ssize_t)
2521 countstring(char *target, Py_ssize_t target_len,
2522 char *pattern, Py_ssize_t pattern_len,
2523 Py_ssize_t start,
2524 Py_ssize_t end,
2525 int direction, Py_ssize_t maxcount)
2527 Py_ssize_t count=0;
2529 if (start < 0) {
2530 start += target_len;
2531 if (start < 0)
2532 start = 0;
2534 if (end > target_len) {
2535 end = target_len;
2536 } else if (end < 0) {
2537 end += target_len;
2538 if (end < 0)
2539 end = 0;
2542 /* zero-length substrings match everywhere */
2543 if (pattern_len == 0 || maxcount == 0) {
2544 if (target_len+1 < maxcount)
2545 return target_len+1;
2546 return maxcount;
2549 end -= pattern_len;
2550 if (direction < 0) {
2551 for (; (end >= start); end--)
2552 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2553 count++;
2554 if (--maxcount <= 0) break;
2555 end -= pattern_len-1;
2557 } else {
2558 for (; (start <= end); start++)
2559 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2560 count++;
2561 if (--maxcount <= 0)
2562 break;
2563 start += pattern_len-1;
2566 return count;
2570 /* Algorithms for different cases of string replacement */
2572 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2573 Py_LOCAL(PyStringObject *)
2574 replace_interleave(PyStringObject *self,
2575 PyStringObject *to,
2576 Py_ssize_t maxcount)
2578 char *self_s, *to_s, *result_s;
2579 Py_ssize_t self_len, to_len, result_len;
2580 Py_ssize_t count, i, product;
2581 PyStringObject *result;
2583 self_len = PyString_GET_SIZE(self);
2584 to_len = PyString_GET_SIZE(to);
2586 /* 1 at the end plus 1 after every character */
2587 count = self_len+1;
2588 if (maxcount < count)
2589 count = maxcount;
2591 /* Check for overflow */
2592 /* result_len = count * to_len + self_len; */
2593 product = count * to_len;
2594 if (product / to_len != count) {
2595 PyErr_SetString(PyExc_OverflowError,
2596 "replace string is too long");
2597 return NULL;
2599 result_len = product + self_len;
2600 if (result_len < 0) {
2601 PyErr_SetString(PyExc_OverflowError,
2602 "replace string is too long");
2603 return NULL;
2606 if (! (result = (PyStringObject *)
2607 PyString_FromStringAndSize(NULL, result_len)) )
2608 return NULL;
2610 self_s = PyString_AS_STRING(self);
2611 to_s = PyString_AS_STRING(to);
2612 to_len = PyString_GET_SIZE(to);
2613 result_s = PyString_AS_STRING(result);
2615 /* TODO: special case single character, which doesn't need memcpy */
2617 /* Lay the first one down (guaranteed this will occur) */
2618 Py_MEMCPY(result_s, to_s, to_len);
2619 result_s += to_len;
2620 count -= 1;
2622 for (i=0; i<count; i++) {
2623 *result_s++ = *self_s++;
2624 Py_MEMCPY(result_s, to_s, to_len);
2625 result_s += to_len;
2628 /* Copy the rest of the original string */
2629 Py_MEMCPY(result_s, self_s, self_len-i);
2631 return result;
2634 /* Special case for deleting a single character */
2635 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2636 Py_LOCAL(PyStringObject *)
2637 replace_delete_single_character(PyStringObject *self,
2638 char from_c, Py_ssize_t maxcount)
2640 char *self_s, *result_s;
2641 char *start, *next, *end;
2642 Py_ssize_t self_len, result_len;
2643 Py_ssize_t count;
2644 PyStringObject *result;
2646 self_len = PyString_GET_SIZE(self);
2647 self_s = PyString_AS_STRING(self);
2649 count = countchar(self_s, self_len, from_c, maxcount);
2650 if (count == 0) {
2651 return return_self(self);
2654 result_len = self_len - count; /* from_len == 1 */
2655 assert(result_len>=0);
2657 if ( (result = (PyStringObject *)
2658 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2659 return NULL;
2660 result_s = PyString_AS_STRING(result);
2662 start = self_s;
2663 end = self_s + self_len;
2664 while (count-- > 0) {
2665 next = findchar(start, end-start, from_c);
2666 if (next == NULL)
2667 break;
2668 Py_MEMCPY(result_s, start, next-start);
2669 result_s += (next-start);
2670 start = next+1;
2672 Py_MEMCPY(result_s, start, end-start);
2674 return result;
2677 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2679 Py_LOCAL(PyStringObject *)
2680 replace_delete_substring(PyStringObject *self, PyStringObject *from,
2681 Py_ssize_t maxcount) {
2682 char *self_s, *from_s, *result_s;
2683 char *start, *next, *end;
2684 Py_ssize_t self_len, from_len, result_len;
2685 Py_ssize_t count, offset;
2686 PyStringObject *result;
2688 self_len = PyString_GET_SIZE(self);
2689 self_s = PyString_AS_STRING(self);
2690 from_len = PyString_GET_SIZE(from);
2691 from_s = PyString_AS_STRING(from);
2693 count = countstring(self_s, self_len,
2694 from_s, from_len,
2695 0, self_len, 1,
2696 maxcount);
2698 if (count == 0) {
2699 /* no matches */
2700 return return_self(self);
2703 result_len = self_len - (count * from_len);
2704 assert (result_len>=0);
2706 if ( (result = (PyStringObject *)
2707 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2708 return NULL;
2710 result_s = PyString_AS_STRING(result);
2712 start = self_s;
2713 end = self_s + self_len;
2714 while (count-- > 0) {
2715 offset = findstring(start, end-start,
2716 from_s, from_len,
2717 0, end-start, FORWARD);
2718 if (offset == -1)
2719 break;
2720 next = start + offset;
2722 Py_MEMCPY(result_s, start, next-start);
2724 result_s += (next-start);
2725 start = next+from_len;
2727 Py_MEMCPY(result_s, start, end-start);
2728 return result;
2731 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2732 Py_LOCAL(PyStringObject *)
2733 replace_single_character_in_place(PyStringObject *self,
2734 char from_c, char to_c,
2735 Py_ssize_t maxcount)
2737 char *self_s, *result_s, *start, *end, *next;
2738 Py_ssize_t self_len;
2739 PyStringObject *result;
2741 /* The result string will be the same size */
2742 self_s = PyString_AS_STRING(self);
2743 self_len = PyString_GET_SIZE(self);
2745 next = findchar(self_s, self_len, from_c);
2747 if (next == NULL) {
2748 /* No matches; return the original string */
2749 return return_self(self);
2752 /* Need to make a new string */
2753 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2754 if (result == NULL)
2755 return NULL;
2756 result_s = PyString_AS_STRING(result);
2757 Py_MEMCPY(result_s, self_s, self_len);
2759 /* change everything in-place, starting with this one */
2760 start = result_s + (next-self_s);
2761 *start = to_c;
2762 start++;
2763 end = result_s + self_len;
2765 while (--maxcount > 0) {
2766 next = findchar(start, end-start, from_c);
2767 if (next == NULL)
2768 break;
2769 *next = to_c;
2770 start = next+1;
2773 return result;
2776 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2777 Py_LOCAL(PyStringObject *)
2778 replace_substring_in_place(PyStringObject *self,
2779 PyStringObject *from,
2780 PyStringObject *to,
2781 Py_ssize_t maxcount)
2783 char *result_s, *start, *end;
2784 char *self_s, *from_s, *to_s;
2785 Py_ssize_t self_len, from_len, offset;
2786 PyStringObject *result;
2788 /* The result string will be the same size */
2790 self_s = PyString_AS_STRING(self);
2791 self_len = PyString_GET_SIZE(self);
2793 from_s = PyString_AS_STRING(from);
2794 from_len = PyString_GET_SIZE(from);
2795 to_s = PyString_AS_STRING(to);
2797 offset = findstring(self_s, self_len,
2798 from_s, from_len,
2799 0, self_len, FORWARD);
2801 if (offset == -1) {
2802 /* No matches; return the original string */
2803 return return_self(self);
2806 /* Need to make a new string */
2807 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2808 if (result == NULL)
2809 return NULL;
2810 result_s = PyString_AS_STRING(result);
2811 Py_MEMCPY(result_s, self_s, self_len);
2814 /* change everything in-place, starting with this one */
2815 start = result_s + offset;
2816 Py_MEMCPY(start, to_s, from_len);
2817 start += from_len;
2818 end = result_s + self_len;
2820 while ( --maxcount > 0) {
2821 offset = findstring(start, end-start,
2822 from_s, from_len,
2823 0, end-start, FORWARD);
2824 if (offset==-1)
2825 break;
2826 Py_MEMCPY(start+offset, to_s, from_len);
2827 start += offset+from_len;
2830 return result;
2833 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2834 Py_LOCAL(PyStringObject *)
2835 replace_single_character(PyStringObject *self,
2836 char from_c,
2837 PyStringObject *to,
2838 Py_ssize_t maxcount)
2840 char *self_s, *to_s, *result_s;
2841 char *start, *next, *end;
2842 Py_ssize_t self_len, to_len, result_len;
2843 Py_ssize_t count, product;
2844 PyStringObject *result;
2846 self_s = PyString_AS_STRING(self);
2847 self_len = PyString_GET_SIZE(self);
2849 count = countchar(self_s, self_len, from_c, maxcount);
2851 if (count == 0) {
2852 /* no matches, return unchanged */
2853 return return_self(self);
2856 to_s = PyString_AS_STRING(to);
2857 to_len = PyString_GET_SIZE(to);
2859 /* use the difference between current and new, hence the "-1" */
2860 /* result_len = self_len + count * (to_len-1) */
2861 product = count * (to_len-1);
2862 if (product / (to_len-1) != count) {
2863 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2864 return NULL;
2866 result_len = self_len + product;
2867 if (result_len < 0) {
2868 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2869 return NULL;
2872 if ( (result = (PyStringObject *)
2873 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2874 return NULL;
2875 result_s = PyString_AS_STRING(result);
2877 start = self_s;
2878 end = self_s + self_len;
2879 while (count-- > 0) {
2880 next = findchar(start, end-start, from_c);
2881 if (next == NULL)
2882 break;
2884 if (next == start) {
2885 /* replace with the 'to' */
2886 Py_MEMCPY(result_s, to_s, to_len);
2887 result_s += to_len;
2888 start += 1;
2889 } else {
2890 /* copy the unchanged old then the 'to' */
2891 Py_MEMCPY(result_s, start, next-start);
2892 result_s += (next-start);
2893 Py_MEMCPY(result_s, to_s, to_len);
2894 result_s += to_len;
2895 start = next+1;
2898 /* Copy the remainder of the remaining string */
2899 Py_MEMCPY(result_s, start, end-start);
2901 return result;
2904 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2905 Py_LOCAL(PyStringObject *)
2906 replace_substring(PyStringObject *self,
2907 PyStringObject *from,
2908 PyStringObject *to,
2909 Py_ssize_t maxcount) {
2910 char *self_s, *from_s, *to_s, *result_s;
2911 char *start, *next, *end;
2912 Py_ssize_t self_len, from_len, to_len, result_len;
2913 Py_ssize_t count, offset, product;
2914 PyStringObject *result;
2916 self_s = PyString_AS_STRING(self);
2917 self_len = PyString_GET_SIZE(self);
2918 from_s = PyString_AS_STRING(from);
2919 from_len = PyString_GET_SIZE(from);
2921 count = countstring(self_s, self_len,
2922 from_s, from_len,
2923 0, self_len, FORWARD, maxcount);
2924 if (count == 0) {
2925 /* no matches, return unchanged */
2926 return return_self(self);
2929 to_s = PyString_AS_STRING(to);
2930 to_len = PyString_GET_SIZE(to);
2932 /* Check for overflow */
2933 /* result_len = self_len + count * (to_len-from_len) */
2934 product = count * (to_len-from_len);
2935 if (product / (to_len-from_len) != count) {
2936 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2937 return NULL;
2939 result_len = self_len + product;
2940 if (result_len < 0) {
2941 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2942 return NULL;
2945 if ( (result = (PyStringObject *)
2946 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2947 return NULL;
2948 result_s = PyString_AS_STRING(result);
2950 start = self_s;
2951 end = self_s + self_len;
2952 while (count-- > 0) {
2953 offset = findstring(start, end-start,
2954 from_s, from_len,
2955 0, end-start, FORWARD);
2956 if (offset == -1)
2957 break;
2958 next = start+offset;
2959 if (next == start) {
2960 /* replace with the 'to' */
2961 Py_MEMCPY(result_s, to_s, to_len);
2962 result_s += to_len;
2963 start += from_len;
2964 } else {
2965 /* copy the unchanged old then the 'to' */
2966 Py_MEMCPY(result_s, start, next-start);
2967 result_s += (next-start);
2968 Py_MEMCPY(result_s, to_s, to_len);
2969 result_s += to_len;
2970 start = next+from_len;
2973 /* Copy the remainder of the remaining string */
2974 Py_MEMCPY(result_s, start, end-start);
2976 return result;
2980 Py_LOCAL(PyStringObject *)
2981 replace(PyStringObject *self,
2982 PyStringObject *from,
2983 PyStringObject *to,
2984 Py_ssize_t maxcount)
2986 Py_ssize_t from_len, to_len;
2988 if (maxcount < 0) {
2989 maxcount = PY_SSIZE_T_MAX;
2990 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2991 /* nothing to do; return the original string */
2992 return return_self(self);
2995 from_len = PyString_GET_SIZE(from);
2996 to_len = PyString_GET_SIZE(to);
2998 if (maxcount == 0 ||
2999 (from_len == 0 && to_len == 0)) {
3000 /* nothing to do; return the original string */
3001 return return_self(self);
3004 /* Handle zero-length special cases */
3006 if (from_len == 0) {
3007 /* insert the 'to' string everywhere. */
3008 /* >>> "Python".replace("", ".") */
3009 /* '.P.y.t.h.o.n.' */
3010 return replace_interleave(self, to, maxcount);
3013 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3014 /* point for an empty self string to generate a non-empty string */
3015 /* Special case so the remaining code always gets a non-empty string */
3016 if (PyString_GET_SIZE(self) == 0) {
3017 return return_self(self);
3020 if (to_len == 0) {
3021 /* delete all occurances of 'from' string */
3022 if (from_len == 1) {
3023 return replace_delete_single_character(
3024 self, PyString_AS_STRING(from)[0], maxcount);
3025 } else {
3026 return replace_delete_substring(self, from, maxcount);
3030 /* Handle special case where both strings have the same length */
3032 if (from_len == to_len) {
3033 if (from_len == 1) {
3034 return replace_single_character_in_place(
3035 self,
3036 PyString_AS_STRING(from)[0],
3037 PyString_AS_STRING(to)[0],
3038 maxcount);
3039 } else {
3040 return replace_substring_in_place(
3041 self, from, to, maxcount);
3045 /* Otherwise use the more generic algorithms */
3046 if (from_len == 1) {
3047 return replace_single_character(self, PyString_AS_STRING(from)[0],
3048 to, maxcount);
3049 } else {
3050 /* len('from')>=2, len('to')>=1 */
3051 return replace_substring(self, from, to, maxcount);
3055 PyDoc_STRVAR(replace__doc__,
3056 "S.replace (old, new[, count]) -> string\n\
3058 Return a copy of string S with all occurrences of substring\n\
3059 old replaced by new. If the optional argument count is\n\
3060 given, only the first count occurrences are replaced.");
3062 static PyObject *
3063 string_replace(PyStringObject *self, PyObject *args)
3065 Py_ssize_t count = -1;
3066 PyObject *from, *to;
3067 const char *tmp_s;
3068 Py_ssize_t tmp_len;
3070 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3071 return NULL;
3073 if (PyString_Check(from)) {
3074 /* Can this be made a '!check' after the Unicode check? */
3076 #ifdef Py_USING_UNICODE
3077 if (PyUnicode_Check(from))
3078 return PyUnicode_Replace((PyObject *)self,
3079 from, to, count);
3080 #endif
3081 else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
3082 return NULL;
3084 if (PyString_Check(to)) {
3085 /* Can this be made a '!check' after the Unicode check? */
3087 #ifdef Py_USING_UNICODE
3088 else if (PyUnicode_Check(to))
3089 return PyUnicode_Replace((PyObject *)self,
3090 from, to, count);
3091 #endif
3092 else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
3093 return NULL;
3095 return (PyObject *)replace((PyStringObject *) self,
3096 (PyStringObject *) from,
3097 (PyStringObject *) to, count);
3100 /** End DALKE **/
3102 /* Matches the end (direction >= 0) or start (direction < 0) of self
3103 * against substr, using the start and end arguments. Returns
3104 * -1 on error, 0 if not found and 1 if found.
3106 Py_LOCAL(int)
3107 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3108 Py_ssize_t end, int direction)
3110 Py_ssize_t len = PyString_GET_SIZE(self);
3111 Py_ssize_t slen;
3112 const char* sub;
3113 const char* str;
3115 if (PyString_Check(substr)) {
3116 sub = PyString_AS_STRING(substr);
3117 slen = PyString_GET_SIZE(substr);
3119 #ifdef Py_USING_UNICODE
3120 else if (PyUnicode_Check(substr))
3121 return PyUnicode_Tailmatch((PyObject *)self,
3122 substr, start, end, direction);
3123 #endif
3124 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3125 return -1;
3126 str = PyString_AS_STRING(self);
3128 string_adjust_indices(&start, &end, len);
3130 if (direction < 0) {
3131 /* startswith */
3132 if (start+slen > len)
3133 return 0;
3134 } else {
3135 /* endswith */
3136 if (end-start < slen || start > len)
3137 return 0;
3139 if (end-slen > start)
3140 start = end - slen;
3142 if (end-start >= slen)
3143 return ! memcmp(str+start, sub, slen);
3144 return 0;
3148 PyDoc_STRVAR(startswith__doc__,
3149 "S.startswith(prefix[, start[, end]]) -> bool\n\
3151 Return True if S starts with the specified prefix, False otherwise.\n\
3152 With optional start, test S beginning at that position.\n\
3153 With optional end, stop comparing S at that position.\n\
3154 prefix can also be a tuple of strings to try.");
3156 static PyObject *
3157 string_startswith(PyStringObject *self, PyObject *args)
3159 Py_ssize_t start = 0;
3160 Py_ssize_t end = PY_SSIZE_T_MAX;
3161 PyObject *subobj;
3162 int result;
3164 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3165 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3166 return NULL;
3167 if (PyTuple_Check(subobj)) {
3168 Py_ssize_t i;
3169 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3170 result = _string_tailmatch(self,
3171 PyTuple_GET_ITEM(subobj, i),
3172 start, end, -1);
3173 if (result == -1)
3174 return NULL;
3175 else if (result) {
3176 Py_RETURN_TRUE;
3179 Py_RETURN_FALSE;
3181 result = _string_tailmatch(self, subobj, start, end, -1);
3182 if (result == -1)
3183 return NULL;
3184 else
3185 return PyBool_FromLong(result);
3189 PyDoc_STRVAR(endswith__doc__,
3190 "S.endswith(suffix[, start[, end]]) -> bool\n\
3192 Return True if S ends with the specified suffix, False otherwise.\n\
3193 With optional start, test S beginning at that position.\n\
3194 With optional end, stop comparing S at that position.\n\
3195 suffix can also be a tuple of strings to try.");
3197 static PyObject *
3198 string_endswith(PyStringObject *self, PyObject *args)
3200 Py_ssize_t start = 0;
3201 Py_ssize_t end = PY_SSIZE_T_MAX;
3202 PyObject *subobj;
3203 int result;
3205 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3206 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3207 return NULL;
3208 if (PyTuple_Check(subobj)) {
3209 Py_ssize_t i;
3210 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3211 result = _string_tailmatch(self,
3212 PyTuple_GET_ITEM(subobj, i),
3213 start, end, +1);
3214 if (result == -1)
3215 return NULL;
3216 else if (result) {
3217 Py_RETURN_TRUE;
3220 Py_RETURN_FALSE;
3222 result = _string_tailmatch(self, subobj, start, end, +1);
3223 if (result == -1)
3224 return NULL;
3225 else
3226 return PyBool_FromLong(result);
3230 PyDoc_STRVAR(encode__doc__,
3231 "S.encode([encoding[,errors]]) -> object\n\
3233 Encodes S using the codec registered for encoding. encoding defaults\n\
3234 to the default encoding. errors may be given to set a different error\n\
3235 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3236 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3237 'xmlcharrefreplace' as well as any other name registered with\n\
3238 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3240 static PyObject *
3241 string_encode(PyStringObject *self, PyObject *args)
3243 char *encoding = NULL;
3244 char *errors = NULL;
3245 PyObject *v;
3247 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3248 return NULL;
3249 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3250 if (v == NULL)
3251 goto onError;
3252 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3253 PyErr_Format(PyExc_TypeError,
3254 "encoder did not return a string/unicode object "
3255 "(type=%.400s)",
3256 v->ob_type->tp_name);
3257 Py_DECREF(v);
3258 return NULL;
3260 return v;
3262 onError:
3263 return NULL;
3267 PyDoc_STRVAR(decode__doc__,
3268 "S.decode([encoding[,errors]]) -> object\n\
3270 Decodes S using the codec registered for encoding. encoding defaults\n\
3271 to the default encoding. errors may be given to set a different error\n\
3272 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3273 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3274 as well as any other name registerd with codecs.register_error that is\n\
3275 able to handle UnicodeDecodeErrors.");
3277 static PyObject *
3278 string_decode(PyStringObject *self, PyObject *args)
3280 char *encoding = NULL;
3281 char *errors = NULL;
3282 PyObject *v;
3284 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3285 return NULL;
3286 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3287 if (v == NULL)
3288 goto onError;
3289 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3290 PyErr_Format(PyExc_TypeError,
3291 "decoder did not return a string/unicode object "
3292 "(type=%.400s)",
3293 v->ob_type->tp_name);
3294 Py_DECREF(v);
3295 return NULL;
3297 return v;
3299 onError:
3300 return NULL;
3304 PyDoc_STRVAR(expandtabs__doc__,
3305 "S.expandtabs([tabsize]) -> string\n\
3307 Return a copy of S where all tab characters are expanded using spaces.\n\
3308 If tabsize is not given, a tab size of 8 characters is assumed.");
3310 static PyObject*
3311 string_expandtabs(PyStringObject *self, PyObject *args)
3313 const char *e, *p;
3314 char *q;
3315 Py_ssize_t i, j;
3316 PyObject *u;
3317 int tabsize = 8;
3319 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3320 return NULL;
3322 /* First pass: determine size of output string */
3323 i = j = 0;
3324 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3325 for (p = PyString_AS_STRING(self); p < e; p++)
3326 if (*p == '\t') {
3327 if (tabsize > 0)
3328 j += tabsize - (j % tabsize);
3330 else {
3331 j++;
3332 if (*p == '\n' || *p == '\r') {
3333 i += j;
3334 j = 0;
3338 /* Second pass: create output string and fill it */
3339 u = PyString_FromStringAndSize(NULL, i + j);
3340 if (!u)
3341 return NULL;
3343 j = 0;
3344 q = PyString_AS_STRING(u);
3346 for (p = PyString_AS_STRING(self); p < e; p++)
3347 if (*p == '\t') {
3348 if (tabsize > 0) {
3349 i = tabsize - (j % tabsize);
3350 j += i;
3351 while (i--)
3352 *q++ = ' ';
3355 else {
3356 j++;
3357 *q++ = *p;
3358 if (*p == '\n' || *p == '\r')
3359 j = 0;
3362 return u;
3365 Py_LOCAL_INLINE(PyObject *)
3366 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3368 PyObject *u;
3370 if (left < 0)
3371 left = 0;
3372 if (right < 0)
3373 right = 0;
3375 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3376 Py_INCREF(self);
3377 return (PyObject *)self;
3380 u = PyString_FromStringAndSize(NULL,
3381 left + PyString_GET_SIZE(self) + right);
3382 if (u) {
3383 if (left)
3384 memset(PyString_AS_STRING(u), fill, left);
3385 Py_MEMCPY(PyString_AS_STRING(u) + left,
3386 PyString_AS_STRING(self),
3387 PyString_GET_SIZE(self));
3388 if (right)
3389 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3390 fill, right);
3393 return u;
3396 PyDoc_STRVAR(ljust__doc__,
3397 "S.ljust(width[, fillchar]) -> string\n"
3398 "\n"
3399 "Return S left justified in a string of length width. Padding is\n"
3400 "done using the specified fill character (default is a space).");
3402 static PyObject *
3403 string_ljust(PyStringObject *self, PyObject *args)
3405 Py_ssize_t width;
3406 char fillchar = ' ';
3408 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3409 return NULL;
3411 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3412 Py_INCREF(self);
3413 return (PyObject*) self;
3416 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3420 PyDoc_STRVAR(rjust__doc__,
3421 "S.rjust(width[, fillchar]) -> string\n"
3422 "\n"
3423 "Return S right justified in a string of length width. Padding is\n"
3424 "done using the specified fill character (default is a space)");
3426 static PyObject *
3427 string_rjust(PyStringObject *self, PyObject *args)
3429 Py_ssize_t width;
3430 char fillchar = ' ';
3432 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3433 return NULL;
3435 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3436 Py_INCREF(self);
3437 return (PyObject*) self;
3440 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3444 PyDoc_STRVAR(center__doc__,
3445 "S.center(width[, fillchar]) -> string\n"
3446 "\n"
3447 "Return S centered in a string of length width. Padding is\n"
3448 "done using the specified fill character (default is a space)");
3450 static PyObject *
3451 string_center(PyStringObject *self, PyObject *args)
3453 Py_ssize_t marg, left;
3454 Py_ssize_t width;
3455 char fillchar = ' ';
3457 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3458 return NULL;
3460 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3461 Py_INCREF(self);
3462 return (PyObject*) self;
3465 marg = width - PyString_GET_SIZE(self);
3466 left = marg / 2 + (marg & width & 1);
3468 return pad(self, left, marg - left, fillchar);
3471 PyDoc_STRVAR(zfill__doc__,
3472 "S.zfill(width) -> string\n"
3473 "\n"
3474 "Pad a numeric string S with zeros on the left, to fill a field\n"
3475 "of the specified width. The string S is never truncated.");
3477 static PyObject *
3478 string_zfill(PyStringObject *self, PyObject *args)
3480 Py_ssize_t fill;
3481 PyObject *s;
3482 char *p;
3483 Py_ssize_t width;
3485 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3486 return NULL;
3488 if (PyString_GET_SIZE(self) >= width) {
3489 if (PyString_CheckExact(self)) {
3490 Py_INCREF(self);
3491 return (PyObject*) self;
3493 else
3494 return PyString_FromStringAndSize(
3495 PyString_AS_STRING(self),
3496 PyString_GET_SIZE(self)
3500 fill = width - PyString_GET_SIZE(self);
3502 s = pad(self, fill, 0, '0');
3504 if (s == NULL)
3505 return NULL;
3507 p = PyString_AS_STRING(s);
3508 if (p[fill] == '+' || p[fill] == '-') {
3509 /* move sign to beginning of string */
3510 p[0] = p[fill];
3511 p[fill] = '0';
3514 return (PyObject*) s;
3517 PyDoc_STRVAR(isspace__doc__,
3518 "S.isspace() -> bool\n\
3520 Return True if all characters in S are whitespace\n\
3521 and there is at least one character in S, False otherwise.");
3523 static PyObject*
3524 string_isspace(PyStringObject *self)
3526 register const unsigned char *p
3527 = (unsigned char *) PyString_AS_STRING(self);
3528 register const unsigned char *e;
3530 /* Shortcut for single character strings */
3531 if (PyString_GET_SIZE(self) == 1 &&
3532 isspace(*p))
3533 return PyBool_FromLong(1);
3535 /* Special case for empty strings */
3536 if (PyString_GET_SIZE(self) == 0)
3537 return PyBool_FromLong(0);
3539 e = p + PyString_GET_SIZE(self);
3540 for (; p < e; p++) {
3541 if (!isspace(*p))
3542 return PyBool_FromLong(0);
3544 return PyBool_FromLong(1);
3548 PyDoc_STRVAR(isalpha__doc__,
3549 "S.isalpha() -> bool\n\
3551 Return True if all characters in S are alphabetic\n\
3552 and there is at least one character in S, False otherwise.");
3554 static PyObject*
3555 string_isalpha(PyStringObject *self)
3557 register const unsigned char *p
3558 = (unsigned char *) PyString_AS_STRING(self);
3559 register const unsigned char *e;
3561 /* Shortcut for single character strings */
3562 if (PyString_GET_SIZE(self) == 1 &&
3563 isalpha(*p))
3564 return PyBool_FromLong(1);
3566 /* Special case for empty strings */
3567 if (PyString_GET_SIZE(self) == 0)
3568 return PyBool_FromLong(0);
3570 e = p + PyString_GET_SIZE(self);
3571 for (; p < e; p++) {
3572 if (!isalpha(*p))
3573 return PyBool_FromLong(0);
3575 return PyBool_FromLong(1);
3579 PyDoc_STRVAR(isalnum__doc__,
3580 "S.isalnum() -> bool\n\
3582 Return True if all characters in S are alphanumeric\n\
3583 and there is at least one character in S, False otherwise.");
3585 static PyObject*
3586 string_isalnum(PyStringObject *self)
3588 register const unsigned char *p
3589 = (unsigned char *) PyString_AS_STRING(self);
3590 register const unsigned char *e;
3592 /* Shortcut for single character strings */
3593 if (PyString_GET_SIZE(self) == 1 &&
3594 isalnum(*p))
3595 return PyBool_FromLong(1);
3597 /* Special case for empty strings */
3598 if (PyString_GET_SIZE(self) == 0)
3599 return PyBool_FromLong(0);
3601 e = p + PyString_GET_SIZE(self);
3602 for (; p < e; p++) {
3603 if (!isalnum(*p))
3604 return PyBool_FromLong(0);
3606 return PyBool_FromLong(1);
3610 PyDoc_STRVAR(isdigit__doc__,
3611 "S.isdigit() -> bool\n\
3613 Return True if all characters in S are digits\n\
3614 and there is at least one character in S, False otherwise.");
3616 static PyObject*
3617 string_isdigit(PyStringObject *self)
3619 register const unsigned char *p
3620 = (unsigned char *) PyString_AS_STRING(self);
3621 register const unsigned char *e;
3623 /* Shortcut for single character strings */
3624 if (PyString_GET_SIZE(self) == 1 &&
3625 isdigit(*p))
3626 return PyBool_FromLong(1);
3628 /* Special case for empty strings */
3629 if (PyString_GET_SIZE(self) == 0)
3630 return PyBool_FromLong(0);
3632 e = p + PyString_GET_SIZE(self);
3633 for (; p < e; p++) {
3634 if (!isdigit(*p))
3635 return PyBool_FromLong(0);
3637 return PyBool_FromLong(1);
3641 PyDoc_STRVAR(islower__doc__,
3642 "S.islower() -> bool\n\
3644 Return True if all cased characters in S are lowercase and there is\n\
3645 at least one cased character in S, False otherwise.");
3647 static PyObject*
3648 string_islower(PyStringObject *self)
3650 register const unsigned char *p
3651 = (unsigned char *) PyString_AS_STRING(self);
3652 register const unsigned char *e;
3653 int cased;
3655 /* Shortcut for single character strings */
3656 if (PyString_GET_SIZE(self) == 1)
3657 return PyBool_FromLong(islower(*p) != 0);
3659 /* Special case for empty strings */
3660 if (PyString_GET_SIZE(self) == 0)
3661 return PyBool_FromLong(0);
3663 e = p + PyString_GET_SIZE(self);
3664 cased = 0;
3665 for (; p < e; p++) {
3666 if (isupper(*p))
3667 return PyBool_FromLong(0);
3668 else if (!cased && islower(*p))
3669 cased = 1;
3671 return PyBool_FromLong(cased);
3675 PyDoc_STRVAR(isupper__doc__,
3676 "S.isupper() -> bool\n\
3678 Return True if all cased characters in S are uppercase and there is\n\
3679 at least one cased character in S, False otherwise.");
3681 static PyObject*
3682 string_isupper(PyStringObject *self)
3684 register const unsigned char *p
3685 = (unsigned char *) PyString_AS_STRING(self);
3686 register const unsigned char *e;
3687 int cased;
3689 /* Shortcut for single character strings */
3690 if (PyString_GET_SIZE(self) == 1)
3691 return PyBool_FromLong(isupper(*p) != 0);
3693 /* Special case for empty strings */
3694 if (PyString_GET_SIZE(self) == 0)
3695 return PyBool_FromLong(0);
3697 e = p + PyString_GET_SIZE(self);
3698 cased = 0;
3699 for (; p < e; p++) {
3700 if (islower(*p))
3701 return PyBool_FromLong(0);
3702 else if (!cased && isupper(*p))
3703 cased = 1;
3705 return PyBool_FromLong(cased);
3709 PyDoc_STRVAR(istitle__doc__,
3710 "S.istitle() -> bool\n\
3712 Return True if S is a titlecased string and there is at least one\n\
3713 character in S, i.e. uppercase characters may only follow uncased\n\
3714 characters and lowercase characters only cased ones. Return False\n\
3715 otherwise.");
3717 static PyObject*
3718 string_istitle(PyStringObject *self, PyObject *uncased)
3720 register const unsigned char *p
3721 = (unsigned char *) PyString_AS_STRING(self);
3722 register const unsigned char *e;
3723 int cased, previous_is_cased;
3725 /* Shortcut for single character strings */
3726 if (PyString_GET_SIZE(self) == 1)
3727 return PyBool_FromLong(isupper(*p) != 0);
3729 /* Special case for empty strings */
3730 if (PyString_GET_SIZE(self) == 0)
3731 return PyBool_FromLong(0);
3733 e = p + PyString_GET_SIZE(self);
3734 cased = 0;
3735 previous_is_cased = 0;
3736 for (; p < e; p++) {
3737 register const unsigned char ch = *p;
3739 if (isupper(ch)) {
3740 if (previous_is_cased)
3741 return PyBool_FromLong(0);
3742 previous_is_cased = 1;
3743 cased = 1;
3745 else if (islower(ch)) {
3746 if (!previous_is_cased)
3747 return PyBool_FromLong(0);
3748 previous_is_cased = 1;
3749 cased = 1;
3751 else
3752 previous_is_cased = 0;
3754 return PyBool_FromLong(cased);
3758 PyDoc_STRVAR(splitlines__doc__,
3759 "S.splitlines([keepends]) -> list of strings\n\
3761 Return a list of the lines in S, breaking at line boundaries.\n\
3762 Line breaks are not included in the resulting list unless keepends\n\
3763 is given and true.");
3765 static PyObject*
3766 string_splitlines(PyStringObject *self, PyObject *args)
3768 register Py_ssize_t i;
3769 register Py_ssize_t j;
3770 Py_ssize_t len;
3771 int keepends = 0;
3772 PyObject *list;
3773 PyObject *str;
3774 char *data;
3776 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3777 return NULL;
3779 data = PyString_AS_STRING(self);
3780 len = PyString_GET_SIZE(self);
3782 /* This does not use the preallocated list because splitlines is
3783 usually run with hundreds of newlines. The overhead of
3784 switching between PyList_SET_ITEM and append causes about a
3785 2-3% slowdown for that common case. A smarter implementation
3786 could move the if check out, so the SET_ITEMs are done first
3787 and the appends only done when the prealloc buffer is full.
3788 That's too much work for little gain.*/
3790 list = PyList_New(0);
3791 if (!list)
3792 goto onError;
3794 for (i = j = 0; i < len; ) {
3795 Py_ssize_t eol;
3797 /* Find a line and append it */
3798 while (i < len && data[i] != '\n' && data[i] != '\r')
3799 i++;
3801 /* Skip the line break reading CRLF as one line break */
3802 eol = i;
3803 if (i < len) {
3804 if (data[i] == '\r' && i + 1 < len &&
3805 data[i+1] == '\n')
3806 i += 2;
3807 else
3808 i++;
3809 if (keepends)
3810 eol = i;
3812 SPLIT_APPEND(data, j, eol);
3813 j = i;
3815 if (j < len) {
3816 SPLIT_APPEND(data, j, len);
3819 return list;
3821 onError:
3822 Py_XDECREF(list);
3823 return NULL;
3826 #undef SPLIT_APPEND
3827 #undef SPLIT_ADD
3828 #undef MAX_PREALLOC
3829 #undef PREALLOC_SIZE
3831 static PyObject *
3832 string_getnewargs(PyStringObject *v)
3834 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3838 static PyMethodDef
3839 string_methods[] = {
3840 /* Counterparts of the obsolete stropmodule functions; except
3841 string.maketrans(). */
3842 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3843 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3844 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3845 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3846 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3847 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3848 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3849 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3850 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3851 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3852 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3853 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3854 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3855 capitalize__doc__},
3856 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3857 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3858 endswith__doc__},
3859 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3860 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3861 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3862 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3863 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3864 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3865 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3866 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3867 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3868 rpartition__doc__},
3869 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3870 startswith__doc__},
3871 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3872 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3873 swapcase__doc__},
3874 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3875 translate__doc__},
3876 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3877 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3878 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3879 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3880 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3881 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3882 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3883 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3884 expandtabs__doc__},
3885 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3886 splitlines__doc__},
3887 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3888 {NULL, NULL} /* sentinel */
3891 static PyObject *
3892 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3894 static PyObject *
3895 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3897 PyObject *x = NULL;
3898 static char *kwlist[] = {"object", 0};
3900 if (type != &PyString_Type)
3901 return str_subtype_new(type, args, kwds);
3902 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3903 return NULL;
3904 if (x == NULL)
3905 return PyString_FromString("");
3906 return PyObject_Str(x);
3909 static PyObject *
3910 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3912 PyObject *tmp, *pnew;
3913 Py_ssize_t n;
3915 assert(PyType_IsSubtype(type, &PyString_Type));
3916 tmp = string_new(&PyString_Type, args, kwds);
3917 if (tmp == NULL)
3918 return NULL;
3919 assert(PyString_CheckExact(tmp));
3920 n = PyString_GET_SIZE(tmp);
3921 pnew = type->tp_alloc(type, n);
3922 if (pnew != NULL) {
3923 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3924 ((PyStringObject *)pnew)->ob_shash =
3925 ((PyStringObject *)tmp)->ob_shash;
3926 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3928 Py_DECREF(tmp);
3929 return pnew;
3932 static PyObject *
3933 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3935 PyErr_SetString(PyExc_TypeError,
3936 "The basestring type cannot be instantiated");
3937 return NULL;
3940 static PyObject *
3941 string_mod(PyObject *v, PyObject *w)
3943 if (!PyString_Check(v)) {
3944 Py_INCREF(Py_NotImplemented);
3945 return Py_NotImplemented;
3947 return PyString_Format(v, w);
3950 PyDoc_STRVAR(basestring_doc,
3951 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3953 static PyNumberMethods string_as_number = {
3954 0, /*nb_add*/
3955 0, /*nb_subtract*/
3956 0, /*nb_multiply*/
3957 0, /*nb_divide*/
3958 string_mod, /*nb_remainder*/
3962 PyTypeObject PyBaseString_Type = {
3963 PyObject_HEAD_INIT(&PyType_Type)
3965 "basestring",
3968 0, /* tp_dealloc */
3969 0, /* tp_print */
3970 0, /* tp_getattr */
3971 0, /* tp_setattr */
3972 0, /* tp_compare */
3973 0, /* tp_repr */
3974 0, /* tp_as_number */
3975 0, /* tp_as_sequence */
3976 0, /* tp_as_mapping */
3977 0, /* tp_hash */
3978 0, /* tp_call */
3979 0, /* tp_str */
3980 0, /* tp_getattro */
3981 0, /* tp_setattro */
3982 0, /* tp_as_buffer */
3983 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3984 basestring_doc, /* tp_doc */
3985 0, /* tp_traverse */
3986 0, /* tp_clear */
3987 0, /* tp_richcompare */
3988 0, /* tp_weaklistoffset */
3989 0, /* tp_iter */
3990 0, /* tp_iternext */
3991 0, /* tp_methods */
3992 0, /* tp_members */
3993 0, /* tp_getset */
3994 &PyBaseObject_Type, /* tp_base */
3995 0, /* tp_dict */
3996 0, /* tp_descr_get */
3997 0, /* tp_descr_set */
3998 0, /* tp_dictoffset */
3999 0, /* tp_init */
4000 0, /* tp_alloc */
4001 basestring_new, /* tp_new */
4002 0, /* tp_free */
4005 PyDoc_STRVAR(string_doc,
4006 "str(object) -> string\n\
4008 Return a nice string representation of the object.\n\
4009 If the argument is a string, the return value is the same object.");
4011 PyTypeObject PyString_Type = {
4012 PyObject_HEAD_INIT(&PyType_Type)
4014 "str",
4015 sizeof(PyStringObject),
4016 sizeof(char),
4017 string_dealloc, /* tp_dealloc */
4018 (printfunc)string_print, /* tp_print */
4019 0, /* tp_getattr */
4020 0, /* tp_setattr */
4021 0, /* tp_compare */
4022 string_repr, /* tp_repr */
4023 &string_as_number, /* tp_as_number */
4024 &string_as_sequence, /* tp_as_sequence */
4025 &string_as_mapping, /* tp_as_mapping */
4026 (hashfunc)string_hash, /* tp_hash */
4027 0, /* tp_call */
4028 string_str, /* tp_str */
4029 PyObject_GenericGetAttr, /* tp_getattro */
4030 0, /* tp_setattro */
4031 &string_as_buffer, /* tp_as_buffer */
4032 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4033 Py_TPFLAGS_BASETYPE, /* tp_flags */
4034 string_doc, /* tp_doc */
4035 0, /* tp_traverse */
4036 0, /* tp_clear */
4037 (richcmpfunc)string_richcompare, /* tp_richcompare */
4038 0, /* tp_weaklistoffset */
4039 0, /* tp_iter */
4040 0, /* tp_iternext */
4041 string_methods, /* tp_methods */
4042 0, /* tp_members */
4043 0, /* tp_getset */
4044 &PyBaseString_Type, /* tp_base */
4045 0, /* tp_dict */
4046 0, /* tp_descr_get */
4047 0, /* tp_descr_set */
4048 0, /* tp_dictoffset */
4049 0, /* tp_init */
4050 0, /* tp_alloc */
4051 string_new, /* tp_new */
4052 PyObject_Del, /* tp_free */
4055 void
4056 PyString_Concat(register PyObject **pv, register PyObject *w)
4058 register PyObject *v;
4059 if (*pv == NULL)
4060 return;
4061 if (w == NULL || !PyString_Check(*pv)) {
4062 Py_DECREF(*pv);
4063 *pv = NULL;
4064 return;
4066 v = string_concat((PyStringObject *) *pv, w);
4067 Py_DECREF(*pv);
4068 *pv = v;
4071 void
4072 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
4074 PyString_Concat(pv, w);
4075 Py_XDECREF(w);
4079 /* The following function breaks the notion that strings are immutable:
4080 it changes the size of a string. We get away with this only if there
4081 is only one module referencing the object. You can also think of it
4082 as creating a new string object and destroying the old one, only
4083 more efficiently. In any case, don't use this if the string may
4084 already be known to some other part of the code...
4085 Note that if there's not enough memory to resize the string, the original
4086 string object at *pv is deallocated, *pv is set to NULL, an "out of
4087 memory" exception is set, and -1 is returned. Else (on success) 0 is
4088 returned, and the value in *pv may or may not be the same as on input.
4089 As always, an extra byte is allocated for a trailing \0 byte (newsize
4090 does *not* include that), and a trailing \0 byte is stored.
4094 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
4096 register PyObject *v;
4097 register PyStringObject *sv;
4098 v = *pv;
4099 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4100 PyString_CHECK_INTERNED(v)) {
4101 *pv = 0;
4102 Py_DECREF(v);
4103 PyErr_BadInternalCall();
4104 return -1;
4106 /* XXX UNREF/NEWREF interface should be more symmetrical */
4107 _Py_DEC_REFTOTAL;
4108 _Py_ForgetReference(v);
4109 *pv = (PyObject *)
4110 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
4111 if (*pv == NULL) {
4112 PyObject_Del(v);
4113 PyErr_NoMemory();
4114 return -1;
4116 _Py_NewReference(*pv);
4117 sv = (PyStringObject *) *pv;
4118 sv->ob_size = newsize;
4119 sv->ob_sval[newsize] = '\0';
4120 sv->ob_shash = -1; /* invalidate cached hash value */
4121 return 0;
4124 /* Helpers for formatstring */
4126 Py_LOCAL_INLINE(PyObject *)
4127 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4129 Py_ssize_t argidx = *p_argidx;
4130 if (argidx < arglen) {
4131 (*p_argidx)++;
4132 if (arglen < 0)
4133 return args;
4134 else
4135 return PyTuple_GetItem(args, argidx);
4137 PyErr_SetString(PyExc_TypeError,
4138 "not enough arguments for format string");
4139 return NULL;
4142 /* Format codes
4143 * F_LJUST '-'
4144 * F_SIGN '+'
4145 * F_BLANK ' '
4146 * F_ALT '#'
4147 * F_ZERO '0'
4149 #define F_LJUST (1<<0)
4150 #define F_SIGN (1<<1)
4151 #define F_BLANK (1<<2)
4152 #define F_ALT (1<<3)
4153 #define F_ZERO (1<<4)
4155 Py_LOCAL_INLINE(int)
4156 formatfloat(char *buf, size_t buflen, int flags,
4157 int prec, int type, PyObject *v)
4159 /* fmt = '%#.' + `prec` + `type`
4160 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4161 char fmt[20];
4162 double x;
4163 x = PyFloat_AsDouble(v);
4164 if (x == -1.0 && PyErr_Occurred()) {
4165 PyErr_SetString(PyExc_TypeError, "float argument required");
4166 return -1;
4168 if (prec < 0)
4169 prec = 6;
4170 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4171 type = 'g';
4172 /* Worst case length calc to ensure no buffer overrun:
4174 'g' formats:
4175 fmt = %#.<prec>g
4176 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4177 for any double rep.)
4178 len = 1 + prec + 1 + 2 + 5 = 9 + prec
4180 'f' formats:
4181 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4182 len = 1 + 50 + 1 + prec = 52 + prec
4184 If prec=0 the effective precision is 1 (the leading digit is
4185 always given), therefore increase the length by one.
4188 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4189 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
4190 PyErr_SetString(PyExc_OverflowError,
4191 "formatted float is too long (precision too large?)");
4192 return -1;
4194 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4195 (flags&F_ALT) ? "#" : "",
4196 prec, type);
4197 PyOS_ascii_formatd(buf, buflen, fmt, x);
4198 return (int)strlen(buf);
4201 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4202 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4203 * Python's regular ints.
4204 * Return value: a new PyString*, or NULL if error.
4205 * . *pbuf is set to point into it,
4206 * *plen set to the # of chars following that.
4207 * Caller must decref it when done using pbuf.
4208 * The string starting at *pbuf is of the form
4209 * "-"? ("0x" | "0X")? digit+
4210 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4211 * set in flags. The case of hex digits will be correct,
4212 * There will be at least prec digits, zero-filled on the left if
4213 * necessary to get that many.
4214 * val object to be converted
4215 * flags bitmask of format flags; only F_ALT is looked at
4216 * prec minimum number of digits; 0-fill on left if needed
4217 * type a character in [duoxX]; u acts the same as d
4219 * CAUTION: o, x and X conversions on regular ints can never
4220 * produce a '-' sign, but can for Python's unbounded ints.
4222 PyObject*
4223 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4224 char **pbuf, int *plen)
4226 PyObject *result = NULL;
4227 char *buf;
4228 Py_ssize_t i;
4229 int sign; /* 1 if '-', else 0 */
4230 int len; /* number of characters */
4231 Py_ssize_t llen;
4232 int numdigits; /* len == numnondigits + numdigits */
4233 int numnondigits = 0;
4235 switch (type) {
4236 case 'd':
4237 case 'u':
4238 result = val->ob_type->tp_str(val);
4239 break;
4240 case 'o':
4241 result = val->ob_type->tp_as_number->nb_oct(val);
4242 break;
4243 case 'x':
4244 case 'X':
4245 numnondigits = 2;
4246 result = val->ob_type->tp_as_number->nb_hex(val);
4247 break;
4248 default:
4249 assert(!"'type' not in [duoxX]");
4251 if (!result)
4252 return NULL;
4254 /* To modify the string in-place, there can only be one reference. */
4255 if (result->ob_refcnt != 1) {
4256 PyErr_BadInternalCall();
4257 return NULL;
4259 buf = PyString_AsString(result);
4260 llen = PyString_Size(result);
4261 if (llen > PY_SSIZE_T_MAX) {
4262 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4263 return NULL;
4265 len = (int)llen;
4266 if (buf[len-1] == 'L') {
4267 --len;
4268 buf[len] = '\0';
4270 sign = buf[0] == '-';
4271 numnondigits += sign;
4272 numdigits = len - numnondigits;
4273 assert(numdigits > 0);
4275 /* Get rid of base marker unless F_ALT */
4276 if ((flags & F_ALT) == 0) {
4277 /* Need to skip 0x, 0X or 0. */
4278 int skipped = 0;
4279 switch (type) {
4280 case 'o':
4281 assert(buf[sign] == '0');
4282 /* If 0 is only digit, leave it alone. */
4283 if (numdigits > 1) {
4284 skipped = 1;
4285 --numdigits;
4287 break;
4288 case 'x':
4289 case 'X':
4290 assert(buf[sign] == '0');
4291 assert(buf[sign + 1] == 'x');
4292 skipped = 2;
4293 numnondigits -= 2;
4294 break;
4296 if (skipped) {
4297 buf += skipped;
4298 len -= skipped;
4299 if (sign)
4300 buf[0] = '-';
4302 assert(len == numnondigits + numdigits);
4303 assert(numdigits > 0);
4306 /* Fill with leading zeroes to meet minimum width. */
4307 if (prec > numdigits) {
4308 PyObject *r1 = PyString_FromStringAndSize(NULL,
4309 numnondigits + prec);
4310 char *b1;
4311 if (!r1) {
4312 Py_DECREF(result);
4313 return NULL;
4315 b1 = PyString_AS_STRING(r1);
4316 for (i = 0; i < numnondigits; ++i)
4317 *b1++ = *buf++;
4318 for (i = 0; i < prec - numdigits; i++)
4319 *b1++ = '0';
4320 for (i = 0; i < numdigits; i++)
4321 *b1++ = *buf++;
4322 *b1 = '\0';
4323 Py_DECREF(result);
4324 result = r1;
4325 buf = PyString_AS_STRING(result);
4326 len = numnondigits + prec;
4329 /* Fix up case for hex conversions. */
4330 if (type == 'X') {
4331 /* Need to convert all lower case letters to upper case.
4332 and need to convert 0x to 0X (and -0x to -0X). */
4333 for (i = 0; i < len; i++)
4334 if (buf[i] >= 'a' && buf[i] <= 'x')
4335 buf[i] -= 'a'-'A';
4337 *pbuf = buf;
4338 *plen = len;
4339 return result;
4342 Py_LOCAL_INLINE(int)
4343 formatint(char *buf, size_t buflen, int flags,
4344 int prec, int type, PyObject *v)
4346 /* fmt = '%#.' + `prec` + 'l' + `type`
4347 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4348 + 1 + 1 = 24 */
4349 char fmt[64]; /* plenty big enough! */
4350 char *sign;
4351 long x;
4353 x = PyInt_AsLong(v);
4354 if (x == -1 && PyErr_Occurred()) {
4355 PyErr_SetString(PyExc_TypeError, "int argument required");
4356 return -1;
4358 if (x < 0 && type == 'u') {
4359 type = 'd';
4361 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4362 sign = "-";
4363 else
4364 sign = "";
4365 if (prec < 0)
4366 prec = 1;
4368 if ((flags & F_ALT) &&
4369 (type == 'x' || type == 'X')) {
4370 /* When converting under %#x or %#X, there are a number
4371 * of issues that cause pain:
4372 * - when 0 is being converted, the C standard leaves off
4373 * the '0x' or '0X', which is inconsistent with other
4374 * %#x/%#X conversions and inconsistent with Python's
4375 * hex() function
4376 * - there are platforms that violate the standard and
4377 * convert 0 with the '0x' or '0X'
4378 * (Metrowerks, Compaq Tru64)
4379 * - there are platforms that give '0x' when converting
4380 * under %#X, but convert 0 in accordance with the
4381 * standard (OS/2 EMX)
4383 * We can achieve the desired consistency by inserting our
4384 * own '0x' or '0X' prefix, and substituting %x/%X in place
4385 * of %#x/%#X.
4387 * Note that this is the same approach as used in
4388 * formatint() in unicodeobject.c
4390 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4391 sign, type, prec, type);
4393 else {
4394 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4395 sign, (flags&F_ALT) ? "#" : "",
4396 prec, type);
4399 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4400 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4402 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4403 PyErr_SetString(PyExc_OverflowError,
4404 "formatted integer is too long (precision too large?)");
4405 return -1;
4407 if (sign[0])
4408 PyOS_snprintf(buf, buflen, fmt, -x);
4409 else
4410 PyOS_snprintf(buf, buflen, fmt, x);
4411 return (int)strlen(buf);
4414 Py_LOCAL_INLINE(int)
4415 formatchar(char *buf, size_t buflen, PyObject *v)
4417 /* presume that the buffer is at least 2 characters long */
4418 if (PyString_Check(v)) {
4419 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4420 return -1;
4422 else {
4423 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4424 return -1;
4426 buf[1] = '\0';
4427 return 1;
4430 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4432 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4433 chars are formatted. XXX This is a magic number. Each formatting
4434 routine does bounds checking to ensure no overflow, but a better
4435 solution may be to malloc a buffer of appropriate size for each
4436 format. For now, the current solution is sufficient.
4438 #define FORMATBUFLEN (size_t)120
4440 PyObject *
4441 PyString_Format(PyObject *format, PyObject *args)
4443 char *fmt, *res;
4444 Py_ssize_t arglen, argidx;
4445 Py_ssize_t reslen, rescnt, fmtcnt;
4446 int args_owned = 0;
4447 PyObject *result, *orig_args;
4448 #ifdef Py_USING_UNICODE
4449 PyObject *v, *w;
4450 #endif
4451 PyObject *dict = NULL;
4452 if (format == NULL || !PyString_Check(format) || args == NULL) {
4453 PyErr_BadInternalCall();
4454 return NULL;
4456 orig_args = args;
4457 fmt = PyString_AS_STRING(format);
4458 fmtcnt = PyString_GET_SIZE(format);
4459 reslen = rescnt = fmtcnt + 100;
4460 result = PyString_FromStringAndSize((char *)NULL, reslen);
4461 if (result == NULL)
4462 return NULL;
4463 res = PyString_AsString(result);
4464 if (PyTuple_Check(args)) {
4465 arglen = PyTuple_GET_SIZE(args);
4466 argidx = 0;
4468 else {
4469 arglen = -1;
4470 argidx = -2;
4472 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4473 !PyObject_TypeCheck(args, &PyBaseString_Type))
4474 dict = args;
4475 while (--fmtcnt >= 0) {
4476 if (*fmt != '%') {
4477 if (--rescnt < 0) {
4478 rescnt = fmtcnt + 100;
4479 reslen += rescnt;
4480 if (_PyString_Resize(&result, reslen) < 0)
4481 return NULL;
4482 res = PyString_AS_STRING(result)
4483 + reslen - rescnt;
4484 --rescnt;
4486 *res++ = *fmt++;
4488 else {
4489 /* Got a format specifier */
4490 int flags = 0;
4491 Py_ssize_t width = -1;
4492 int prec = -1;
4493 int c = '\0';
4494 int fill;
4495 PyObject *v = NULL;
4496 PyObject *temp = NULL;
4497 char *pbuf;
4498 int sign;
4499 Py_ssize_t len;
4500 char formatbuf[FORMATBUFLEN];
4501 /* For format{float,int,char}() */
4502 #ifdef Py_USING_UNICODE
4503 char *fmt_start = fmt;
4504 Py_ssize_t argidx_start = argidx;
4505 #endif
4507 fmt++;
4508 if (*fmt == '(') {
4509 char *keystart;
4510 Py_ssize_t keylen;
4511 PyObject *key;
4512 int pcount = 1;
4514 if (dict == NULL) {
4515 PyErr_SetString(PyExc_TypeError,
4516 "format requires a mapping");
4517 goto error;
4519 ++fmt;
4520 --fmtcnt;
4521 keystart = fmt;
4522 /* Skip over balanced parentheses */
4523 while (pcount > 0 && --fmtcnt >= 0) {
4524 if (*fmt == ')')
4525 --pcount;
4526 else if (*fmt == '(')
4527 ++pcount;
4528 fmt++;
4530 keylen = fmt - keystart - 1;
4531 if (fmtcnt < 0 || pcount > 0) {
4532 PyErr_SetString(PyExc_ValueError,
4533 "incomplete format key");
4534 goto error;
4536 key = PyString_FromStringAndSize(keystart,
4537 keylen);
4538 if (key == NULL)
4539 goto error;
4540 if (args_owned) {
4541 Py_DECREF(args);
4542 args_owned = 0;
4544 args = PyObject_GetItem(dict, key);
4545 Py_DECREF(key);
4546 if (args == NULL) {
4547 goto error;
4549 args_owned = 1;
4550 arglen = -1;
4551 argidx = -2;
4553 while (--fmtcnt >= 0) {
4554 switch (c = *fmt++) {
4555 case '-': flags |= F_LJUST; continue;
4556 case '+': flags |= F_SIGN; continue;
4557 case ' ': flags |= F_BLANK; continue;
4558 case '#': flags |= F_ALT; continue;
4559 case '0': flags |= F_ZERO; continue;
4561 break;
4563 if (c == '*') {
4564 v = getnextarg(args, arglen, &argidx);
4565 if (v == NULL)
4566 goto error;
4567 if (!PyInt_Check(v)) {
4568 PyErr_SetString(PyExc_TypeError,
4569 "* wants int");
4570 goto error;
4572 width = PyInt_AsLong(v);
4573 if (width < 0) {
4574 flags |= F_LJUST;
4575 width = -width;
4577 if (--fmtcnt >= 0)
4578 c = *fmt++;
4580 else if (c >= 0 && isdigit(c)) {
4581 width = c - '0';
4582 while (--fmtcnt >= 0) {
4583 c = Py_CHARMASK(*fmt++);
4584 if (!isdigit(c))
4585 break;
4586 if ((width*10) / 10 != width) {
4587 PyErr_SetString(
4588 PyExc_ValueError,
4589 "width too big");
4590 goto error;
4592 width = width*10 + (c - '0');
4595 if (c == '.') {
4596 prec = 0;
4597 if (--fmtcnt >= 0)
4598 c = *fmt++;
4599 if (c == '*') {
4600 v = getnextarg(args, arglen, &argidx);
4601 if (v == NULL)
4602 goto error;
4603 if (!PyInt_Check(v)) {
4604 PyErr_SetString(
4605 PyExc_TypeError,
4606 "* wants int");
4607 goto error;
4609 prec = PyInt_AsLong(v);
4610 if (prec < 0)
4611 prec = 0;
4612 if (--fmtcnt >= 0)
4613 c = *fmt++;
4615 else if (c >= 0 && isdigit(c)) {
4616 prec = c - '0';
4617 while (--fmtcnt >= 0) {
4618 c = Py_CHARMASK(*fmt++);
4619 if (!isdigit(c))
4620 break;
4621 if ((prec*10) / 10 != prec) {
4622 PyErr_SetString(
4623 PyExc_ValueError,
4624 "prec too big");
4625 goto error;
4627 prec = prec*10 + (c - '0');
4630 } /* prec */
4631 if (fmtcnt >= 0) {
4632 if (c == 'h' || c == 'l' || c == 'L') {
4633 if (--fmtcnt >= 0)
4634 c = *fmt++;
4637 if (fmtcnt < 0) {
4638 PyErr_SetString(PyExc_ValueError,
4639 "incomplete format");
4640 goto error;
4642 if (c != '%') {
4643 v = getnextarg(args, arglen, &argidx);
4644 if (v == NULL)
4645 goto error;
4647 sign = 0;
4648 fill = ' ';
4649 switch (c) {
4650 case '%':
4651 pbuf = "%";
4652 len = 1;
4653 break;
4654 case 's':
4655 #ifdef Py_USING_UNICODE
4656 if (PyUnicode_Check(v)) {
4657 fmt = fmt_start;
4658 argidx = argidx_start;
4659 goto unicode;
4661 #endif
4662 temp = _PyObject_Str(v);
4663 #ifdef Py_USING_UNICODE
4664 if (temp != NULL && PyUnicode_Check(temp)) {
4665 Py_DECREF(temp);
4666 fmt = fmt_start;
4667 argidx = argidx_start;
4668 goto unicode;
4670 #endif
4671 /* Fall through */
4672 case 'r':
4673 if (c == 'r')
4674 temp = PyObject_Repr(v);
4675 if (temp == NULL)
4676 goto error;
4677 if (!PyString_Check(temp)) {
4678 PyErr_SetString(PyExc_TypeError,
4679 "%s argument has non-string str()");
4680 Py_DECREF(temp);
4681 goto error;
4683 pbuf = PyString_AS_STRING(temp);
4684 len = PyString_GET_SIZE(temp);
4685 if (prec >= 0 && len > prec)
4686 len = prec;
4687 break;
4688 case 'i':
4689 case 'd':
4690 case 'u':
4691 case 'o':
4692 case 'x':
4693 case 'X':
4694 if (c == 'i')
4695 c = 'd';
4696 if (PyLong_Check(v)) {
4697 int ilen;
4698 temp = _PyString_FormatLong(v, flags,
4699 prec, c, &pbuf, &ilen);
4700 len = ilen;
4701 if (!temp)
4702 goto error;
4703 sign = 1;
4705 else {
4706 pbuf = formatbuf;
4707 len = formatint(pbuf,
4708 sizeof(formatbuf),
4709 flags, prec, c, v);
4710 if (len < 0)
4711 goto error;
4712 sign = 1;
4714 if (flags & F_ZERO)
4715 fill = '0';
4716 break;
4717 case 'e':
4718 case 'E':
4719 case 'f':
4720 case 'F':
4721 case 'g':
4722 case 'G':
4723 if (c == 'F')
4724 c = 'f';
4725 pbuf = formatbuf;
4726 len = formatfloat(pbuf, sizeof(formatbuf),
4727 flags, prec, c, v);
4728 if (len < 0)
4729 goto error;
4730 sign = 1;
4731 if (flags & F_ZERO)
4732 fill = '0';
4733 break;
4734 case 'c':
4735 #ifdef Py_USING_UNICODE
4736 if (PyUnicode_Check(v)) {
4737 fmt = fmt_start;
4738 argidx = argidx_start;
4739 goto unicode;
4741 #endif
4742 pbuf = formatbuf;
4743 len = formatchar(pbuf, sizeof(formatbuf), v);
4744 if (len < 0)
4745 goto error;
4746 break;
4747 default:
4748 PyErr_Format(PyExc_ValueError,
4749 "unsupported format character '%c' (0x%x) "
4750 "at index %i",
4751 c, c,
4752 (int)(fmt - 1 - PyString_AsString(format)));
4753 goto error;
4755 if (sign) {
4756 if (*pbuf == '-' || *pbuf == '+') {
4757 sign = *pbuf++;
4758 len--;
4760 else if (flags & F_SIGN)
4761 sign = '+';
4762 else if (flags & F_BLANK)
4763 sign = ' ';
4764 else
4765 sign = 0;
4767 if (width < len)
4768 width = len;
4769 if (rescnt - (sign != 0) < width) {
4770 reslen -= rescnt;
4771 rescnt = width + fmtcnt + 100;
4772 reslen += rescnt;
4773 if (reslen < 0) {
4774 Py_DECREF(result);
4775 return PyErr_NoMemory();
4777 if (_PyString_Resize(&result, reslen) < 0)
4778 return NULL;
4779 res = PyString_AS_STRING(result)
4780 + reslen - rescnt;
4782 if (sign) {
4783 if (fill != ' ')
4784 *res++ = sign;
4785 rescnt--;
4786 if (width > len)
4787 width--;
4789 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4790 assert(pbuf[0] == '0');
4791 assert(pbuf[1] == c);
4792 if (fill != ' ') {
4793 *res++ = *pbuf++;
4794 *res++ = *pbuf++;
4796 rescnt -= 2;
4797 width -= 2;
4798 if (width < 0)
4799 width = 0;
4800 len -= 2;
4802 if (width > len && !(flags & F_LJUST)) {
4803 do {
4804 --rescnt;
4805 *res++ = fill;
4806 } while (--width > len);
4808 if (fill == ' ') {
4809 if (sign)
4810 *res++ = sign;
4811 if ((flags & F_ALT) &&
4812 (c == 'x' || c == 'X')) {
4813 assert(pbuf[0] == '0');
4814 assert(pbuf[1] == c);
4815 *res++ = *pbuf++;
4816 *res++ = *pbuf++;
4819 Py_MEMCPY(res, pbuf, len);
4820 res += len;
4821 rescnt -= len;
4822 while (--width >= len) {
4823 --rescnt;
4824 *res++ = ' ';
4826 if (dict && (argidx < arglen) && c != '%') {
4827 PyErr_SetString(PyExc_TypeError,
4828 "not all arguments converted during string formatting");
4829 goto error;
4831 Py_XDECREF(temp);
4832 } /* '%' */
4833 } /* until end */
4834 if (argidx < arglen && !dict) {
4835 PyErr_SetString(PyExc_TypeError,
4836 "not all arguments converted during string formatting");
4837 goto error;
4839 if (args_owned) {
4840 Py_DECREF(args);
4842 _PyString_Resize(&result, reslen - rescnt);
4843 return result;
4845 #ifdef Py_USING_UNICODE
4846 unicode:
4847 if (args_owned) {
4848 Py_DECREF(args);
4849 args_owned = 0;
4851 /* Fiddle args right (remove the first argidx arguments) */
4852 if (PyTuple_Check(orig_args) && argidx > 0) {
4853 PyObject *v;
4854 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4855 v = PyTuple_New(n);
4856 if (v == NULL)
4857 goto error;
4858 while (--n >= 0) {
4859 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4860 Py_INCREF(w);
4861 PyTuple_SET_ITEM(v, n, w);
4863 args = v;
4864 } else {
4865 Py_INCREF(orig_args);
4866 args = orig_args;
4868 args_owned = 1;
4869 /* Take what we have of the result and let the Unicode formatting
4870 function format the rest of the input. */
4871 rescnt = res - PyString_AS_STRING(result);
4872 if (_PyString_Resize(&result, rescnt))
4873 goto error;
4874 fmtcnt = PyString_GET_SIZE(format) - \
4875 (fmt - PyString_AS_STRING(format));
4876 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4877 if (format == NULL)
4878 goto error;
4879 v = PyUnicode_Format(format, args);
4880 Py_DECREF(format);
4881 if (v == NULL)
4882 goto error;
4883 /* Paste what we have (result) to what the Unicode formatting
4884 function returned (v) and return the result (or error) */
4885 w = PyUnicode_Concat(result, v);
4886 Py_DECREF(result);
4887 Py_DECREF(v);
4888 Py_DECREF(args);
4889 return w;
4890 #endif /* Py_USING_UNICODE */
4892 error:
4893 Py_DECREF(result);
4894 if (args_owned) {
4895 Py_DECREF(args);
4897 return NULL;
4900 void
4901 PyString_InternInPlace(PyObject **p)
4903 register PyStringObject *s = (PyStringObject *)(*p);
4904 PyObject *t;
4905 if (s == NULL || !PyString_Check(s))
4906 Py_FatalError("PyString_InternInPlace: strings only please!");
4907 /* If it's a string subclass, we don't really know what putting
4908 it in the interned dict might do. */
4909 if (!PyString_CheckExact(s))
4910 return;
4911 if (PyString_CHECK_INTERNED(s))
4912 return;
4913 if (interned == NULL) {
4914 interned = PyDict_New();
4915 if (interned == NULL) {
4916 PyErr_Clear(); /* Don't leave an exception */
4917 return;
4920 t = PyDict_GetItem(interned, (PyObject *)s);
4921 if (t) {
4922 Py_INCREF(t);
4923 Py_DECREF(*p);
4924 *p = t;
4925 return;
4928 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4929 PyErr_Clear();
4930 return;
4932 /* The two references in interned are not counted by refcnt.
4933 The string deallocator will take care of this */
4934 s->ob_refcnt -= 2;
4935 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4938 void
4939 PyString_InternImmortal(PyObject **p)
4941 PyString_InternInPlace(p);
4942 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4943 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4944 Py_INCREF(*p);
4949 PyObject *
4950 PyString_InternFromString(const char *cp)
4952 PyObject *s = PyString_FromString(cp);
4953 if (s == NULL)
4954 return NULL;
4955 PyString_InternInPlace(&s);
4956 return s;
4959 void
4960 PyString_Fini(void)
4962 int i;
4963 for (i = 0; i < UCHAR_MAX + 1; i++) {
4964 Py_XDECREF(characters[i]);
4965 characters[i] = NULL;
4967 Py_XDECREF(nullstring);
4968 nullstring = NULL;
4971 void _Py_ReleaseInternedStrings(void)
4973 PyObject *keys;
4974 PyStringObject *s;
4975 Py_ssize_t i, n;
4977 if (interned == NULL || !PyDict_Check(interned))
4978 return;
4979 keys = PyDict_Keys(interned);
4980 if (keys == NULL || !PyList_Check(keys)) {
4981 PyErr_Clear();
4982 return;
4985 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4986 detector, interned strings are not forcibly deallocated; rather, we
4987 give them their stolen references back, and then clear and DECREF
4988 the interned dict. */
4990 fprintf(stderr, "releasing interned strings\n");
4991 n = PyList_GET_SIZE(keys);
4992 for (i = 0; i < n; i++) {
4993 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4994 switch (s->ob_sstate) {
4995 case SSTATE_NOT_INTERNED:
4996 /* XXX Shouldn't happen */
4997 break;
4998 case SSTATE_INTERNED_IMMORTAL:
4999 s->ob_refcnt += 1;
5000 break;
5001 case SSTATE_INTERNED_MORTAL:
5002 s->ob_refcnt += 2;
5003 break;
5004 default:
5005 Py_FatalError("Inconsistent interned string state.");
5007 s->ob_sstate = SSTATE_NOT_INTERNED;
5009 Py_DECREF(keys);
5010 PyDict_Clear(interned);
5011 Py_DECREF(interned);
5012 interned = NULL;