Bug 1277: make Maildir use the user-provided factory instead of hard-wiring MaildirMe...
[pytest.git] / Objects / stringobject.c
blob6673f670a0ab549cc2da69dcc9f700dd73180fa3
1 /* String object implementation */
3 #define PY_SSIZE_T_CLEAN
5 #include "Python.h"
7 #include <ctype.h>
9 #ifdef COUNT_ALLOCS
10 int null_strings, one_strings;
11 #endif
13 static PyStringObject *characters[UCHAR_MAX + 1];
14 static PyStringObject *nullstring;
16 /* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
24 static PyObject *interned;
27 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
29 null terminating character.
31 For PyString_FromString(), the parameter `str' points to a null-terminated
32 string containing exactly `size' bytes.
34 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
45 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
52 PyObject *
53 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
55 register PyStringObject *op;
56 assert(size >= 0);
57 if (size == 0 && (op = nullstring) != NULL) {
58 #ifdef COUNT_ALLOCS
59 null_strings++;
60 #endif
61 Py_INCREF(op);
62 return (PyObject *)op;
64 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
67 #ifdef COUNT_ALLOCS
68 one_strings++;
69 #endif
70 Py_INCREF(op);
71 return (PyObject *)op;
74 /* Inline PyObject_NewVar */
75 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
76 if (op == NULL)
77 return PyErr_NoMemory();
78 PyObject_INIT_VAR(op, &PyString_Type, size);
79 op->ob_shash = -1;
80 op->ob_sstate = SSTATE_NOT_INTERNED;
81 if (str != NULL)
82 Py_MEMCPY(op->ob_sval, str, size);
83 op->ob_sval[size] = '\0';
84 /* share short strings */
85 if (size == 0) {
86 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
88 op = (PyStringObject *)t;
89 nullstring = op;
90 Py_INCREF(op);
91 } else if (size == 1 && str != NULL) {
92 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
94 op = (PyStringObject *)t;
95 characters[*str & UCHAR_MAX] = op;
96 Py_INCREF(op);
98 return (PyObject *) op;
101 PyObject *
102 PyString_FromString(const char *str)
104 register size_t size;
105 register PyStringObject *op;
107 assert(str != NULL);
108 size = strlen(str);
109 if (size > PY_SSIZE_T_MAX) {
110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
114 if (size == 0 && (op = nullstring) != NULL) {
115 #ifdef COUNT_ALLOCS
116 null_strings++;
117 #endif
118 Py_INCREF(op);
119 return (PyObject *)op;
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122 #ifdef COUNT_ALLOCS
123 one_strings++;
124 #endif
125 Py_INCREF(op);
126 return (PyObject *)op;
129 /* Inline PyObject_NewVar */
130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
131 if (op == NULL)
132 return PyErr_NoMemory();
133 PyObject_INIT_VAR(op, &PyString_Type, size);
134 op->ob_shash = -1;
135 op->ob_sstate = SSTATE_NOT_INTERNED;
136 Py_MEMCPY(op->ob_sval, str, size+1);
137 /* share short strings */
138 if (size == 0) {
139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
141 op = (PyStringObject *)t;
142 nullstring = op;
143 Py_INCREF(op);
144 } else if (size == 1) {
145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
147 op = (PyStringObject *)t;
148 characters[*str & UCHAR_MAX] = op;
149 Py_INCREF(op);
151 return (PyObject *) op;
154 PyObject *
155 PyString_FromFormatV(const char *format, va_list vargs)
157 va_list count;
158 Py_ssize_t n = 0;
159 const char* f;
160 char *s;
161 PyObject* string;
163 #ifdef VA_LIST_IS_ARRAY
164 Py_MEMCPY(count, vargs, sizeof(va_list));
165 #else
166 #ifdef __va_copy
167 __va_copy(count, vargs);
168 #else
169 count = vargs;
170 #endif
171 #endif
172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
184 ++f;
186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
193 case 'd': case 'u': case 'i': case 'x':
194 (void) va_arg(count, int);
195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
209 * XXX I count 18 -- what's the extra for?
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
219 what's in the argument list) */
220 n += strlen(p);
221 goto expand;
223 } else
224 n++;
226 expand:
227 /* step 2: fill the buffer */
228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
234 s = PyString_AsString(string);
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
239 Py_ssize_t i;
240 int longflag = 0;
241 int size_tflag = 0;
242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
258 longflag = 1;
259 ++f;
261 /* handle the size_t flag. */
262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
263 size_tflag = 1;
264 ++f;
267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
274 else if (size_tflag)
275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
306 Py_MEMCPY(s, p, i);
307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
329 } else
330 *s++ = *f;
333 end:
334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
335 return string;
338 PyObject *
339 PyString_FromFormat(const char *format, ...)
341 PyObject* ret;
342 va_list vargs;
344 #ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346 #else
347 va_start(vargs);
348 #endif
349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
355 PyObject *PyString_Decode(const char *s,
356 Py_ssize_t size,
357 const char *encoding,
358 const char *errors)
360 PyObject *v, *str;
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
370 PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
374 PyObject *v;
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
381 if (encoding == NULL) {
382 #ifdef Py_USING_UNICODE
383 encoding = PyUnicode_GetDefaultEncoding();
384 #else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387 #endif
390 /* Decode via the codec registry */
391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
393 goto onError;
395 return v;
397 onError:
398 return NULL;
401 PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
405 PyObject *v;
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
411 #ifdef Py_USING_UNICODE
412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
420 #endif
421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
424 v->ob_type->tp_name);
425 Py_DECREF(v);
426 goto onError;
429 return v;
431 onError:
432 return NULL;
435 PyObject *PyString_Encode(const char *s,
436 Py_ssize_t size,
437 const char *encoding,
438 const char *errors)
440 PyObject *v, *str;
442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
450 PyObject *PyString_AsEncodedObject(PyObject *str,
451 const char *encoding,
452 const char *errors)
454 PyObject *v;
456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
461 if (encoding == NULL) {
462 #ifdef Py_USING_UNICODE
463 encoding = PyUnicode_GetDefaultEncoding();
464 #else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467 #endif
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
475 return v;
477 onError:
478 return NULL;
481 PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
485 PyObject *v;
487 v = PyString_AsEncodedObject(str, encoding, errors);
488 if (v == NULL)
489 goto onError;
491 #ifdef Py_USING_UNICODE
492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
500 #endif
501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
504 v->ob_type->tp_name);
505 Py_DECREF(v);
506 goto onError;
509 return v;
511 onError:
512 return NULL;
515 static void
516 string_dealloc(PyObject *op)
518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
524 op->ob_refcnt = 3;
525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
533 default:
534 Py_FatalError("Inconsistent interned string state.");
536 op->ob_type->tp_free(op);
539 /* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
544 PyObject *PyString_DecodeEscape(const char *s,
545 Py_ssize_t len,
546 const char *errors,
547 Py_ssize_t unicode,
548 const char *recode_encoding)
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
555 v = PyString_FromStringAndSize((char *)NULL, newlen);
556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
562 non_esc:
563 #ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
568 Py_ssize_t rn;
569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
581 /* Append bytes to output buffer. */
582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
585 Py_MEMCPY(p, r, rn);
586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
592 #else
593 *p++ = *s++;
594 #endif
595 continue;
597 s++;
598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
619 if (s < end && '0' <= *s && *s <= '7') {
620 c = (c<<3) + *s++ - '0';
621 if (s < end && '0' <= *s && *s <= '7')
622 c = (c<<3) + *s++ - '0';
624 *p++ = c;
625 break;
626 case 'x':
627 if (s+1 < end &&
628 isxdigit(Py_CHARMASK(s[0])) &&
629 isxdigit(Py_CHARMASK(s[1])))
631 unsigned int x = 0;
632 c = Py_CHARMASK(*s);
633 s++;
634 if (isdigit(c))
635 x = c - '0';
636 else if (islower(c))
637 x = 10 + c - 'a';
638 else
639 x = 10 + c - 'A';
640 x = x << 4;
641 c = Py_CHARMASK(*s);
642 s++;
643 if (isdigit(c))
644 x += c - '0';
645 else if (islower(c))
646 x += 10 + c - 'a';
647 else
648 x += 10 + c - 'A';
649 *p++ = x;
650 break;
652 if (!errors || strcmp(errors, "strict") == 0) {
653 PyErr_SetString(PyExc_ValueError,
654 "invalid \\x escape");
655 goto failed;
657 if (strcmp(errors, "replace") == 0) {
658 *p++ = '?';
659 } else if (strcmp(errors, "ignore") == 0)
660 /* do nothing */;
661 else {
662 PyErr_Format(PyExc_ValueError,
663 "decoding error; "
664 "unknown error handling code: %.400s",
665 errors);
666 goto failed;
668 #ifndef Py_USING_UNICODE
669 case 'u':
670 case 'U':
671 case 'N':
672 if (unicode) {
673 PyErr_SetString(PyExc_ValueError,
674 "Unicode escapes not legal "
675 "when Unicode disabled");
676 goto failed;
678 #endif
679 default:
680 *p++ = '\\';
681 s--;
682 goto non_esc; /* an arbitry number of unescaped
683 UTF-8 bytes may follow. */
686 if (p-buf < newlen)
687 _PyString_Resize(&v, p - buf);
688 return v;
689 failed:
690 Py_DECREF(v);
691 return NULL;
694 /* -------------------------------------------------------------------- */
695 /* object api */
697 static Py_ssize_t
698 string_getsize(register PyObject *op)
700 char *s;
701 Py_ssize_t len;
702 if (PyString_AsStringAndSize(op, &s, &len))
703 return -1;
704 return len;
707 static /*const*/ char *
708 string_getbuffer(register PyObject *op)
710 char *s;
711 Py_ssize_t len;
712 if (PyString_AsStringAndSize(op, &s, &len))
713 return NULL;
714 return s;
717 Py_ssize_t
718 PyString_Size(register PyObject *op)
720 if (!PyString_Check(op))
721 return string_getsize(op);
722 return ((PyStringObject *)op) -> ob_size;
725 /*const*/ char *
726 PyString_AsString(register PyObject *op)
728 if (!PyString_Check(op))
729 return string_getbuffer(op);
730 return ((PyStringObject *)op) -> ob_sval;
734 PyString_AsStringAndSize(register PyObject *obj,
735 register char **s,
736 register Py_ssize_t *len)
738 if (s == NULL) {
739 PyErr_BadInternalCall();
740 return -1;
743 if (!PyString_Check(obj)) {
744 #ifdef Py_USING_UNICODE
745 if (PyUnicode_Check(obj)) {
746 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
747 if (obj == NULL)
748 return -1;
750 else
751 #endif
753 PyErr_Format(PyExc_TypeError,
754 "expected string or Unicode object, "
755 "%.200s found", obj->ob_type->tp_name);
756 return -1;
760 *s = PyString_AS_STRING(obj);
761 if (len != NULL)
762 *len = PyString_GET_SIZE(obj);
763 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
764 PyErr_SetString(PyExc_TypeError,
765 "expected string without null bytes");
766 return -1;
768 return 0;
771 /* -------------------------------------------------------------------- */
772 /* Methods */
774 #define STRINGLIB_CHAR char
776 #define STRINGLIB_CMP memcmp
777 #define STRINGLIB_LEN PyString_GET_SIZE
778 #define STRINGLIB_NEW PyString_FromStringAndSize
779 #define STRINGLIB_STR PyString_AS_STRING
781 #define STRINGLIB_EMPTY nullstring
783 #include "stringlib/fastsearch.h"
785 #include "stringlib/count.h"
786 #include "stringlib/find.h"
787 #include "stringlib/partition.h"
790 static int
791 string_print(PyStringObject *op, FILE *fp, int flags)
793 Py_ssize_t i;
794 char c;
795 int quote;
797 /* XXX Ought to check for interrupts when writing long strings */
798 if (! PyString_CheckExact(op)) {
799 int ret;
800 /* A str subclass may have its own __str__ method. */
801 op = (PyStringObject *) PyObject_Str((PyObject *)op);
802 if (op == NULL)
803 return -1;
804 ret = string_print(op, fp, flags);
805 Py_DECREF(op);
806 return ret;
808 if (flags & Py_PRINT_RAW) {
809 char *data = op->ob_sval;
810 Py_ssize_t size = op->ob_size;
811 while (size > INT_MAX) {
812 /* Very long strings cannot be written atomically.
813 * But don't write exactly INT_MAX bytes at a time
814 * to avoid memory aligment issues.
816 const int chunk_size = INT_MAX & ~0x3FFF;
817 fwrite(data, 1, chunk_size, fp);
818 data += chunk_size;
819 size -= chunk_size;
821 #ifdef __VMS
822 if (size) fwrite(data, (int)size, 1, fp);
823 #else
824 fwrite(data, 1, (int)size, fp);
825 #endif
826 return 0;
829 /* figure out which quote to use; single is preferred */
830 quote = '\'';
831 if (memchr(op->ob_sval, '\'', op->ob_size) &&
832 !memchr(op->ob_sval, '"', op->ob_size))
833 quote = '"';
835 fputc(quote, fp);
836 for (i = 0; i < op->ob_size; i++) {
837 c = op->ob_sval[i];
838 if (c == quote || c == '\\')
839 fprintf(fp, "\\%c", c);
840 else if (c == '\t')
841 fprintf(fp, "\\t");
842 else if (c == '\n')
843 fprintf(fp, "\\n");
844 else if (c == '\r')
845 fprintf(fp, "\\r");
846 else if (c < ' ' || c >= 0x7f)
847 fprintf(fp, "\\x%02x", c & 0xff);
848 else
849 fputc(c, fp);
851 fputc(quote, fp);
852 return 0;
855 PyObject *
856 PyString_Repr(PyObject *obj, int smartquotes)
858 register PyStringObject* op = (PyStringObject*) obj;
859 size_t newsize = 2 + 4 * op->ob_size;
860 PyObject *v;
861 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) {
862 PyErr_SetString(PyExc_OverflowError,
863 "string is too large to make repr");
864 return NULL;
866 v = PyString_FromStringAndSize((char *)NULL, newsize);
867 if (v == NULL) {
868 return NULL;
870 else {
871 register Py_ssize_t i;
872 register char c;
873 register char *p;
874 int quote;
876 /* figure out which quote to use; single is preferred */
877 quote = '\'';
878 if (smartquotes &&
879 memchr(op->ob_sval, '\'', op->ob_size) &&
880 !memchr(op->ob_sval, '"', op->ob_size))
881 quote = '"';
883 p = PyString_AS_STRING(v);
884 *p++ = quote;
885 for (i = 0; i < op->ob_size; i++) {
886 /* There's at least enough room for a hex escape
887 and a closing quote. */
888 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
889 c = op->ob_sval[i];
890 if (c == quote || c == '\\')
891 *p++ = '\\', *p++ = c;
892 else if (c == '\t')
893 *p++ = '\\', *p++ = 't';
894 else if (c == '\n')
895 *p++ = '\\', *p++ = 'n';
896 else if (c == '\r')
897 *p++ = '\\', *p++ = 'r';
898 else if (c < ' ' || c >= 0x7f) {
899 /* For performance, we don't want to call
900 PyOS_snprintf here (extra layers of
901 function call). */
902 sprintf(p, "\\x%02x", c & 0xff);
903 p += 4;
905 else
906 *p++ = c;
908 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
909 *p++ = quote;
910 *p = '\0';
911 _PyString_Resize(
912 &v, (p - PyString_AS_STRING(v)));
913 return v;
917 static PyObject *
918 string_repr(PyObject *op)
920 return PyString_Repr(op, 1);
923 static PyObject *
924 string_str(PyObject *s)
926 assert(PyString_Check(s));
927 if (PyString_CheckExact(s)) {
928 Py_INCREF(s);
929 return s;
931 else {
932 /* Subtype -- return genuine string with the same value. */
933 PyStringObject *t = (PyStringObject *) s;
934 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
938 static Py_ssize_t
939 string_length(PyStringObject *a)
941 return a->ob_size;
944 static PyObject *
945 string_concat(register PyStringObject *a, register PyObject *bb)
947 register Py_ssize_t size;
948 register PyStringObject *op;
949 if (!PyString_Check(bb)) {
950 #ifdef Py_USING_UNICODE
951 if (PyUnicode_Check(bb))
952 return PyUnicode_Concat((PyObject *)a, bb);
953 #endif
954 PyErr_Format(PyExc_TypeError,
955 "cannot concatenate 'str' and '%.200s' objects",
956 bb->ob_type->tp_name);
957 return NULL;
959 #define b ((PyStringObject *)bb)
960 /* Optimize cases with empty left or right operand */
961 if ((a->ob_size == 0 || b->ob_size == 0) &&
962 PyString_CheckExact(a) && PyString_CheckExact(b)) {
963 if (a->ob_size == 0) {
964 Py_INCREF(bb);
965 return bb;
967 Py_INCREF(a);
968 return (PyObject *)a;
970 size = a->ob_size + b->ob_size;
971 if (size < 0) {
972 PyErr_SetString(PyExc_OverflowError,
973 "strings are too large to concat");
974 return NULL;
977 /* Inline PyObject_NewVar */
978 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
979 if (op == NULL)
980 return PyErr_NoMemory();
981 PyObject_INIT_VAR(op, &PyString_Type, size);
982 op->ob_shash = -1;
983 op->ob_sstate = SSTATE_NOT_INTERNED;
984 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
985 Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
986 op->ob_sval[size] = '\0';
987 return (PyObject *) op;
988 #undef b
991 static PyObject *
992 string_repeat(register PyStringObject *a, register Py_ssize_t n)
994 register Py_ssize_t i;
995 register Py_ssize_t j;
996 register Py_ssize_t size;
997 register PyStringObject *op;
998 size_t nbytes;
999 if (n < 0)
1000 n = 0;
1001 /* watch out for overflows: the size can overflow int,
1002 * and the # of bytes needed can overflow size_t
1004 size = a->ob_size * n;
1005 if (n && size / n != a->ob_size) {
1006 PyErr_SetString(PyExc_OverflowError,
1007 "repeated string is too long");
1008 return NULL;
1010 if (size == a->ob_size && PyString_CheckExact(a)) {
1011 Py_INCREF(a);
1012 return (PyObject *)a;
1014 nbytes = (size_t)size;
1015 if (nbytes + sizeof(PyStringObject) <= nbytes) {
1016 PyErr_SetString(PyExc_OverflowError,
1017 "repeated string is too long");
1018 return NULL;
1020 op = (PyStringObject *)
1021 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
1022 if (op == NULL)
1023 return PyErr_NoMemory();
1024 PyObject_INIT_VAR(op, &PyString_Type, size);
1025 op->ob_shash = -1;
1026 op->ob_sstate = SSTATE_NOT_INTERNED;
1027 op->ob_sval[size] = '\0';
1028 if (a->ob_size == 1 && n > 0) {
1029 memset(op->ob_sval, a->ob_sval[0] , n);
1030 return (PyObject *) op;
1032 i = 0;
1033 if (i < size) {
1034 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
1035 i = a->ob_size;
1037 while (i < size) {
1038 j = (i <= size-i) ? i : size-i;
1039 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1040 i += j;
1042 return (PyObject *) op;
1045 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1047 static PyObject *
1048 string_slice(register PyStringObject *a, register Py_ssize_t i,
1049 register Py_ssize_t j)
1050 /* j -- may be negative! */
1052 if (i < 0)
1053 i = 0;
1054 if (j < 0)
1055 j = 0; /* Avoid signed/unsigned bug in next line */
1056 if (j > a->ob_size)
1057 j = a->ob_size;
1058 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1059 /* It's the same as a */
1060 Py_INCREF(a);
1061 return (PyObject *)a;
1063 if (j < i)
1064 j = i;
1065 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1068 static int
1069 string_contains(PyObject *str_obj, PyObject *sub_obj)
1071 if (!PyString_CheckExact(sub_obj)) {
1072 #ifdef Py_USING_UNICODE
1073 if (PyUnicode_Check(sub_obj))
1074 return PyUnicode_Contains(str_obj, sub_obj);
1075 #endif
1076 if (!PyString_Check(sub_obj)) {
1077 PyErr_SetString(PyExc_TypeError,
1078 "'in <string>' requires string as left operand");
1079 return -1;
1083 return stringlib_contains_obj(str_obj, sub_obj);
1086 static PyObject *
1087 string_item(PyStringObject *a, register Py_ssize_t i)
1089 char pchar;
1090 PyObject *v;
1091 if (i < 0 || i >= a->ob_size) {
1092 PyErr_SetString(PyExc_IndexError, "string index out of range");
1093 return NULL;
1095 pchar = a->ob_sval[i];
1096 v = (PyObject *)characters[pchar & UCHAR_MAX];
1097 if (v == NULL)
1098 v = PyString_FromStringAndSize(&pchar, 1);
1099 else {
1100 #ifdef COUNT_ALLOCS
1101 one_strings++;
1102 #endif
1103 Py_INCREF(v);
1105 return v;
1108 static PyObject*
1109 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1111 int c;
1112 Py_ssize_t len_a, len_b;
1113 Py_ssize_t min_len;
1114 PyObject *result;
1116 /* Make sure both arguments are strings. */
1117 if (!(PyString_Check(a) && PyString_Check(b))) {
1118 result = Py_NotImplemented;
1119 goto out;
1121 if (a == b) {
1122 switch (op) {
1123 case Py_EQ:case Py_LE:case Py_GE:
1124 result = Py_True;
1125 goto out;
1126 case Py_NE:case Py_LT:case Py_GT:
1127 result = Py_False;
1128 goto out;
1131 if (op == Py_EQ) {
1132 /* Supporting Py_NE here as well does not save
1133 much time, since Py_NE is rarely used. */
1134 if (a->ob_size == b->ob_size
1135 && (a->ob_sval[0] == b->ob_sval[0]
1136 && memcmp(a->ob_sval, b->ob_sval,
1137 a->ob_size) == 0)) {
1138 result = Py_True;
1139 } else {
1140 result = Py_False;
1142 goto out;
1144 len_a = a->ob_size; len_b = b->ob_size;
1145 min_len = (len_a < len_b) ? len_a : len_b;
1146 if (min_len > 0) {
1147 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1148 if (c==0)
1149 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1150 }else
1151 c = 0;
1152 if (c == 0)
1153 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1154 switch (op) {
1155 case Py_LT: c = c < 0; break;
1156 case Py_LE: c = c <= 0; break;
1157 case Py_EQ: assert(0); break; /* unreachable */
1158 case Py_NE: c = c != 0; break;
1159 case Py_GT: c = c > 0; break;
1160 case Py_GE: c = c >= 0; break;
1161 default:
1162 result = Py_NotImplemented;
1163 goto out;
1165 result = c ? Py_True : Py_False;
1166 out:
1167 Py_INCREF(result);
1168 return result;
1172 _PyString_Eq(PyObject *o1, PyObject *o2)
1174 PyStringObject *a = (PyStringObject*) o1;
1175 PyStringObject *b = (PyStringObject*) o2;
1176 return a->ob_size == b->ob_size
1177 && *a->ob_sval == *b->ob_sval
1178 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
1181 static long
1182 string_hash(PyStringObject *a)
1184 register Py_ssize_t len;
1185 register unsigned char *p;
1186 register long x;
1188 if (a->ob_shash != -1)
1189 return a->ob_shash;
1190 len = a->ob_size;
1191 p = (unsigned char *) a->ob_sval;
1192 x = *p << 7;
1193 while (--len >= 0)
1194 x = (1000003*x) ^ *p++;
1195 x ^= a->ob_size;
1196 if (x == -1)
1197 x = -2;
1198 a->ob_shash = x;
1199 return x;
1202 static PyObject*
1203 string_subscript(PyStringObject* self, PyObject* item)
1205 if (PyIndex_Check(item)) {
1206 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1207 if (i == -1 && PyErr_Occurred())
1208 return NULL;
1209 if (i < 0)
1210 i += PyString_GET_SIZE(self);
1211 return string_item(self, i);
1213 else if (PySlice_Check(item)) {
1214 Py_ssize_t start, stop, step, slicelength, cur, i;
1215 char* source_buf;
1216 char* result_buf;
1217 PyObject* result;
1219 if (PySlice_GetIndicesEx((PySliceObject*)item,
1220 PyString_GET_SIZE(self),
1221 &start, &stop, &step, &slicelength) < 0) {
1222 return NULL;
1225 if (slicelength <= 0) {
1226 return PyString_FromStringAndSize("", 0);
1228 else {
1229 source_buf = PyString_AsString((PyObject*)self);
1230 result_buf = (char *)PyMem_Malloc(slicelength);
1231 if (result_buf == NULL)
1232 return PyErr_NoMemory();
1234 for (cur = start, i = 0; i < slicelength;
1235 cur += step, i++) {
1236 result_buf[i] = source_buf[cur];
1239 result = PyString_FromStringAndSize(result_buf,
1240 slicelength);
1241 PyMem_Free(result_buf);
1242 return result;
1245 else {
1246 PyErr_SetString(PyExc_TypeError,
1247 "string indices must be integers");
1248 return NULL;
1252 static Py_ssize_t
1253 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1255 if ( index != 0 ) {
1256 PyErr_SetString(PyExc_SystemError,
1257 "accessing non-existent string segment");
1258 return -1;
1260 *ptr = (void *)self->ob_sval;
1261 return self->ob_size;
1264 static Py_ssize_t
1265 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1267 PyErr_SetString(PyExc_TypeError,
1268 "Cannot use string as modifiable buffer");
1269 return -1;
1272 static Py_ssize_t
1273 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1275 if ( lenp )
1276 *lenp = self->ob_size;
1277 return 1;
1280 static Py_ssize_t
1281 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1283 if ( index != 0 ) {
1284 PyErr_SetString(PyExc_SystemError,
1285 "accessing non-existent string segment");
1286 return -1;
1288 *ptr = self->ob_sval;
1289 return self->ob_size;
1292 static PySequenceMethods string_as_sequence = {
1293 (lenfunc)string_length, /*sq_length*/
1294 (binaryfunc)string_concat, /*sq_concat*/
1295 (ssizeargfunc)string_repeat, /*sq_repeat*/
1296 (ssizeargfunc)string_item, /*sq_item*/
1297 (ssizessizeargfunc)string_slice, /*sq_slice*/
1298 0, /*sq_ass_item*/
1299 0, /*sq_ass_slice*/
1300 (objobjproc)string_contains /*sq_contains*/
1303 static PyMappingMethods string_as_mapping = {
1304 (lenfunc)string_length,
1305 (binaryfunc)string_subscript,
1309 static PyBufferProcs string_as_buffer = {
1310 (readbufferproc)string_buffer_getreadbuf,
1311 (writebufferproc)string_buffer_getwritebuf,
1312 (segcountproc)string_buffer_getsegcount,
1313 (charbufferproc)string_buffer_getcharbuf,
1318 #define LEFTSTRIP 0
1319 #define RIGHTSTRIP 1
1320 #define BOTHSTRIP 2
1322 /* Arrays indexed by above */
1323 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1325 #define STRIPNAME(i) (stripformat[i]+3)
1328 /* Don't call if length < 2 */
1329 #define Py_STRING_MATCH(target, offset, pattern, length) \
1330 (target[offset] == pattern[0] && \
1331 target[offset+length-1] == pattern[length-1] && \
1332 !memcmp(target+offset+1, pattern+1, length-2) )
1335 /* Overallocate the initial list to reduce the number of reallocs for small
1336 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1337 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1338 text (roughly 11 words per line) and field delimited data (usually 1-10
1339 fields). For large strings the split algorithms are bandwidth limited
1340 so increasing the preallocation likely will not improve things.*/
1342 #define MAX_PREALLOC 12
1344 /* 5 splits gives 6 elements */
1345 #define PREALLOC_SIZE(maxsplit) \
1346 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1348 #define SPLIT_APPEND(data, left, right) \
1349 str = PyString_FromStringAndSize((data) + (left), \
1350 (right) - (left)); \
1351 if (str == NULL) \
1352 goto onError; \
1353 if (PyList_Append(list, str)) { \
1354 Py_DECREF(str); \
1355 goto onError; \
1357 else \
1358 Py_DECREF(str);
1360 #define SPLIT_ADD(data, left, right) { \
1361 str = PyString_FromStringAndSize((data) + (left), \
1362 (right) - (left)); \
1363 if (str == NULL) \
1364 goto onError; \
1365 if (count < MAX_PREALLOC) { \
1366 PyList_SET_ITEM(list, count, str); \
1367 } else { \
1368 if (PyList_Append(list, str)) { \
1369 Py_DECREF(str); \
1370 goto onError; \
1372 else \
1373 Py_DECREF(str); \
1375 count++; }
1377 /* Always force the list to the expected size. */
1378 #define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
1380 #define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1381 #define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1382 #define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1383 #define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1385 Py_LOCAL_INLINE(PyObject *)
1386 split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
1388 Py_ssize_t i, j, count=0;
1389 PyObject *str;
1390 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1392 if (list == NULL)
1393 return NULL;
1395 i = j = 0;
1397 while (maxsplit-- > 0) {
1398 SKIP_SPACE(s, i, len);
1399 if (i==len) break;
1400 j = i; i++;
1401 SKIP_NONSPACE(s, i, len);
1402 SPLIT_ADD(s, j, i);
1405 if (i < len) {
1406 /* Only occurs when maxsplit was reached */
1407 /* Skip any remaining whitespace and copy to end of string */
1408 SKIP_SPACE(s, i, len);
1409 if (i != len)
1410 SPLIT_ADD(s, i, len);
1412 FIX_PREALLOC_SIZE(list);
1413 return list;
1414 onError:
1415 Py_DECREF(list);
1416 return NULL;
1419 Py_LOCAL_INLINE(PyObject *)
1420 split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1422 register Py_ssize_t i, j, count=0;
1423 PyObject *str;
1424 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1426 if (list == NULL)
1427 return NULL;
1429 i = j = 0;
1430 while ((j < len) && (maxcount-- > 0)) {
1431 for(; j<len; j++) {
1432 /* I found that using memchr makes no difference */
1433 if (s[j] == ch) {
1434 SPLIT_ADD(s, i, j);
1435 i = j = j + 1;
1436 break;
1440 if (i <= len) {
1441 SPLIT_ADD(s, i, len);
1443 FIX_PREALLOC_SIZE(list);
1444 return list;
1446 onError:
1447 Py_DECREF(list);
1448 return NULL;
1451 PyDoc_STRVAR(split__doc__,
1452 "S.split([sep [,maxsplit]]) -> list of strings\n\
1454 Return a list of the words in the string S, using sep as the\n\
1455 delimiter string. If maxsplit is given, at most maxsplit\n\
1456 splits are done. If sep is not specified or is None, any\n\
1457 whitespace string is a separator.");
1459 static PyObject *
1460 string_split(PyStringObject *self, PyObject *args)
1462 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1463 Py_ssize_t maxsplit = -1, count=0;
1464 const char *s = PyString_AS_STRING(self), *sub;
1465 PyObject *list, *str, *subobj = Py_None;
1466 #ifdef USE_FAST
1467 Py_ssize_t pos;
1468 #endif
1470 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1471 return NULL;
1472 if (maxsplit < 0)
1473 maxsplit = PY_SSIZE_T_MAX;
1474 if (subobj == Py_None)
1475 return split_whitespace(s, len, maxsplit);
1476 if (PyString_Check(subobj)) {
1477 sub = PyString_AS_STRING(subobj);
1478 n = PyString_GET_SIZE(subobj);
1480 #ifdef Py_USING_UNICODE
1481 else if (PyUnicode_Check(subobj))
1482 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1483 #endif
1484 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1485 return NULL;
1487 if (n == 0) {
1488 PyErr_SetString(PyExc_ValueError, "empty separator");
1489 return NULL;
1491 else if (n == 1)
1492 return split_char(s, len, sub[0], maxsplit);
1494 list = PyList_New(PREALLOC_SIZE(maxsplit));
1495 if (list == NULL)
1496 return NULL;
1498 #ifdef USE_FAST
1499 i = j = 0;
1500 while (maxsplit-- > 0) {
1501 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1502 if (pos < 0)
1503 break;
1504 j = i+pos;
1505 SPLIT_ADD(s, i, j);
1506 i = j + n;
1508 #else
1509 i = j = 0;
1510 while ((j+n <= len) && (maxsplit-- > 0)) {
1511 for (; j+n <= len; j++) {
1512 if (Py_STRING_MATCH(s, j, sub, n)) {
1513 SPLIT_ADD(s, i, j);
1514 i = j = j + n;
1515 break;
1519 #endif
1520 SPLIT_ADD(s, i, len);
1521 FIX_PREALLOC_SIZE(list);
1522 return list;
1524 onError:
1525 Py_DECREF(list);
1526 return NULL;
1529 PyDoc_STRVAR(partition__doc__,
1530 "S.partition(sep) -> (head, sep, tail)\n\
1532 Searches for the separator sep in S, and returns the part before it,\n\
1533 the separator itself, and the part after it. If the separator is not\n\
1534 found, returns S and two empty strings.");
1536 static PyObject *
1537 string_partition(PyStringObject *self, PyObject *sep_obj)
1539 const char *sep;
1540 Py_ssize_t sep_len;
1542 if (PyString_Check(sep_obj)) {
1543 sep = PyString_AS_STRING(sep_obj);
1544 sep_len = PyString_GET_SIZE(sep_obj);
1546 #ifdef Py_USING_UNICODE
1547 else if (PyUnicode_Check(sep_obj))
1548 return PyUnicode_Partition((PyObject *) self, sep_obj);
1549 #endif
1550 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1551 return NULL;
1553 return stringlib_partition(
1554 (PyObject*) self,
1555 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1556 sep_obj, sep, sep_len
1560 PyDoc_STRVAR(rpartition__doc__,
1561 "S.rpartition(sep) -> (tail, sep, head)\n\
1563 Searches for the separator sep in S, starting at the end of S, and returns\n\
1564 the part before it, the separator itself, and the part after it. If the\n\
1565 separator is not found, returns two empty strings and S.");
1567 static PyObject *
1568 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1570 const char *sep;
1571 Py_ssize_t sep_len;
1573 if (PyString_Check(sep_obj)) {
1574 sep = PyString_AS_STRING(sep_obj);
1575 sep_len = PyString_GET_SIZE(sep_obj);
1577 #ifdef Py_USING_UNICODE
1578 else if (PyUnicode_Check(sep_obj))
1579 return PyUnicode_Partition((PyObject *) self, sep_obj);
1580 #endif
1581 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1582 return NULL;
1584 return stringlib_rpartition(
1585 (PyObject*) self,
1586 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1587 sep_obj, sep, sep_len
1591 Py_LOCAL_INLINE(PyObject *)
1592 rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
1594 Py_ssize_t i, j, count=0;
1595 PyObject *str;
1596 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1598 if (list == NULL)
1599 return NULL;
1601 i = j = len-1;
1603 while (maxsplit-- > 0) {
1604 RSKIP_SPACE(s, i);
1605 if (i<0) break;
1606 j = i; i--;
1607 RSKIP_NONSPACE(s, i);
1608 SPLIT_ADD(s, i + 1, j + 1);
1610 if (i >= 0) {
1611 /* Only occurs when maxsplit was reached */
1612 /* Skip any remaining whitespace and copy to beginning of string */
1613 RSKIP_SPACE(s, i);
1614 if (i >= 0)
1615 SPLIT_ADD(s, 0, i + 1);
1618 FIX_PREALLOC_SIZE(list);
1619 if (PyList_Reverse(list) < 0)
1620 goto onError;
1621 return list;
1622 onError:
1623 Py_DECREF(list);
1624 return NULL;
1627 Py_LOCAL_INLINE(PyObject *)
1628 rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1630 register Py_ssize_t i, j, count=0;
1631 PyObject *str;
1632 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1634 if (list == NULL)
1635 return NULL;
1637 i = j = len - 1;
1638 while ((i >= 0) && (maxcount-- > 0)) {
1639 for (; i >= 0; i--) {
1640 if (s[i] == ch) {
1641 SPLIT_ADD(s, i + 1, j + 1);
1642 j = i = i - 1;
1643 break;
1647 if (j >= -1) {
1648 SPLIT_ADD(s, 0, j + 1);
1650 FIX_PREALLOC_SIZE(list);
1651 if (PyList_Reverse(list) < 0)
1652 goto onError;
1653 return list;
1655 onError:
1656 Py_DECREF(list);
1657 return NULL;
1660 PyDoc_STRVAR(rsplit__doc__,
1661 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1663 Return a list of the words in the string S, using sep as the\n\
1664 delimiter string, starting at the end of the string and working\n\
1665 to the front. If maxsplit is given, at most maxsplit splits are\n\
1666 done. If sep is not specified or is None, any whitespace string\n\
1667 is a separator.");
1669 static PyObject *
1670 string_rsplit(PyStringObject *self, PyObject *args)
1672 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1673 Py_ssize_t maxsplit = -1, count=0;
1674 const char *s = PyString_AS_STRING(self), *sub;
1675 PyObject *list, *str, *subobj = Py_None;
1677 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1678 return NULL;
1679 if (maxsplit < 0)
1680 maxsplit = PY_SSIZE_T_MAX;
1681 if (subobj == Py_None)
1682 return rsplit_whitespace(s, len, maxsplit);
1683 if (PyString_Check(subobj)) {
1684 sub = PyString_AS_STRING(subobj);
1685 n = PyString_GET_SIZE(subobj);
1687 #ifdef Py_USING_UNICODE
1688 else if (PyUnicode_Check(subobj))
1689 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1690 #endif
1691 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1692 return NULL;
1694 if (n == 0) {
1695 PyErr_SetString(PyExc_ValueError, "empty separator");
1696 return NULL;
1698 else if (n == 1)
1699 return rsplit_char(s, len, sub[0], maxsplit);
1701 list = PyList_New(PREALLOC_SIZE(maxsplit));
1702 if (list == NULL)
1703 return NULL;
1705 j = len;
1706 i = j - n;
1708 while ( (i >= 0) && (maxsplit-- > 0) ) {
1709 for (; i>=0; i--) {
1710 if (Py_STRING_MATCH(s, i, sub, n)) {
1711 SPLIT_ADD(s, i + n, j);
1712 j = i;
1713 i -= n;
1714 break;
1718 SPLIT_ADD(s, 0, j);
1719 FIX_PREALLOC_SIZE(list);
1720 if (PyList_Reverse(list) < 0)
1721 goto onError;
1722 return list;
1724 onError:
1725 Py_DECREF(list);
1726 return NULL;
1730 PyDoc_STRVAR(join__doc__,
1731 "S.join(sequence) -> string\n\
1733 Return a string which is the concatenation of the strings in the\n\
1734 sequence. The separator between elements is S.");
1736 static PyObject *
1737 string_join(PyStringObject *self, PyObject *orig)
1739 char *sep = PyString_AS_STRING(self);
1740 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1741 PyObject *res = NULL;
1742 char *p;
1743 Py_ssize_t seqlen = 0;
1744 size_t sz = 0;
1745 Py_ssize_t i;
1746 PyObject *seq, *item;
1748 seq = PySequence_Fast(orig, "");
1749 if (seq == NULL) {
1750 return NULL;
1753 seqlen = PySequence_Size(seq);
1754 if (seqlen == 0) {
1755 Py_DECREF(seq);
1756 return PyString_FromString("");
1758 if (seqlen == 1) {
1759 item = PySequence_Fast_GET_ITEM(seq, 0);
1760 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1761 Py_INCREF(item);
1762 Py_DECREF(seq);
1763 return item;
1767 /* There are at least two things to join, or else we have a subclass
1768 * of the builtin types in the sequence.
1769 * Do a pre-pass to figure out the total amount of space we'll
1770 * need (sz), see whether any argument is absurd, and defer to
1771 * the Unicode join if appropriate.
1773 for (i = 0; i < seqlen; i++) {
1774 const size_t old_sz = sz;
1775 item = PySequence_Fast_GET_ITEM(seq, i);
1776 if (!PyString_Check(item)){
1777 #ifdef Py_USING_UNICODE
1778 if (PyUnicode_Check(item)) {
1779 /* Defer to Unicode join.
1780 * CAUTION: There's no gurantee that the
1781 * original sequence can be iterated over
1782 * again, so we must pass seq here.
1784 PyObject *result;
1785 result = PyUnicode_Join((PyObject *)self, seq);
1786 Py_DECREF(seq);
1787 return result;
1789 #endif
1790 PyErr_Format(PyExc_TypeError,
1791 "sequence item %zd: expected string,"
1792 " %.80s found",
1793 i, item->ob_type->tp_name);
1794 Py_DECREF(seq);
1795 return NULL;
1797 sz += PyString_GET_SIZE(item);
1798 if (i != 0)
1799 sz += seplen;
1800 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1801 PyErr_SetString(PyExc_OverflowError,
1802 "join() result is too long for a Python string");
1803 Py_DECREF(seq);
1804 return NULL;
1808 /* Allocate result space. */
1809 res = PyString_FromStringAndSize((char*)NULL, sz);
1810 if (res == NULL) {
1811 Py_DECREF(seq);
1812 return NULL;
1815 /* Catenate everything. */
1816 p = PyString_AS_STRING(res);
1817 for (i = 0; i < seqlen; ++i) {
1818 size_t n;
1819 item = PySequence_Fast_GET_ITEM(seq, i);
1820 n = PyString_GET_SIZE(item);
1821 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1822 p += n;
1823 if (i < seqlen - 1) {
1824 Py_MEMCPY(p, sep, seplen);
1825 p += seplen;
1829 Py_DECREF(seq);
1830 return res;
1833 PyObject *
1834 _PyString_Join(PyObject *sep, PyObject *x)
1836 assert(sep != NULL && PyString_Check(sep));
1837 assert(x != NULL);
1838 return string_join((PyStringObject *)sep, x);
1841 Py_LOCAL_INLINE(void)
1842 string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1844 if (*end > len)
1845 *end = len;
1846 else if (*end < 0)
1847 *end += len;
1848 if (*end < 0)
1849 *end = 0;
1850 if (*start < 0)
1851 *start += len;
1852 if (*start < 0)
1853 *start = 0;
1856 Py_LOCAL_INLINE(Py_ssize_t)
1857 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1859 PyObject *subobj;
1860 const char *sub;
1861 Py_ssize_t sub_len;
1862 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1864 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1865 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1866 return -2;
1867 if (PyString_Check(subobj)) {
1868 sub = PyString_AS_STRING(subobj);
1869 sub_len = PyString_GET_SIZE(subobj);
1871 #ifdef Py_USING_UNICODE
1872 else if (PyUnicode_Check(subobj))
1873 return PyUnicode_Find(
1874 (PyObject *)self, subobj, start, end, dir);
1875 #endif
1876 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1877 /* XXX - the "expected a character buffer object" is pretty
1878 confusing for a non-expert. remap to something else ? */
1879 return -2;
1881 if (dir > 0)
1882 return stringlib_find_slice(
1883 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1884 sub, sub_len, start, end);
1885 else
1886 return stringlib_rfind_slice(
1887 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1888 sub, sub_len, start, end);
1892 PyDoc_STRVAR(find__doc__,
1893 "S.find(sub [,start [,end]]) -> int\n\
1895 Return the lowest index in S where substring sub is found,\n\
1896 such that sub is contained within s[start:end]. Optional\n\
1897 arguments start and end are interpreted as in slice notation.\n\
1899 Return -1 on failure.");
1901 static PyObject *
1902 string_find(PyStringObject *self, PyObject *args)
1904 Py_ssize_t result = string_find_internal(self, args, +1);
1905 if (result == -2)
1906 return NULL;
1907 return PyInt_FromSsize_t(result);
1911 PyDoc_STRVAR(index__doc__,
1912 "S.index(sub [,start [,end]]) -> int\n\
1914 Like S.find() but raise ValueError when the substring is not found.");
1916 static PyObject *
1917 string_index(PyStringObject *self, PyObject *args)
1919 Py_ssize_t result = string_find_internal(self, args, +1);
1920 if (result == -2)
1921 return NULL;
1922 if (result == -1) {
1923 PyErr_SetString(PyExc_ValueError,
1924 "substring not found");
1925 return NULL;
1927 return PyInt_FromSsize_t(result);
1931 PyDoc_STRVAR(rfind__doc__,
1932 "S.rfind(sub [,start [,end]]) -> int\n\
1934 Return the highest index in S where substring sub is found,\n\
1935 such that sub is contained within s[start:end]. Optional\n\
1936 arguments start and end are interpreted as in slice notation.\n\
1938 Return -1 on failure.");
1940 static PyObject *
1941 string_rfind(PyStringObject *self, PyObject *args)
1943 Py_ssize_t result = string_find_internal(self, args, -1);
1944 if (result == -2)
1945 return NULL;
1946 return PyInt_FromSsize_t(result);
1950 PyDoc_STRVAR(rindex__doc__,
1951 "S.rindex(sub [,start [,end]]) -> int\n\
1953 Like S.rfind() but raise ValueError when the substring is not found.");
1955 static PyObject *
1956 string_rindex(PyStringObject *self, PyObject *args)
1958 Py_ssize_t result = string_find_internal(self, args, -1);
1959 if (result == -2)
1960 return NULL;
1961 if (result == -1) {
1962 PyErr_SetString(PyExc_ValueError,
1963 "substring not found");
1964 return NULL;
1966 return PyInt_FromSsize_t(result);
1970 Py_LOCAL_INLINE(PyObject *)
1971 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1973 char *s = PyString_AS_STRING(self);
1974 Py_ssize_t len = PyString_GET_SIZE(self);
1975 char *sep = PyString_AS_STRING(sepobj);
1976 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1977 Py_ssize_t i, j;
1979 i = 0;
1980 if (striptype != RIGHTSTRIP) {
1981 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1982 i++;
1986 j = len;
1987 if (striptype != LEFTSTRIP) {
1988 do {
1989 j--;
1990 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1991 j++;
1994 if (i == 0 && j == len && PyString_CheckExact(self)) {
1995 Py_INCREF(self);
1996 return (PyObject*)self;
1998 else
1999 return PyString_FromStringAndSize(s+i, j-i);
2003 Py_LOCAL_INLINE(PyObject *)
2004 do_strip(PyStringObject *self, int striptype)
2006 char *s = PyString_AS_STRING(self);
2007 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
2009 i = 0;
2010 if (striptype != RIGHTSTRIP) {
2011 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2012 i++;
2016 j = len;
2017 if (striptype != LEFTSTRIP) {
2018 do {
2019 j--;
2020 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2021 j++;
2024 if (i == 0 && j == len && PyString_CheckExact(self)) {
2025 Py_INCREF(self);
2026 return (PyObject*)self;
2028 else
2029 return PyString_FromStringAndSize(s+i, j-i);
2033 Py_LOCAL_INLINE(PyObject *)
2034 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2036 PyObject *sep = NULL;
2038 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2039 return NULL;
2041 if (sep != NULL && sep != Py_None) {
2042 if (PyString_Check(sep))
2043 return do_xstrip(self, striptype, sep);
2044 #ifdef Py_USING_UNICODE
2045 else if (PyUnicode_Check(sep)) {
2046 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2047 PyObject *res;
2048 if (uniself==NULL)
2049 return NULL;
2050 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2051 striptype, sep);
2052 Py_DECREF(uniself);
2053 return res;
2055 #endif
2056 PyErr_Format(PyExc_TypeError,
2057 #ifdef Py_USING_UNICODE
2058 "%s arg must be None, str or unicode",
2059 #else
2060 "%s arg must be None or str",
2061 #endif
2062 STRIPNAME(striptype));
2063 return NULL;
2066 return do_strip(self, striptype);
2070 PyDoc_STRVAR(strip__doc__,
2071 "S.strip([chars]) -> string or unicode\n\
2073 Return a copy of the string S with leading and trailing\n\
2074 whitespace removed.\n\
2075 If chars is given and not None, remove characters in chars instead.\n\
2076 If chars is unicode, S will be converted to unicode before stripping");
2078 static PyObject *
2079 string_strip(PyStringObject *self, PyObject *args)
2081 if (PyTuple_GET_SIZE(args) == 0)
2082 return do_strip(self, BOTHSTRIP); /* Common case */
2083 else
2084 return do_argstrip(self, BOTHSTRIP, args);
2088 PyDoc_STRVAR(lstrip__doc__,
2089 "S.lstrip([chars]) -> string or unicode\n\
2091 Return a copy of the string S with leading whitespace removed.\n\
2092 If chars is given and not None, remove characters in chars instead.\n\
2093 If chars is unicode, S will be converted to unicode before stripping");
2095 static PyObject *
2096 string_lstrip(PyStringObject *self, PyObject *args)
2098 if (PyTuple_GET_SIZE(args) == 0)
2099 return do_strip(self, LEFTSTRIP); /* Common case */
2100 else
2101 return do_argstrip(self, LEFTSTRIP, args);
2105 PyDoc_STRVAR(rstrip__doc__,
2106 "S.rstrip([chars]) -> string or unicode\n\
2108 Return a copy of the string S with trailing whitespace removed.\n\
2109 If chars is given and not None, remove characters in chars instead.\n\
2110 If chars is unicode, S will be converted to unicode before stripping");
2112 static PyObject *
2113 string_rstrip(PyStringObject *self, PyObject *args)
2115 if (PyTuple_GET_SIZE(args) == 0)
2116 return do_strip(self, RIGHTSTRIP); /* Common case */
2117 else
2118 return do_argstrip(self, RIGHTSTRIP, args);
2122 PyDoc_STRVAR(lower__doc__,
2123 "S.lower() -> string\n\
2125 Return a copy of the string S converted to lowercase.");
2127 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2128 #ifndef _tolower
2129 #define _tolower tolower
2130 #endif
2132 static PyObject *
2133 string_lower(PyStringObject *self)
2135 char *s;
2136 Py_ssize_t i, n = PyString_GET_SIZE(self);
2137 PyObject *newobj;
2139 newobj = PyString_FromStringAndSize(NULL, n);
2140 if (!newobj)
2141 return NULL;
2143 s = PyString_AS_STRING(newobj);
2145 Py_MEMCPY(s, PyString_AS_STRING(self), n);
2147 for (i = 0; i < n; i++) {
2148 int c = Py_CHARMASK(s[i]);
2149 if (isupper(c))
2150 s[i] = _tolower(c);
2153 return newobj;
2156 PyDoc_STRVAR(upper__doc__,
2157 "S.upper() -> string\n\
2159 Return a copy of the string S converted to uppercase.");
2161 #ifndef _toupper
2162 #define _toupper toupper
2163 #endif
2165 static PyObject *
2166 string_upper(PyStringObject *self)
2168 char *s;
2169 Py_ssize_t i, n = PyString_GET_SIZE(self);
2170 PyObject *newobj;
2172 newobj = PyString_FromStringAndSize(NULL, n);
2173 if (!newobj)
2174 return NULL;
2176 s = PyString_AS_STRING(newobj);
2178 Py_MEMCPY(s, PyString_AS_STRING(self), n);
2180 for (i = 0; i < n; i++) {
2181 int c = Py_CHARMASK(s[i]);
2182 if (islower(c))
2183 s[i] = _toupper(c);
2186 return newobj;
2189 PyDoc_STRVAR(title__doc__,
2190 "S.title() -> string\n\
2192 Return a titlecased version of S, i.e. words start with uppercase\n\
2193 characters, all remaining cased characters have lowercase.");
2195 static PyObject*
2196 string_title(PyStringObject *self)
2198 char *s = PyString_AS_STRING(self), *s_new;
2199 Py_ssize_t i, n = PyString_GET_SIZE(self);
2200 int previous_is_cased = 0;
2201 PyObject *newobj;
2203 newobj = PyString_FromStringAndSize(NULL, n);
2204 if (newobj == NULL)
2205 return NULL;
2206 s_new = PyString_AsString(newobj);
2207 for (i = 0; i < n; i++) {
2208 int c = Py_CHARMASK(*s++);
2209 if (islower(c)) {
2210 if (!previous_is_cased)
2211 c = toupper(c);
2212 previous_is_cased = 1;
2213 } else if (isupper(c)) {
2214 if (previous_is_cased)
2215 c = tolower(c);
2216 previous_is_cased = 1;
2217 } else
2218 previous_is_cased = 0;
2219 *s_new++ = c;
2221 return newobj;
2224 PyDoc_STRVAR(capitalize__doc__,
2225 "S.capitalize() -> string\n\
2227 Return a copy of the string S with only its first character\n\
2228 capitalized.");
2230 static PyObject *
2231 string_capitalize(PyStringObject *self)
2233 char *s = PyString_AS_STRING(self), *s_new;
2234 Py_ssize_t i, n = PyString_GET_SIZE(self);
2235 PyObject *newobj;
2237 newobj = PyString_FromStringAndSize(NULL, n);
2238 if (newobj == NULL)
2239 return NULL;
2240 s_new = PyString_AsString(newobj);
2241 if (0 < n) {
2242 int c = Py_CHARMASK(*s++);
2243 if (islower(c))
2244 *s_new = toupper(c);
2245 else
2246 *s_new = c;
2247 s_new++;
2249 for (i = 1; i < n; i++) {
2250 int c = Py_CHARMASK(*s++);
2251 if (isupper(c))
2252 *s_new = tolower(c);
2253 else
2254 *s_new = c;
2255 s_new++;
2257 return newobj;
2261 PyDoc_STRVAR(count__doc__,
2262 "S.count(sub[, start[, end]]) -> int\n\
2264 Return the number of non-overlapping occurrences of substring sub in\n\
2265 string S[start:end]. Optional arguments start and end are interpreted\n\
2266 as in slice notation.");
2268 static PyObject *
2269 string_count(PyStringObject *self, PyObject *args)
2271 PyObject *sub_obj;
2272 const char *str = PyString_AS_STRING(self), *sub;
2273 Py_ssize_t sub_len;
2274 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2276 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2277 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2278 return NULL;
2280 if (PyString_Check(sub_obj)) {
2281 sub = PyString_AS_STRING(sub_obj);
2282 sub_len = PyString_GET_SIZE(sub_obj);
2284 #ifdef Py_USING_UNICODE
2285 else if (PyUnicode_Check(sub_obj)) {
2286 Py_ssize_t count;
2287 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2288 if (count == -1)
2289 return NULL;
2290 else
2291 return PyInt_FromSsize_t(count);
2293 #endif
2294 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2295 return NULL;
2297 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
2299 return PyInt_FromSsize_t(
2300 stringlib_count(str + start, end - start, sub, sub_len)
2304 PyDoc_STRVAR(swapcase__doc__,
2305 "S.swapcase() -> string\n\
2307 Return a copy of the string S with uppercase characters\n\
2308 converted to lowercase and vice versa.");
2310 static PyObject *
2311 string_swapcase(PyStringObject *self)
2313 char *s = PyString_AS_STRING(self), *s_new;
2314 Py_ssize_t i, n = PyString_GET_SIZE(self);
2315 PyObject *newobj;
2317 newobj = PyString_FromStringAndSize(NULL, n);
2318 if (newobj == NULL)
2319 return NULL;
2320 s_new = PyString_AsString(newobj);
2321 for (i = 0; i < n; i++) {
2322 int c = Py_CHARMASK(*s++);
2323 if (islower(c)) {
2324 *s_new = toupper(c);
2326 else if (isupper(c)) {
2327 *s_new = tolower(c);
2329 else
2330 *s_new = c;
2331 s_new++;
2333 return newobj;
2337 PyDoc_STRVAR(translate__doc__,
2338 "S.translate(table [,deletechars]) -> string\n\
2340 Return a copy of the string S, where all characters occurring\n\
2341 in the optional argument deletechars are removed, and the\n\
2342 remaining characters have been mapped through the given\n\
2343 translation table, which must be a string of length 256.");
2345 static PyObject *
2346 string_translate(PyStringObject *self, PyObject *args)
2348 register char *input, *output;
2349 register const char *table;
2350 register Py_ssize_t i, c, changed = 0;
2351 PyObject *input_obj = (PyObject*)self;
2352 const char *table1, *output_start, *del_table=NULL;
2353 Py_ssize_t inlen, tablen, dellen = 0;
2354 PyObject *result;
2355 int trans_table[256];
2356 PyObject *tableobj, *delobj = NULL;
2358 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2359 &tableobj, &delobj))
2360 return NULL;
2362 if (PyString_Check(tableobj)) {
2363 table1 = PyString_AS_STRING(tableobj);
2364 tablen = PyString_GET_SIZE(tableobj);
2366 #ifdef Py_USING_UNICODE
2367 else if (PyUnicode_Check(tableobj)) {
2368 /* Unicode .translate() does not support the deletechars
2369 parameter; instead a mapping to None will cause characters
2370 to be deleted. */
2371 if (delobj != NULL) {
2372 PyErr_SetString(PyExc_TypeError,
2373 "deletions are implemented differently for unicode");
2374 return NULL;
2376 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2378 #endif
2379 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
2380 return NULL;
2382 if (tablen != 256) {
2383 PyErr_SetString(PyExc_ValueError,
2384 "translation table must be 256 characters long");
2385 return NULL;
2388 if (delobj != NULL) {
2389 if (PyString_Check(delobj)) {
2390 del_table = PyString_AS_STRING(delobj);
2391 dellen = PyString_GET_SIZE(delobj);
2393 #ifdef Py_USING_UNICODE
2394 else if (PyUnicode_Check(delobj)) {
2395 PyErr_SetString(PyExc_TypeError,
2396 "deletions are implemented differently for unicode");
2397 return NULL;
2399 #endif
2400 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2401 return NULL;
2403 else {
2404 del_table = NULL;
2405 dellen = 0;
2408 table = table1;
2409 inlen = PyString_GET_SIZE(input_obj);
2410 result = PyString_FromStringAndSize((char *)NULL, inlen);
2411 if (result == NULL)
2412 return NULL;
2413 output_start = output = PyString_AsString(result);
2414 input = PyString_AS_STRING(input_obj);
2416 if (dellen == 0) {
2417 /* If no deletions are required, use faster code */
2418 for (i = inlen; --i >= 0; ) {
2419 c = Py_CHARMASK(*input++);
2420 if (Py_CHARMASK((*output++ = table[c])) != c)
2421 changed = 1;
2423 if (changed || !PyString_CheckExact(input_obj))
2424 return result;
2425 Py_DECREF(result);
2426 Py_INCREF(input_obj);
2427 return input_obj;
2430 for (i = 0; i < 256; i++)
2431 trans_table[i] = Py_CHARMASK(table[i]);
2433 for (i = 0; i < dellen; i++)
2434 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2436 for (i = inlen; --i >= 0; ) {
2437 c = Py_CHARMASK(*input++);
2438 if (trans_table[c] != -1)
2439 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2440 continue;
2441 changed = 1;
2443 if (!changed && PyString_CheckExact(input_obj)) {
2444 Py_DECREF(result);
2445 Py_INCREF(input_obj);
2446 return input_obj;
2448 /* Fix the size of the resulting string */
2449 if (inlen > 0)
2450 _PyString_Resize(&result, output - output_start);
2451 return result;
2455 #define FORWARD 1
2456 #define REVERSE -1
2458 /* find and count characters and substrings */
2460 #define findchar(target, target_len, c) \
2461 ((char *)memchr((const void *)(target), c, target_len))
2463 /* String ops must return a string. */
2464 /* If the object is subclass of string, create a copy */
2465 Py_LOCAL(PyStringObject *)
2466 return_self(PyStringObject *self)
2468 if (PyString_CheckExact(self)) {
2469 Py_INCREF(self);
2470 return self;
2472 return (PyStringObject *)PyString_FromStringAndSize(
2473 PyString_AS_STRING(self),
2474 PyString_GET_SIZE(self));
2477 Py_LOCAL_INLINE(Py_ssize_t)
2478 countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2480 Py_ssize_t count=0;
2481 const char *start=target;
2482 const char *end=target+target_len;
2484 while ( (start=findchar(start, end-start, c)) != NULL ) {
2485 count++;
2486 if (count >= maxcount)
2487 break;
2488 start += 1;
2490 return count;
2493 Py_LOCAL(Py_ssize_t)
2494 findstring(const char *target, Py_ssize_t target_len,
2495 const char *pattern, Py_ssize_t pattern_len,
2496 Py_ssize_t start,
2497 Py_ssize_t end,
2498 int direction)
2500 if (start < 0) {
2501 start += target_len;
2502 if (start < 0)
2503 start = 0;
2505 if (end > target_len) {
2506 end = target_len;
2507 } else if (end < 0) {
2508 end += target_len;
2509 if (end < 0)
2510 end = 0;
2513 /* zero-length substrings always match at the first attempt */
2514 if (pattern_len == 0)
2515 return (direction > 0) ? start : end;
2517 end -= pattern_len;
2519 if (direction < 0) {
2520 for (; end >= start; end--)
2521 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2522 return end;
2523 } else {
2524 for (; start <= end; start++)
2525 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2526 return start;
2528 return -1;
2531 Py_LOCAL_INLINE(Py_ssize_t)
2532 countstring(const char *target, Py_ssize_t target_len,
2533 const char *pattern, Py_ssize_t pattern_len,
2534 Py_ssize_t start,
2535 Py_ssize_t end,
2536 int direction, Py_ssize_t maxcount)
2538 Py_ssize_t count=0;
2540 if (start < 0) {
2541 start += target_len;
2542 if (start < 0)
2543 start = 0;
2545 if (end > target_len) {
2546 end = target_len;
2547 } else if (end < 0) {
2548 end += target_len;
2549 if (end < 0)
2550 end = 0;
2553 /* zero-length substrings match everywhere */
2554 if (pattern_len == 0 || maxcount == 0) {
2555 if (target_len+1 < maxcount)
2556 return target_len+1;
2557 return maxcount;
2560 end -= pattern_len;
2561 if (direction < 0) {
2562 for (; (end >= start); end--)
2563 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2564 count++;
2565 if (--maxcount <= 0) break;
2566 end -= pattern_len-1;
2568 } else {
2569 for (; (start <= end); start++)
2570 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2571 count++;
2572 if (--maxcount <= 0)
2573 break;
2574 start += pattern_len-1;
2577 return count;
2581 /* Algorithms for different cases of string replacement */
2583 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2584 Py_LOCAL(PyStringObject *)
2585 replace_interleave(PyStringObject *self,
2586 const char *to_s, Py_ssize_t to_len,
2587 Py_ssize_t maxcount)
2589 char *self_s, *result_s;
2590 Py_ssize_t self_len, result_len;
2591 Py_ssize_t count, i, product;
2592 PyStringObject *result;
2594 self_len = PyString_GET_SIZE(self);
2596 /* 1 at the end plus 1 after every character */
2597 count = self_len+1;
2598 if (maxcount < count)
2599 count = maxcount;
2601 /* Check for overflow */
2602 /* result_len = count * to_len + self_len; */
2603 product = count * to_len;
2604 if (product / to_len != count) {
2605 PyErr_SetString(PyExc_OverflowError,
2606 "replace string is too long");
2607 return NULL;
2609 result_len = product + self_len;
2610 if (result_len < 0) {
2611 PyErr_SetString(PyExc_OverflowError,
2612 "replace string is too long");
2613 return NULL;
2616 if (! (result = (PyStringObject *)
2617 PyString_FromStringAndSize(NULL, result_len)) )
2618 return NULL;
2620 self_s = PyString_AS_STRING(self);
2621 result_s = PyString_AS_STRING(result);
2623 /* TODO: special case single character, which doesn't need memcpy */
2625 /* Lay the first one down (guaranteed this will occur) */
2626 Py_MEMCPY(result_s, to_s, to_len);
2627 result_s += to_len;
2628 count -= 1;
2630 for (i=0; i<count; i++) {
2631 *result_s++ = *self_s++;
2632 Py_MEMCPY(result_s, to_s, to_len);
2633 result_s += to_len;
2636 /* Copy the rest of the original string */
2637 Py_MEMCPY(result_s, self_s, self_len-i);
2639 return result;
2642 /* Special case for deleting a single character */
2643 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2644 Py_LOCAL(PyStringObject *)
2645 replace_delete_single_character(PyStringObject *self,
2646 char from_c, Py_ssize_t maxcount)
2648 char *self_s, *result_s;
2649 char *start, *next, *end;
2650 Py_ssize_t self_len, result_len;
2651 Py_ssize_t count;
2652 PyStringObject *result;
2654 self_len = PyString_GET_SIZE(self);
2655 self_s = PyString_AS_STRING(self);
2657 count = countchar(self_s, self_len, from_c, maxcount);
2658 if (count == 0) {
2659 return return_self(self);
2662 result_len = self_len - count; /* from_len == 1 */
2663 assert(result_len>=0);
2665 if ( (result = (PyStringObject *)
2666 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2667 return NULL;
2668 result_s = PyString_AS_STRING(result);
2670 start = self_s;
2671 end = self_s + self_len;
2672 while (count-- > 0) {
2673 next = findchar(start, end-start, from_c);
2674 if (next == NULL)
2675 break;
2676 Py_MEMCPY(result_s, start, next-start);
2677 result_s += (next-start);
2678 start = next+1;
2680 Py_MEMCPY(result_s, start, end-start);
2682 return result;
2685 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2687 Py_LOCAL(PyStringObject *)
2688 replace_delete_substring(PyStringObject *self,
2689 const char *from_s, Py_ssize_t from_len,
2690 Py_ssize_t maxcount) {
2691 char *self_s, *result_s;
2692 char *start, *next, *end;
2693 Py_ssize_t self_len, result_len;
2694 Py_ssize_t count, offset;
2695 PyStringObject *result;
2697 self_len = PyString_GET_SIZE(self);
2698 self_s = PyString_AS_STRING(self);
2700 count = countstring(self_s, self_len,
2701 from_s, from_len,
2702 0, self_len, 1,
2703 maxcount);
2705 if (count == 0) {
2706 /* no matches */
2707 return return_self(self);
2710 result_len = self_len - (count * from_len);
2711 assert (result_len>=0);
2713 if ( (result = (PyStringObject *)
2714 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2715 return NULL;
2717 result_s = PyString_AS_STRING(result);
2719 start = self_s;
2720 end = self_s + self_len;
2721 while (count-- > 0) {
2722 offset = findstring(start, end-start,
2723 from_s, from_len,
2724 0, end-start, FORWARD);
2725 if (offset == -1)
2726 break;
2727 next = start + offset;
2729 Py_MEMCPY(result_s, start, next-start);
2731 result_s += (next-start);
2732 start = next+from_len;
2734 Py_MEMCPY(result_s, start, end-start);
2735 return result;
2738 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2739 Py_LOCAL(PyStringObject *)
2740 replace_single_character_in_place(PyStringObject *self,
2741 char from_c, char to_c,
2742 Py_ssize_t maxcount)
2744 char *self_s, *result_s, *start, *end, *next;
2745 Py_ssize_t self_len;
2746 PyStringObject *result;
2748 /* The result string will be the same size */
2749 self_s = PyString_AS_STRING(self);
2750 self_len = PyString_GET_SIZE(self);
2752 next = findchar(self_s, self_len, from_c);
2754 if (next == NULL) {
2755 /* No matches; return the original string */
2756 return return_self(self);
2759 /* Need to make a new string */
2760 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2761 if (result == NULL)
2762 return NULL;
2763 result_s = PyString_AS_STRING(result);
2764 Py_MEMCPY(result_s, self_s, self_len);
2766 /* change everything in-place, starting with this one */
2767 start = result_s + (next-self_s);
2768 *start = to_c;
2769 start++;
2770 end = result_s + self_len;
2772 while (--maxcount > 0) {
2773 next = findchar(start, end-start, from_c);
2774 if (next == NULL)
2775 break;
2776 *next = to_c;
2777 start = next+1;
2780 return result;
2783 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2784 Py_LOCAL(PyStringObject *)
2785 replace_substring_in_place(PyStringObject *self,
2786 const char *from_s, Py_ssize_t from_len,
2787 const char *to_s, Py_ssize_t to_len,
2788 Py_ssize_t maxcount)
2790 char *result_s, *start, *end;
2791 char *self_s;
2792 Py_ssize_t self_len, offset;
2793 PyStringObject *result;
2795 /* The result string will be the same size */
2797 self_s = PyString_AS_STRING(self);
2798 self_len = PyString_GET_SIZE(self);
2800 offset = findstring(self_s, self_len,
2801 from_s, from_len,
2802 0, self_len, FORWARD);
2803 if (offset == -1) {
2804 /* No matches; return the original string */
2805 return return_self(self);
2808 /* Need to make a new string */
2809 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2810 if (result == NULL)
2811 return NULL;
2812 result_s = PyString_AS_STRING(result);
2813 Py_MEMCPY(result_s, self_s, self_len);
2815 /* change everything in-place, starting with this one */
2816 start = result_s + offset;
2817 Py_MEMCPY(start, to_s, from_len);
2818 start += from_len;
2819 end = result_s + self_len;
2821 while ( --maxcount > 0) {
2822 offset = findstring(start, end-start,
2823 from_s, from_len,
2824 0, end-start, FORWARD);
2825 if (offset==-1)
2826 break;
2827 Py_MEMCPY(start+offset, to_s, from_len);
2828 start += offset+from_len;
2831 return result;
2834 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2835 Py_LOCAL(PyStringObject *)
2836 replace_single_character(PyStringObject *self,
2837 char from_c,
2838 const char *to_s, Py_ssize_t to_len,
2839 Py_ssize_t maxcount)
2841 char *self_s, *result_s;
2842 char *start, *next, *end;
2843 Py_ssize_t self_len, result_len;
2844 Py_ssize_t count, product;
2845 PyStringObject *result;
2847 self_s = PyString_AS_STRING(self);
2848 self_len = PyString_GET_SIZE(self);
2850 count = countchar(self_s, self_len, from_c, maxcount);
2851 if (count == 0) {
2852 /* no matches, return unchanged */
2853 return return_self(self);
2856 /* use the difference between current and new, hence the "-1" */
2857 /* result_len = self_len + count * (to_len-1) */
2858 product = count * (to_len-1);
2859 if (product / (to_len-1) != count) {
2860 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2861 return NULL;
2863 result_len = self_len + product;
2864 if (result_len < 0) {
2865 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2866 return NULL;
2869 if ( (result = (PyStringObject *)
2870 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2871 return NULL;
2872 result_s = PyString_AS_STRING(result);
2874 start = self_s;
2875 end = self_s + self_len;
2876 while (count-- > 0) {
2877 next = findchar(start, end-start, from_c);
2878 if (next == NULL)
2879 break;
2881 if (next == start) {
2882 /* replace with the 'to' */
2883 Py_MEMCPY(result_s, to_s, to_len);
2884 result_s += to_len;
2885 start += 1;
2886 } else {
2887 /* copy the unchanged old then the 'to' */
2888 Py_MEMCPY(result_s, start, next-start);
2889 result_s += (next-start);
2890 Py_MEMCPY(result_s, to_s, to_len);
2891 result_s += to_len;
2892 start = next+1;
2895 /* Copy the remainder of the remaining string */
2896 Py_MEMCPY(result_s, start, end-start);
2898 return result;
2901 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2902 Py_LOCAL(PyStringObject *)
2903 replace_substring(PyStringObject *self,
2904 const char *from_s, Py_ssize_t from_len,
2905 const char *to_s, Py_ssize_t to_len,
2906 Py_ssize_t maxcount) {
2907 char *self_s, *result_s;
2908 char *start, *next, *end;
2909 Py_ssize_t self_len, result_len;
2910 Py_ssize_t count, offset, product;
2911 PyStringObject *result;
2913 self_s = PyString_AS_STRING(self);
2914 self_len = PyString_GET_SIZE(self);
2916 count = countstring(self_s, self_len,
2917 from_s, from_len,
2918 0, self_len, FORWARD, maxcount);
2919 if (count == 0) {
2920 /* no matches, return unchanged */
2921 return return_self(self);
2924 /* Check for overflow */
2925 /* result_len = self_len + count * (to_len-from_len) */
2926 product = count * (to_len-from_len);
2927 if (product / (to_len-from_len) != count) {
2928 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2929 return NULL;
2931 result_len = self_len + product;
2932 if (result_len < 0) {
2933 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2934 return NULL;
2937 if ( (result = (PyStringObject *)
2938 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2939 return NULL;
2940 result_s = PyString_AS_STRING(result);
2942 start = self_s;
2943 end = self_s + self_len;
2944 while (count-- > 0) {
2945 offset = findstring(start, end-start,
2946 from_s, from_len,
2947 0, end-start, FORWARD);
2948 if (offset == -1)
2949 break;
2950 next = start+offset;
2951 if (next == start) {
2952 /* replace with the 'to' */
2953 Py_MEMCPY(result_s, to_s, to_len);
2954 result_s += to_len;
2955 start += from_len;
2956 } else {
2957 /* copy the unchanged old then the 'to' */
2958 Py_MEMCPY(result_s, start, next-start);
2959 result_s += (next-start);
2960 Py_MEMCPY(result_s, to_s, to_len);
2961 result_s += to_len;
2962 start = next+from_len;
2965 /* Copy the remainder of the remaining string */
2966 Py_MEMCPY(result_s, start, end-start);
2968 return result;
2972 Py_LOCAL(PyStringObject *)
2973 replace(PyStringObject *self,
2974 const char *from_s, Py_ssize_t from_len,
2975 const char *to_s, Py_ssize_t to_len,
2976 Py_ssize_t maxcount)
2978 if (maxcount < 0) {
2979 maxcount = PY_SSIZE_T_MAX;
2980 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2981 /* nothing to do; return the original string */
2982 return return_self(self);
2985 if (maxcount == 0 ||
2986 (from_len == 0 && to_len == 0)) {
2987 /* nothing to do; return the original string */
2988 return return_self(self);
2991 /* Handle zero-length special cases */
2993 if (from_len == 0) {
2994 /* insert the 'to' string everywhere. */
2995 /* >>> "Python".replace("", ".") */
2996 /* '.P.y.t.h.o.n.' */
2997 return replace_interleave(self, to_s, to_len, maxcount);
3000 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3001 /* point for an empty self string to generate a non-empty string */
3002 /* Special case so the remaining code always gets a non-empty string */
3003 if (PyString_GET_SIZE(self) == 0) {
3004 return return_self(self);
3007 if (to_len == 0) {
3008 /* delete all occurances of 'from' string */
3009 if (from_len == 1) {
3010 return replace_delete_single_character(
3011 self, from_s[0], maxcount);
3012 } else {
3013 return replace_delete_substring(self, from_s, from_len, maxcount);
3017 /* Handle special case where both strings have the same length */
3019 if (from_len == to_len) {
3020 if (from_len == 1) {
3021 return replace_single_character_in_place(
3022 self,
3023 from_s[0],
3024 to_s[0],
3025 maxcount);
3026 } else {
3027 return replace_substring_in_place(
3028 self, from_s, from_len, to_s, to_len, maxcount);
3032 /* Otherwise use the more generic algorithms */
3033 if (from_len == 1) {
3034 return replace_single_character(self, from_s[0],
3035 to_s, to_len, maxcount);
3036 } else {
3037 /* len('from')>=2, len('to')>=1 */
3038 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3042 PyDoc_STRVAR(replace__doc__,
3043 "S.replace (old, new[, count]) -> string\n\
3045 Return a copy of string S with all occurrences of substring\n\
3046 old replaced by new. If the optional argument count is\n\
3047 given, only the first count occurrences are replaced.");
3049 static PyObject *
3050 string_replace(PyStringObject *self, PyObject *args)
3052 Py_ssize_t count = -1;
3053 PyObject *from, *to;
3054 const char *from_s, *to_s;
3055 Py_ssize_t from_len, to_len;
3057 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3058 return NULL;
3060 if (PyString_Check(from)) {
3061 from_s = PyString_AS_STRING(from);
3062 from_len = PyString_GET_SIZE(from);
3064 #ifdef Py_USING_UNICODE
3065 if (PyUnicode_Check(from))
3066 return PyUnicode_Replace((PyObject *)self,
3067 from, to, count);
3068 #endif
3069 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3070 return NULL;
3072 if (PyString_Check(to)) {
3073 to_s = PyString_AS_STRING(to);
3074 to_len = PyString_GET_SIZE(to);
3076 #ifdef Py_USING_UNICODE
3077 else if (PyUnicode_Check(to))
3078 return PyUnicode_Replace((PyObject *)self,
3079 from, to, count);
3080 #endif
3081 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3082 return NULL;
3084 return (PyObject *)replace((PyStringObject *) self,
3085 from_s, from_len,
3086 to_s, to_len, count);
3089 /** End DALKE **/
3091 /* Matches the end (direction >= 0) or start (direction < 0) of self
3092 * against substr, using the start and end arguments. Returns
3093 * -1 on error, 0 if not found and 1 if found.
3095 Py_LOCAL(int)
3096 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3097 Py_ssize_t end, int direction)
3099 Py_ssize_t len = PyString_GET_SIZE(self);
3100 Py_ssize_t slen;
3101 const char* sub;
3102 const char* str;
3104 if (PyString_Check(substr)) {
3105 sub = PyString_AS_STRING(substr);
3106 slen = PyString_GET_SIZE(substr);
3108 #ifdef Py_USING_UNICODE
3109 else if (PyUnicode_Check(substr))
3110 return PyUnicode_Tailmatch((PyObject *)self,
3111 substr, start, end, direction);
3112 #endif
3113 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3114 return -1;
3115 str = PyString_AS_STRING(self);
3117 string_adjust_indices(&start, &end, len);
3119 if (direction < 0) {
3120 /* startswith */
3121 if (start+slen > len)
3122 return 0;
3123 } else {
3124 /* endswith */
3125 if (end-start < slen || start > len)
3126 return 0;
3128 if (end-slen > start)
3129 start = end - slen;
3131 if (end-start >= slen)
3132 return ! memcmp(str+start, sub, slen);
3133 return 0;
3137 PyDoc_STRVAR(startswith__doc__,
3138 "S.startswith(prefix[, start[, end]]) -> bool\n\
3140 Return True if S starts with the specified prefix, False otherwise.\n\
3141 With optional start, test S beginning at that position.\n\
3142 With optional end, stop comparing S at that position.\n\
3143 prefix can also be a tuple of strings to try.");
3145 static PyObject *
3146 string_startswith(PyStringObject *self, PyObject *args)
3148 Py_ssize_t start = 0;
3149 Py_ssize_t end = PY_SSIZE_T_MAX;
3150 PyObject *subobj;
3151 int result;
3153 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3154 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3155 return NULL;
3156 if (PyTuple_Check(subobj)) {
3157 Py_ssize_t i;
3158 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3159 result = _string_tailmatch(self,
3160 PyTuple_GET_ITEM(subobj, i),
3161 start, end, -1);
3162 if (result == -1)
3163 return NULL;
3164 else if (result) {
3165 Py_RETURN_TRUE;
3168 Py_RETURN_FALSE;
3170 result = _string_tailmatch(self, subobj, start, end, -1);
3171 if (result == -1)
3172 return NULL;
3173 else
3174 return PyBool_FromLong(result);
3178 PyDoc_STRVAR(endswith__doc__,
3179 "S.endswith(suffix[, start[, end]]) -> bool\n\
3181 Return True if S ends with the specified suffix, False otherwise.\n\
3182 With optional start, test S beginning at that position.\n\
3183 With optional end, stop comparing S at that position.\n\
3184 suffix can also be a tuple of strings to try.");
3186 static PyObject *
3187 string_endswith(PyStringObject *self, PyObject *args)
3189 Py_ssize_t start = 0;
3190 Py_ssize_t end = PY_SSIZE_T_MAX;
3191 PyObject *subobj;
3192 int result;
3194 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3195 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3196 return NULL;
3197 if (PyTuple_Check(subobj)) {
3198 Py_ssize_t i;
3199 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3200 result = _string_tailmatch(self,
3201 PyTuple_GET_ITEM(subobj, i),
3202 start, end, +1);
3203 if (result == -1)
3204 return NULL;
3205 else if (result) {
3206 Py_RETURN_TRUE;
3209 Py_RETURN_FALSE;
3211 result = _string_tailmatch(self, subobj, start, end, +1);
3212 if (result == -1)
3213 return NULL;
3214 else
3215 return PyBool_FromLong(result);
3219 PyDoc_STRVAR(encode__doc__,
3220 "S.encode([encoding[,errors]]) -> object\n\
3222 Encodes S using the codec registered for encoding. encoding defaults\n\
3223 to the default encoding. errors may be given to set a different error\n\
3224 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3225 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3226 'xmlcharrefreplace' as well as any other name registered with\n\
3227 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3229 static PyObject *
3230 string_encode(PyStringObject *self, PyObject *args)
3232 char *encoding = NULL;
3233 char *errors = NULL;
3234 PyObject *v;
3236 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3237 return NULL;
3238 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3239 if (v == NULL)
3240 goto onError;
3241 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3242 PyErr_Format(PyExc_TypeError,
3243 "encoder did not return a string/unicode object "
3244 "(type=%.400s)",
3245 v->ob_type->tp_name);
3246 Py_DECREF(v);
3247 return NULL;
3249 return v;
3251 onError:
3252 return NULL;
3256 PyDoc_STRVAR(decode__doc__,
3257 "S.decode([encoding[,errors]]) -> object\n\
3259 Decodes S using the codec registered for encoding. encoding defaults\n\
3260 to the default encoding. errors may be given to set a different error\n\
3261 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3262 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3263 as well as any other name registerd with codecs.register_error that is\n\
3264 able to handle UnicodeDecodeErrors.");
3266 static PyObject *
3267 string_decode(PyStringObject *self, PyObject *args)
3269 char *encoding = NULL;
3270 char *errors = NULL;
3271 PyObject *v;
3273 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3274 return NULL;
3275 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3276 if (v == NULL)
3277 goto onError;
3278 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3279 PyErr_Format(PyExc_TypeError,
3280 "decoder did not return a string/unicode object "
3281 "(type=%.400s)",
3282 v->ob_type->tp_name);
3283 Py_DECREF(v);
3284 return NULL;
3286 return v;
3288 onError:
3289 return NULL;
3293 PyDoc_STRVAR(expandtabs__doc__,
3294 "S.expandtabs([tabsize]) -> string\n\
3296 Return a copy of S where all tab characters are expanded using spaces.\n\
3297 If tabsize is not given, a tab size of 8 characters is assumed.");
3299 static PyObject*
3300 string_expandtabs(PyStringObject *self, PyObject *args)
3302 const char *e, *p;
3303 char *q;
3304 Py_ssize_t i, j, old_j;
3305 PyObject *u;
3306 int tabsize = 8;
3308 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3309 return NULL;
3311 /* First pass: determine size of output string */
3312 i = j = old_j = 0;
3313 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3314 for (p = PyString_AS_STRING(self); p < e; p++)
3315 if (*p == '\t') {
3316 if (tabsize > 0) {
3317 j += tabsize - (j % tabsize);
3318 if (old_j > j) {
3319 PyErr_SetString(PyExc_OverflowError,
3320 "new string is too long");
3321 return NULL;
3323 old_j = j;
3326 else {
3327 j++;
3328 if (*p == '\n' || *p == '\r') {
3329 i += j;
3330 old_j = j = 0;
3331 if (i < 0) {
3332 PyErr_SetString(PyExc_OverflowError,
3333 "new string is too long");
3334 return NULL;
3339 if ((i + j) < 0) {
3340 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3341 return NULL;
3344 /* Second pass: create output string and fill it */
3345 u = PyString_FromStringAndSize(NULL, i + j);
3346 if (!u)
3347 return NULL;
3349 j = 0;
3350 q = PyString_AS_STRING(u);
3352 for (p = PyString_AS_STRING(self); p < e; p++)
3353 if (*p == '\t') {
3354 if (tabsize > 0) {
3355 i = tabsize - (j % tabsize);
3356 j += i;
3357 while (i--)
3358 *q++ = ' ';
3361 else {
3362 j++;
3363 *q++ = *p;
3364 if (*p == '\n' || *p == '\r')
3365 j = 0;
3368 return u;
3371 Py_LOCAL_INLINE(PyObject *)
3372 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3374 PyObject *u;
3376 if (left < 0)
3377 left = 0;
3378 if (right < 0)
3379 right = 0;
3381 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3382 Py_INCREF(self);
3383 return (PyObject *)self;
3386 u = PyString_FromStringAndSize(NULL,
3387 left + PyString_GET_SIZE(self) + right);
3388 if (u) {
3389 if (left)
3390 memset(PyString_AS_STRING(u), fill, left);
3391 Py_MEMCPY(PyString_AS_STRING(u) + left,
3392 PyString_AS_STRING(self),
3393 PyString_GET_SIZE(self));
3394 if (right)
3395 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3396 fill, right);
3399 return u;
3402 PyDoc_STRVAR(ljust__doc__,
3403 "S.ljust(width[, fillchar]) -> string\n"
3404 "\n"
3405 "Return S left justified in a string of length width. Padding is\n"
3406 "done using the specified fill character (default is a space).");
3408 static PyObject *
3409 string_ljust(PyStringObject *self, PyObject *args)
3411 Py_ssize_t width;
3412 char fillchar = ' ';
3414 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3415 return NULL;
3417 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3418 Py_INCREF(self);
3419 return (PyObject*) self;
3422 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3426 PyDoc_STRVAR(rjust__doc__,
3427 "S.rjust(width[, fillchar]) -> string\n"
3428 "\n"
3429 "Return S right justified in a string of length width. Padding is\n"
3430 "done using the specified fill character (default is a space)");
3432 static PyObject *
3433 string_rjust(PyStringObject *self, PyObject *args)
3435 Py_ssize_t width;
3436 char fillchar = ' ';
3438 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3439 return NULL;
3441 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3442 Py_INCREF(self);
3443 return (PyObject*) self;
3446 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3450 PyDoc_STRVAR(center__doc__,
3451 "S.center(width[, fillchar]) -> string\n"
3452 "\n"
3453 "Return S centered in a string of length width. Padding is\n"
3454 "done using the specified fill character (default is a space)");
3456 static PyObject *
3457 string_center(PyStringObject *self, PyObject *args)
3459 Py_ssize_t marg, left;
3460 Py_ssize_t width;
3461 char fillchar = ' ';
3463 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3464 return NULL;
3466 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3467 Py_INCREF(self);
3468 return (PyObject*) self;
3471 marg = width - PyString_GET_SIZE(self);
3472 left = marg / 2 + (marg & width & 1);
3474 return pad(self, left, marg - left, fillchar);
3477 PyDoc_STRVAR(zfill__doc__,
3478 "S.zfill(width) -> string\n"
3479 "\n"
3480 "Pad a numeric string S with zeros on the left, to fill a field\n"
3481 "of the specified width. The string S is never truncated.");
3483 static PyObject *
3484 string_zfill(PyStringObject *self, PyObject *args)
3486 Py_ssize_t fill;
3487 PyObject *s;
3488 char *p;
3489 Py_ssize_t width;
3491 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3492 return NULL;
3494 if (PyString_GET_SIZE(self) >= width) {
3495 if (PyString_CheckExact(self)) {
3496 Py_INCREF(self);
3497 return (PyObject*) self;
3499 else
3500 return PyString_FromStringAndSize(
3501 PyString_AS_STRING(self),
3502 PyString_GET_SIZE(self)
3506 fill = width - PyString_GET_SIZE(self);
3508 s = pad(self, fill, 0, '0');
3510 if (s == NULL)
3511 return NULL;
3513 p = PyString_AS_STRING(s);
3514 if (p[fill] == '+' || p[fill] == '-') {
3515 /* move sign to beginning of string */
3516 p[0] = p[fill];
3517 p[fill] = '0';
3520 return (PyObject*) s;
3523 PyDoc_STRVAR(isspace__doc__,
3524 "S.isspace() -> bool\n\
3526 Return True if all characters in S are whitespace\n\
3527 and there is at least one character in S, False otherwise.");
3529 static PyObject*
3530 string_isspace(PyStringObject *self)
3532 register const unsigned char *p
3533 = (unsigned char *) PyString_AS_STRING(self);
3534 register const unsigned char *e;
3536 /* Shortcut for single character strings */
3537 if (PyString_GET_SIZE(self) == 1 &&
3538 isspace(*p))
3539 return PyBool_FromLong(1);
3541 /* Special case for empty strings */
3542 if (PyString_GET_SIZE(self) == 0)
3543 return PyBool_FromLong(0);
3545 e = p + PyString_GET_SIZE(self);
3546 for (; p < e; p++) {
3547 if (!isspace(*p))
3548 return PyBool_FromLong(0);
3550 return PyBool_FromLong(1);
3554 PyDoc_STRVAR(isalpha__doc__,
3555 "S.isalpha() -> bool\n\
3557 Return True if all characters in S are alphabetic\n\
3558 and there is at least one character in S, False otherwise.");
3560 static PyObject*
3561 string_isalpha(PyStringObject *self)
3563 register const unsigned char *p
3564 = (unsigned char *) PyString_AS_STRING(self);
3565 register const unsigned char *e;
3567 /* Shortcut for single character strings */
3568 if (PyString_GET_SIZE(self) == 1 &&
3569 isalpha(*p))
3570 return PyBool_FromLong(1);
3572 /* Special case for empty strings */
3573 if (PyString_GET_SIZE(self) == 0)
3574 return PyBool_FromLong(0);
3576 e = p + PyString_GET_SIZE(self);
3577 for (; p < e; p++) {
3578 if (!isalpha(*p))
3579 return PyBool_FromLong(0);
3581 return PyBool_FromLong(1);
3585 PyDoc_STRVAR(isalnum__doc__,
3586 "S.isalnum() -> bool\n\
3588 Return True if all characters in S are alphanumeric\n\
3589 and there is at least one character in S, False otherwise.");
3591 static PyObject*
3592 string_isalnum(PyStringObject *self)
3594 register const unsigned char *p
3595 = (unsigned char *) PyString_AS_STRING(self);
3596 register const unsigned char *e;
3598 /* Shortcut for single character strings */
3599 if (PyString_GET_SIZE(self) == 1 &&
3600 isalnum(*p))
3601 return PyBool_FromLong(1);
3603 /* Special case for empty strings */
3604 if (PyString_GET_SIZE(self) == 0)
3605 return PyBool_FromLong(0);
3607 e = p + PyString_GET_SIZE(self);
3608 for (; p < e; p++) {
3609 if (!isalnum(*p))
3610 return PyBool_FromLong(0);
3612 return PyBool_FromLong(1);
3616 PyDoc_STRVAR(isdigit__doc__,
3617 "S.isdigit() -> bool\n\
3619 Return True if all characters in S are digits\n\
3620 and there is at least one character in S, False otherwise.");
3622 static PyObject*
3623 string_isdigit(PyStringObject *self)
3625 register const unsigned char *p
3626 = (unsigned char *) PyString_AS_STRING(self);
3627 register const unsigned char *e;
3629 /* Shortcut for single character strings */
3630 if (PyString_GET_SIZE(self) == 1 &&
3631 isdigit(*p))
3632 return PyBool_FromLong(1);
3634 /* Special case for empty strings */
3635 if (PyString_GET_SIZE(self) == 0)
3636 return PyBool_FromLong(0);
3638 e = p + PyString_GET_SIZE(self);
3639 for (; p < e; p++) {
3640 if (!isdigit(*p))
3641 return PyBool_FromLong(0);
3643 return PyBool_FromLong(1);
3647 PyDoc_STRVAR(islower__doc__,
3648 "S.islower() -> bool\n\
3650 Return True if all cased characters in S are lowercase and there is\n\
3651 at least one cased character in S, False otherwise.");
3653 static PyObject*
3654 string_islower(PyStringObject *self)
3656 register const unsigned char *p
3657 = (unsigned char *) PyString_AS_STRING(self);
3658 register const unsigned char *e;
3659 int cased;
3661 /* Shortcut for single character strings */
3662 if (PyString_GET_SIZE(self) == 1)
3663 return PyBool_FromLong(islower(*p) != 0);
3665 /* Special case for empty strings */
3666 if (PyString_GET_SIZE(self) == 0)
3667 return PyBool_FromLong(0);
3669 e = p + PyString_GET_SIZE(self);
3670 cased = 0;
3671 for (; p < e; p++) {
3672 if (isupper(*p))
3673 return PyBool_FromLong(0);
3674 else if (!cased && islower(*p))
3675 cased = 1;
3677 return PyBool_FromLong(cased);
3681 PyDoc_STRVAR(isupper__doc__,
3682 "S.isupper() -> bool\n\
3684 Return True if all cased characters in S are uppercase and there is\n\
3685 at least one cased character in S, False otherwise.");
3687 static PyObject*
3688 string_isupper(PyStringObject *self)
3690 register const unsigned char *p
3691 = (unsigned char *) PyString_AS_STRING(self);
3692 register const unsigned char *e;
3693 int cased;
3695 /* Shortcut for single character strings */
3696 if (PyString_GET_SIZE(self) == 1)
3697 return PyBool_FromLong(isupper(*p) != 0);
3699 /* Special case for empty strings */
3700 if (PyString_GET_SIZE(self) == 0)
3701 return PyBool_FromLong(0);
3703 e = p + PyString_GET_SIZE(self);
3704 cased = 0;
3705 for (; p < e; p++) {
3706 if (islower(*p))
3707 return PyBool_FromLong(0);
3708 else if (!cased && isupper(*p))
3709 cased = 1;
3711 return PyBool_FromLong(cased);
3715 PyDoc_STRVAR(istitle__doc__,
3716 "S.istitle() -> bool\n\
3718 Return True if S is a titlecased string and there is at least one\n\
3719 character in S, i.e. uppercase characters may only follow uncased\n\
3720 characters and lowercase characters only cased ones. Return False\n\
3721 otherwise.");
3723 static PyObject*
3724 string_istitle(PyStringObject *self, PyObject *uncased)
3726 register const unsigned char *p
3727 = (unsigned char *) PyString_AS_STRING(self);
3728 register const unsigned char *e;
3729 int cased, previous_is_cased;
3731 /* Shortcut for single character strings */
3732 if (PyString_GET_SIZE(self) == 1)
3733 return PyBool_FromLong(isupper(*p) != 0);
3735 /* Special case for empty strings */
3736 if (PyString_GET_SIZE(self) == 0)
3737 return PyBool_FromLong(0);
3739 e = p + PyString_GET_SIZE(self);
3740 cased = 0;
3741 previous_is_cased = 0;
3742 for (; p < e; p++) {
3743 register const unsigned char ch = *p;
3745 if (isupper(ch)) {
3746 if (previous_is_cased)
3747 return PyBool_FromLong(0);
3748 previous_is_cased = 1;
3749 cased = 1;
3751 else if (islower(ch)) {
3752 if (!previous_is_cased)
3753 return PyBool_FromLong(0);
3754 previous_is_cased = 1;
3755 cased = 1;
3757 else
3758 previous_is_cased = 0;
3760 return PyBool_FromLong(cased);
3764 PyDoc_STRVAR(splitlines__doc__,
3765 "S.splitlines([keepends]) -> list of strings\n\
3767 Return a list of the lines in S, breaking at line boundaries.\n\
3768 Line breaks are not included in the resulting list unless keepends\n\
3769 is given and true.");
3771 static PyObject*
3772 string_splitlines(PyStringObject *self, PyObject *args)
3774 register Py_ssize_t i;
3775 register Py_ssize_t j;
3776 Py_ssize_t len;
3777 int keepends = 0;
3778 PyObject *list;
3779 PyObject *str;
3780 char *data;
3782 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3783 return NULL;
3785 data = PyString_AS_STRING(self);
3786 len = PyString_GET_SIZE(self);
3788 /* This does not use the preallocated list because splitlines is
3789 usually run with hundreds of newlines. The overhead of
3790 switching between PyList_SET_ITEM and append causes about a
3791 2-3% slowdown for that common case. A smarter implementation
3792 could move the if check out, so the SET_ITEMs are done first
3793 and the appends only done when the prealloc buffer is full.
3794 That's too much work for little gain.*/
3796 list = PyList_New(0);
3797 if (!list)
3798 goto onError;
3800 for (i = j = 0; i < len; ) {
3801 Py_ssize_t eol;
3803 /* Find a line and append it */
3804 while (i < len && data[i] != '\n' && data[i] != '\r')
3805 i++;
3807 /* Skip the line break reading CRLF as one line break */
3808 eol = i;
3809 if (i < len) {
3810 if (data[i] == '\r' && i + 1 < len &&
3811 data[i+1] == '\n')
3812 i += 2;
3813 else
3814 i++;
3815 if (keepends)
3816 eol = i;
3818 SPLIT_APPEND(data, j, eol);
3819 j = i;
3821 if (j < len) {
3822 SPLIT_APPEND(data, j, len);
3825 return list;
3827 onError:
3828 Py_XDECREF(list);
3829 return NULL;
3832 #undef SPLIT_APPEND
3833 #undef SPLIT_ADD
3834 #undef MAX_PREALLOC
3835 #undef PREALLOC_SIZE
3837 static PyObject *
3838 string_getnewargs(PyStringObject *v)
3840 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3844 static PyMethodDef
3845 string_methods[] = {
3846 /* Counterparts of the obsolete stropmodule functions; except
3847 string.maketrans(). */
3848 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3849 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3850 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3851 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3852 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3853 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3854 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3855 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3856 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3857 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3858 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3859 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3860 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3861 capitalize__doc__},
3862 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3863 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3864 endswith__doc__},
3865 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3866 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3867 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3868 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3869 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3870 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3871 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3872 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3873 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3874 rpartition__doc__},
3875 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3876 startswith__doc__},
3877 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3878 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3879 swapcase__doc__},
3880 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3881 translate__doc__},
3882 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3883 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3884 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3885 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3886 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3887 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3888 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3889 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3890 expandtabs__doc__},
3891 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3892 splitlines__doc__},
3893 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3894 {NULL, NULL} /* sentinel */
3897 static PyObject *
3898 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3900 static PyObject *
3901 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3903 PyObject *x = NULL;
3904 static char *kwlist[] = {"object", 0};
3906 if (type != &PyString_Type)
3907 return str_subtype_new(type, args, kwds);
3908 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3909 return NULL;
3910 if (x == NULL)
3911 return PyString_FromString("");
3912 return PyObject_Str(x);
3915 static PyObject *
3916 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3918 PyObject *tmp, *pnew;
3919 Py_ssize_t n;
3921 assert(PyType_IsSubtype(type, &PyString_Type));
3922 tmp = string_new(&PyString_Type, args, kwds);
3923 if (tmp == NULL)
3924 return NULL;
3925 assert(PyString_CheckExact(tmp));
3926 n = PyString_GET_SIZE(tmp);
3927 pnew = type->tp_alloc(type, n);
3928 if (pnew != NULL) {
3929 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3930 ((PyStringObject *)pnew)->ob_shash =
3931 ((PyStringObject *)tmp)->ob_shash;
3932 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3934 Py_DECREF(tmp);
3935 return pnew;
3938 static PyObject *
3939 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3941 PyErr_SetString(PyExc_TypeError,
3942 "The basestring type cannot be instantiated");
3943 return NULL;
3946 static PyObject *
3947 string_mod(PyObject *v, PyObject *w)
3949 if (!PyString_Check(v)) {
3950 Py_INCREF(Py_NotImplemented);
3951 return Py_NotImplemented;
3953 return PyString_Format(v, w);
3956 PyDoc_STRVAR(basestring_doc,
3957 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3959 static PyNumberMethods string_as_number = {
3960 0, /*nb_add*/
3961 0, /*nb_subtract*/
3962 0, /*nb_multiply*/
3963 0, /*nb_divide*/
3964 string_mod, /*nb_remainder*/
3968 PyTypeObject PyBaseString_Type = {
3969 PyObject_HEAD_INIT(&PyType_Type)
3971 "basestring",
3974 0, /* tp_dealloc */
3975 0, /* tp_print */
3976 0, /* tp_getattr */
3977 0, /* tp_setattr */
3978 0, /* tp_compare */
3979 0, /* tp_repr */
3980 0, /* tp_as_number */
3981 0, /* tp_as_sequence */
3982 0, /* tp_as_mapping */
3983 0, /* tp_hash */
3984 0, /* tp_call */
3985 0, /* tp_str */
3986 0, /* tp_getattro */
3987 0, /* tp_setattro */
3988 0, /* tp_as_buffer */
3989 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3990 basestring_doc, /* tp_doc */
3991 0, /* tp_traverse */
3992 0, /* tp_clear */
3993 0, /* tp_richcompare */
3994 0, /* tp_weaklistoffset */
3995 0, /* tp_iter */
3996 0, /* tp_iternext */
3997 0, /* tp_methods */
3998 0, /* tp_members */
3999 0, /* tp_getset */
4000 &PyBaseObject_Type, /* tp_base */
4001 0, /* tp_dict */
4002 0, /* tp_descr_get */
4003 0, /* tp_descr_set */
4004 0, /* tp_dictoffset */
4005 0, /* tp_init */
4006 0, /* tp_alloc */
4007 basestring_new, /* tp_new */
4008 0, /* tp_free */
4011 PyDoc_STRVAR(string_doc,
4012 "str(object) -> string\n\
4014 Return a nice string representation of the object.\n\
4015 If the argument is a string, the return value is the same object.");
4017 PyTypeObject PyString_Type = {
4018 PyObject_HEAD_INIT(&PyType_Type)
4020 "str",
4021 sizeof(PyStringObject),
4022 sizeof(char),
4023 string_dealloc, /* tp_dealloc */
4024 (printfunc)string_print, /* tp_print */
4025 0, /* tp_getattr */
4026 0, /* tp_setattr */
4027 0, /* tp_compare */
4028 string_repr, /* tp_repr */
4029 &string_as_number, /* tp_as_number */
4030 &string_as_sequence, /* tp_as_sequence */
4031 &string_as_mapping, /* tp_as_mapping */
4032 (hashfunc)string_hash, /* tp_hash */
4033 0, /* tp_call */
4034 string_str, /* tp_str */
4035 PyObject_GenericGetAttr, /* tp_getattro */
4036 0, /* tp_setattro */
4037 &string_as_buffer, /* tp_as_buffer */
4038 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4039 Py_TPFLAGS_BASETYPE, /* tp_flags */
4040 string_doc, /* tp_doc */
4041 0, /* tp_traverse */
4042 0, /* tp_clear */
4043 (richcmpfunc)string_richcompare, /* tp_richcompare */
4044 0, /* tp_weaklistoffset */
4045 0, /* tp_iter */
4046 0, /* tp_iternext */
4047 string_methods, /* tp_methods */
4048 0, /* tp_members */
4049 0, /* tp_getset */
4050 &PyBaseString_Type, /* tp_base */
4051 0, /* tp_dict */
4052 0, /* tp_descr_get */
4053 0, /* tp_descr_set */
4054 0, /* tp_dictoffset */
4055 0, /* tp_init */
4056 0, /* tp_alloc */
4057 string_new, /* tp_new */
4058 PyObject_Del, /* tp_free */
4061 void
4062 PyString_Concat(register PyObject **pv, register PyObject *w)
4064 register PyObject *v;
4065 if (*pv == NULL)
4066 return;
4067 if (w == NULL || !PyString_Check(*pv)) {
4068 Py_DECREF(*pv);
4069 *pv = NULL;
4070 return;
4072 v = string_concat((PyStringObject *) *pv, w);
4073 Py_DECREF(*pv);
4074 *pv = v;
4077 void
4078 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
4080 PyString_Concat(pv, w);
4081 Py_XDECREF(w);
4085 /* The following function breaks the notion that strings are immutable:
4086 it changes the size of a string. We get away with this only if there
4087 is only one module referencing the object. You can also think of it
4088 as creating a new string object and destroying the old one, only
4089 more efficiently. In any case, don't use this if the string may
4090 already be known to some other part of the code...
4091 Note that if there's not enough memory to resize the string, the original
4092 string object at *pv is deallocated, *pv is set to NULL, an "out of
4093 memory" exception is set, and -1 is returned. Else (on success) 0 is
4094 returned, and the value in *pv may or may not be the same as on input.
4095 As always, an extra byte is allocated for a trailing \0 byte (newsize
4096 does *not* include that), and a trailing \0 byte is stored.
4100 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
4102 register PyObject *v;
4103 register PyStringObject *sv;
4104 v = *pv;
4105 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4106 PyString_CHECK_INTERNED(v)) {
4107 *pv = 0;
4108 Py_DECREF(v);
4109 PyErr_BadInternalCall();
4110 return -1;
4112 /* XXX UNREF/NEWREF interface should be more symmetrical */
4113 _Py_DEC_REFTOTAL;
4114 _Py_ForgetReference(v);
4115 *pv = (PyObject *)
4116 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
4117 if (*pv == NULL) {
4118 PyObject_Del(v);
4119 PyErr_NoMemory();
4120 return -1;
4122 _Py_NewReference(*pv);
4123 sv = (PyStringObject *) *pv;
4124 sv->ob_size = newsize;
4125 sv->ob_sval[newsize] = '\0';
4126 sv->ob_shash = -1; /* invalidate cached hash value */
4127 return 0;
4130 /* Helpers for formatstring */
4132 Py_LOCAL_INLINE(PyObject *)
4133 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4135 Py_ssize_t argidx = *p_argidx;
4136 if (argidx < arglen) {
4137 (*p_argidx)++;
4138 if (arglen < 0)
4139 return args;
4140 else
4141 return PyTuple_GetItem(args, argidx);
4143 PyErr_SetString(PyExc_TypeError,
4144 "not enough arguments for format string");
4145 return NULL;
4148 /* Format codes
4149 * F_LJUST '-'
4150 * F_SIGN '+'
4151 * F_BLANK ' '
4152 * F_ALT '#'
4153 * F_ZERO '0'
4155 #define F_LJUST (1<<0)
4156 #define F_SIGN (1<<1)
4157 #define F_BLANK (1<<2)
4158 #define F_ALT (1<<3)
4159 #define F_ZERO (1<<4)
4161 Py_LOCAL_INLINE(int)
4162 formatfloat(char *buf, size_t buflen, int flags,
4163 int prec, int type, PyObject *v)
4165 /* fmt = '%#.' + `prec` + `type`
4166 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4167 char fmt[20];
4168 double x;
4169 x = PyFloat_AsDouble(v);
4170 if (x == -1.0 && PyErr_Occurred()) {
4171 PyErr_SetString(PyExc_TypeError, "float argument required");
4172 return -1;
4174 if (prec < 0)
4175 prec = 6;
4176 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4177 type = 'g';
4178 /* Worst case length calc to ensure no buffer overrun:
4180 'g' formats:
4181 fmt = %#.<prec>g
4182 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4183 for any double rep.)
4184 len = 1 + prec + 1 + 2 + 5 = 9 + prec
4186 'f' formats:
4187 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4188 len = 1 + 50 + 1 + prec = 52 + prec
4190 If prec=0 the effective precision is 1 (the leading digit is
4191 always given), therefore increase the length by one.
4194 if (((type == 'g' || type == 'G') &&
4195 buflen <= (size_t)10 + (size_t)prec) ||
4196 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
4197 PyErr_SetString(PyExc_OverflowError,
4198 "formatted float is too long (precision too large?)");
4199 return -1;
4201 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4202 (flags&F_ALT) ? "#" : "",
4203 prec, type);
4204 PyOS_ascii_formatd(buf, buflen, fmt, x);
4205 return (int)strlen(buf);
4208 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4209 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4210 * Python's regular ints.
4211 * Return value: a new PyString*, or NULL if error.
4212 * . *pbuf is set to point into it,
4213 * *plen set to the # of chars following that.
4214 * Caller must decref it when done using pbuf.
4215 * The string starting at *pbuf is of the form
4216 * "-"? ("0x" | "0X")? digit+
4217 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4218 * set in flags. The case of hex digits will be correct,
4219 * There will be at least prec digits, zero-filled on the left if
4220 * necessary to get that many.
4221 * val object to be converted
4222 * flags bitmask of format flags; only F_ALT is looked at
4223 * prec minimum number of digits; 0-fill on left if needed
4224 * type a character in [duoxX]; u acts the same as d
4226 * CAUTION: o, x and X conversions on regular ints can never
4227 * produce a '-' sign, but can for Python's unbounded ints.
4229 PyObject*
4230 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4231 char **pbuf, int *plen)
4233 PyObject *result = NULL;
4234 char *buf;
4235 Py_ssize_t i;
4236 int sign; /* 1 if '-', else 0 */
4237 int len; /* number of characters */
4238 Py_ssize_t llen;
4239 int numdigits; /* len == numnondigits + numdigits */
4240 int numnondigits = 0;
4242 switch (type) {
4243 case 'd':
4244 case 'u':
4245 result = val->ob_type->tp_str(val);
4246 break;
4247 case 'o':
4248 result = val->ob_type->tp_as_number->nb_oct(val);
4249 break;
4250 case 'x':
4251 case 'X':
4252 numnondigits = 2;
4253 result = val->ob_type->tp_as_number->nb_hex(val);
4254 break;
4255 default:
4256 assert(!"'type' not in [duoxX]");
4258 if (!result)
4259 return NULL;
4261 buf = PyString_AsString(result);
4262 if (!buf) {
4263 Py_DECREF(result);
4264 return NULL;
4267 /* To modify the string in-place, there can only be one reference. */
4268 if (result->ob_refcnt != 1) {
4269 PyErr_BadInternalCall();
4270 return NULL;
4272 llen = PyString_Size(result);
4273 if (llen > INT_MAX) {
4274 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4275 return NULL;
4277 len = (int)llen;
4278 if (buf[len-1] == 'L') {
4279 --len;
4280 buf[len] = '\0';
4282 sign = buf[0] == '-';
4283 numnondigits += sign;
4284 numdigits = len - numnondigits;
4285 assert(numdigits > 0);
4287 /* Get rid of base marker unless F_ALT */
4288 if ((flags & F_ALT) == 0) {
4289 /* Need to skip 0x, 0X or 0. */
4290 int skipped = 0;
4291 switch (type) {
4292 case 'o':
4293 assert(buf[sign] == '0');
4294 /* If 0 is only digit, leave it alone. */
4295 if (numdigits > 1) {
4296 skipped = 1;
4297 --numdigits;
4299 break;
4300 case 'x':
4301 case 'X':
4302 assert(buf[sign] == '0');
4303 assert(buf[sign + 1] == 'x');
4304 skipped = 2;
4305 numnondigits -= 2;
4306 break;
4308 if (skipped) {
4309 buf += skipped;
4310 len -= skipped;
4311 if (sign)
4312 buf[0] = '-';
4314 assert(len == numnondigits + numdigits);
4315 assert(numdigits > 0);
4318 /* Fill with leading zeroes to meet minimum width. */
4319 if (prec > numdigits) {
4320 PyObject *r1 = PyString_FromStringAndSize(NULL,
4321 numnondigits + prec);
4322 char *b1;
4323 if (!r1) {
4324 Py_DECREF(result);
4325 return NULL;
4327 b1 = PyString_AS_STRING(r1);
4328 for (i = 0; i < numnondigits; ++i)
4329 *b1++ = *buf++;
4330 for (i = 0; i < prec - numdigits; i++)
4331 *b1++ = '0';
4332 for (i = 0; i < numdigits; i++)
4333 *b1++ = *buf++;
4334 *b1 = '\0';
4335 Py_DECREF(result);
4336 result = r1;
4337 buf = PyString_AS_STRING(result);
4338 len = numnondigits + prec;
4341 /* Fix up case for hex conversions. */
4342 if (type == 'X') {
4343 /* Need to convert all lower case letters to upper case.
4344 and need to convert 0x to 0X (and -0x to -0X). */
4345 for (i = 0; i < len; i++)
4346 if (buf[i] >= 'a' && buf[i] <= 'x')
4347 buf[i] -= 'a'-'A';
4349 *pbuf = buf;
4350 *plen = len;
4351 return result;
4354 Py_LOCAL_INLINE(int)
4355 formatint(char *buf, size_t buflen, int flags,
4356 int prec, int type, PyObject *v)
4358 /* fmt = '%#.' + `prec` + 'l' + `type`
4359 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4360 + 1 + 1 = 24 */
4361 char fmt[64]; /* plenty big enough! */
4362 char *sign;
4363 long x;
4365 x = PyInt_AsLong(v);
4366 if (x == -1 && PyErr_Occurred()) {
4367 PyErr_SetString(PyExc_TypeError, "int argument required");
4368 return -1;
4370 if (x < 0 && type == 'u') {
4371 type = 'd';
4373 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4374 sign = "-";
4375 else
4376 sign = "";
4377 if (prec < 0)
4378 prec = 1;
4380 if ((flags & F_ALT) &&
4381 (type == 'x' || type == 'X')) {
4382 /* When converting under %#x or %#X, there are a number
4383 * of issues that cause pain:
4384 * - when 0 is being converted, the C standard leaves off
4385 * the '0x' or '0X', which is inconsistent with other
4386 * %#x/%#X conversions and inconsistent with Python's
4387 * hex() function
4388 * - there are platforms that violate the standard and
4389 * convert 0 with the '0x' or '0X'
4390 * (Metrowerks, Compaq Tru64)
4391 * - there are platforms that give '0x' when converting
4392 * under %#X, but convert 0 in accordance with the
4393 * standard (OS/2 EMX)
4395 * We can achieve the desired consistency by inserting our
4396 * own '0x' or '0X' prefix, and substituting %x/%X in place
4397 * of %#x/%#X.
4399 * Note that this is the same approach as used in
4400 * formatint() in unicodeobject.c
4402 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4403 sign, type, prec, type);
4405 else {
4406 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4407 sign, (flags&F_ALT) ? "#" : "",
4408 prec, type);
4411 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4412 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4414 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4415 PyErr_SetString(PyExc_OverflowError,
4416 "formatted integer is too long (precision too large?)");
4417 return -1;
4419 if (sign[0])
4420 PyOS_snprintf(buf, buflen, fmt, -x);
4421 else
4422 PyOS_snprintf(buf, buflen, fmt, x);
4423 return (int)strlen(buf);
4426 Py_LOCAL_INLINE(int)
4427 formatchar(char *buf, size_t buflen, PyObject *v)
4429 /* presume that the buffer is at least 2 characters long */
4430 if (PyString_Check(v)) {
4431 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4432 return -1;
4434 else {
4435 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4436 return -1;
4438 buf[1] = '\0';
4439 return 1;
4442 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4444 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4445 chars are formatted. XXX This is a magic number. Each formatting
4446 routine does bounds checking to ensure no overflow, but a better
4447 solution may be to malloc a buffer of appropriate size for each
4448 format. For now, the current solution is sufficient.
4450 #define FORMATBUFLEN (size_t)120
4452 PyObject *
4453 PyString_Format(PyObject *format, PyObject *args)
4455 char *fmt, *res;
4456 Py_ssize_t arglen, argidx;
4457 Py_ssize_t reslen, rescnt, fmtcnt;
4458 int args_owned = 0;
4459 PyObject *result, *orig_args;
4460 #ifdef Py_USING_UNICODE
4461 PyObject *v, *w;
4462 #endif
4463 PyObject *dict = NULL;
4464 if (format == NULL || !PyString_Check(format) || args == NULL) {
4465 PyErr_BadInternalCall();
4466 return NULL;
4468 orig_args = args;
4469 fmt = PyString_AS_STRING(format);
4470 fmtcnt = PyString_GET_SIZE(format);
4471 reslen = rescnt = fmtcnt + 100;
4472 result = PyString_FromStringAndSize((char *)NULL, reslen);
4473 if (result == NULL)
4474 return NULL;
4475 res = PyString_AsString(result);
4476 if (PyTuple_Check(args)) {
4477 arglen = PyTuple_GET_SIZE(args);
4478 argidx = 0;
4480 else {
4481 arglen = -1;
4482 argidx = -2;
4484 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4485 !PyObject_TypeCheck(args, &PyBaseString_Type))
4486 dict = args;
4487 while (--fmtcnt >= 0) {
4488 if (*fmt != '%') {
4489 if (--rescnt < 0) {
4490 rescnt = fmtcnt + 100;
4491 reslen += rescnt;
4492 if (_PyString_Resize(&result, reslen) < 0)
4493 return NULL;
4494 res = PyString_AS_STRING(result)
4495 + reslen - rescnt;
4496 --rescnt;
4498 *res++ = *fmt++;
4500 else {
4501 /* Got a format specifier */
4502 int flags = 0;
4503 Py_ssize_t width = -1;
4504 int prec = -1;
4505 int c = '\0';
4506 int fill;
4507 PyObject *v = NULL;
4508 PyObject *temp = NULL;
4509 char *pbuf;
4510 int sign;
4511 Py_ssize_t len;
4512 char formatbuf[FORMATBUFLEN];
4513 /* For format{float,int,char}() */
4514 #ifdef Py_USING_UNICODE
4515 char *fmt_start = fmt;
4516 Py_ssize_t argidx_start = argidx;
4517 #endif
4519 fmt++;
4520 if (*fmt == '(') {
4521 char *keystart;
4522 Py_ssize_t keylen;
4523 PyObject *key;
4524 int pcount = 1;
4526 if (dict == NULL) {
4527 PyErr_SetString(PyExc_TypeError,
4528 "format requires a mapping");
4529 goto error;
4531 ++fmt;
4532 --fmtcnt;
4533 keystart = fmt;
4534 /* Skip over balanced parentheses */
4535 while (pcount > 0 && --fmtcnt >= 0) {
4536 if (*fmt == ')')
4537 --pcount;
4538 else if (*fmt == '(')
4539 ++pcount;
4540 fmt++;
4542 keylen = fmt - keystart - 1;
4543 if (fmtcnt < 0 || pcount > 0) {
4544 PyErr_SetString(PyExc_ValueError,
4545 "incomplete format key");
4546 goto error;
4548 key = PyString_FromStringAndSize(keystart,
4549 keylen);
4550 if (key == NULL)
4551 goto error;
4552 if (args_owned) {
4553 Py_DECREF(args);
4554 args_owned = 0;
4556 args = PyObject_GetItem(dict, key);
4557 Py_DECREF(key);
4558 if (args == NULL) {
4559 goto error;
4561 args_owned = 1;
4562 arglen = -1;
4563 argidx = -2;
4565 while (--fmtcnt >= 0) {
4566 switch (c = *fmt++) {
4567 case '-': flags |= F_LJUST; continue;
4568 case '+': flags |= F_SIGN; continue;
4569 case ' ': flags |= F_BLANK; continue;
4570 case '#': flags |= F_ALT; continue;
4571 case '0': flags |= F_ZERO; continue;
4573 break;
4575 if (c == '*') {
4576 v = getnextarg(args, arglen, &argidx);
4577 if (v == NULL)
4578 goto error;
4579 if (!PyInt_Check(v)) {
4580 PyErr_SetString(PyExc_TypeError,
4581 "* wants int");
4582 goto error;
4584 width = PyInt_AsLong(v);
4585 if (width < 0) {
4586 flags |= F_LJUST;
4587 width = -width;
4589 if (--fmtcnt >= 0)
4590 c = *fmt++;
4592 else if (c >= 0 && isdigit(c)) {
4593 width = c - '0';
4594 while (--fmtcnt >= 0) {
4595 c = Py_CHARMASK(*fmt++);
4596 if (!isdigit(c))
4597 break;
4598 if ((width*10) / 10 != width) {
4599 PyErr_SetString(
4600 PyExc_ValueError,
4601 "width too big");
4602 goto error;
4604 width = width*10 + (c - '0');
4607 if (c == '.') {
4608 prec = 0;
4609 if (--fmtcnt >= 0)
4610 c = *fmt++;
4611 if (c == '*') {
4612 v = getnextarg(args, arglen, &argidx);
4613 if (v == NULL)
4614 goto error;
4615 if (!PyInt_Check(v)) {
4616 PyErr_SetString(
4617 PyExc_TypeError,
4618 "* wants int");
4619 goto error;
4621 prec = PyInt_AsLong(v);
4622 if (prec < 0)
4623 prec = 0;
4624 if (--fmtcnt >= 0)
4625 c = *fmt++;
4627 else if (c >= 0 && isdigit(c)) {
4628 prec = c - '0';
4629 while (--fmtcnt >= 0) {
4630 c = Py_CHARMASK(*fmt++);
4631 if (!isdigit(c))
4632 break;
4633 if ((prec*10) / 10 != prec) {
4634 PyErr_SetString(
4635 PyExc_ValueError,
4636 "prec too big");
4637 goto error;
4639 prec = prec*10 + (c - '0');
4642 } /* prec */
4643 if (fmtcnt >= 0) {
4644 if (c == 'h' || c == 'l' || c == 'L') {
4645 if (--fmtcnt >= 0)
4646 c = *fmt++;
4649 if (fmtcnt < 0) {
4650 PyErr_SetString(PyExc_ValueError,
4651 "incomplete format");
4652 goto error;
4654 if (c != '%') {
4655 v = getnextarg(args, arglen, &argidx);
4656 if (v == NULL)
4657 goto error;
4659 sign = 0;
4660 fill = ' ';
4661 switch (c) {
4662 case '%':
4663 pbuf = "%";
4664 len = 1;
4665 break;
4666 case 's':
4667 #ifdef Py_USING_UNICODE
4668 if (PyUnicode_Check(v)) {
4669 fmt = fmt_start;
4670 argidx = argidx_start;
4671 goto unicode;
4673 #endif
4674 temp = _PyObject_Str(v);
4675 #ifdef Py_USING_UNICODE
4676 if (temp != NULL && PyUnicode_Check(temp)) {
4677 Py_DECREF(temp);
4678 fmt = fmt_start;
4679 argidx = argidx_start;
4680 goto unicode;
4682 #endif
4683 /* Fall through */
4684 case 'r':
4685 if (c == 'r')
4686 temp = PyObject_Repr(v);
4687 if (temp == NULL)
4688 goto error;
4689 if (!PyString_Check(temp)) {
4690 PyErr_SetString(PyExc_TypeError,
4691 "%s argument has non-string str()");
4692 Py_DECREF(temp);
4693 goto error;
4695 pbuf = PyString_AS_STRING(temp);
4696 len = PyString_GET_SIZE(temp);
4697 if (prec >= 0 && len > prec)
4698 len = prec;
4699 break;
4700 case 'i':
4701 case 'd':
4702 case 'u':
4703 case 'o':
4704 case 'x':
4705 case 'X':
4706 if (c == 'i')
4707 c = 'd';
4708 if (PyLong_Check(v)) {
4709 int ilen;
4710 temp = _PyString_FormatLong(v, flags,
4711 prec, c, &pbuf, &ilen);
4712 len = ilen;
4713 if (!temp)
4714 goto error;
4715 sign = 1;
4717 else {
4718 pbuf = formatbuf;
4719 len = formatint(pbuf,
4720 sizeof(formatbuf),
4721 flags, prec, c, v);
4722 if (len < 0)
4723 goto error;
4724 sign = 1;
4726 if (flags & F_ZERO)
4727 fill = '0';
4728 break;
4729 case 'e':
4730 case 'E':
4731 case 'f':
4732 case 'F':
4733 case 'g':
4734 case 'G':
4735 if (c == 'F')
4736 c = 'f';
4737 pbuf = formatbuf;
4738 len = formatfloat(pbuf, sizeof(formatbuf),
4739 flags, prec, c, v);
4740 if (len < 0)
4741 goto error;
4742 sign = 1;
4743 if (flags & F_ZERO)
4744 fill = '0';
4745 break;
4746 case 'c':
4747 #ifdef Py_USING_UNICODE
4748 if (PyUnicode_Check(v)) {
4749 fmt = fmt_start;
4750 argidx = argidx_start;
4751 goto unicode;
4753 #endif
4754 pbuf = formatbuf;
4755 len = formatchar(pbuf, sizeof(formatbuf), v);
4756 if (len < 0)
4757 goto error;
4758 break;
4759 default:
4760 PyErr_Format(PyExc_ValueError,
4761 "unsupported format character '%c' (0x%x) "
4762 "at index %zd",
4763 c, c,
4764 (Py_ssize_t)(fmt - 1 -
4765 PyString_AsString(format)));
4766 goto error;
4768 if (sign) {
4769 if (*pbuf == '-' || *pbuf == '+') {
4770 sign = *pbuf++;
4771 len--;
4773 else if (flags & F_SIGN)
4774 sign = '+';
4775 else if (flags & F_BLANK)
4776 sign = ' ';
4777 else
4778 sign = 0;
4780 if (width < len)
4781 width = len;
4782 if (rescnt - (sign != 0) < width) {
4783 reslen -= rescnt;
4784 rescnt = width + fmtcnt + 100;
4785 reslen += rescnt;
4786 if (reslen < 0) {
4787 Py_DECREF(result);
4788 Py_XDECREF(temp);
4789 return PyErr_NoMemory();
4791 if (_PyString_Resize(&result, reslen) < 0) {
4792 Py_XDECREF(temp);
4793 return NULL;
4795 res = PyString_AS_STRING(result)
4796 + reslen - rescnt;
4798 if (sign) {
4799 if (fill != ' ')
4800 *res++ = sign;
4801 rescnt--;
4802 if (width > len)
4803 width--;
4805 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4806 assert(pbuf[0] == '0');
4807 assert(pbuf[1] == c);
4808 if (fill != ' ') {
4809 *res++ = *pbuf++;
4810 *res++ = *pbuf++;
4812 rescnt -= 2;
4813 width -= 2;
4814 if (width < 0)
4815 width = 0;
4816 len -= 2;
4818 if (width > len && !(flags & F_LJUST)) {
4819 do {
4820 --rescnt;
4821 *res++ = fill;
4822 } while (--width > len);
4824 if (fill == ' ') {
4825 if (sign)
4826 *res++ = sign;
4827 if ((flags & F_ALT) &&
4828 (c == 'x' || c == 'X')) {
4829 assert(pbuf[0] == '0');
4830 assert(pbuf[1] == c);
4831 *res++ = *pbuf++;
4832 *res++ = *pbuf++;
4835 Py_MEMCPY(res, pbuf, len);
4836 res += len;
4837 rescnt -= len;
4838 while (--width >= len) {
4839 --rescnt;
4840 *res++ = ' ';
4842 if (dict && (argidx < arglen) && c != '%') {
4843 PyErr_SetString(PyExc_TypeError,
4844 "not all arguments converted during string formatting");
4845 Py_XDECREF(temp);
4846 goto error;
4848 Py_XDECREF(temp);
4849 } /* '%' */
4850 } /* until end */
4851 if (argidx < arglen && !dict) {
4852 PyErr_SetString(PyExc_TypeError,
4853 "not all arguments converted during string formatting");
4854 goto error;
4856 if (args_owned) {
4857 Py_DECREF(args);
4859 _PyString_Resize(&result, reslen - rescnt);
4860 return result;
4862 #ifdef Py_USING_UNICODE
4863 unicode:
4864 if (args_owned) {
4865 Py_DECREF(args);
4866 args_owned = 0;
4868 /* Fiddle args right (remove the first argidx arguments) */
4869 if (PyTuple_Check(orig_args) && argidx > 0) {
4870 PyObject *v;
4871 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4872 v = PyTuple_New(n);
4873 if (v == NULL)
4874 goto error;
4875 while (--n >= 0) {
4876 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4877 Py_INCREF(w);
4878 PyTuple_SET_ITEM(v, n, w);
4880 args = v;
4881 } else {
4882 Py_INCREF(orig_args);
4883 args = orig_args;
4885 args_owned = 1;
4886 /* Take what we have of the result and let the Unicode formatting
4887 function format the rest of the input. */
4888 rescnt = res - PyString_AS_STRING(result);
4889 if (_PyString_Resize(&result, rescnt))
4890 goto error;
4891 fmtcnt = PyString_GET_SIZE(format) - \
4892 (fmt - PyString_AS_STRING(format));
4893 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4894 if (format == NULL)
4895 goto error;
4896 v = PyUnicode_Format(format, args);
4897 Py_DECREF(format);
4898 if (v == NULL)
4899 goto error;
4900 /* Paste what we have (result) to what the Unicode formatting
4901 function returned (v) and return the result (or error) */
4902 w = PyUnicode_Concat(result, v);
4903 Py_DECREF(result);
4904 Py_DECREF(v);
4905 Py_DECREF(args);
4906 return w;
4907 #endif /* Py_USING_UNICODE */
4909 error:
4910 Py_DECREF(result);
4911 if (args_owned) {
4912 Py_DECREF(args);
4914 return NULL;
4917 void
4918 PyString_InternInPlace(PyObject **p)
4920 register PyStringObject *s = (PyStringObject *)(*p);
4921 PyObject *t;
4922 if (s == NULL || !PyString_Check(s))
4923 Py_FatalError("PyString_InternInPlace: strings only please!");
4924 /* If it's a string subclass, we don't really know what putting
4925 it in the interned dict might do. */
4926 if (!PyString_CheckExact(s))
4927 return;
4928 if (PyString_CHECK_INTERNED(s))
4929 return;
4930 if (interned == NULL) {
4931 interned = PyDict_New();
4932 if (interned == NULL) {
4933 PyErr_Clear(); /* Don't leave an exception */
4934 return;
4937 t = PyDict_GetItem(interned, (PyObject *)s);
4938 if (t) {
4939 Py_INCREF(t);
4940 Py_DECREF(*p);
4941 *p = t;
4942 return;
4945 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4946 PyErr_Clear();
4947 return;
4949 /* The two references in interned are not counted by refcnt.
4950 The string deallocator will take care of this */
4951 s->ob_refcnt -= 2;
4952 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4955 void
4956 PyString_InternImmortal(PyObject **p)
4958 PyString_InternInPlace(p);
4959 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4960 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4961 Py_INCREF(*p);
4966 PyObject *
4967 PyString_InternFromString(const char *cp)
4969 PyObject *s = PyString_FromString(cp);
4970 if (s == NULL)
4971 return NULL;
4972 PyString_InternInPlace(&s);
4973 return s;
4976 void
4977 PyString_Fini(void)
4979 int i;
4980 for (i = 0; i < UCHAR_MAX + 1; i++) {
4981 Py_XDECREF(characters[i]);
4982 characters[i] = NULL;
4984 Py_XDECREF(nullstring);
4985 nullstring = NULL;
4988 void _Py_ReleaseInternedStrings(void)
4990 PyObject *keys;
4991 PyStringObject *s;
4992 Py_ssize_t i, n;
4994 if (interned == NULL || !PyDict_Check(interned))
4995 return;
4996 keys = PyDict_Keys(interned);
4997 if (keys == NULL || !PyList_Check(keys)) {
4998 PyErr_Clear();
4999 return;
5002 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5003 detector, interned strings are not forcibly deallocated; rather, we
5004 give them their stolen references back, and then clear and DECREF
5005 the interned dict. */
5007 fprintf(stderr, "releasing interned strings\n");
5008 n = PyList_GET_SIZE(keys);
5009 for (i = 0; i < n; i++) {
5010 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5011 switch (s->ob_sstate) {
5012 case SSTATE_NOT_INTERNED:
5013 /* XXX Shouldn't happen */
5014 break;
5015 case SSTATE_INTERNED_IMMORTAL:
5016 s->ob_refcnt += 1;
5017 break;
5018 case SSTATE_INTERNED_MORTAL:
5019 s->ob_refcnt += 2;
5020 break;
5021 default:
5022 Py_FatalError("Inconsistent interned string state.");
5024 s->ob_sstate = SSTATE_NOT_INTERNED;
5026 Py_DECREF(keys);
5027 PyDict_Clear(interned);
5028 Py_DECREF(interned);
5029 interned = NULL;