Silence the DeprecationWarning raised by importing mimetools in BaseHTTPServer.
[python.git] / Objects / stringobject.c
blob5bf4add229c692cadc5a4a99cf5f40620a0d078e
1 /* String (str/bytes) object implementation */
3 #define PY_SSIZE_T_CLEAN
5 #include "Python.h"
6 #include <ctype.h>
8 #ifdef COUNT_ALLOCS
9 int null_strings, one_strings;
10 #endif
12 static PyStringObject *characters[UCHAR_MAX + 1];
13 static PyStringObject *nullstring;
15 /* This dictionary holds all interned strings. Note that references to
16 strings in this dictionary are *not* counted in the string's ob_refcnt.
17 When the interned string reaches a refcnt of 0 the string deallocation
18 function will delete the reference from this dictionary.
20 Another way to look at this is that to say that the actual reference
21 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23 static PyObject *interned;
26 For both PyString_FromString() and PyString_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
28 null terminating character.
30 For PyString_FromString(), the parameter `str' points to a null-terminated
31 string containing exactly `size' bytes.
33 For PyString_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyString_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
44 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
51 PyObject *
52 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
54 register PyStringObject *op;
55 if (size < 0) {
56 PyErr_SetString(PyExc_SystemError,
57 "Negative size passed to PyString_FromStringAndSize");
58 return NULL;
60 if (size == 0 && (op = nullstring) != NULL) {
61 #ifdef COUNT_ALLOCS
62 null_strings++;
63 #endif
64 Py_INCREF(op);
65 return (PyObject *)op;
67 if (size == 1 && str != NULL &&
68 (op = characters[*str & UCHAR_MAX]) != NULL)
70 #ifdef COUNT_ALLOCS
71 one_strings++;
72 #endif
73 Py_INCREF(op);
74 return (PyObject *)op;
77 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
78 PyErr_SetString(PyExc_OverflowError, "string is too large");
79 return NULL;
82 /* Inline PyObject_NewVar */
83 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
84 if (op == NULL)
85 return PyErr_NoMemory();
86 PyObject_INIT_VAR(op, &PyString_Type, size);
87 op->ob_shash = -1;
88 op->ob_sstate = SSTATE_NOT_INTERNED;
89 if (str != NULL)
90 Py_MEMCPY(op->ob_sval, str, size);
91 op->ob_sval[size] = '\0';
92 /* share short strings */
93 if (size == 0) {
94 PyObject *t = (PyObject *)op;
95 PyString_InternInPlace(&t);
96 op = (PyStringObject *)t;
97 nullstring = op;
98 Py_INCREF(op);
99 } else if (size == 1 && str != NULL) {
100 PyObject *t = (PyObject *)op;
101 PyString_InternInPlace(&t);
102 op = (PyStringObject *)t;
103 characters[*str & UCHAR_MAX] = op;
104 Py_INCREF(op);
106 return (PyObject *) op;
109 PyObject *
110 PyString_FromString(const char *str)
112 register size_t size;
113 register PyStringObject *op;
115 assert(str != NULL);
116 size = strlen(str);
117 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
118 PyErr_SetString(PyExc_OverflowError,
119 "string is too long for a Python string");
120 return NULL;
122 if (size == 0 && (op = nullstring) != NULL) {
123 #ifdef COUNT_ALLOCS
124 null_strings++;
125 #endif
126 Py_INCREF(op);
127 return (PyObject *)op;
129 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
130 #ifdef COUNT_ALLOCS
131 one_strings++;
132 #endif
133 Py_INCREF(op);
134 return (PyObject *)op;
137 /* Inline PyObject_NewVar */
138 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
139 if (op == NULL)
140 return PyErr_NoMemory();
141 PyObject_INIT_VAR(op, &PyString_Type, size);
142 op->ob_shash = -1;
143 op->ob_sstate = SSTATE_NOT_INTERNED;
144 Py_MEMCPY(op->ob_sval, str, size+1);
145 /* share short strings */
146 if (size == 0) {
147 PyObject *t = (PyObject *)op;
148 PyString_InternInPlace(&t);
149 op = (PyStringObject *)t;
150 nullstring = op;
151 Py_INCREF(op);
152 } else if (size == 1) {
153 PyObject *t = (PyObject *)op;
154 PyString_InternInPlace(&t);
155 op = (PyStringObject *)t;
156 characters[*str & UCHAR_MAX] = op;
157 Py_INCREF(op);
159 return (PyObject *) op;
162 PyObject *
163 PyString_FromFormatV(const char *format, va_list vargs)
165 va_list count;
166 Py_ssize_t n = 0;
167 const char* f;
168 char *s;
169 PyObject* string;
171 #ifdef VA_LIST_IS_ARRAY
172 Py_MEMCPY(count, vargs, sizeof(va_list));
173 #else
174 #ifdef __va_copy
175 __va_copy(count, vargs);
176 #else
177 count = vargs;
178 #endif
179 #endif
180 /* step 1: figure out how large a buffer we need */
181 for (f = format; *f; f++) {
182 if (*f == '%') {
183 const char* p = f;
184 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
187 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
188 * they don't affect the amount of space we reserve.
190 if ((*f == 'l' || *f == 'z') &&
191 (f[1] == 'd' || f[1] == 'u'))
192 ++f;
194 switch (*f) {
195 case 'c':
196 (void)va_arg(count, int);
197 /* fall through... */
198 case '%':
199 n++;
200 break;
201 case 'd': case 'u': case 'i': case 'x':
202 (void) va_arg(count, int);
203 /* 20 bytes is enough to hold a 64-bit
204 integer. Decimal takes the most space.
205 This isn't enough for octal. */
206 n += 20;
207 break;
208 case 's':
209 s = va_arg(count, char*);
210 n += strlen(s);
211 break;
212 case 'p':
213 (void) va_arg(count, int);
214 /* maximum 64-bit pointer representation:
215 * 0xffffffffffffffff
216 * so 19 characters is enough.
217 * XXX I count 18 -- what's the extra for?
219 n += 19;
220 break;
221 default:
222 /* if we stumble upon an unknown
223 formatting code, copy the rest of
224 the format string to the output
225 string. (we cannot just skip the
226 code, since there's no way to know
227 what's in the argument list) */
228 n += strlen(p);
229 goto expand;
231 } else
232 n++;
234 expand:
235 /* step 2: fill the buffer */
236 /* Since we've analyzed how much space we need for the worst case,
237 use sprintf directly instead of the slower PyOS_snprintf. */
238 string = PyString_FromStringAndSize(NULL, n);
239 if (!string)
240 return NULL;
242 s = PyString_AsString(string);
244 for (f = format; *f; f++) {
245 if (*f == '%') {
246 const char* p = f++;
247 Py_ssize_t i;
248 int longflag = 0;
249 int size_tflag = 0;
250 /* parse the width.precision part (we're only
251 interested in the precision value, if any) */
252 n = 0;
253 while (isdigit(Py_CHARMASK(*f)))
254 n = (n*10) + *f++ - '0';
255 if (*f == '.') {
256 f++;
257 n = 0;
258 while (isdigit(Py_CHARMASK(*f)))
259 n = (n*10) + *f++ - '0';
261 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
262 f++;
263 /* handle the long flag, but only for %ld and %lu.
264 others can be added when necessary. */
265 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
266 longflag = 1;
267 ++f;
269 /* handle the size_t flag. */
270 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
271 size_tflag = 1;
272 ++f;
275 switch (*f) {
276 case 'c':
277 *s++ = va_arg(vargs, int);
278 break;
279 case 'd':
280 if (longflag)
281 sprintf(s, "%ld", va_arg(vargs, long));
282 else if (size_tflag)
283 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
284 va_arg(vargs, Py_ssize_t));
285 else
286 sprintf(s, "%d", va_arg(vargs, int));
287 s += strlen(s);
288 break;
289 case 'u':
290 if (longflag)
291 sprintf(s, "%lu",
292 va_arg(vargs, unsigned long));
293 else if (size_tflag)
294 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
295 va_arg(vargs, size_t));
296 else
297 sprintf(s, "%u",
298 va_arg(vargs, unsigned int));
299 s += strlen(s);
300 break;
301 case 'i':
302 sprintf(s, "%i", va_arg(vargs, int));
303 s += strlen(s);
304 break;
305 case 'x':
306 sprintf(s, "%x", va_arg(vargs, int));
307 s += strlen(s);
308 break;
309 case 's':
310 p = va_arg(vargs, char*);
311 i = strlen(p);
312 if (n > 0 && i > n)
313 i = n;
314 Py_MEMCPY(s, p, i);
315 s += i;
316 break;
317 case 'p':
318 sprintf(s, "%p", va_arg(vargs, void*));
319 /* %p is ill-defined: ensure leading 0x. */
320 if (s[1] == 'X')
321 s[1] = 'x';
322 else if (s[1] != 'x') {
323 memmove(s+2, s, strlen(s)+1);
324 s[0] = '0';
325 s[1] = 'x';
327 s += strlen(s);
328 break;
329 case '%':
330 *s++ = '%';
331 break;
332 default:
333 strcpy(s, p);
334 s += strlen(s);
335 goto end;
337 } else
338 *s++ = *f;
341 end:
342 _PyString_Resize(&string, s - PyString_AS_STRING(string));
343 return string;
346 PyObject *
347 PyString_FromFormat(const char *format, ...)
349 PyObject* ret;
350 va_list vargs;
352 #ifdef HAVE_STDARG_PROTOTYPES
353 va_start(vargs, format);
354 #else
355 va_start(vargs);
356 #endif
357 ret = PyString_FromFormatV(format, vargs);
358 va_end(vargs);
359 return ret;
363 PyObject *PyString_Decode(const char *s,
364 Py_ssize_t size,
365 const char *encoding,
366 const char *errors)
368 PyObject *v, *str;
370 str = PyString_FromStringAndSize(s, size);
371 if (str == NULL)
372 return NULL;
373 v = PyString_AsDecodedString(str, encoding, errors);
374 Py_DECREF(str);
375 return v;
378 PyObject *PyString_AsDecodedObject(PyObject *str,
379 const char *encoding,
380 const char *errors)
382 PyObject *v;
384 if (!PyString_Check(str)) {
385 PyErr_BadArgument();
386 goto onError;
389 if (encoding == NULL) {
390 #ifdef Py_USING_UNICODE
391 encoding = PyUnicode_GetDefaultEncoding();
392 #else
393 PyErr_SetString(PyExc_ValueError, "no encoding specified");
394 goto onError;
395 #endif
398 /* Decode via the codec registry */
399 v = PyCodec_Decode(str, encoding, errors);
400 if (v == NULL)
401 goto onError;
403 return v;
405 onError:
406 return NULL;
409 PyObject *PyString_AsDecodedString(PyObject *str,
410 const char *encoding,
411 const char *errors)
413 PyObject *v;
415 v = PyString_AsDecodedObject(str, encoding, errors);
416 if (v == NULL)
417 goto onError;
419 #ifdef Py_USING_UNICODE
420 /* Convert Unicode to a string using the default encoding */
421 if (PyUnicode_Check(v)) {
422 PyObject *temp = v;
423 v = PyUnicode_AsEncodedString(v, NULL, NULL);
424 Py_DECREF(temp);
425 if (v == NULL)
426 goto onError;
428 #endif
429 if (!PyString_Check(v)) {
430 PyErr_Format(PyExc_TypeError,
431 "decoder did not return a string object (type=%.400s)",
432 Py_TYPE(v)->tp_name);
433 Py_DECREF(v);
434 goto onError;
437 return v;
439 onError:
440 return NULL;
443 PyObject *PyString_Encode(const char *s,
444 Py_ssize_t size,
445 const char *encoding,
446 const char *errors)
448 PyObject *v, *str;
450 str = PyString_FromStringAndSize(s, size);
451 if (str == NULL)
452 return NULL;
453 v = PyString_AsEncodedString(str, encoding, errors);
454 Py_DECREF(str);
455 return v;
458 PyObject *PyString_AsEncodedObject(PyObject *str,
459 const char *encoding,
460 const char *errors)
462 PyObject *v;
464 if (!PyString_Check(str)) {
465 PyErr_BadArgument();
466 goto onError;
469 if (encoding == NULL) {
470 #ifdef Py_USING_UNICODE
471 encoding = PyUnicode_GetDefaultEncoding();
472 #else
473 PyErr_SetString(PyExc_ValueError, "no encoding specified");
474 goto onError;
475 #endif
478 /* Encode via the codec registry */
479 v = PyCodec_Encode(str, encoding, errors);
480 if (v == NULL)
481 goto onError;
483 return v;
485 onError:
486 return NULL;
489 PyObject *PyString_AsEncodedString(PyObject *str,
490 const char *encoding,
491 const char *errors)
493 PyObject *v;
495 v = PyString_AsEncodedObject(str, encoding, errors);
496 if (v == NULL)
497 goto onError;
499 #ifdef Py_USING_UNICODE
500 /* Convert Unicode to a string using the default encoding */
501 if (PyUnicode_Check(v)) {
502 PyObject *temp = v;
503 v = PyUnicode_AsEncodedString(v, NULL, NULL);
504 Py_DECREF(temp);
505 if (v == NULL)
506 goto onError;
508 #endif
509 if (!PyString_Check(v)) {
510 PyErr_Format(PyExc_TypeError,
511 "encoder did not return a string object (type=%.400s)",
512 Py_TYPE(v)->tp_name);
513 Py_DECREF(v);
514 goto onError;
517 return v;
519 onError:
520 return NULL;
523 static void
524 string_dealloc(PyObject *op)
526 switch (PyString_CHECK_INTERNED(op)) {
527 case SSTATE_NOT_INTERNED:
528 break;
530 case SSTATE_INTERNED_MORTAL:
531 /* revive dead object temporarily for DelItem */
532 Py_REFCNT(op) = 3;
533 if (PyDict_DelItem(interned, op) != 0)
534 Py_FatalError(
535 "deletion of interned string failed");
536 break;
538 case SSTATE_INTERNED_IMMORTAL:
539 Py_FatalError("Immortal interned string died.");
541 default:
542 Py_FatalError("Inconsistent interned string state.");
544 Py_TYPE(op)->tp_free(op);
547 /* Unescape a backslash-escaped string. If unicode is non-zero,
548 the string is a u-literal. If recode_encoding is non-zero,
549 the string is UTF-8 encoded and should be re-encoded in the
550 specified encoding. */
552 PyObject *PyString_DecodeEscape(const char *s,
553 Py_ssize_t len,
554 const char *errors,
555 Py_ssize_t unicode,
556 const char *recode_encoding)
558 int c;
559 char *p, *buf;
560 const char *end;
561 PyObject *v;
562 Py_ssize_t newlen = recode_encoding ? 4*len:len;
563 v = PyString_FromStringAndSize((char *)NULL, newlen);
564 if (v == NULL)
565 return NULL;
566 p = buf = PyString_AsString(v);
567 end = s + len;
568 while (s < end) {
569 if (*s != '\\') {
570 non_esc:
571 #ifdef Py_USING_UNICODE
572 if (recode_encoding && (*s & 0x80)) {
573 PyObject *u, *w;
574 char *r;
575 const char* t;
576 Py_ssize_t rn;
577 t = s;
578 /* Decode non-ASCII bytes as UTF-8. */
579 while (t < end && (*t & 0x80)) t++;
580 u = PyUnicode_DecodeUTF8(s, t - s, errors);
581 if(!u) goto failed;
583 /* Recode them in target encoding. */
584 w = PyUnicode_AsEncodedString(
585 u, recode_encoding, errors);
586 Py_DECREF(u);
587 if (!w) goto failed;
589 /* Append bytes to output buffer. */
590 assert(PyString_Check(w));
591 r = PyString_AS_STRING(w);
592 rn = PyString_GET_SIZE(w);
593 Py_MEMCPY(p, r, rn);
594 p += rn;
595 Py_DECREF(w);
596 s = t;
597 } else {
598 *p++ = *s++;
600 #else
601 *p++ = *s++;
602 #endif
603 continue;
605 s++;
606 if (s==end) {
607 PyErr_SetString(PyExc_ValueError,
608 "Trailing \\ in string");
609 goto failed;
611 switch (*s++) {
612 /* XXX This assumes ASCII! */
613 case '\n': break;
614 case '\\': *p++ = '\\'; break;
615 case '\'': *p++ = '\''; break;
616 case '\"': *p++ = '\"'; break;
617 case 'b': *p++ = '\b'; break;
618 case 'f': *p++ = '\014'; break; /* FF */
619 case 't': *p++ = '\t'; break;
620 case 'n': *p++ = '\n'; break;
621 case 'r': *p++ = '\r'; break;
622 case 'v': *p++ = '\013'; break; /* VT */
623 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
624 case '0': case '1': case '2': case '3':
625 case '4': case '5': case '6': case '7':
626 c = s[-1] - '0';
627 if (s < end && '0' <= *s && *s <= '7') {
628 c = (c<<3) + *s++ - '0';
629 if (s < end && '0' <= *s && *s <= '7')
630 c = (c<<3) + *s++ - '0';
632 *p++ = c;
633 break;
634 case 'x':
635 if (s+1 < end &&
636 isxdigit(Py_CHARMASK(s[0])) &&
637 isxdigit(Py_CHARMASK(s[1])))
639 unsigned int x = 0;
640 c = Py_CHARMASK(*s);
641 s++;
642 if (isdigit(c))
643 x = c - '0';
644 else if (islower(c))
645 x = 10 + c - 'a';
646 else
647 x = 10 + c - 'A';
648 x = x << 4;
649 c = Py_CHARMASK(*s);
650 s++;
651 if (isdigit(c))
652 x += c - '0';
653 else if (islower(c))
654 x += 10 + c - 'a';
655 else
656 x += 10 + c - 'A';
657 *p++ = x;
658 break;
660 if (!errors || strcmp(errors, "strict") == 0) {
661 PyErr_SetString(PyExc_ValueError,
662 "invalid \\x escape");
663 goto failed;
665 if (strcmp(errors, "replace") == 0) {
666 *p++ = '?';
667 } else if (strcmp(errors, "ignore") == 0)
668 /* do nothing */;
669 else {
670 PyErr_Format(PyExc_ValueError,
671 "decoding error; "
672 "unknown error handling code: %.400s",
673 errors);
674 goto failed;
676 #ifndef Py_USING_UNICODE
677 case 'u':
678 case 'U':
679 case 'N':
680 if (unicode) {
681 PyErr_SetString(PyExc_ValueError,
682 "Unicode escapes not legal "
683 "when Unicode disabled");
684 goto failed;
686 #endif
687 default:
688 *p++ = '\\';
689 s--;
690 goto non_esc; /* an arbitry number of unescaped
691 UTF-8 bytes may follow. */
694 if (p-buf < newlen)
695 _PyString_Resize(&v, p - buf);
696 return v;
697 failed:
698 Py_DECREF(v);
699 return NULL;
702 /* -------------------------------------------------------------------- */
703 /* object api */
705 static Py_ssize_t
706 string_getsize(register PyObject *op)
708 char *s;
709 Py_ssize_t len;
710 if (PyString_AsStringAndSize(op, &s, &len))
711 return -1;
712 return len;
715 static /*const*/ char *
716 string_getbuffer(register PyObject *op)
718 char *s;
719 Py_ssize_t len;
720 if (PyString_AsStringAndSize(op, &s, &len))
721 return NULL;
722 return s;
725 Py_ssize_t
726 PyString_Size(register PyObject *op)
728 if (!PyString_Check(op))
729 return string_getsize(op);
730 return Py_SIZE(op);
733 /*const*/ char *
734 PyString_AsString(register PyObject *op)
736 if (!PyString_Check(op))
737 return string_getbuffer(op);
738 return ((PyStringObject *)op) -> ob_sval;
742 PyString_AsStringAndSize(register PyObject *obj,
743 register char **s,
744 register Py_ssize_t *len)
746 if (s == NULL) {
747 PyErr_BadInternalCall();
748 return -1;
751 if (!PyString_Check(obj)) {
752 #ifdef Py_USING_UNICODE
753 if (PyUnicode_Check(obj)) {
754 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
755 if (obj == NULL)
756 return -1;
758 else
759 #endif
761 PyErr_Format(PyExc_TypeError,
762 "expected string or Unicode object, "
763 "%.200s found", Py_TYPE(obj)->tp_name);
764 return -1;
768 *s = PyString_AS_STRING(obj);
769 if (len != NULL)
770 *len = PyString_GET_SIZE(obj);
771 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
772 PyErr_SetString(PyExc_TypeError,
773 "expected string without null bytes");
774 return -1;
776 return 0;
779 /* -------------------------------------------------------------------- */
780 /* Methods */
782 #include "stringlib/stringdefs.h"
783 #include "stringlib/fastsearch.h"
785 #include "stringlib/count.h"
786 #include "stringlib/find.h"
787 #include "stringlib/partition.h"
789 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
790 #include "stringlib/localeutil.h"
794 static int
795 string_print(PyStringObject *op, FILE *fp, int flags)
797 Py_ssize_t i, str_len;
798 char c;
799 int quote;
801 /* XXX Ought to check for interrupts when writing long strings */
802 if (! PyString_CheckExact(op)) {
803 int ret;
804 /* A str subclass may have its own __str__ method. */
805 op = (PyStringObject *) PyObject_Str((PyObject *)op);
806 if (op == NULL)
807 return -1;
808 ret = string_print(op, fp, flags);
809 Py_DECREF(op);
810 return ret;
812 if (flags & Py_PRINT_RAW) {
813 char *data = op->ob_sval;
814 Py_ssize_t size = Py_SIZE(op);
815 Py_BEGIN_ALLOW_THREADS
816 while (size > INT_MAX) {
817 /* Very long strings cannot be written atomically.
818 * But don't write exactly INT_MAX bytes at a time
819 * to avoid memory aligment issues.
821 const int chunk_size = INT_MAX & ~0x3FFF;
822 fwrite(data, 1, chunk_size, fp);
823 data += chunk_size;
824 size -= chunk_size;
826 #ifdef __VMS
827 if (size) fwrite(data, (int)size, 1, fp);
828 #else
829 fwrite(data, 1, (int)size, fp);
830 #endif
831 Py_END_ALLOW_THREADS
832 return 0;
835 /* figure out which quote to use; single is preferred */
836 quote = '\'';
837 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
838 !memchr(op->ob_sval, '"', Py_SIZE(op)))
839 quote = '"';
841 str_len = Py_SIZE(op);
842 Py_BEGIN_ALLOW_THREADS
843 fputc(quote, fp);
844 for (i = 0; i < str_len; i++) {
845 /* Since strings are immutable and the caller should have a
846 reference, accessing the interal buffer should not be an issue
847 with the GIL released. */
848 c = op->ob_sval[i];
849 if (c == quote || c == '\\')
850 fprintf(fp, "\\%c", c);
851 else if (c == '\t')
852 fprintf(fp, "\\t");
853 else if (c == '\n')
854 fprintf(fp, "\\n");
855 else if (c == '\r')
856 fprintf(fp, "\\r");
857 else if (c < ' ' || c >= 0x7f)
858 fprintf(fp, "\\x%02x", c & 0xff);
859 else
860 fputc(c, fp);
862 fputc(quote, fp);
863 Py_END_ALLOW_THREADS
864 return 0;
867 PyObject *
868 PyString_Repr(PyObject *obj, int smartquotes)
870 register PyStringObject* op = (PyStringObject*) obj;
871 size_t newsize = 2 + 4 * Py_SIZE(op);
872 PyObject *v;
873 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
874 PyErr_SetString(PyExc_OverflowError,
875 "string is too large to make repr");
876 return NULL;
878 v = PyString_FromStringAndSize((char *)NULL, newsize);
879 if (v == NULL) {
880 return NULL;
882 else {
883 register Py_ssize_t i;
884 register char c;
885 register char *p;
886 int quote;
888 /* figure out which quote to use; single is preferred */
889 quote = '\'';
890 if (smartquotes &&
891 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
892 !memchr(op->ob_sval, '"', Py_SIZE(op)))
893 quote = '"';
895 p = PyString_AS_STRING(v);
896 *p++ = quote;
897 for (i = 0; i < Py_SIZE(op); i++) {
898 /* There's at least enough room for a hex escape
899 and a closing quote. */
900 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
901 c = op->ob_sval[i];
902 if (c == quote || c == '\\')
903 *p++ = '\\', *p++ = c;
904 else if (c == '\t')
905 *p++ = '\\', *p++ = 't';
906 else if (c == '\n')
907 *p++ = '\\', *p++ = 'n';
908 else if (c == '\r')
909 *p++ = '\\', *p++ = 'r';
910 else if (c < ' ' || c >= 0x7f) {
911 /* For performance, we don't want to call
912 PyOS_snprintf here (extra layers of
913 function call). */
914 sprintf(p, "\\x%02x", c & 0xff);
915 p += 4;
917 else
918 *p++ = c;
920 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
921 *p++ = quote;
922 *p = '\0';
923 _PyString_Resize(
924 &v, (p - PyString_AS_STRING(v)));
925 return v;
929 static PyObject *
930 string_repr(PyObject *op)
932 return PyString_Repr(op, 1);
935 static PyObject *
936 string_str(PyObject *s)
938 assert(PyString_Check(s));
939 if (PyString_CheckExact(s)) {
940 Py_INCREF(s);
941 return s;
943 else {
944 /* Subtype -- return genuine string with the same value. */
945 PyStringObject *t = (PyStringObject *) s;
946 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
950 static Py_ssize_t
951 string_length(PyStringObject *a)
953 return Py_SIZE(a);
956 static PyObject *
957 string_concat(register PyStringObject *a, register PyObject *bb)
959 register Py_ssize_t size;
960 register PyStringObject *op;
961 if (!PyString_Check(bb)) {
962 #ifdef Py_USING_UNICODE
963 if (PyUnicode_Check(bb))
964 return PyUnicode_Concat((PyObject *)a, bb);
965 #endif
966 if (PyByteArray_Check(bb))
967 return PyByteArray_Concat((PyObject *)a, bb);
968 PyErr_Format(PyExc_TypeError,
969 "cannot concatenate 'str' and '%.200s' objects",
970 Py_TYPE(bb)->tp_name);
971 return NULL;
973 #define b ((PyStringObject *)bb)
974 /* Optimize cases with empty left or right operand */
975 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
976 PyString_CheckExact(a) && PyString_CheckExact(b)) {
977 if (Py_SIZE(a) == 0) {
978 Py_INCREF(bb);
979 return bb;
981 Py_INCREF(a);
982 return (PyObject *)a;
984 size = Py_SIZE(a) + Py_SIZE(b);
985 /* Check that string sizes are not negative, to prevent an
986 overflow in cases where we are passed incorrectly-created
987 strings with negative lengths (due to a bug in other code).
989 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
990 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
991 PyErr_SetString(PyExc_OverflowError,
992 "strings are too large to concat");
993 return NULL;
996 /* Inline PyObject_NewVar */
997 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
998 PyErr_SetString(PyExc_OverflowError,
999 "strings are too large to concat");
1000 return NULL;
1002 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
1003 if (op == NULL)
1004 return PyErr_NoMemory();
1005 PyObject_INIT_VAR(op, &PyString_Type, size);
1006 op->ob_shash = -1;
1007 op->ob_sstate = SSTATE_NOT_INTERNED;
1008 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1009 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1010 op->ob_sval[size] = '\0';
1011 return (PyObject *) op;
1012 #undef b
1015 static PyObject *
1016 string_repeat(register PyStringObject *a, register Py_ssize_t n)
1018 register Py_ssize_t i;
1019 register Py_ssize_t j;
1020 register Py_ssize_t size;
1021 register PyStringObject *op;
1022 size_t nbytes;
1023 if (n < 0)
1024 n = 0;
1025 /* watch out for overflows: the size can overflow int,
1026 * and the # of bytes needed can overflow size_t
1028 size = Py_SIZE(a) * n;
1029 if (n && size / n != Py_SIZE(a)) {
1030 PyErr_SetString(PyExc_OverflowError,
1031 "repeated string is too long");
1032 return NULL;
1034 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1035 Py_INCREF(a);
1036 return (PyObject *)a;
1038 nbytes = (size_t)size;
1039 if (nbytes + sizeof(PyStringObject) <= nbytes) {
1040 PyErr_SetString(PyExc_OverflowError,
1041 "repeated string is too long");
1042 return NULL;
1044 op = (PyStringObject *)
1045 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
1046 if (op == NULL)
1047 return PyErr_NoMemory();
1048 PyObject_INIT_VAR(op, &PyString_Type, size);
1049 op->ob_shash = -1;
1050 op->ob_sstate = SSTATE_NOT_INTERNED;
1051 op->ob_sval[size] = '\0';
1052 if (Py_SIZE(a) == 1 && n > 0) {
1053 memset(op->ob_sval, a->ob_sval[0] , n);
1054 return (PyObject *) op;
1056 i = 0;
1057 if (i < size) {
1058 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1059 i = Py_SIZE(a);
1061 while (i < size) {
1062 j = (i <= size-i) ? i : size-i;
1063 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1064 i += j;
1066 return (PyObject *) op;
1069 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1071 static PyObject *
1072 string_slice(register PyStringObject *a, register Py_ssize_t i,
1073 register Py_ssize_t j)
1074 /* j -- may be negative! */
1076 if (i < 0)
1077 i = 0;
1078 if (j < 0)
1079 j = 0; /* Avoid signed/unsigned bug in next line */
1080 if (j > Py_SIZE(a))
1081 j = Py_SIZE(a);
1082 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1083 /* It's the same as a */
1084 Py_INCREF(a);
1085 return (PyObject *)a;
1087 if (j < i)
1088 j = i;
1089 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1092 static int
1093 string_contains(PyObject *str_obj, PyObject *sub_obj)
1095 if (!PyString_CheckExact(sub_obj)) {
1096 #ifdef Py_USING_UNICODE
1097 if (PyUnicode_Check(sub_obj))
1098 return PyUnicode_Contains(str_obj, sub_obj);
1099 #endif
1100 if (!PyString_Check(sub_obj)) {
1101 PyErr_Format(PyExc_TypeError,
1102 "'in <string>' requires string as left operand, "
1103 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1104 return -1;
1108 return stringlib_contains_obj(str_obj, sub_obj);
1111 static PyObject *
1112 string_item(PyStringObject *a, register Py_ssize_t i)
1114 char pchar;
1115 PyObject *v;
1116 if (i < 0 || i >= Py_SIZE(a)) {
1117 PyErr_SetString(PyExc_IndexError, "string index out of range");
1118 return NULL;
1120 pchar = a->ob_sval[i];
1121 v = (PyObject *)characters[pchar & UCHAR_MAX];
1122 if (v == NULL)
1123 v = PyString_FromStringAndSize(&pchar, 1);
1124 else {
1125 #ifdef COUNT_ALLOCS
1126 one_strings++;
1127 #endif
1128 Py_INCREF(v);
1130 return v;
1133 static PyObject*
1134 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1136 int c;
1137 Py_ssize_t len_a, len_b;
1138 Py_ssize_t min_len;
1139 PyObject *result;
1141 /* Make sure both arguments are strings. */
1142 if (!(PyString_Check(a) && PyString_Check(b))) {
1143 result = Py_NotImplemented;
1144 goto out;
1146 if (a == b) {
1147 switch (op) {
1148 case Py_EQ:case Py_LE:case Py_GE:
1149 result = Py_True;
1150 goto out;
1151 case Py_NE:case Py_LT:case Py_GT:
1152 result = Py_False;
1153 goto out;
1156 if (op == Py_EQ) {
1157 /* Supporting Py_NE here as well does not save
1158 much time, since Py_NE is rarely used. */
1159 if (Py_SIZE(a) == Py_SIZE(b)
1160 && (a->ob_sval[0] == b->ob_sval[0]
1161 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1162 result = Py_True;
1163 } else {
1164 result = Py_False;
1166 goto out;
1168 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1169 min_len = (len_a < len_b) ? len_a : len_b;
1170 if (min_len > 0) {
1171 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1172 if (c==0)
1173 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1174 } else
1175 c = 0;
1176 if (c == 0)
1177 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1178 switch (op) {
1179 case Py_LT: c = c < 0; break;
1180 case Py_LE: c = c <= 0; break;
1181 case Py_EQ: assert(0); break; /* unreachable */
1182 case Py_NE: c = c != 0; break;
1183 case Py_GT: c = c > 0; break;
1184 case Py_GE: c = c >= 0; break;
1185 default:
1186 result = Py_NotImplemented;
1187 goto out;
1189 result = c ? Py_True : Py_False;
1190 out:
1191 Py_INCREF(result);
1192 return result;
1196 _PyString_Eq(PyObject *o1, PyObject *o2)
1198 PyStringObject *a = (PyStringObject*) o1;
1199 PyStringObject *b = (PyStringObject*) o2;
1200 return Py_SIZE(a) == Py_SIZE(b)
1201 && *a->ob_sval == *b->ob_sval
1202 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1205 static long
1206 string_hash(PyStringObject *a)
1208 register Py_ssize_t len;
1209 register unsigned char *p;
1210 register long x;
1212 if (a->ob_shash != -1)
1213 return a->ob_shash;
1214 len = Py_SIZE(a);
1215 p = (unsigned char *) a->ob_sval;
1216 x = *p << 7;
1217 while (--len >= 0)
1218 x = (1000003*x) ^ *p++;
1219 x ^= Py_SIZE(a);
1220 if (x == -1)
1221 x = -2;
1222 a->ob_shash = x;
1223 return x;
1226 static PyObject*
1227 string_subscript(PyStringObject* self, PyObject* item)
1229 if (PyIndex_Check(item)) {
1230 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1231 if (i == -1 && PyErr_Occurred())
1232 return NULL;
1233 if (i < 0)
1234 i += PyString_GET_SIZE(self);
1235 return string_item(self, i);
1237 else if (PySlice_Check(item)) {
1238 Py_ssize_t start, stop, step, slicelength, cur, i;
1239 char* source_buf;
1240 char* result_buf;
1241 PyObject* result;
1243 if (PySlice_GetIndicesEx((PySliceObject*)item,
1244 PyString_GET_SIZE(self),
1245 &start, &stop, &step, &slicelength) < 0) {
1246 return NULL;
1249 if (slicelength <= 0) {
1250 return PyString_FromStringAndSize("", 0);
1252 else if (start == 0 && step == 1 &&
1253 slicelength == PyString_GET_SIZE(self) &&
1254 PyString_CheckExact(self)) {
1255 Py_INCREF(self);
1256 return (PyObject *)self;
1258 else if (step == 1) {
1259 return PyString_FromStringAndSize(
1260 PyString_AS_STRING(self) + start,
1261 slicelength);
1263 else {
1264 source_buf = PyString_AsString((PyObject*)self);
1265 result_buf = (char *)PyMem_Malloc(slicelength);
1266 if (result_buf == NULL)
1267 return PyErr_NoMemory();
1269 for (cur = start, i = 0; i < slicelength;
1270 cur += step, i++) {
1271 result_buf[i] = source_buf[cur];
1274 result = PyString_FromStringAndSize(result_buf,
1275 slicelength);
1276 PyMem_Free(result_buf);
1277 return result;
1280 else {
1281 PyErr_Format(PyExc_TypeError,
1282 "string indices must be integers, not %.200s",
1283 Py_TYPE(item)->tp_name);
1284 return NULL;
1288 static Py_ssize_t
1289 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1291 if ( index != 0 ) {
1292 PyErr_SetString(PyExc_SystemError,
1293 "accessing non-existent string segment");
1294 return -1;
1296 *ptr = (void *)self->ob_sval;
1297 return Py_SIZE(self);
1300 static Py_ssize_t
1301 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1303 PyErr_SetString(PyExc_TypeError,
1304 "Cannot use string as modifiable buffer");
1305 return -1;
1308 static Py_ssize_t
1309 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1311 if ( lenp )
1312 *lenp = Py_SIZE(self);
1313 return 1;
1316 static Py_ssize_t
1317 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1319 if ( index != 0 ) {
1320 PyErr_SetString(PyExc_SystemError,
1321 "accessing non-existent string segment");
1322 return -1;
1324 *ptr = self->ob_sval;
1325 return Py_SIZE(self);
1328 static int
1329 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1331 return PyBuffer_FillInfo(view, (PyObject*)self,
1332 (void *)self->ob_sval, Py_SIZE(self),
1333 1, flags);
1336 static PySequenceMethods string_as_sequence = {
1337 (lenfunc)string_length, /*sq_length*/
1338 (binaryfunc)string_concat, /*sq_concat*/
1339 (ssizeargfunc)string_repeat, /*sq_repeat*/
1340 (ssizeargfunc)string_item, /*sq_item*/
1341 (ssizessizeargfunc)string_slice, /*sq_slice*/
1342 0, /*sq_ass_item*/
1343 0, /*sq_ass_slice*/
1344 (objobjproc)string_contains /*sq_contains*/
1347 static PyMappingMethods string_as_mapping = {
1348 (lenfunc)string_length,
1349 (binaryfunc)string_subscript,
1353 static PyBufferProcs string_as_buffer = {
1354 (readbufferproc)string_buffer_getreadbuf,
1355 (writebufferproc)string_buffer_getwritebuf,
1356 (segcountproc)string_buffer_getsegcount,
1357 (charbufferproc)string_buffer_getcharbuf,
1358 (getbufferproc)string_buffer_getbuffer,
1359 0, /* XXX */
1364 #define LEFTSTRIP 0
1365 #define RIGHTSTRIP 1
1366 #define BOTHSTRIP 2
1368 /* Arrays indexed by above */
1369 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1371 #define STRIPNAME(i) (stripformat[i]+3)
1374 /* Don't call if length < 2 */
1375 #define Py_STRING_MATCH(target, offset, pattern, length) \
1376 (target[offset] == pattern[0] && \
1377 target[offset+length-1] == pattern[length-1] && \
1378 !memcmp(target+offset+1, pattern+1, length-2) )
1381 /* Overallocate the initial list to reduce the number of reallocs for small
1382 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1383 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1384 text (roughly 11 words per line) and field delimited data (usually 1-10
1385 fields). For large strings the split algorithms are bandwidth limited
1386 so increasing the preallocation likely will not improve things.*/
1388 #define MAX_PREALLOC 12
1390 /* 5 splits gives 6 elements */
1391 #define PREALLOC_SIZE(maxsplit) \
1392 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1394 #define SPLIT_APPEND(data, left, right) \
1395 str = PyString_FromStringAndSize((data) + (left), \
1396 (right) - (left)); \
1397 if (str == NULL) \
1398 goto onError; \
1399 if (PyList_Append(list, str)) { \
1400 Py_DECREF(str); \
1401 goto onError; \
1403 else \
1404 Py_DECREF(str);
1406 #define SPLIT_ADD(data, left, right) { \
1407 str = PyString_FromStringAndSize((data) + (left), \
1408 (right) - (left)); \
1409 if (str == NULL) \
1410 goto onError; \
1411 if (count < MAX_PREALLOC) { \
1412 PyList_SET_ITEM(list, count, str); \
1413 } else { \
1414 if (PyList_Append(list, str)) { \
1415 Py_DECREF(str); \
1416 goto onError; \
1418 else \
1419 Py_DECREF(str); \
1421 count++; }
1423 /* Always force the list to the expected size. */
1424 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1426 #define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1427 #define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1428 #define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1429 #define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1431 Py_LOCAL_INLINE(PyObject *)
1432 split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
1434 const char *s = PyString_AS_STRING(self);
1435 Py_ssize_t i, j, count=0;
1436 PyObject *str;
1437 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1439 if (list == NULL)
1440 return NULL;
1442 i = j = 0;
1444 while (maxsplit-- > 0) {
1445 SKIP_SPACE(s, i, len);
1446 if (i==len) break;
1447 j = i; i++;
1448 SKIP_NONSPACE(s, i, len);
1449 if (j == 0 && i == len && PyString_CheckExact(self)) {
1450 /* No whitespace in self, so just use it as list[0] */
1451 Py_INCREF(self);
1452 PyList_SET_ITEM(list, 0, (PyObject *)self);
1453 count++;
1454 break;
1456 SPLIT_ADD(s, j, i);
1459 if (i < len) {
1460 /* Only occurs when maxsplit was reached */
1461 /* Skip any remaining whitespace and copy to end of string */
1462 SKIP_SPACE(s, i, len);
1463 if (i != len)
1464 SPLIT_ADD(s, i, len);
1466 FIX_PREALLOC_SIZE(list);
1467 return list;
1468 onError:
1469 Py_DECREF(list);
1470 return NULL;
1473 Py_LOCAL_INLINE(PyObject *)
1474 split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1476 const char *s = PyString_AS_STRING(self);
1477 register Py_ssize_t i, j, count=0;
1478 PyObject *str;
1479 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1481 if (list == NULL)
1482 return NULL;
1484 i = j = 0;
1485 while ((j < len) && (maxcount-- > 0)) {
1486 for(; j<len; j++) {
1487 /* I found that using memchr makes no difference */
1488 if (s[j] == ch) {
1489 SPLIT_ADD(s, i, j);
1490 i = j = j + 1;
1491 break;
1495 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1496 /* ch not in self, so just use self as list[0] */
1497 Py_INCREF(self);
1498 PyList_SET_ITEM(list, 0, (PyObject *)self);
1499 count++;
1501 else if (i <= len) {
1502 SPLIT_ADD(s, i, len);
1504 FIX_PREALLOC_SIZE(list);
1505 return list;
1507 onError:
1508 Py_DECREF(list);
1509 return NULL;
1512 PyDoc_STRVAR(split__doc__,
1513 "S.split([sep [,maxsplit]]) -> list of strings\n\
1515 Return a list of the words in the string S, using sep as the\n\
1516 delimiter string. If maxsplit is given, at most maxsplit\n\
1517 splits are done. If sep is not specified or is None, any\n\
1518 whitespace string is a separator and empty strings are removed\n\
1519 from the result.");
1521 static PyObject *
1522 string_split(PyStringObject *self, PyObject *args)
1524 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1525 Py_ssize_t maxsplit = -1, count=0;
1526 const char *s = PyString_AS_STRING(self), *sub;
1527 PyObject *list, *str, *subobj = Py_None;
1528 #ifdef USE_FAST
1529 Py_ssize_t pos;
1530 #endif
1532 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1533 return NULL;
1534 if (maxsplit < 0)
1535 maxsplit = PY_SSIZE_T_MAX;
1536 if (subobj == Py_None)
1537 return split_whitespace(self, len, maxsplit);
1538 if (PyString_Check(subobj)) {
1539 sub = PyString_AS_STRING(subobj);
1540 n = PyString_GET_SIZE(subobj);
1542 #ifdef Py_USING_UNICODE
1543 else if (PyUnicode_Check(subobj))
1544 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1545 #endif
1546 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1547 return NULL;
1549 if (n == 0) {
1550 PyErr_SetString(PyExc_ValueError, "empty separator");
1551 return NULL;
1553 else if (n == 1)
1554 return split_char(self, len, sub[0], maxsplit);
1556 list = PyList_New(PREALLOC_SIZE(maxsplit));
1557 if (list == NULL)
1558 return NULL;
1560 #ifdef USE_FAST
1561 i = j = 0;
1562 while (maxsplit-- > 0) {
1563 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1564 if (pos < 0)
1565 break;
1566 j = i+pos;
1567 SPLIT_ADD(s, i, j);
1568 i = j + n;
1570 #else
1571 i = j = 0;
1572 while ((j+n <= len) && (maxsplit-- > 0)) {
1573 for (; j+n <= len; j++) {
1574 if (Py_STRING_MATCH(s, j, sub, n)) {
1575 SPLIT_ADD(s, i, j);
1576 i = j = j + n;
1577 break;
1581 #endif
1582 SPLIT_ADD(s, i, len);
1583 FIX_PREALLOC_SIZE(list);
1584 return list;
1586 onError:
1587 Py_DECREF(list);
1588 return NULL;
1591 PyDoc_STRVAR(partition__doc__,
1592 "S.partition(sep) -> (head, sep, tail)\n\
1594 Searches for the separator sep in S, and returns the part before it,\n\
1595 the separator itself, and the part after it. If the separator is not\n\
1596 found, returns S and two empty strings.");
1598 static PyObject *
1599 string_partition(PyStringObject *self, PyObject *sep_obj)
1601 const char *sep;
1602 Py_ssize_t sep_len;
1604 if (PyString_Check(sep_obj)) {
1605 sep = PyString_AS_STRING(sep_obj);
1606 sep_len = PyString_GET_SIZE(sep_obj);
1608 #ifdef Py_USING_UNICODE
1609 else if (PyUnicode_Check(sep_obj))
1610 return PyUnicode_Partition((PyObject *) self, sep_obj);
1611 #endif
1612 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1613 return NULL;
1615 return stringlib_partition(
1616 (PyObject*) self,
1617 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1618 sep_obj, sep, sep_len
1622 PyDoc_STRVAR(rpartition__doc__,
1623 "S.rpartition(sep) -> (tail, sep, head)\n\
1625 Searches for the separator sep in S, starting at the end of S, and returns\n\
1626 the part before it, the separator itself, and the part after it. If the\n\
1627 separator is not found, returns two empty strings and S.");
1629 static PyObject *
1630 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1632 const char *sep;
1633 Py_ssize_t sep_len;
1635 if (PyString_Check(sep_obj)) {
1636 sep = PyString_AS_STRING(sep_obj);
1637 sep_len = PyString_GET_SIZE(sep_obj);
1639 #ifdef Py_USING_UNICODE
1640 else if (PyUnicode_Check(sep_obj))
1641 return PyUnicode_Partition((PyObject *) self, sep_obj);
1642 #endif
1643 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1644 return NULL;
1646 return stringlib_rpartition(
1647 (PyObject*) self,
1648 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1649 sep_obj, sep, sep_len
1653 Py_LOCAL_INLINE(PyObject *)
1654 rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
1656 const char *s = PyString_AS_STRING(self);
1657 Py_ssize_t i, j, count=0;
1658 PyObject *str;
1659 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1661 if (list == NULL)
1662 return NULL;
1664 i = j = len-1;
1666 while (maxsplit-- > 0) {
1667 RSKIP_SPACE(s, i);
1668 if (i<0) break;
1669 j = i; i--;
1670 RSKIP_NONSPACE(s, i);
1671 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1672 /* No whitespace in self, so just use it as list[0] */
1673 Py_INCREF(self);
1674 PyList_SET_ITEM(list, 0, (PyObject *)self);
1675 count++;
1676 break;
1678 SPLIT_ADD(s, i + 1, j + 1);
1680 if (i >= 0) {
1681 /* Only occurs when maxsplit was reached */
1682 /* Skip any remaining whitespace and copy to beginning of string */
1683 RSKIP_SPACE(s, i);
1684 if (i >= 0)
1685 SPLIT_ADD(s, 0, i + 1);
1688 FIX_PREALLOC_SIZE(list);
1689 if (PyList_Reverse(list) < 0)
1690 goto onError;
1691 return list;
1692 onError:
1693 Py_DECREF(list);
1694 return NULL;
1697 Py_LOCAL_INLINE(PyObject *)
1698 rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1700 const char *s = PyString_AS_STRING(self);
1701 register Py_ssize_t i, j, count=0;
1702 PyObject *str;
1703 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1705 if (list == NULL)
1706 return NULL;
1708 i = j = len - 1;
1709 while ((i >= 0) && (maxcount-- > 0)) {
1710 for (; i >= 0; i--) {
1711 if (s[i] == ch) {
1712 SPLIT_ADD(s, i + 1, j + 1);
1713 j = i = i - 1;
1714 break;
1718 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1719 /* ch not in self, so just use self as list[0] */
1720 Py_INCREF(self);
1721 PyList_SET_ITEM(list, 0, (PyObject *)self);
1722 count++;
1724 else if (j >= -1) {
1725 SPLIT_ADD(s, 0, j + 1);
1727 FIX_PREALLOC_SIZE(list);
1728 if (PyList_Reverse(list) < 0)
1729 goto onError;
1730 return list;
1732 onError:
1733 Py_DECREF(list);
1734 return NULL;
1737 PyDoc_STRVAR(rsplit__doc__,
1738 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1740 Return a list of the words in the string S, using sep as the\n\
1741 delimiter string, starting at the end of the string and working\n\
1742 to the front. If maxsplit is given, at most maxsplit splits are\n\
1743 done. If sep is not specified or is None, any whitespace string\n\
1744 is a separator.");
1746 static PyObject *
1747 string_rsplit(PyStringObject *self, PyObject *args)
1749 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1750 Py_ssize_t maxsplit = -1, count=0;
1751 const char *s, *sub;
1752 PyObject *list, *str, *subobj = Py_None;
1754 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1755 return NULL;
1756 if (maxsplit < 0)
1757 maxsplit = PY_SSIZE_T_MAX;
1758 if (subobj == Py_None)
1759 return rsplit_whitespace(self, len, maxsplit);
1760 if (PyString_Check(subobj)) {
1761 sub = PyString_AS_STRING(subobj);
1762 n = PyString_GET_SIZE(subobj);
1764 #ifdef Py_USING_UNICODE
1765 else if (PyUnicode_Check(subobj))
1766 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1767 #endif
1768 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1769 return NULL;
1771 if (n == 0) {
1772 PyErr_SetString(PyExc_ValueError, "empty separator");
1773 return NULL;
1775 else if (n == 1)
1776 return rsplit_char(self, len, sub[0], maxsplit);
1778 list = PyList_New(PREALLOC_SIZE(maxsplit));
1779 if (list == NULL)
1780 return NULL;
1782 j = len;
1783 i = j - n;
1785 s = PyString_AS_STRING(self);
1786 while ( (i >= 0) && (maxsplit-- > 0) ) {
1787 for (; i>=0; i--) {
1788 if (Py_STRING_MATCH(s, i, sub, n)) {
1789 SPLIT_ADD(s, i + n, j);
1790 j = i;
1791 i -= n;
1792 break;
1796 SPLIT_ADD(s, 0, j);
1797 FIX_PREALLOC_SIZE(list);
1798 if (PyList_Reverse(list) < 0)
1799 goto onError;
1800 return list;
1802 onError:
1803 Py_DECREF(list);
1804 return NULL;
1808 PyDoc_STRVAR(join__doc__,
1809 "S.join(sequence) -> string\n\
1811 Return a string which is the concatenation of the strings in the\n\
1812 sequence. The separator between elements is S.");
1814 static PyObject *
1815 string_join(PyStringObject *self, PyObject *orig)
1817 char *sep = PyString_AS_STRING(self);
1818 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1819 PyObject *res = NULL;
1820 char *p;
1821 Py_ssize_t seqlen = 0;
1822 size_t sz = 0;
1823 Py_ssize_t i;
1824 PyObject *seq, *item;
1826 seq = PySequence_Fast(orig, "");
1827 if (seq == NULL) {
1828 return NULL;
1831 seqlen = PySequence_Size(seq);
1832 if (seqlen == 0) {
1833 Py_DECREF(seq);
1834 return PyString_FromString("");
1836 if (seqlen == 1) {
1837 item = PySequence_Fast_GET_ITEM(seq, 0);
1838 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1839 Py_INCREF(item);
1840 Py_DECREF(seq);
1841 return item;
1845 /* There are at least two things to join, or else we have a subclass
1846 * of the builtin types in the sequence.
1847 * Do a pre-pass to figure out the total amount of space we'll
1848 * need (sz), see whether any argument is absurd, and defer to
1849 * the Unicode join if appropriate.
1851 for (i = 0; i < seqlen; i++) {
1852 const size_t old_sz = sz;
1853 item = PySequence_Fast_GET_ITEM(seq, i);
1854 if (!PyString_Check(item)){
1855 #ifdef Py_USING_UNICODE
1856 if (PyUnicode_Check(item)) {
1857 /* Defer to Unicode join.
1858 * CAUTION: There's no gurantee that the
1859 * original sequence can be iterated over
1860 * again, so we must pass seq here.
1862 PyObject *result;
1863 result = PyUnicode_Join((PyObject *)self, seq);
1864 Py_DECREF(seq);
1865 return result;
1867 #endif
1868 PyErr_Format(PyExc_TypeError,
1869 "sequence item %zd: expected string,"
1870 " %.80s found",
1871 i, Py_TYPE(item)->tp_name);
1872 Py_DECREF(seq);
1873 return NULL;
1875 sz += PyString_GET_SIZE(item);
1876 if (i != 0)
1877 sz += seplen;
1878 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1879 PyErr_SetString(PyExc_OverflowError,
1880 "join() result is too long for a Python string");
1881 Py_DECREF(seq);
1882 return NULL;
1886 /* Allocate result space. */
1887 res = PyString_FromStringAndSize((char*)NULL, sz);
1888 if (res == NULL) {
1889 Py_DECREF(seq);
1890 return NULL;
1893 /* Catenate everything. */
1894 p = PyString_AS_STRING(res);
1895 for (i = 0; i < seqlen; ++i) {
1896 size_t n;
1897 item = PySequence_Fast_GET_ITEM(seq, i);
1898 n = PyString_GET_SIZE(item);
1899 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1900 p += n;
1901 if (i < seqlen - 1) {
1902 Py_MEMCPY(p, sep, seplen);
1903 p += seplen;
1907 Py_DECREF(seq);
1908 return res;
1911 PyObject *
1912 _PyString_Join(PyObject *sep, PyObject *x)
1914 assert(sep != NULL && PyString_Check(sep));
1915 assert(x != NULL);
1916 return string_join((PyStringObject *)sep, x);
1919 Py_LOCAL_INLINE(void)
1920 string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1922 if (*end > len)
1923 *end = len;
1924 else if (*end < 0)
1925 *end += len;
1926 if (*end < 0)
1927 *end = 0;
1928 if (*start < 0)
1929 *start += len;
1930 if (*start < 0)
1931 *start = 0;
1934 Py_LOCAL_INLINE(Py_ssize_t)
1935 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1937 PyObject *subobj;
1938 const char *sub;
1939 Py_ssize_t sub_len;
1940 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1941 PyObject *obj_start=Py_None, *obj_end=Py_None;
1943 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1944 &obj_start, &obj_end))
1945 return -2;
1946 /* To support None in "start" and "end" arguments, meaning
1947 the same as if they were not passed.
1949 if (obj_start != Py_None)
1950 if (!_PyEval_SliceIndex(obj_start, &start))
1951 return -2;
1952 if (obj_end != Py_None)
1953 if (!_PyEval_SliceIndex(obj_end, &end))
1954 return -2;
1956 if (PyString_Check(subobj)) {
1957 sub = PyString_AS_STRING(subobj);
1958 sub_len = PyString_GET_SIZE(subobj);
1960 #ifdef Py_USING_UNICODE
1961 else if (PyUnicode_Check(subobj))
1962 return PyUnicode_Find(
1963 (PyObject *)self, subobj, start, end, dir);
1964 #endif
1965 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1966 /* XXX - the "expected a character buffer object" is pretty
1967 confusing for a non-expert. remap to something else ? */
1968 return -2;
1970 if (dir > 0)
1971 return stringlib_find_slice(
1972 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1973 sub, sub_len, start, end);
1974 else
1975 return stringlib_rfind_slice(
1976 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1977 sub, sub_len, start, end);
1981 PyDoc_STRVAR(find__doc__,
1982 "S.find(sub [,start [,end]]) -> int\n\
1984 Return the lowest index in S where substring sub is found,\n\
1985 such that sub is contained within s[start:end]. Optional\n\
1986 arguments start and end are interpreted as in slice notation.\n\
1988 Return -1 on failure.");
1990 static PyObject *
1991 string_find(PyStringObject *self, PyObject *args)
1993 Py_ssize_t result = string_find_internal(self, args, +1);
1994 if (result == -2)
1995 return NULL;
1996 return PyInt_FromSsize_t(result);
2000 PyDoc_STRVAR(index__doc__,
2001 "S.index(sub [,start [,end]]) -> int\n\
2003 Like S.find() but raise ValueError when the substring is not found.");
2005 static PyObject *
2006 string_index(PyStringObject *self, PyObject *args)
2008 Py_ssize_t result = string_find_internal(self, args, +1);
2009 if (result == -2)
2010 return NULL;
2011 if (result == -1) {
2012 PyErr_SetString(PyExc_ValueError,
2013 "substring not found");
2014 return NULL;
2016 return PyInt_FromSsize_t(result);
2020 PyDoc_STRVAR(rfind__doc__,
2021 "S.rfind(sub [,start [,end]]) -> int\n\
2023 Return the highest index in S where substring sub is found,\n\
2024 such that sub is contained within s[start:end]. Optional\n\
2025 arguments start and end are interpreted as in slice notation.\n\
2027 Return -1 on failure.");
2029 static PyObject *
2030 string_rfind(PyStringObject *self, PyObject *args)
2032 Py_ssize_t result = string_find_internal(self, args, -1);
2033 if (result == -2)
2034 return NULL;
2035 return PyInt_FromSsize_t(result);
2039 PyDoc_STRVAR(rindex__doc__,
2040 "S.rindex(sub [,start [,end]]) -> int\n\
2042 Like S.rfind() but raise ValueError when the substring is not found.");
2044 static PyObject *
2045 string_rindex(PyStringObject *self, PyObject *args)
2047 Py_ssize_t result = string_find_internal(self, args, -1);
2048 if (result == -2)
2049 return NULL;
2050 if (result == -1) {
2051 PyErr_SetString(PyExc_ValueError,
2052 "substring not found");
2053 return NULL;
2055 return PyInt_FromSsize_t(result);
2059 Py_LOCAL_INLINE(PyObject *)
2060 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
2062 char *s = PyString_AS_STRING(self);
2063 Py_ssize_t len = PyString_GET_SIZE(self);
2064 char *sep = PyString_AS_STRING(sepobj);
2065 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2066 Py_ssize_t i, j;
2068 i = 0;
2069 if (striptype != RIGHTSTRIP) {
2070 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2071 i++;
2075 j = len;
2076 if (striptype != LEFTSTRIP) {
2077 do {
2078 j--;
2079 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2080 j++;
2083 if (i == 0 && j == len && PyString_CheckExact(self)) {
2084 Py_INCREF(self);
2085 return (PyObject*)self;
2087 else
2088 return PyString_FromStringAndSize(s+i, j-i);
2092 Py_LOCAL_INLINE(PyObject *)
2093 do_strip(PyStringObject *self, int striptype)
2095 char *s = PyString_AS_STRING(self);
2096 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
2098 i = 0;
2099 if (striptype != RIGHTSTRIP) {
2100 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2101 i++;
2105 j = len;
2106 if (striptype != LEFTSTRIP) {
2107 do {
2108 j--;
2109 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2110 j++;
2113 if (i == 0 && j == len && PyString_CheckExact(self)) {
2114 Py_INCREF(self);
2115 return (PyObject*)self;
2117 else
2118 return PyString_FromStringAndSize(s+i, j-i);
2122 Py_LOCAL_INLINE(PyObject *)
2123 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2125 PyObject *sep = NULL;
2127 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2128 return NULL;
2130 if (sep != NULL && sep != Py_None) {
2131 if (PyString_Check(sep))
2132 return do_xstrip(self, striptype, sep);
2133 #ifdef Py_USING_UNICODE
2134 else if (PyUnicode_Check(sep)) {
2135 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2136 PyObject *res;
2137 if (uniself==NULL)
2138 return NULL;
2139 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2140 striptype, sep);
2141 Py_DECREF(uniself);
2142 return res;
2144 #endif
2145 PyErr_Format(PyExc_TypeError,
2146 #ifdef Py_USING_UNICODE
2147 "%s arg must be None, str or unicode",
2148 #else
2149 "%s arg must be None or str",
2150 #endif
2151 STRIPNAME(striptype));
2152 return NULL;
2155 return do_strip(self, striptype);
2159 PyDoc_STRVAR(strip__doc__,
2160 "S.strip([chars]) -> string or unicode\n\
2162 Return a copy of the string S with leading and trailing\n\
2163 whitespace removed.\n\
2164 If chars is given and not None, remove characters in chars instead.\n\
2165 If chars is unicode, S will be converted to unicode before stripping");
2167 static PyObject *
2168 string_strip(PyStringObject *self, PyObject *args)
2170 if (PyTuple_GET_SIZE(args) == 0)
2171 return do_strip(self, BOTHSTRIP); /* Common case */
2172 else
2173 return do_argstrip(self, BOTHSTRIP, args);
2177 PyDoc_STRVAR(lstrip__doc__,
2178 "S.lstrip([chars]) -> string or unicode\n\
2180 Return a copy of the string S with leading whitespace removed.\n\
2181 If chars is given and not None, remove characters in chars instead.\n\
2182 If chars is unicode, S will be converted to unicode before stripping");
2184 static PyObject *
2185 string_lstrip(PyStringObject *self, PyObject *args)
2187 if (PyTuple_GET_SIZE(args) == 0)
2188 return do_strip(self, LEFTSTRIP); /* Common case */
2189 else
2190 return do_argstrip(self, LEFTSTRIP, args);
2194 PyDoc_STRVAR(rstrip__doc__,
2195 "S.rstrip([chars]) -> string or unicode\n\
2197 Return a copy of the string S with trailing whitespace removed.\n\
2198 If chars is given and not None, remove characters in chars instead.\n\
2199 If chars is unicode, S will be converted to unicode before stripping");
2201 static PyObject *
2202 string_rstrip(PyStringObject *self, PyObject *args)
2204 if (PyTuple_GET_SIZE(args) == 0)
2205 return do_strip(self, RIGHTSTRIP); /* Common case */
2206 else
2207 return do_argstrip(self, RIGHTSTRIP, args);
2211 PyDoc_STRVAR(lower__doc__,
2212 "S.lower() -> string\n\
2214 Return a copy of the string S converted to lowercase.");
2216 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2217 #ifndef _tolower
2218 #define _tolower tolower
2219 #endif
2221 static PyObject *
2222 string_lower(PyStringObject *self)
2224 char *s;
2225 Py_ssize_t i, n = PyString_GET_SIZE(self);
2226 PyObject *newobj;
2228 newobj = PyString_FromStringAndSize(NULL, n);
2229 if (!newobj)
2230 return NULL;
2232 s = PyString_AS_STRING(newobj);
2234 Py_MEMCPY(s, PyString_AS_STRING(self), n);
2236 for (i = 0; i < n; i++) {
2237 int c = Py_CHARMASK(s[i]);
2238 if (isupper(c))
2239 s[i] = _tolower(c);
2242 return newobj;
2245 PyDoc_STRVAR(upper__doc__,
2246 "S.upper() -> string\n\
2248 Return a copy of the string S converted to uppercase.");
2250 #ifndef _toupper
2251 #define _toupper toupper
2252 #endif
2254 static PyObject *
2255 string_upper(PyStringObject *self)
2257 char *s;
2258 Py_ssize_t i, n = PyString_GET_SIZE(self);
2259 PyObject *newobj;
2261 newobj = PyString_FromStringAndSize(NULL, n);
2262 if (!newobj)
2263 return NULL;
2265 s = PyString_AS_STRING(newobj);
2267 Py_MEMCPY(s, PyString_AS_STRING(self), n);
2269 for (i = 0; i < n; i++) {
2270 int c = Py_CHARMASK(s[i]);
2271 if (islower(c))
2272 s[i] = _toupper(c);
2275 return newobj;
2278 PyDoc_STRVAR(title__doc__,
2279 "S.title() -> string\n\
2281 Return a titlecased version of S, i.e. words start with uppercase\n\
2282 characters, all remaining cased characters have lowercase.");
2284 static PyObject*
2285 string_title(PyStringObject *self)
2287 char *s = PyString_AS_STRING(self), *s_new;
2288 Py_ssize_t i, n = PyString_GET_SIZE(self);
2289 int previous_is_cased = 0;
2290 PyObject *newobj;
2292 newobj = PyString_FromStringAndSize(NULL, n);
2293 if (newobj == NULL)
2294 return NULL;
2295 s_new = PyString_AsString(newobj);
2296 for (i = 0; i < n; i++) {
2297 int c = Py_CHARMASK(*s++);
2298 if (islower(c)) {
2299 if (!previous_is_cased)
2300 c = toupper(c);
2301 previous_is_cased = 1;
2302 } else if (isupper(c)) {
2303 if (previous_is_cased)
2304 c = tolower(c);
2305 previous_is_cased = 1;
2306 } else
2307 previous_is_cased = 0;
2308 *s_new++ = c;
2310 return newobj;
2313 PyDoc_STRVAR(capitalize__doc__,
2314 "S.capitalize() -> string\n\
2316 Return a copy of the string S with only its first character\n\
2317 capitalized.");
2319 static PyObject *
2320 string_capitalize(PyStringObject *self)
2322 char *s = PyString_AS_STRING(self), *s_new;
2323 Py_ssize_t i, n = PyString_GET_SIZE(self);
2324 PyObject *newobj;
2326 newobj = PyString_FromStringAndSize(NULL, n);
2327 if (newobj == NULL)
2328 return NULL;
2329 s_new = PyString_AsString(newobj);
2330 if (0 < n) {
2331 int c = Py_CHARMASK(*s++);
2332 if (islower(c))
2333 *s_new = toupper(c);
2334 else
2335 *s_new = c;
2336 s_new++;
2338 for (i = 1; i < n; i++) {
2339 int c = Py_CHARMASK(*s++);
2340 if (isupper(c))
2341 *s_new = tolower(c);
2342 else
2343 *s_new = c;
2344 s_new++;
2346 return newobj;
2350 PyDoc_STRVAR(count__doc__,
2351 "S.count(sub[, start[, end]]) -> int\n\
2353 Return the number of non-overlapping occurrences of substring sub in\n\
2354 string S[start:end]. Optional arguments start and end are interpreted\n\
2355 as in slice notation.");
2357 static PyObject *
2358 string_count(PyStringObject *self, PyObject *args)
2360 PyObject *sub_obj;
2361 const char *str = PyString_AS_STRING(self), *sub;
2362 Py_ssize_t sub_len;
2363 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2365 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2366 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2367 return NULL;
2369 if (PyString_Check(sub_obj)) {
2370 sub = PyString_AS_STRING(sub_obj);
2371 sub_len = PyString_GET_SIZE(sub_obj);
2373 #ifdef Py_USING_UNICODE
2374 else if (PyUnicode_Check(sub_obj)) {
2375 Py_ssize_t count;
2376 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2377 if (count == -1)
2378 return NULL;
2379 else
2380 return PyInt_FromSsize_t(count);
2382 #endif
2383 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2384 return NULL;
2386 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
2388 return PyInt_FromSsize_t(
2389 stringlib_count(str + start, end - start, sub, sub_len)
2393 PyDoc_STRVAR(swapcase__doc__,
2394 "S.swapcase() -> string\n\
2396 Return a copy of the string S with uppercase characters\n\
2397 converted to lowercase and vice versa.");
2399 static PyObject *
2400 string_swapcase(PyStringObject *self)
2402 char *s = PyString_AS_STRING(self), *s_new;
2403 Py_ssize_t i, n = PyString_GET_SIZE(self);
2404 PyObject *newobj;
2406 newobj = PyString_FromStringAndSize(NULL, n);
2407 if (newobj == NULL)
2408 return NULL;
2409 s_new = PyString_AsString(newobj);
2410 for (i = 0; i < n; i++) {
2411 int c = Py_CHARMASK(*s++);
2412 if (islower(c)) {
2413 *s_new = toupper(c);
2415 else if (isupper(c)) {
2416 *s_new = tolower(c);
2418 else
2419 *s_new = c;
2420 s_new++;
2422 return newobj;
2426 PyDoc_STRVAR(translate__doc__,
2427 "S.translate(table [,deletechars]) -> string\n\
2429 Return a copy of the string S, where all characters occurring\n\
2430 in the optional argument deletechars are removed, and the\n\
2431 remaining characters have been mapped through the given\n\
2432 translation table, which must be a string of length 256.");
2434 static PyObject *
2435 string_translate(PyStringObject *self, PyObject *args)
2437 register char *input, *output;
2438 const char *table;
2439 register Py_ssize_t i, c, changed = 0;
2440 PyObject *input_obj = (PyObject*)self;
2441 const char *output_start, *del_table=NULL;
2442 Py_ssize_t inlen, tablen, dellen = 0;
2443 PyObject *result;
2444 int trans_table[256];
2445 PyObject *tableobj, *delobj = NULL;
2447 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2448 &tableobj, &delobj))
2449 return NULL;
2451 if (PyString_Check(tableobj)) {
2452 table = PyString_AS_STRING(tableobj);
2453 tablen = PyString_GET_SIZE(tableobj);
2455 else if (tableobj == Py_None) {
2456 table = NULL;
2457 tablen = 256;
2459 #ifdef Py_USING_UNICODE
2460 else if (PyUnicode_Check(tableobj)) {
2461 /* Unicode .translate() does not support the deletechars
2462 parameter; instead a mapping to None will cause characters
2463 to be deleted. */
2464 if (delobj != NULL) {
2465 PyErr_SetString(PyExc_TypeError,
2466 "deletions are implemented differently for unicode");
2467 return NULL;
2469 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2471 #endif
2472 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2473 return NULL;
2475 if (tablen != 256) {
2476 PyErr_SetString(PyExc_ValueError,
2477 "translation table must be 256 characters long");
2478 return NULL;
2481 if (delobj != NULL) {
2482 if (PyString_Check(delobj)) {
2483 del_table = PyString_AS_STRING(delobj);
2484 dellen = PyString_GET_SIZE(delobj);
2486 #ifdef Py_USING_UNICODE
2487 else if (PyUnicode_Check(delobj)) {
2488 PyErr_SetString(PyExc_TypeError,
2489 "deletions are implemented differently for unicode");
2490 return NULL;
2492 #endif
2493 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2494 return NULL;
2496 else {
2497 del_table = NULL;
2498 dellen = 0;
2501 inlen = PyString_GET_SIZE(input_obj);
2502 result = PyString_FromStringAndSize((char *)NULL, inlen);
2503 if (result == NULL)
2504 return NULL;
2505 output_start = output = PyString_AsString(result);
2506 input = PyString_AS_STRING(input_obj);
2508 if (dellen == 0 && table != NULL) {
2509 /* If no deletions are required, use faster code */
2510 for (i = inlen; --i >= 0; ) {
2511 c = Py_CHARMASK(*input++);
2512 if (Py_CHARMASK((*output++ = table[c])) != c)
2513 changed = 1;
2515 if (changed || !PyString_CheckExact(input_obj))
2516 return result;
2517 Py_DECREF(result);
2518 Py_INCREF(input_obj);
2519 return input_obj;
2522 if (table == NULL) {
2523 for (i = 0; i < 256; i++)
2524 trans_table[i] = Py_CHARMASK(i);
2525 } else {
2526 for (i = 0; i < 256; i++)
2527 trans_table[i] = Py_CHARMASK(table[i]);
2530 for (i = 0; i < dellen; i++)
2531 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2533 for (i = inlen; --i >= 0; ) {
2534 c = Py_CHARMASK(*input++);
2535 if (trans_table[c] != -1)
2536 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2537 continue;
2538 changed = 1;
2540 if (!changed && PyString_CheckExact(input_obj)) {
2541 Py_DECREF(result);
2542 Py_INCREF(input_obj);
2543 return input_obj;
2545 /* Fix the size of the resulting string */
2546 if (inlen > 0)
2547 _PyString_Resize(&result, output - output_start);
2548 return result;
2552 #define FORWARD 1
2553 #define REVERSE -1
2555 /* find and count characters and substrings */
2557 #define findchar(target, target_len, c) \
2558 ((char *)memchr((const void *)(target), c, target_len))
2560 /* String ops must return a string. */
2561 /* If the object is subclass of string, create a copy */
2562 Py_LOCAL(PyStringObject *)
2563 return_self(PyStringObject *self)
2565 if (PyString_CheckExact(self)) {
2566 Py_INCREF(self);
2567 return self;
2569 return (PyStringObject *)PyString_FromStringAndSize(
2570 PyString_AS_STRING(self),
2571 PyString_GET_SIZE(self));
2574 Py_LOCAL_INLINE(Py_ssize_t)
2575 countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2577 Py_ssize_t count=0;
2578 const char *start=target;
2579 const char *end=target+target_len;
2581 while ( (start=findchar(start, end-start, c)) != NULL ) {
2582 count++;
2583 if (count >= maxcount)
2584 break;
2585 start += 1;
2587 return count;
2590 Py_LOCAL(Py_ssize_t)
2591 findstring(const char *target, Py_ssize_t target_len,
2592 const char *pattern, Py_ssize_t pattern_len,
2593 Py_ssize_t start,
2594 Py_ssize_t end,
2595 int direction)
2597 if (start < 0) {
2598 start += target_len;
2599 if (start < 0)
2600 start = 0;
2602 if (end > target_len) {
2603 end = target_len;
2604 } else if (end < 0) {
2605 end += target_len;
2606 if (end < 0)
2607 end = 0;
2610 /* zero-length substrings always match at the first attempt */
2611 if (pattern_len == 0)
2612 return (direction > 0) ? start : end;
2614 end -= pattern_len;
2616 if (direction < 0) {
2617 for (; end >= start; end--)
2618 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2619 return end;
2620 } else {
2621 for (; start <= end; start++)
2622 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2623 return start;
2625 return -1;
2628 Py_LOCAL_INLINE(Py_ssize_t)
2629 countstring(const char *target, Py_ssize_t target_len,
2630 const char *pattern, Py_ssize_t pattern_len,
2631 Py_ssize_t start,
2632 Py_ssize_t end,
2633 int direction, Py_ssize_t maxcount)
2635 Py_ssize_t count=0;
2637 if (start < 0) {
2638 start += target_len;
2639 if (start < 0)
2640 start = 0;
2642 if (end > target_len) {
2643 end = target_len;
2644 } else if (end < 0) {
2645 end += target_len;
2646 if (end < 0)
2647 end = 0;
2650 /* zero-length substrings match everywhere */
2651 if (pattern_len == 0 || maxcount == 0) {
2652 if (target_len+1 < maxcount)
2653 return target_len+1;
2654 return maxcount;
2657 end -= pattern_len;
2658 if (direction < 0) {
2659 for (; (end >= start); end--)
2660 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2661 count++;
2662 if (--maxcount <= 0) break;
2663 end -= pattern_len-1;
2665 } else {
2666 for (; (start <= end); start++)
2667 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2668 count++;
2669 if (--maxcount <= 0)
2670 break;
2671 start += pattern_len-1;
2674 return count;
2678 /* Algorithms for different cases of string replacement */
2680 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2681 Py_LOCAL(PyStringObject *)
2682 replace_interleave(PyStringObject *self,
2683 const char *to_s, Py_ssize_t to_len,
2684 Py_ssize_t maxcount)
2686 char *self_s, *result_s;
2687 Py_ssize_t self_len, result_len;
2688 Py_ssize_t count, i, product;
2689 PyStringObject *result;
2691 self_len = PyString_GET_SIZE(self);
2693 /* 1 at the end plus 1 after every character */
2694 count = self_len+1;
2695 if (maxcount < count)
2696 count = maxcount;
2698 /* Check for overflow */
2699 /* result_len = count * to_len + self_len; */
2700 product = count * to_len;
2701 if (product / to_len != count) {
2702 PyErr_SetString(PyExc_OverflowError,
2703 "replace string is too long");
2704 return NULL;
2706 result_len = product + self_len;
2707 if (result_len < 0) {
2708 PyErr_SetString(PyExc_OverflowError,
2709 "replace string is too long");
2710 return NULL;
2713 if (! (result = (PyStringObject *)
2714 PyString_FromStringAndSize(NULL, result_len)) )
2715 return NULL;
2717 self_s = PyString_AS_STRING(self);
2718 result_s = PyString_AS_STRING(result);
2720 /* TODO: special case single character, which doesn't need memcpy */
2722 /* Lay the first one down (guaranteed this will occur) */
2723 Py_MEMCPY(result_s, to_s, to_len);
2724 result_s += to_len;
2725 count -= 1;
2727 for (i=0; i<count; i++) {
2728 *result_s++ = *self_s++;
2729 Py_MEMCPY(result_s, to_s, to_len);
2730 result_s += to_len;
2733 /* Copy the rest of the original string */
2734 Py_MEMCPY(result_s, self_s, self_len-i);
2736 return result;
2739 /* Special case for deleting a single character */
2740 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2741 Py_LOCAL(PyStringObject *)
2742 replace_delete_single_character(PyStringObject *self,
2743 char from_c, Py_ssize_t maxcount)
2745 char *self_s, *result_s;
2746 char *start, *next, *end;
2747 Py_ssize_t self_len, result_len;
2748 Py_ssize_t count;
2749 PyStringObject *result;
2751 self_len = PyString_GET_SIZE(self);
2752 self_s = PyString_AS_STRING(self);
2754 count = countchar(self_s, self_len, from_c, maxcount);
2755 if (count == 0) {
2756 return return_self(self);
2759 result_len = self_len - count; /* from_len == 1 */
2760 assert(result_len>=0);
2762 if ( (result = (PyStringObject *)
2763 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2764 return NULL;
2765 result_s = PyString_AS_STRING(result);
2767 start = self_s;
2768 end = self_s + self_len;
2769 while (count-- > 0) {
2770 next = findchar(start, end-start, from_c);
2771 if (next == NULL)
2772 break;
2773 Py_MEMCPY(result_s, start, next-start);
2774 result_s += (next-start);
2775 start = next+1;
2777 Py_MEMCPY(result_s, start, end-start);
2779 return result;
2782 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2784 Py_LOCAL(PyStringObject *)
2785 replace_delete_substring(PyStringObject *self,
2786 const char *from_s, Py_ssize_t from_len,
2787 Py_ssize_t maxcount) {
2788 char *self_s, *result_s;
2789 char *start, *next, *end;
2790 Py_ssize_t self_len, result_len;
2791 Py_ssize_t count, offset;
2792 PyStringObject *result;
2794 self_len = PyString_GET_SIZE(self);
2795 self_s = PyString_AS_STRING(self);
2797 count = countstring(self_s, self_len,
2798 from_s, from_len,
2799 0, self_len, 1,
2800 maxcount);
2802 if (count == 0) {
2803 /* no matches */
2804 return return_self(self);
2807 result_len = self_len - (count * from_len);
2808 assert (result_len>=0);
2810 if ( (result = (PyStringObject *)
2811 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2812 return NULL;
2814 result_s = PyString_AS_STRING(result);
2816 start = self_s;
2817 end = self_s + self_len;
2818 while (count-- > 0) {
2819 offset = findstring(start, end-start,
2820 from_s, from_len,
2821 0, end-start, FORWARD);
2822 if (offset == -1)
2823 break;
2824 next = start + offset;
2826 Py_MEMCPY(result_s, start, next-start);
2828 result_s += (next-start);
2829 start = next+from_len;
2831 Py_MEMCPY(result_s, start, end-start);
2832 return result;
2835 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2836 Py_LOCAL(PyStringObject *)
2837 replace_single_character_in_place(PyStringObject *self,
2838 char from_c, char to_c,
2839 Py_ssize_t maxcount)
2841 char *self_s, *result_s, *start, *end, *next;
2842 Py_ssize_t self_len;
2843 PyStringObject *result;
2845 /* The result string will be the same size */
2846 self_s = PyString_AS_STRING(self);
2847 self_len = PyString_GET_SIZE(self);
2849 next = findchar(self_s, self_len, from_c);
2851 if (next == NULL) {
2852 /* No matches; return the original string */
2853 return return_self(self);
2856 /* Need to make a new string */
2857 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2858 if (result == NULL)
2859 return NULL;
2860 result_s = PyString_AS_STRING(result);
2861 Py_MEMCPY(result_s, self_s, self_len);
2863 /* change everything in-place, starting with this one */
2864 start = result_s + (next-self_s);
2865 *start = to_c;
2866 start++;
2867 end = result_s + self_len;
2869 while (--maxcount > 0) {
2870 next = findchar(start, end-start, from_c);
2871 if (next == NULL)
2872 break;
2873 *next = to_c;
2874 start = next+1;
2877 return result;
2880 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2881 Py_LOCAL(PyStringObject *)
2882 replace_substring_in_place(PyStringObject *self,
2883 const char *from_s, Py_ssize_t from_len,
2884 const char *to_s, Py_ssize_t to_len,
2885 Py_ssize_t maxcount)
2887 char *result_s, *start, *end;
2888 char *self_s;
2889 Py_ssize_t self_len, offset;
2890 PyStringObject *result;
2892 /* The result string will be the same size */
2894 self_s = PyString_AS_STRING(self);
2895 self_len = PyString_GET_SIZE(self);
2897 offset = findstring(self_s, self_len,
2898 from_s, from_len,
2899 0, self_len, FORWARD);
2900 if (offset == -1) {
2901 /* No matches; return the original string */
2902 return return_self(self);
2905 /* Need to make a new string */
2906 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2907 if (result == NULL)
2908 return NULL;
2909 result_s = PyString_AS_STRING(result);
2910 Py_MEMCPY(result_s, self_s, self_len);
2912 /* change everything in-place, starting with this one */
2913 start = result_s + offset;
2914 Py_MEMCPY(start, to_s, from_len);
2915 start += from_len;
2916 end = result_s + self_len;
2918 while ( --maxcount > 0) {
2919 offset = findstring(start, end-start,
2920 from_s, from_len,
2921 0, end-start, FORWARD);
2922 if (offset==-1)
2923 break;
2924 Py_MEMCPY(start+offset, to_s, from_len);
2925 start += offset+from_len;
2928 return result;
2931 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2932 Py_LOCAL(PyStringObject *)
2933 replace_single_character(PyStringObject *self,
2934 char from_c,
2935 const char *to_s, Py_ssize_t to_len,
2936 Py_ssize_t maxcount)
2938 char *self_s, *result_s;
2939 char *start, *next, *end;
2940 Py_ssize_t self_len, result_len;
2941 Py_ssize_t count, product;
2942 PyStringObject *result;
2944 self_s = PyString_AS_STRING(self);
2945 self_len = PyString_GET_SIZE(self);
2947 count = countchar(self_s, self_len, from_c, maxcount);
2948 if (count == 0) {
2949 /* no matches, return unchanged */
2950 return return_self(self);
2953 /* use the difference between current and new, hence the "-1" */
2954 /* result_len = self_len + count * (to_len-1) */
2955 product = count * (to_len-1);
2956 if (product / (to_len-1) != count) {
2957 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2958 return NULL;
2960 result_len = self_len + product;
2961 if (result_len < 0) {
2962 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2963 return NULL;
2966 if ( (result = (PyStringObject *)
2967 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2968 return NULL;
2969 result_s = PyString_AS_STRING(result);
2971 start = self_s;
2972 end = self_s + self_len;
2973 while (count-- > 0) {
2974 next = findchar(start, end-start, from_c);
2975 if (next == NULL)
2976 break;
2978 if (next == start) {
2979 /* replace with the 'to' */
2980 Py_MEMCPY(result_s, to_s, to_len);
2981 result_s += to_len;
2982 start += 1;
2983 } else {
2984 /* copy the unchanged old then the 'to' */
2985 Py_MEMCPY(result_s, start, next-start);
2986 result_s += (next-start);
2987 Py_MEMCPY(result_s, to_s, to_len);
2988 result_s += to_len;
2989 start = next+1;
2992 /* Copy the remainder of the remaining string */
2993 Py_MEMCPY(result_s, start, end-start);
2995 return result;
2998 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2999 Py_LOCAL(PyStringObject *)
3000 replace_substring(PyStringObject *self,
3001 const char *from_s, Py_ssize_t from_len,
3002 const char *to_s, Py_ssize_t to_len,
3003 Py_ssize_t maxcount) {
3004 char *self_s, *result_s;
3005 char *start, *next, *end;
3006 Py_ssize_t self_len, result_len;
3007 Py_ssize_t count, offset, product;
3008 PyStringObject *result;
3010 self_s = PyString_AS_STRING(self);
3011 self_len = PyString_GET_SIZE(self);
3013 count = countstring(self_s, self_len,
3014 from_s, from_len,
3015 0, self_len, FORWARD, maxcount);
3016 if (count == 0) {
3017 /* no matches, return unchanged */
3018 return return_self(self);
3021 /* Check for overflow */
3022 /* result_len = self_len + count * (to_len-from_len) */
3023 product = count * (to_len-from_len);
3024 if (product / (to_len-from_len) != count) {
3025 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3026 return NULL;
3028 result_len = self_len + product;
3029 if (result_len < 0) {
3030 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3031 return NULL;
3034 if ( (result = (PyStringObject *)
3035 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3036 return NULL;
3037 result_s = PyString_AS_STRING(result);
3039 start = self_s;
3040 end = self_s + self_len;
3041 while (count-- > 0) {
3042 offset = findstring(start, end-start,
3043 from_s, from_len,
3044 0, end-start, FORWARD);
3045 if (offset == -1)
3046 break;
3047 next = start+offset;
3048 if (next == start) {
3049 /* replace with the 'to' */
3050 Py_MEMCPY(result_s, to_s, to_len);
3051 result_s += to_len;
3052 start += from_len;
3053 } else {
3054 /* copy the unchanged old then the 'to' */
3055 Py_MEMCPY(result_s, start, next-start);
3056 result_s += (next-start);
3057 Py_MEMCPY(result_s, to_s, to_len);
3058 result_s += to_len;
3059 start = next+from_len;
3062 /* Copy the remainder of the remaining string */
3063 Py_MEMCPY(result_s, start, end-start);
3065 return result;
3069 Py_LOCAL(PyStringObject *)
3070 replace(PyStringObject *self,
3071 const char *from_s, Py_ssize_t from_len,
3072 const char *to_s, Py_ssize_t to_len,
3073 Py_ssize_t maxcount)
3075 if (maxcount < 0) {
3076 maxcount = PY_SSIZE_T_MAX;
3077 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3078 /* nothing to do; return the original string */
3079 return return_self(self);
3082 if (maxcount == 0 ||
3083 (from_len == 0 && to_len == 0)) {
3084 /* nothing to do; return the original string */
3085 return return_self(self);
3088 /* Handle zero-length special cases */
3090 if (from_len == 0) {
3091 /* insert the 'to' string everywhere. */
3092 /* >>> "Python".replace("", ".") */
3093 /* '.P.y.t.h.o.n.' */
3094 return replace_interleave(self, to_s, to_len, maxcount);
3097 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3098 /* point for an empty self string to generate a non-empty string */
3099 /* Special case so the remaining code always gets a non-empty string */
3100 if (PyString_GET_SIZE(self) == 0) {
3101 return return_self(self);
3104 if (to_len == 0) {
3105 /* delete all occurances of 'from' string */
3106 if (from_len == 1) {
3107 return replace_delete_single_character(
3108 self, from_s[0], maxcount);
3109 } else {
3110 return replace_delete_substring(self, from_s, from_len, maxcount);
3114 /* Handle special case where both strings have the same length */
3116 if (from_len == to_len) {
3117 if (from_len == 1) {
3118 return replace_single_character_in_place(
3119 self,
3120 from_s[0],
3121 to_s[0],
3122 maxcount);
3123 } else {
3124 return replace_substring_in_place(
3125 self, from_s, from_len, to_s, to_len, maxcount);
3129 /* Otherwise use the more generic algorithms */
3130 if (from_len == 1) {
3131 return replace_single_character(self, from_s[0],
3132 to_s, to_len, maxcount);
3133 } else {
3134 /* len('from')>=2, len('to')>=1 */
3135 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3139 PyDoc_STRVAR(replace__doc__,
3140 "S.replace (old, new[, count]) -> string\n\
3142 Return a copy of string S with all occurrences of substring\n\
3143 old replaced by new. If the optional argument count is\n\
3144 given, only the first count occurrences are replaced.");
3146 static PyObject *
3147 string_replace(PyStringObject *self, PyObject *args)
3149 Py_ssize_t count = -1;
3150 PyObject *from, *to;
3151 const char *from_s, *to_s;
3152 Py_ssize_t from_len, to_len;
3154 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3155 return NULL;
3157 if (PyString_Check(from)) {
3158 from_s = PyString_AS_STRING(from);
3159 from_len = PyString_GET_SIZE(from);
3161 #ifdef Py_USING_UNICODE
3162 if (PyUnicode_Check(from))
3163 return PyUnicode_Replace((PyObject *)self,
3164 from, to, count);
3165 #endif
3166 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3167 return NULL;
3169 if (PyString_Check(to)) {
3170 to_s = PyString_AS_STRING(to);
3171 to_len = PyString_GET_SIZE(to);
3173 #ifdef Py_USING_UNICODE
3174 else if (PyUnicode_Check(to))
3175 return PyUnicode_Replace((PyObject *)self,
3176 from, to, count);
3177 #endif
3178 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3179 return NULL;
3181 return (PyObject *)replace((PyStringObject *) self,
3182 from_s, from_len,
3183 to_s, to_len, count);
3186 /** End DALKE **/
3188 /* Matches the end (direction >= 0) or start (direction < 0) of self
3189 * against substr, using the start and end arguments. Returns
3190 * -1 on error, 0 if not found and 1 if found.
3192 Py_LOCAL(int)
3193 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3194 Py_ssize_t end, int direction)
3196 Py_ssize_t len = PyString_GET_SIZE(self);
3197 Py_ssize_t slen;
3198 const char* sub;
3199 const char* str;
3201 if (PyString_Check(substr)) {
3202 sub = PyString_AS_STRING(substr);
3203 slen = PyString_GET_SIZE(substr);
3205 #ifdef Py_USING_UNICODE
3206 else if (PyUnicode_Check(substr))
3207 return PyUnicode_Tailmatch((PyObject *)self,
3208 substr, start, end, direction);
3209 #endif
3210 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3211 return -1;
3212 str = PyString_AS_STRING(self);
3214 string_adjust_indices(&start, &end, len);
3216 if (direction < 0) {
3217 /* startswith */
3218 if (start+slen > len)
3219 return 0;
3220 } else {
3221 /* endswith */
3222 if (end-start < slen || start > len)
3223 return 0;
3225 if (end-slen > start)
3226 start = end - slen;
3228 if (end-start >= slen)
3229 return ! memcmp(str+start, sub, slen);
3230 return 0;
3234 PyDoc_STRVAR(startswith__doc__,
3235 "S.startswith(prefix[, start[, end]]) -> bool\n\
3237 Return True if S starts with the specified prefix, False otherwise.\n\
3238 With optional start, test S beginning at that position.\n\
3239 With optional end, stop comparing S at that position.\n\
3240 prefix can also be a tuple of strings to try.");
3242 static PyObject *
3243 string_startswith(PyStringObject *self, PyObject *args)
3245 Py_ssize_t start = 0;
3246 Py_ssize_t end = PY_SSIZE_T_MAX;
3247 PyObject *subobj;
3248 int result;
3250 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3251 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3252 return NULL;
3253 if (PyTuple_Check(subobj)) {
3254 Py_ssize_t i;
3255 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3256 result = _string_tailmatch(self,
3257 PyTuple_GET_ITEM(subobj, i),
3258 start, end, -1);
3259 if (result == -1)
3260 return NULL;
3261 else if (result) {
3262 Py_RETURN_TRUE;
3265 Py_RETURN_FALSE;
3267 result = _string_tailmatch(self, subobj, start, end, -1);
3268 if (result == -1)
3269 return NULL;
3270 else
3271 return PyBool_FromLong(result);
3275 PyDoc_STRVAR(endswith__doc__,
3276 "S.endswith(suffix[, start[, end]]) -> bool\n\
3278 Return True if S ends with the specified suffix, False otherwise.\n\
3279 With optional start, test S beginning at that position.\n\
3280 With optional end, stop comparing S at that position.\n\
3281 suffix can also be a tuple of strings to try.");
3283 static PyObject *
3284 string_endswith(PyStringObject *self, PyObject *args)
3286 Py_ssize_t start = 0;
3287 Py_ssize_t end = PY_SSIZE_T_MAX;
3288 PyObject *subobj;
3289 int result;
3291 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3292 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3293 return NULL;
3294 if (PyTuple_Check(subobj)) {
3295 Py_ssize_t i;
3296 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3297 result = _string_tailmatch(self,
3298 PyTuple_GET_ITEM(subobj, i),
3299 start, end, +1);
3300 if (result == -1)
3301 return NULL;
3302 else if (result) {
3303 Py_RETURN_TRUE;
3306 Py_RETURN_FALSE;
3308 result = _string_tailmatch(self, subobj, start, end, +1);
3309 if (result == -1)
3310 return NULL;
3311 else
3312 return PyBool_FromLong(result);
3316 PyDoc_STRVAR(encode__doc__,
3317 "S.encode([encoding[,errors]]) -> object\n\
3319 Encodes S using the codec registered for encoding. encoding defaults\n\
3320 to the default encoding. errors may be given to set a different error\n\
3321 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3322 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3323 'xmlcharrefreplace' as well as any other name registered with\n\
3324 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3326 static PyObject *
3327 string_encode(PyStringObject *self, PyObject *args)
3329 char *encoding = NULL;
3330 char *errors = NULL;
3331 PyObject *v;
3333 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3334 return NULL;
3335 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3336 if (v == NULL)
3337 goto onError;
3338 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3339 PyErr_Format(PyExc_TypeError,
3340 "encoder did not return a string/unicode object "
3341 "(type=%.400s)",
3342 Py_TYPE(v)->tp_name);
3343 Py_DECREF(v);
3344 return NULL;
3346 return v;
3348 onError:
3349 return NULL;
3353 PyDoc_STRVAR(decode__doc__,
3354 "S.decode([encoding[,errors]]) -> object\n\
3356 Decodes S using the codec registered for encoding. encoding defaults\n\
3357 to the default encoding. errors may be given to set a different error\n\
3358 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3359 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3360 as well as any other name registerd with codecs.register_error that is\n\
3361 able to handle UnicodeDecodeErrors.");
3363 static PyObject *
3364 string_decode(PyStringObject *self, PyObject *args)
3366 char *encoding = NULL;
3367 char *errors = NULL;
3368 PyObject *v;
3370 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3371 return NULL;
3372 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3373 if (v == NULL)
3374 goto onError;
3375 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3376 PyErr_Format(PyExc_TypeError,
3377 "decoder did not return a string/unicode object "
3378 "(type=%.400s)",
3379 Py_TYPE(v)->tp_name);
3380 Py_DECREF(v);
3381 return NULL;
3383 return v;
3385 onError:
3386 return NULL;
3390 PyDoc_STRVAR(expandtabs__doc__,
3391 "S.expandtabs([tabsize]) -> string\n\
3393 Return a copy of S where all tab characters are expanded using spaces.\n\
3394 If tabsize is not given, a tab size of 8 characters is assumed.");
3396 static PyObject*
3397 string_expandtabs(PyStringObject *self, PyObject *args)
3399 const char *e, *p, *qe;
3400 char *q;
3401 Py_ssize_t i, j, incr;
3402 PyObject *u;
3403 int tabsize = 8;
3405 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3406 return NULL;
3408 /* First pass: determine size of output string */
3409 i = 0; /* chars up to and including most recent \n or \r */
3410 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3411 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3412 for (p = PyString_AS_STRING(self); p < e; p++)
3413 if (*p == '\t') {
3414 if (tabsize > 0) {
3415 incr = tabsize - (j % tabsize);
3416 if (j > PY_SSIZE_T_MAX - incr)
3417 goto overflow1;
3418 j += incr;
3421 else {
3422 if (j > PY_SSIZE_T_MAX - 1)
3423 goto overflow1;
3424 j++;
3425 if (*p == '\n' || *p == '\r') {
3426 if (i > PY_SSIZE_T_MAX - j)
3427 goto overflow1;
3428 i += j;
3429 j = 0;
3433 if (i > PY_SSIZE_T_MAX - j)
3434 goto overflow1;
3436 /* Second pass: create output string and fill it */
3437 u = PyString_FromStringAndSize(NULL, i + j);
3438 if (!u)
3439 return NULL;
3441 j = 0; /* same as in first pass */
3442 q = PyString_AS_STRING(u); /* next output char */
3443 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3445 for (p = PyString_AS_STRING(self); p < e; p++)
3446 if (*p == '\t') {
3447 if (tabsize > 0) {
3448 i = tabsize - (j % tabsize);
3449 j += i;
3450 while (i--) {
3451 if (q >= qe)
3452 goto overflow2;
3453 *q++ = ' ';
3457 else {
3458 if (q >= qe)
3459 goto overflow2;
3460 *q++ = *p;
3461 j++;
3462 if (*p == '\n' || *p == '\r')
3463 j = 0;
3466 return u;
3468 overflow2:
3469 Py_DECREF(u);
3470 overflow1:
3471 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3472 return NULL;
3475 Py_LOCAL_INLINE(PyObject *)
3476 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3478 PyObject *u;
3480 if (left < 0)
3481 left = 0;
3482 if (right < 0)
3483 right = 0;
3485 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3486 Py_INCREF(self);
3487 return (PyObject *)self;
3490 u = PyString_FromStringAndSize(NULL,
3491 left + PyString_GET_SIZE(self) + right);
3492 if (u) {
3493 if (left)
3494 memset(PyString_AS_STRING(u), fill, left);
3495 Py_MEMCPY(PyString_AS_STRING(u) + left,
3496 PyString_AS_STRING(self),
3497 PyString_GET_SIZE(self));
3498 if (right)
3499 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3500 fill, right);
3503 return u;
3506 PyDoc_STRVAR(ljust__doc__,
3507 "S.ljust(width[, fillchar]) -> string\n"
3508 "\n"
3509 "Return S left justified in a string of length width. Padding is\n"
3510 "done using the specified fill character (default is a space).");
3512 static PyObject *
3513 string_ljust(PyStringObject *self, PyObject *args)
3515 Py_ssize_t width;
3516 char fillchar = ' ';
3518 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3519 return NULL;
3521 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3522 Py_INCREF(self);
3523 return (PyObject*) self;
3526 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3530 PyDoc_STRVAR(rjust__doc__,
3531 "S.rjust(width[, fillchar]) -> string\n"
3532 "\n"
3533 "Return S right justified in a string of length width. Padding is\n"
3534 "done using the specified fill character (default is a space)");
3536 static PyObject *
3537 string_rjust(PyStringObject *self, PyObject *args)
3539 Py_ssize_t width;
3540 char fillchar = ' ';
3542 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3543 return NULL;
3545 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3546 Py_INCREF(self);
3547 return (PyObject*) self;
3550 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3554 PyDoc_STRVAR(center__doc__,
3555 "S.center(width[, fillchar]) -> string\n"
3556 "\n"
3557 "Return S centered in a string of length width. Padding is\n"
3558 "done using the specified fill character (default is a space)");
3560 static PyObject *
3561 string_center(PyStringObject *self, PyObject *args)
3563 Py_ssize_t marg, left;
3564 Py_ssize_t width;
3565 char fillchar = ' ';
3567 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3568 return NULL;
3570 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3571 Py_INCREF(self);
3572 return (PyObject*) self;
3575 marg = width - PyString_GET_SIZE(self);
3576 left = marg / 2 + (marg & width & 1);
3578 return pad(self, left, marg - left, fillchar);
3581 PyDoc_STRVAR(zfill__doc__,
3582 "S.zfill(width) -> string\n"
3583 "\n"
3584 "Pad a numeric string S with zeros on the left, to fill a field\n"
3585 "of the specified width. The string S is never truncated.");
3587 static PyObject *
3588 string_zfill(PyStringObject *self, PyObject *args)
3590 Py_ssize_t fill;
3591 PyObject *s;
3592 char *p;
3593 Py_ssize_t width;
3595 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3596 return NULL;
3598 if (PyString_GET_SIZE(self) >= width) {
3599 if (PyString_CheckExact(self)) {
3600 Py_INCREF(self);
3601 return (PyObject*) self;
3603 else
3604 return PyString_FromStringAndSize(
3605 PyString_AS_STRING(self),
3606 PyString_GET_SIZE(self)
3610 fill = width - PyString_GET_SIZE(self);
3612 s = pad(self, fill, 0, '0');
3614 if (s == NULL)
3615 return NULL;
3617 p = PyString_AS_STRING(s);
3618 if (p[fill] == '+' || p[fill] == '-') {
3619 /* move sign to beginning of string */
3620 p[0] = p[fill];
3621 p[fill] = '0';
3624 return (PyObject*) s;
3627 PyDoc_STRVAR(isspace__doc__,
3628 "S.isspace() -> bool\n\
3630 Return True if all characters in S are whitespace\n\
3631 and there is at least one character in S, False otherwise.");
3633 static PyObject*
3634 string_isspace(PyStringObject *self)
3636 register const unsigned char *p
3637 = (unsigned char *) PyString_AS_STRING(self);
3638 register const unsigned char *e;
3640 /* Shortcut for single character strings */
3641 if (PyString_GET_SIZE(self) == 1 &&
3642 isspace(*p))
3643 return PyBool_FromLong(1);
3645 /* Special case for empty strings */
3646 if (PyString_GET_SIZE(self) == 0)
3647 return PyBool_FromLong(0);
3649 e = p + PyString_GET_SIZE(self);
3650 for (; p < e; p++) {
3651 if (!isspace(*p))
3652 return PyBool_FromLong(0);
3654 return PyBool_FromLong(1);
3658 PyDoc_STRVAR(isalpha__doc__,
3659 "S.isalpha() -> bool\n\
3661 Return True if all characters in S are alphabetic\n\
3662 and there is at least one character in S, False otherwise.");
3664 static PyObject*
3665 string_isalpha(PyStringObject *self)
3667 register const unsigned char *p
3668 = (unsigned char *) PyString_AS_STRING(self);
3669 register const unsigned char *e;
3671 /* Shortcut for single character strings */
3672 if (PyString_GET_SIZE(self) == 1 &&
3673 isalpha(*p))
3674 return PyBool_FromLong(1);
3676 /* Special case for empty strings */
3677 if (PyString_GET_SIZE(self) == 0)
3678 return PyBool_FromLong(0);
3680 e = p + PyString_GET_SIZE(self);
3681 for (; p < e; p++) {
3682 if (!isalpha(*p))
3683 return PyBool_FromLong(0);
3685 return PyBool_FromLong(1);
3689 PyDoc_STRVAR(isalnum__doc__,
3690 "S.isalnum() -> bool\n\
3692 Return True if all characters in S are alphanumeric\n\
3693 and there is at least one character in S, False otherwise.");
3695 static PyObject*
3696 string_isalnum(PyStringObject *self)
3698 register const unsigned char *p
3699 = (unsigned char *) PyString_AS_STRING(self);
3700 register const unsigned char *e;
3702 /* Shortcut for single character strings */
3703 if (PyString_GET_SIZE(self) == 1 &&
3704 isalnum(*p))
3705 return PyBool_FromLong(1);
3707 /* Special case for empty strings */
3708 if (PyString_GET_SIZE(self) == 0)
3709 return PyBool_FromLong(0);
3711 e = p + PyString_GET_SIZE(self);
3712 for (; p < e; p++) {
3713 if (!isalnum(*p))
3714 return PyBool_FromLong(0);
3716 return PyBool_FromLong(1);
3720 PyDoc_STRVAR(isdigit__doc__,
3721 "S.isdigit() -> bool\n\
3723 Return True if all characters in S are digits\n\
3724 and there is at least one character in S, False otherwise.");
3726 static PyObject*
3727 string_isdigit(PyStringObject *self)
3729 register const unsigned char *p
3730 = (unsigned char *) PyString_AS_STRING(self);
3731 register const unsigned char *e;
3733 /* Shortcut for single character strings */
3734 if (PyString_GET_SIZE(self) == 1 &&
3735 isdigit(*p))
3736 return PyBool_FromLong(1);
3738 /* Special case for empty strings */
3739 if (PyString_GET_SIZE(self) == 0)
3740 return PyBool_FromLong(0);
3742 e = p + PyString_GET_SIZE(self);
3743 for (; p < e; p++) {
3744 if (!isdigit(*p))
3745 return PyBool_FromLong(0);
3747 return PyBool_FromLong(1);
3751 PyDoc_STRVAR(islower__doc__,
3752 "S.islower() -> bool\n\
3754 Return True if all cased characters in S are lowercase and there is\n\
3755 at least one cased character in S, False otherwise.");
3757 static PyObject*
3758 string_islower(PyStringObject *self)
3760 register const unsigned char *p
3761 = (unsigned char *) PyString_AS_STRING(self);
3762 register const unsigned char *e;
3763 int cased;
3765 /* Shortcut for single character strings */
3766 if (PyString_GET_SIZE(self) == 1)
3767 return PyBool_FromLong(islower(*p) != 0);
3769 /* Special case for empty strings */
3770 if (PyString_GET_SIZE(self) == 0)
3771 return PyBool_FromLong(0);
3773 e = p + PyString_GET_SIZE(self);
3774 cased = 0;
3775 for (; p < e; p++) {
3776 if (isupper(*p))
3777 return PyBool_FromLong(0);
3778 else if (!cased && islower(*p))
3779 cased = 1;
3781 return PyBool_FromLong(cased);
3785 PyDoc_STRVAR(isupper__doc__,
3786 "S.isupper() -> bool\n\
3788 Return True if all cased characters in S are uppercase and there is\n\
3789 at least one cased character in S, False otherwise.");
3791 static PyObject*
3792 string_isupper(PyStringObject *self)
3794 register const unsigned char *p
3795 = (unsigned char *) PyString_AS_STRING(self);
3796 register const unsigned char *e;
3797 int cased;
3799 /* Shortcut for single character strings */
3800 if (PyString_GET_SIZE(self) == 1)
3801 return PyBool_FromLong(isupper(*p) != 0);
3803 /* Special case for empty strings */
3804 if (PyString_GET_SIZE(self) == 0)
3805 return PyBool_FromLong(0);
3807 e = p + PyString_GET_SIZE(self);
3808 cased = 0;
3809 for (; p < e; p++) {
3810 if (islower(*p))
3811 return PyBool_FromLong(0);
3812 else if (!cased && isupper(*p))
3813 cased = 1;
3815 return PyBool_FromLong(cased);
3819 PyDoc_STRVAR(istitle__doc__,
3820 "S.istitle() -> bool\n\
3822 Return True if S is a titlecased string and there is at least one\n\
3823 character in S, i.e. uppercase characters may only follow uncased\n\
3824 characters and lowercase characters only cased ones. Return False\n\
3825 otherwise.");
3827 static PyObject*
3828 string_istitle(PyStringObject *self, PyObject *uncased)
3830 register const unsigned char *p
3831 = (unsigned char *) PyString_AS_STRING(self);
3832 register const unsigned char *e;
3833 int cased, previous_is_cased;
3835 /* Shortcut for single character strings */
3836 if (PyString_GET_SIZE(self) == 1)
3837 return PyBool_FromLong(isupper(*p) != 0);
3839 /* Special case for empty strings */
3840 if (PyString_GET_SIZE(self) == 0)
3841 return PyBool_FromLong(0);
3843 e = p + PyString_GET_SIZE(self);
3844 cased = 0;
3845 previous_is_cased = 0;
3846 for (; p < e; p++) {
3847 register const unsigned char ch = *p;
3849 if (isupper(ch)) {
3850 if (previous_is_cased)
3851 return PyBool_FromLong(0);
3852 previous_is_cased = 1;
3853 cased = 1;
3855 else if (islower(ch)) {
3856 if (!previous_is_cased)
3857 return PyBool_FromLong(0);
3858 previous_is_cased = 1;
3859 cased = 1;
3861 else
3862 previous_is_cased = 0;
3864 return PyBool_FromLong(cased);
3868 PyDoc_STRVAR(splitlines__doc__,
3869 "S.splitlines([keepends]) -> list of strings\n\
3871 Return a list of the lines in S, breaking at line boundaries.\n\
3872 Line breaks are not included in the resulting list unless keepends\n\
3873 is given and true.");
3875 static PyObject*
3876 string_splitlines(PyStringObject *self, PyObject *args)
3878 register Py_ssize_t i;
3879 register Py_ssize_t j;
3880 Py_ssize_t len;
3881 int keepends = 0;
3882 PyObject *list;
3883 PyObject *str;
3884 char *data;
3886 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3887 return NULL;
3889 data = PyString_AS_STRING(self);
3890 len = PyString_GET_SIZE(self);
3892 /* This does not use the preallocated list because splitlines is
3893 usually run with hundreds of newlines. The overhead of
3894 switching between PyList_SET_ITEM and append causes about a
3895 2-3% slowdown for that common case. A smarter implementation
3896 could move the if check out, so the SET_ITEMs are done first
3897 and the appends only done when the prealloc buffer is full.
3898 That's too much work for little gain.*/
3900 list = PyList_New(0);
3901 if (!list)
3902 goto onError;
3904 for (i = j = 0; i < len; ) {
3905 Py_ssize_t eol;
3907 /* Find a line and append it */
3908 while (i < len && data[i] != '\n' && data[i] != '\r')
3909 i++;
3911 /* Skip the line break reading CRLF as one line break */
3912 eol = i;
3913 if (i < len) {
3914 if (data[i] == '\r' && i + 1 < len &&
3915 data[i+1] == '\n')
3916 i += 2;
3917 else
3918 i++;
3919 if (keepends)
3920 eol = i;
3922 SPLIT_APPEND(data, j, eol);
3923 j = i;
3925 if (j < len) {
3926 SPLIT_APPEND(data, j, len);
3929 return list;
3931 onError:
3932 Py_XDECREF(list);
3933 return NULL;
3936 PyDoc_STRVAR(sizeof__doc__,
3937 "S.__sizeof__() -> size of S in memory, in bytes");
3939 static PyObject *
3940 string_sizeof(PyStringObject *v)
3942 Py_ssize_t res;
3943 res = sizeof(PyStringObject) + v->ob_size * v->ob_type->tp_itemsize;
3944 return PyInt_FromSsize_t(res);
3947 #undef SPLIT_APPEND
3948 #undef SPLIT_ADD
3949 #undef MAX_PREALLOC
3950 #undef PREALLOC_SIZE
3952 static PyObject *
3953 string_getnewargs(PyStringObject *v)
3955 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3959 #include "stringlib/string_format.h"
3961 PyDoc_STRVAR(format__doc__,
3962 "S.format(*args, **kwargs) -> unicode\n\
3966 static PyObject *
3967 string__format__(PyObject* self, PyObject* args)
3969 PyObject *format_spec;
3970 PyObject *result = NULL;
3971 PyObject *tmp = NULL;
3973 /* If 2.x, convert format_spec to the same type as value */
3974 /* This is to allow things like u''.format('') */
3975 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3976 goto done;
3977 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
3978 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3979 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3980 goto done;
3982 tmp = PyObject_Str(format_spec);
3983 if (tmp == NULL)
3984 goto done;
3985 format_spec = tmp;
3987 result = _PyBytes_FormatAdvanced(self,
3988 PyString_AS_STRING(format_spec),
3989 PyString_GET_SIZE(format_spec));
3990 done:
3991 Py_XDECREF(tmp);
3992 return result;
3995 PyDoc_STRVAR(p_format__doc__,
3996 "S.__format__(format_spec) -> unicode\n\
4001 static PyMethodDef
4002 string_methods[] = {
4003 /* Counterparts of the obsolete stropmodule functions; except
4004 string.maketrans(). */
4005 {"join", (PyCFunction)string_join, METH_O, join__doc__},
4006 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
4007 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
4008 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
4009 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
4010 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
4011 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
4012 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
4013 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
4014 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
4015 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
4016 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
4017 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
4018 capitalize__doc__},
4019 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
4020 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
4021 endswith__doc__},
4022 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
4023 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
4024 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
4025 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
4026 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
4027 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
4028 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
4029 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
4030 {"rpartition", (PyCFunction)string_rpartition, METH_O,
4031 rpartition__doc__},
4032 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
4033 startswith__doc__},
4034 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
4035 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
4036 swapcase__doc__},
4037 {"translate", (PyCFunction)string_translate, METH_VARARGS,
4038 translate__doc__},
4039 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
4040 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
4041 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
4042 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
4043 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
4044 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
4045 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
4046 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
4047 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
4048 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
4049 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
4050 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
4051 expandtabs__doc__},
4052 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4053 splitlines__doc__},
4054 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
4055 sizeof__doc__},
4056 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
4057 {NULL, NULL} /* sentinel */
4060 static PyObject *
4061 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
4063 static PyObject *
4064 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4066 PyObject *x = NULL;
4067 static char *kwlist[] = {"object", 0};
4069 if (type != &PyString_Type)
4070 return str_subtype_new(type, args, kwds);
4071 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4072 return NULL;
4073 if (x == NULL)
4074 return PyString_FromString("");
4075 return PyObject_Str(x);
4078 static PyObject *
4079 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4081 PyObject *tmp, *pnew;
4082 Py_ssize_t n;
4084 assert(PyType_IsSubtype(type, &PyString_Type));
4085 tmp = string_new(&PyString_Type, args, kwds);
4086 if (tmp == NULL)
4087 return NULL;
4088 assert(PyString_CheckExact(tmp));
4089 n = PyString_GET_SIZE(tmp);
4090 pnew = type->tp_alloc(type, n);
4091 if (pnew != NULL) {
4092 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
4093 ((PyStringObject *)pnew)->ob_shash =
4094 ((PyStringObject *)tmp)->ob_shash;
4095 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
4097 Py_DECREF(tmp);
4098 return pnew;
4101 static PyObject *
4102 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4104 PyErr_SetString(PyExc_TypeError,
4105 "The basestring type cannot be instantiated");
4106 return NULL;
4109 static PyObject *
4110 string_mod(PyObject *v, PyObject *w)
4112 if (!PyString_Check(v)) {
4113 Py_INCREF(Py_NotImplemented);
4114 return Py_NotImplemented;
4116 return PyString_Format(v, w);
4119 PyDoc_STRVAR(basestring_doc,
4120 "Type basestring cannot be instantiated; it is the base for str and unicode.");
4122 static PyNumberMethods string_as_number = {
4123 0, /*nb_add*/
4124 0, /*nb_subtract*/
4125 0, /*nb_multiply*/
4126 0, /*nb_divide*/
4127 string_mod, /*nb_remainder*/
4131 PyTypeObject PyBaseString_Type = {
4132 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4133 "basestring",
4136 0, /* tp_dealloc */
4137 0, /* tp_print */
4138 0, /* tp_getattr */
4139 0, /* tp_setattr */
4140 0, /* tp_compare */
4141 0, /* tp_repr */
4142 0, /* tp_as_number */
4143 0, /* tp_as_sequence */
4144 0, /* tp_as_mapping */
4145 0, /* tp_hash */
4146 0, /* tp_call */
4147 0, /* tp_str */
4148 0, /* tp_getattro */
4149 0, /* tp_setattro */
4150 0, /* tp_as_buffer */
4151 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4152 basestring_doc, /* tp_doc */
4153 0, /* tp_traverse */
4154 0, /* tp_clear */
4155 0, /* tp_richcompare */
4156 0, /* tp_weaklistoffset */
4157 0, /* tp_iter */
4158 0, /* tp_iternext */
4159 0, /* tp_methods */
4160 0, /* tp_members */
4161 0, /* tp_getset */
4162 &PyBaseObject_Type, /* tp_base */
4163 0, /* tp_dict */
4164 0, /* tp_descr_get */
4165 0, /* tp_descr_set */
4166 0, /* tp_dictoffset */
4167 0, /* tp_init */
4168 0, /* tp_alloc */
4169 basestring_new, /* tp_new */
4170 0, /* tp_free */
4173 PyDoc_STRVAR(string_doc,
4174 "str(object) -> string\n\
4176 Return a nice string representation of the object.\n\
4177 If the argument is a string, the return value is the same object.");
4179 PyTypeObject PyString_Type = {
4180 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4181 "str",
4182 sizeof(PyStringObject),
4183 sizeof(char),
4184 string_dealloc, /* tp_dealloc */
4185 (printfunc)string_print, /* tp_print */
4186 0, /* tp_getattr */
4187 0, /* tp_setattr */
4188 0, /* tp_compare */
4189 string_repr, /* tp_repr */
4190 &string_as_number, /* tp_as_number */
4191 &string_as_sequence, /* tp_as_sequence */
4192 &string_as_mapping, /* tp_as_mapping */
4193 (hashfunc)string_hash, /* tp_hash */
4194 0, /* tp_call */
4195 string_str, /* tp_str */
4196 PyObject_GenericGetAttr, /* tp_getattro */
4197 0, /* tp_setattro */
4198 &string_as_buffer, /* tp_as_buffer */
4199 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4200 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4201 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
4202 string_doc, /* tp_doc */
4203 0, /* tp_traverse */
4204 0, /* tp_clear */
4205 (richcmpfunc)string_richcompare, /* tp_richcompare */
4206 0, /* tp_weaklistoffset */
4207 0, /* tp_iter */
4208 0, /* tp_iternext */
4209 string_methods, /* tp_methods */
4210 0, /* tp_members */
4211 0, /* tp_getset */
4212 &PyBaseString_Type, /* tp_base */
4213 0, /* tp_dict */
4214 0, /* tp_descr_get */
4215 0, /* tp_descr_set */
4216 0, /* tp_dictoffset */
4217 0, /* tp_init */
4218 0, /* tp_alloc */
4219 string_new, /* tp_new */
4220 PyObject_Del, /* tp_free */
4223 void
4224 PyString_Concat(register PyObject **pv, register PyObject *w)
4226 register PyObject *v;
4227 if (*pv == NULL)
4228 return;
4229 if (w == NULL || !PyString_Check(*pv)) {
4230 Py_DECREF(*pv);
4231 *pv = NULL;
4232 return;
4234 v = string_concat((PyStringObject *) *pv, w);
4235 Py_DECREF(*pv);
4236 *pv = v;
4239 void
4240 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
4242 PyString_Concat(pv, w);
4243 Py_XDECREF(w);
4247 /* The following function breaks the notion that strings are immutable:
4248 it changes the size of a string. We get away with this only if there
4249 is only one module referencing the object. You can also think of it
4250 as creating a new string object and destroying the old one, only
4251 more efficiently. In any case, don't use this if the string may
4252 already be known to some other part of the code...
4253 Note that if there's not enough memory to resize the string, the original
4254 string object at *pv is deallocated, *pv is set to NULL, an "out of
4255 memory" exception is set, and -1 is returned. Else (on success) 0 is
4256 returned, and the value in *pv may or may not be the same as on input.
4257 As always, an extra byte is allocated for a trailing \0 byte (newsize
4258 does *not* include that), and a trailing \0 byte is stored.
4262 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
4264 register PyObject *v;
4265 register PyStringObject *sv;
4266 v = *pv;
4267 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
4268 PyString_CHECK_INTERNED(v)) {
4269 *pv = 0;
4270 Py_DECREF(v);
4271 PyErr_BadInternalCall();
4272 return -1;
4274 /* XXX UNREF/NEWREF interface should be more symmetrical */
4275 _Py_DEC_REFTOTAL;
4276 _Py_ForgetReference(v);
4277 *pv = (PyObject *)
4278 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
4279 if (*pv == NULL) {
4280 PyObject_Del(v);
4281 PyErr_NoMemory();
4282 return -1;
4284 _Py_NewReference(*pv);
4285 sv = (PyStringObject *) *pv;
4286 Py_SIZE(sv) = newsize;
4287 sv->ob_sval[newsize] = '\0';
4288 sv->ob_shash = -1; /* invalidate cached hash value */
4289 return 0;
4292 /* Helpers for formatstring */
4294 Py_LOCAL_INLINE(PyObject *)
4295 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4297 Py_ssize_t argidx = *p_argidx;
4298 if (argidx < arglen) {
4299 (*p_argidx)++;
4300 if (arglen < 0)
4301 return args;
4302 else
4303 return PyTuple_GetItem(args, argidx);
4305 PyErr_SetString(PyExc_TypeError,
4306 "not enough arguments for format string");
4307 return NULL;
4310 /* Format codes
4311 * F_LJUST '-'
4312 * F_SIGN '+'
4313 * F_BLANK ' '
4314 * F_ALT '#'
4315 * F_ZERO '0'
4317 #define F_LJUST (1<<0)
4318 #define F_SIGN (1<<1)
4319 #define F_BLANK (1<<2)
4320 #define F_ALT (1<<3)
4321 #define F_ZERO (1<<4)
4323 Py_LOCAL_INLINE(int)
4324 formatfloat(char *buf, size_t buflen, int flags,
4325 int prec, int type, PyObject *v)
4327 /* fmt = '%#.' + `prec` + `type`
4328 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4329 char fmt[20];
4330 double x;
4331 x = PyFloat_AsDouble(v);
4332 if (x == -1.0 && PyErr_Occurred()) {
4333 PyErr_Format(PyExc_TypeError, "float argument required, "
4334 "not %.200s", Py_TYPE(v)->tp_name);
4335 return -1;
4337 if (prec < 0)
4338 prec = 6;
4339 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4340 type = 'g';
4341 /* Worst case length calc to ensure no buffer overrun:
4343 'g' formats:
4344 fmt = %#.<prec>g
4345 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4346 for any double rep.)
4347 len = 1 + prec + 1 + 2 + 5 = 9 + prec
4349 'f' formats:
4350 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4351 len = 1 + 50 + 1 + prec = 52 + prec
4353 If prec=0 the effective precision is 1 (the leading digit is
4354 always given), therefore increase the length by one.
4357 if (((type == 'g' || type == 'G') &&
4358 buflen <= (size_t)10 + (size_t)prec) ||
4359 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
4360 PyErr_SetString(PyExc_OverflowError,
4361 "formatted float is too long (precision too large?)");
4362 return -1;
4364 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4365 (flags&F_ALT) ? "#" : "",
4366 prec, type);
4367 PyOS_ascii_formatd(buf, buflen, fmt, x);
4368 return (int)strlen(buf);
4371 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4372 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4373 * Python's regular ints.
4374 * Return value: a new PyString*, or NULL if error.
4375 * . *pbuf is set to point into it,
4376 * *plen set to the # of chars following that.
4377 * Caller must decref it when done using pbuf.
4378 * The string starting at *pbuf is of the form
4379 * "-"? ("0x" | "0X")? digit+
4380 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4381 * set in flags. The case of hex digits will be correct,
4382 * There will be at least prec digits, zero-filled on the left if
4383 * necessary to get that many.
4384 * val object to be converted
4385 * flags bitmask of format flags; only F_ALT is looked at
4386 * prec minimum number of digits; 0-fill on left if needed
4387 * type a character in [duoxX]; u acts the same as d
4389 * CAUTION: o, x and X conversions on regular ints can never
4390 * produce a '-' sign, but can for Python's unbounded ints.
4392 PyObject*
4393 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4394 char **pbuf, int *plen)
4396 PyObject *result = NULL;
4397 char *buf;
4398 Py_ssize_t i;
4399 int sign; /* 1 if '-', else 0 */
4400 int len; /* number of characters */
4401 Py_ssize_t llen;
4402 int numdigits; /* len == numnondigits + numdigits */
4403 int numnondigits = 0;
4405 switch (type) {
4406 case 'd':
4407 case 'u':
4408 result = Py_TYPE(val)->tp_str(val);
4409 break;
4410 case 'o':
4411 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4412 break;
4413 case 'x':
4414 case 'X':
4415 numnondigits = 2;
4416 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4417 break;
4418 default:
4419 assert(!"'type' not in [duoxX]");
4421 if (!result)
4422 return NULL;
4424 buf = PyString_AsString(result);
4425 if (!buf) {
4426 Py_DECREF(result);
4427 return NULL;
4430 /* To modify the string in-place, there can only be one reference. */
4431 if (Py_REFCNT(result) != 1) {
4432 PyErr_BadInternalCall();
4433 return NULL;
4435 llen = PyString_Size(result);
4436 if (llen > INT_MAX) {
4437 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4438 return NULL;
4440 len = (int)llen;
4441 if (buf[len-1] == 'L') {
4442 --len;
4443 buf[len] = '\0';
4445 sign = buf[0] == '-';
4446 numnondigits += sign;
4447 numdigits = len - numnondigits;
4448 assert(numdigits > 0);
4450 /* Get rid of base marker unless F_ALT */
4451 if ((flags & F_ALT) == 0) {
4452 /* Need to skip 0x, 0X or 0. */
4453 int skipped = 0;
4454 switch (type) {
4455 case 'o':
4456 assert(buf[sign] == '0');
4457 /* If 0 is only digit, leave it alone. */
4458 if (numdigits > 1) {
4459 skipped = 1;
4460 --numdigits;
4462 break;
4463 case 'x':
4464 case 'X':
4465 assert(buf[sign] == '0');
4466 assert(buf[sign + 1] == 'x');
4467 skipped = 2;
4468 numnondigits -= 2;
4469 break;
4471 if (skipped) {
4472 buf += skipped;
4473 len -= skipped;
4474 if (sign)
4475 buf[0] = '-';
4477 assert(len == numnondigits + numdigits);
4478 assert(numdigits > 0);
4481 /* Fill with leading zeroes to meet minimum width. */
4482 if (prec > numdigits) {
4483 PyObject *r1 = PyString_FromStringAndSize(NULL,
4484 numnondigits + prec);
4485 char *b1;
4486 if (!r1) {
4487 Py_DECREF(result);
4488 return NULL;
4490 b1 = PyString_AS_STRING(r1);
4491 for (i = 0; i < numnondigits; ++i)
4492 *b1++ = *buf++;
4493 for (i = 0; i < prec - numdigits; i++)
4494 *b1++ = '0';
4495 for (i = 0; i < numdigits; i++)
4496 *b1++ = *buf++;
4497 *b1 = '\0';
4498 Py_DECREF(result);
4499 result = r1;
4500 buf = PyString_AS_STRING(result);
4501 len = numnondigits + prec;
4504 /* Fix up case for hex conversions. */
4505 if (type == 'X') {
4506 /* Need to convert all lower case letters to upper case.
4507 and need to convert 0x to 0X (and -0x to -0X). */
4508 for (i = 0; i < len; i++)
4509 if (buf[i] >= 'a' && buf[i] <= 'x')
4510 buf[i] -= 'a'-'A';
4512 *pbuf = buf;
4513 *plen = len;
4514 return result;
4517 Py_LOCAL_INLINE(int)
4518 formatint(char *buf, size_t buflen, int flags,
4519 int prec, int type, PyObject *v)
4521 /* fmt = '%#.' + `prec` + 'l' + `type`
4522 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4523 + 1 + 1 = 24 */
4524 char fmt[64]; /* plenty big enough! */
4525 char *sign;
4526 long x;
4528 x = PyInt_AsLong(v);
4529 if (x == -1 && PyErr_Occurred()) {
4530 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4531 Py_TYPE(v)->tp_name);
4532 return -1;
4534 if (x < 0 && type == 'u') {
4535 type = 'd';
4537 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4538 sign = "-";
4539 else
4540 sign = "";
4541 if (prec < 0)
4542 prec = 1;
4544 if ((flags & F_ALT) &&
4545 (type == 'x' || type == 'X')) {
4546 /* When converting under %#x or %#X, there are a number
4547 * of issues that cause pain:
4548 * - when 0 is being converted, the C standard leaves off
4549 * the '0x' or '0X', which is inconsistent with other
4550 * %#x/%#X conversions and inconsistent with Python's
4551 * hex() function
4552 * - there are platforms that violate the standard and
4553 * convert 0 with the '0x' or '0X'
4554 * (Metrowerks, Compaq Tru64)
4555 * - there are platforms that give '0x' when converting
4556 * under %#X, but convert 0 in accordance with the
4557 * standard (OS/2 EMX)
4559 * We can achieve the desired consistency by inserting our
4560 * own '0x' or '0X' prefix, and substituting %x/%X in place
4561 * of %#x/%#X.
4563 * Note that this is the same approach as used in
4564 * formatint() in unicodeobject.c
4566 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4567 sign, type, prec, type);
4569 else {
4570 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4571 sign, (flags&F_ALT) ? "#" : "",
4572 prec, type);
4575 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4576 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4578 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4579 PyErr_SetString(PyExc_OverflowError,
4580 "formatted integer is too long (precision too large?)");
4581 return -1;
4583 if (sign[0])
4584 PyOS_snprintf(buf, buflen, fmt, -x);
4585 else
4586 PyOS_snprintf(buf, buflen, fmt, x);
4587 return (int)strlen(buf);
4590 Py_LOCAL_INLINE(int)
4591 formatchar(char *buf, size_t buflen, PyObject *v)
4593 /* presume that the buffer is at least 2 characters long */
4594 if (PyString_Check(v)) {
4595 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4596 return -1;
4598 else {
4599 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4600 return -1;
4602 buf[1] = '\0';
4603 return 1;
4606 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4608 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4609 chars are formatted. XXX This is a magic number. Each formatting
4610 routine does bounds checking to ensure no overflow, but a better
4611 solution may be to malloc a buffer of appropriate size for each
4612 format. For now, the current solution is sufficient.
4614 #define FORMATBUFLEN (size_t)120
4616 PyObject *
4617 PyString_Format(PyObject *format, PyObject *args)
4619 char *fmt, *res;
4620 Py_ssize_t arglen, argidx;
4621 Py_ssize_t reslen, rescnt, fmtcnt;
4622 int args_owned = 0;
4623 PyObject *result, *orig_args;
4624 #ifdef Py_USING_UNICODE
4625 PyObject *v, *w;
4626 #endif
4627 PyObject *dict = NULL;
4628 if (format == NULL || !PyString_Check(format) || args == NULL) {
4629 PyErr_BadInternalCall();
4630 return NULL;
4632 orig_args = args;
4633 fmt = PyString_AS_STRING(format);
4634 fmtcnt = PyString_GET_SIZE(format);
4635 reslen = rescnt = fmtcnt + 100;
4636 result = PyString_FromStringAndSize((char *)NULL, reslen);
4637 if (result == NULL)
4638 return NULL;
4639 res = PyString_AsString(result);
4640 if (PyTuple_Check(args)) {
4641 arglen = PyTuple_GET_SIZE(args);
4642 argidx = 0;
4644 else {
4645 arglen = -1;
4646 argidx = -2;
4648 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4649 !PyObject_TypeCheck(args, &PyBaseString_Type))
4650 dict = args;
4651 while (--fmtcnt >= 0) {
4652 if (*fmt != '%') {
4653 if (--rescnt < 0) {
4654 rescnt = fmtcnt + 100;
4655 reslen += rescnt;
4656 if (_PyString_Resize(&result, reslen) < 0)
4657 return NULL;
4658 res = PyString_AS_STRING(result)
4659 + reslen - rescnt;
4660 --rescnt;
4662 *res++ = *fmt++;
4664 else {
4665 /* Got a format specifier */
4666 int flags = 0;
4667 Py_ssize_t width = -1;
4668 int prec = -1;
4669 int c = '\0';
4670 int fill;
4671 int isnumok;
4672 PyObject *v = NULL;
4673 PyObject *temp = NULL;
4674 char *pbuf;
4675 int sign;
4676 Py_ssize_t len;
4677 char formatbuf[FORMATBUFLEN];
4678 /* For format{float,int,char}() */
4679 #ifdef Py_USING_UNICODE
4680 char *fmt_start = fmt;
4681 Py_ssize_t argidx_start = argidx;
4682 #endif
4684 fmt++;
4685 if (*fmt == '(') {
4686 char *keystart;
4687 Py_ssize_t keylen;
4688 PyObject *key;
4689 int pcount = 1;
4691 if (dict == NULL) {
4692 PyErr_SetString(PyExc_TypeError,
4693 "format requires a mapping");
4694 goto error;
4696 ++fmt;
4697 --fmtcnt;
4698 keystart = fmt;
4699 /* Skip over balanced parentheses */
4700 while (pcount > 0 && --fmtcnt >= 0) {
4701 if (*fmt == ')')
4702 --pcount;
4703 else if (*fmt == '(')
4704 ++pcount;
4705 fmt++;
4707 keylen = fmt - keystart - 1;
4708 if (fmtcnt < 0 || pcount > 0) {
4709 PyErr_SetString(PyExc_ValueError,
4710 "incomplete format key");
4711 goto error;
4713 key = PyString_FromStringAndSize(keystart,
4714 keylen);
4715 if (key == NULL)
4716 goto error;
4717 if (args_owned) {
4718 Py_DECREF(args);
4719 args_owned = 0;
4721 args = PyObject_GetItem(dict, key);
4722 Py_DECREF(key);
4723 if (args == NULL) {
4724 goto error;
4726 args_owned = 1;
4727 arglen = -1;
4728 argidx = -2;
4730 while (--fmtcnt >= 0) {
4731 switch (c = *fmt++) {
4732 case '-': flags |= F_LJUST; continue;
4733 case '+': flags |= F_SIGN; continue;
4734 case ' ': flags |= F_BLANK; continue;
4735 case '#': flags |= F_ALT; continue;
4736 case '0': flags |= F_ZERO; continue;
4738 break;
4740 if (c == '*') {
4741 v = getnextarg(args, arglen, &argidx);
4742 if (v == NULL)
4743 goto error;
4744 if (!PyInt_Check(v)) {
4745 PyErr_SetString(PyExc_TypeError,
4746 "* wants int");
4747 goto error;
4749 width = PyInt_AsLong(v);
4750 if (width < 0) {
4751 flags |= F_LJUST;
4752 width = -width;
4754 if (--fmtcnt >= 0)
4755 c = *fmt++;
4757 else if (c >= 0 && isdigit(c)) {
4758 width = c - '0';
4759 while (--fmtcnt >= 0) {
4760 c = Py_CHARMASK(*fmt++);
4761 if (!isdigit(c))
4762 break;
4763 if ((width*10) / 10 != width) {
4764 PyErr_SetString(
4765 PyExc_ValueError,
4766 "width too big");
4767 goto error;
4769 width = width*10 + (c - '0');
4772 if (c == '.') {
4773 prec = 0;
4774 if (--fmtcnt >= 0)
4775 c = *fmt++;
4776 if (c == '*') {
4777 v = getnextarg(args, arglen, &argidx);
4778 if (v == NULL)
4779 goto error;
4780 if (!PyInt_Check(v)) {
4781 PyErr_SetString(
4782 PyExc_TypeError,
4783 "* wants int");
4784 goto error;
4786 prec = PyInt_AsLong(v);
4787 if (prec < 0)
4788 prec = 0;
4789 if (--fmtcnt >= 0)
4790 c = *fmt++;
4792 else if (c >= 0 && isdigit(c)) {
4793 prec = c - '0';
4794 while (--fmtcnt >= 0) {
4795 c = Py_CHARMASK(*fmt++);
4796 if (!isdigit(c))
4797 break;
4798 if ((prec*10) / 10 != prec) {
4799 PyErr_SetString(
4800 PyExc_ValueError,
4801 "prec too big");
4802 goto error;
4804 prec = prec*10 + (c - '0');
4807 } /* prec */
4808 if (fmtcnt >= 0) {
4809 if (c == 'h' || c == 'l' || c == 'L') {
4810 if (--fmtcnt >= 0)
4811 c = *fmt++;
4814 if (fmtcnt < 0) {
4815 PyErr_SetString(PyExc_ValueError,
4816 "incomplete format");
4817 goto error;
4819 if (c != '%') {
4820 v = getnextarg(args, arglen, &argidx);
4821 if (v == NULL)
4822 goto error;
4824 sign = 0;
4825 fill = ' ';
4826 switch (c) {
4827 case '%':
4828 pbuf = "%";
4829 len = 1;
4830 break;
4831 case 's':
4832 #ifdef Py_USING_UNICODE
4833 if (PyUnicode_Check(v)) {
4834 fmt = fmt_start;
4835 argidx = argidx_start;
4836 goto unicode;
4838 #endif
4839 temp = _PyObject_Str(v);
4840 #ifdef Py_USING_UNICODE
4841 if (temp != NULL && PyUnicode_Check(temp)) {
4842 Py_DECREF(temp);
4843 fmt = fmt_start;
4844 argidx = argidx_start;
4845 goto unicode;
4847 #endif
4848 /* Fall through */
4849 case 'r':
4850 if (c == 'r')
4851 temp = PyObject_Repr(v);
4852 if (temp == NULL)
4853 goto error;
4854 if (!PyString_Check(temp)) {
4855 PyErr_SetString(PyExc_TypeError,
4856 "%s argument has non-string str()");
4857 Py_DECREF(temp);
4858 goto error;
4860 pbuf = PyString_AS_STRING(temp);
4861 len = PyString_GET_SIZE(temp);
4862 if (prec >= 0 && len > prec)
4863 len = prec;
4864 break;
4865 case 'i':
4866 case 'd':
4867 case 'u':
4868 case 'o':
4869 case 'x':
4870 case 'X':
4871 if (c == 'i')
4872 c = 'd';
4873 isnumok = 0;
4874 if (PyNumber_Check(v)) {
4875 PyObject *iobj=NULL;
4877 if (PyInt_Check(v) || (PyLong_Check(v))) {
4878 iobj = v;
4879 Py_INCREF(iobj);
4881 else {
4882 iobj = PyNumber_Int(v);
4883 if (iobj==NULL) iobj = PyNumber_Long(v);
4885 if (iobj!=NULL) {
4886 if (PyInt_Check(iobj)) {
4887 isnumok = 1;
4888 pbuf = formatbuf;
4889 len = formatint(pbuf,
4890 sizeof(formatbuf),
4891 flags, prec, c, iobj);
4892 Py_DECREF(iobj);
4893 if (len < 0)
4894 goto error;
4895 sign = 1;
4897 else if (PyLong_Check(iobj)) {
4898 int ilen;
4900 isnumok = 1;
4901 temp = _PyString_FormatLong(iobj, flags,
4902 prec, c, &pbuf, &ilen);
4903 Py_DECREF(iobj);
4904 len = ilen;
4905 if (!temp)
4906 goto error;
4907 sign = 1;
4909 else {
4910 Py_DECREF(iobj);
4914 if (!isnumok) {
4915 PyErr_Format(PyExc_TypeError,
4916 "%%%c format: a number is required, "
4917 "not %.200s", c, Py_TYPE(v)->tp_name);
4918 goto error;
4920 if (flags & F_ZERO)
4921 fill = '0';
4922 break;
4923 case 'e':
4924 case 'E':
4925 case 'f':
4926 case 'F':
4927 case 'g':
4928 case 'G':
4929 if (c == 'F')
4930 c = 'f';
4931 pbuf = formatbuf;
4932 len = formatfloat(pbuf, sizeof(formatbuf),
4933 flags, prec, c, v);
4934 if (len < 0)
4935 goto error;
4936 sign = 1;
4937 if (flags & F_ZERO)
4938 fill = '0';
4939 break;
4940 case 'c':
4941 #ifdef Py_USING_UNICODE
4942 if (PyUnicode_Check(v)) {
4943 fmt = fmt_start;
4944 argidx = argidx_start;
4945 goto unicode;
4947 #endif
4948 pbuf = formatbuf;
4949 len = formatchar(pbuf, sizeof(formatbuf), v);
4950 if (len < 0)
4951 goto error;
4952 break;
4953 default:
4954 PyErr_Format(PyExc_ValueError,
4955 "unsupported format character '%c' (0x%x) "
4956 "at index %zd",
4957 c, c,
4958 (Py_ssize_t)(fmt - 1 -
4959 PyString_AsString(format)));
4960 goto error;
4962 if (sign) {
4963 if (*pbuf == '-' || *pbuf == '+') {
4964 sign = *pbuf++;
4965 len--;
4967 else if (flags & F_SIGN)
4968 sign = '+';
4969 else if (flags & F_BLANK)
4970 sign = ' ';
4971 else
4972 sign = 0;
4974 if (width < len)
4975 width = len;
4976 if (rescnt - (sign != 0) < width) {
4977 reslen -= rescnt;
4978 rescnt = width + fmtcnt + 100;
4979 reslen += rescnt;
4980 if (reslen < 0) {
4981 Py_DECREF(result);
4982 Py_XDECREF(temp);
4983 return PyErr_NoMemory();
4985 if (_PyString_Resize(&result, reslen) < 0) {
4986 Py_XDECREF(temp);
4987 return NULL;
4989 res = PyString_AS_STRING(result)
4990 + reslen - rescnt;
4992 if (sign) {
4993 if (fill != ' ')
4994 *res++ = sign;
4995 rescnt--;
4996 if (width > len)
4997 width--;
4999 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
5000 assert(pbuf[0] == '0');
5001 assert(pbuf[1] == c);
5002 if (fill != ' ') {
5003 *res++ = *pbuf++;
5004 *res++ = *pbuf++;
5006 rescnt -= 2;
5007 width -= 2;
5008 if (width < 0)
5009 width = 0;
5010 len -= 2;
5012 if (width > len && !(flags & F_LJUST)) {
5013 do {
5014 --rescnt;
5015 *res++ = fill;
5016 } while (--width > len);
5018 if (fill == ' ') {
5019 if (sign)
5020 *res++ = sign;
5021 if ((flags & F_ALT) &&
5022 (c == 'x' || c == 'X')) {
5023 assert(pbuf[0] == '0');
5024 assert(pbuf[1] == c);
5025 *res++ = *pbuf++;
5026 *res++ = *pbuf++;
5029 Py_MEMCPY(res, pbuf, len);
5030 res += len;
5031 rescnt -= len;
5032 while (--width >= len) {
5033 --rescnt;
5034 *res++ = ' ';
5036 if (dict && (argidx < arglen) && c != '%') {
5037 PyErr_SetString(PyExc_TypeError,
5038 "not all arguments converted during string formatting");
5039 Py_XDECREF(temp);
5040 goto error;
5042 Py_XDECREF(temp);
5043 } /* '%' */
5044 } /* until end */
5045 if (argidx < arglen && !dict) {
5046 PyErr_SetString(PyExc_TypeError,
5047 "not all arguments converted during string formatting");
5048 goto error;
5050 if (args_owned) {
5051 Py_DECREF(args);
5053 _PyString_Resize(&result, reslen - rescnt);
5054 return result;
5056 #ifdef Py_USING_UNICODE
5057 unicode:
5058 if (args_owned) {
5059 Py_DECREF(args);
5060 args_owned = 0;
5062 /* Fiddle args right (remove the first argidx arguments) */
5063 if (PyTuple_Check(orig_args) && argidx > 0) {
5064 PyObject *v;
5065 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
5066 v = PyTuple_New(n);
5067 if (v == NULL)
5068 goto error;
5069 while (--n >= 0) {
5070 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5071 Py_INCREF(w);
5072 PyTuple_SET_ITEM(v, n, w);
5074 args = v;
5075 } else {
5076 Py_INCREF(orig_args);
5077 args = orig_args;
5079 args_owned = 1;
5080 /* Take what we have of the result and let the Unicode formatting
5081 function format the rest of the input. */
5082 rescnt = res - PyString_AS_STRING(result);
5083 if (_PyString_Resize(&result, rescnt))
5084 goto error;
5085 fmtcnt = PyString_GET_SIZE(format) - \
5086 (fmt - PyString_AS_STRING(format));
5087 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5088 if (format == NULL)
5089 goto error;
5090 v = PyUnicode_Format(format, args);
5091 Py_DECREF(format);
5092 if (v == NULL)
5093 goto error;
5094 /* Paste what we have (result) to what the Unicode formatting
5095 function returned (v) and return the result (or error) */
5096 w = PyUnicode_Concat(result, v);
5097 Py_DECREF(result);
5098 Py_DECREF(v);
5099 Py_DECREF(args);
5100 return w;
5101 #endif /* Py_USING_UNICODE */
5103 error:
5104 Py_DECREF(result);
5105 if (args_owned) {
5106 Py_DECREF(args);
5108 return NULL;
5111 void
5112 PyString_InternInPlace(PyObject **p)
5114 register PyStringObject *s = (PyStringObject *)(*p);
5115 PyObject *t;
5116 if (s == NULL || !PyString_Check(s))
5117 Py_FatalError("PyString_InternInPlace: strings only please!");
5118 /* If it's a string subclass, we don't really know what putting
5119 it in the interned dict might do. */
5120 if (!PyString_CheckExact(s))
5121 return;
5122 if (PyString_CHECK_INTERNED(s))
5123 return;
5124 if (interned == NULL) {
5125 interned = PyDict_New();
5126 if (interned == NULL) {
5127 PyErr_Clear(); /* Don't leave an exception */
5128 return;
5131 t = PyDict_GetItem(interned, (PyObject *)s);
5132 if (t) {
5133 Py_INCREF(t);
5134 Py_DECREF(*p);
5135 *p = t;
5136 return;
5139 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
5140 PyErr_Clear();
5141 return;
5143 /* The two references in interned are not counted by refcnt.
5144 The string deallocator will take care of this */
5145 Py_REFCNT(s) -= 2;
5146 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
5149 void
5150 PyString_InternImmortal(PyObject **p)
5152 PyString_InternInPlace(p);
5153 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5154 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5155 Py_INCREF(*p);
5160 PyObject *
5161 PyString_InternFromString(const char *cp)
5163 PyObject *s = PyString_FromString(cp);
5164 if (s == NULL)
5165 return NULL;
5166 PyString_InternInPlace(&s);
5167 return s;
5170 void
5171 PyString_Fini(void)
5173 int i;
5174 for (i = 0; i < UCHAR_MAX + 1; i++) {
5175 Py_XDECREF(characters[i]);
5176 characters[i] = NULL;
5178 Py_XDECREF(nullstring);
5179 nullstring = NULL;
5182 void _Py_ReleaseInternedStrings(void)
5184 PyObject *keys;
5185 PyStringObject *s;
5186 Py_ssize_t i, n;
5187 Py_ssize_t immortal_size = 0, mortal_size = 0;
5189 if (interned == NULL || !PyDict_Check(interned))
5190 return;
5191 keys = PyDict_Keys(interned);
5192 if (keys == NULL || !PyList_Check(keys)) {
5193 PyErr_Clear();
5194 return;
5197 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5198 detector, interned strings are not forcibly deallocated; rather, we
5199 give them their stolen references back, and then clear and DECREF
5200 the interned dict. */
5202 n = PyList_GET_SIZE(keys);
5203 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5205 for (i = 0; i < n; i++) {
5206 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5207 switch (s->ob_sstate) {
5208 case SSTATE_NOT_INTERNED:
5209 /* XXX Shouldn't happen */
5210 break;
5211 case SSTATE_INTERNED_IMMORTAL:
5212 Py_REFCNT(s) += 1;
5213 immortal_size += Py_SIZE(s);
5214 break;
5215 case SSTATE_INTERNED_MORTAL:
5216 Py_REFCNT(s) += 2;
5217 mortal_size += Py_SIZE(s);
5218 break;
5219 default:
5220 Py_FatalError("Inconsistent interned string state.");
5222 s->ob_sstate = SSTATE_NOT_INTERNED;
5224 fprintf(stderr, "total size of all interned strings: "
5225 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5226 "mortal/immortal\n", mortal_size, immortal_size);
5227 Py_DECREF(keys);
5228 PyDict_Clear(interned);
5229 Py_DECREF(interned);
5230 interned = NULL;