Fix issue number in comment.
[python.git] / Objects / stringobject.c
blob43ef3fa0b6cee840369c73d81552495ed41dba05
1 /* String (str/bytes) object implementation */
3 #define PY_SSIZE_T_CLEAN
5 #include "Python.h"
6 #include <ctype.h>
7 #include <stddef.h>
9 #ifdef COUNT_ALLOCS
10 Py_ssize_t null_strings, one_strings;
11 #endif
13 static PyStringObject *characters[UCHAR_MAX + 1];
14 static PyStringObject *nullstring;
16 /* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
24 static PyObject *interned;
26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
35 For both PyString_FromString() and PyString_FromStringAndSize(), the
36 parameter `size' denotes number of characters to allocate, not counting any
37 null terminating character.
39 For PyString_FromString(), the parameter `str' points to a null-terminated
40 string containing exactly `size' bytes.
42 For PyString_FromStringAndSize(), the parameter the parameter `str' is
43 either NULL or else points to a string containing at least `size' bytes.
44 For PyString_FromStringAndSize(), the string in the `str' parameter does
45 not have to be null-terminated. (Therefore it is safe to construct a
46 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
48 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
50 PyString object must be treated as immutable and you must not fill in nor
51 alter the data yourself, since the strings may be shared.
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
55 allocated for string data, not counting the null terminating character. It
56 is therefore equal to the equal to the `size' parameter (for
57 PyString_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyString_FromString()).
60 PyObject *
61 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
63 register PyStringObject *op;
64 if (size < 0) {
65 PyErr_SetString(PyExc_SystemError,
66 "Negative size passed to PyString_FromStringAndSize");
67 return NULL;
69 if (size == 0 && (op = nullstring) != NULL) {
70 #ifdef COUNT_ALLOCS
71 null_strings++;
72 #endif
73 Py_INCREF(op);
74 return (PyObject *)op;
76 if (size == 1 && str != NULL &&
77 (op = characters[*str & UCHAR_MAX]) != NULL)
79 #ifdef COUNT_ALLOCS
80 one_strings++;
81 #endif
82 Py_INCREF(op);
83 return (PyObject *)op;
86 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
87 PyErr_SetString(PyExc_OverflowError, "string is too large");
88 return NULL;
91 /* Inline PyObject_NewVar */
92 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
93 if (op == NULL)
94 return PyErr_NoMemory();
95 PyObject_INIT_VAR(op, &PyString_Type, size);
96 op->ob_shash = -1;
97 op->ob_sstate = SSTATE_NOT_INTERNED;
98 if (str != NULL)
99 Py_MEMCPY(op->ob_sval, str, size);
100 op->ob_sval[size] = '\0';
101 /* share short strings */
102 if (size == 0) {
103 PyObject *t = (PyObject *)op;
104 PyString_InternInPlace(&t);
105 op = (PyStringObject *)t;
106 nullstring = op;
107 Py_INCREF(op);
108 } else if (size == 1 && str != NULL) {
109 PyObject *t = (PyObject *)op;
110 PyString_InternInPlace(&t);
111 op = (PyStringObject *)t;
112 characters[*str & UCHAR_MAX] = op;
113 Py_INCREF(op);
115 return (PyObject *) op;
118 PyObject *
119 PyString_FromString(const char *str)
121 register size_t size;
122 register PyStringObject *op;
124 assert(str != NULL);
125 size = strlen(str);
126 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
127 PyErr_SetString(PyExc_OverflowError,
128 "string is too long for a Python string");
129 return NULL;
131 if (size == 0 && (op = nullstring) != NULL) {
132 #ifdef COUNT_ALLOCS
133 null_strings++;
134 #endif
135 Py_INCREF(op);
136 return (PyObject *)op;
138 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
139 #ifdef COUNT_ALLOCS
140 one_strings++;
141 #endif
142 Py_INCREF(op);
143 return (PyObject *)op;
146 /* Inline PyObject_NewVar */
147 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
148 if (op == NULL)
149 return PyErr_NoMemory();
150 PyObject_INIT_VAR(op, &PyString_Type, size);
151 op->ob_shash = -1;
152 op->ob_sstate = SSTATE_NOT_INTERNED;
153 Py_MEMCPY(op->ob_sval, str, size+1);
154 /* share short strings */
155 if (size == 0) {
156 PyObject *t = (PyObject *)op;
157 PyString_InternInPlace(&t);
158 op = (PyStringObject *)t;
159 nullstring = op;
160 Py_INCREF(op);
161 } else if (size == 1) {
162 PyObject *t = (PyObject *)op;
163 PyString_InternInPlace(&t);
164 op = (PyStringObject *)t;
165 characters[*str & UCHAR_MAX] = op;
166 Py_INCREF(op);
168 return (PyObject *) op;
171 PyObject *
172 PyString_FromFormatV(const char *format, va_list vargs)
174 va_list count;
175 Py_ssize_t n = 0;
176 const char* f;
177 char *s;
178 PyObject* string;
180 #ifdef VA_LIST_IS_ARRAY
181 Py_MEMCPY(count, vargs, sizeof(va_list));
182 #else
183 #ifdef __va_copy
184 __va_copy(count, vargs);
185 #else
186 count = vargs;
187 #endif
188 #endif
189 /* step 1: figure out how large a buffer we need */
190 for (f = format; *f; f++) {
191 if (*f == '%') {
192 #ifdef HAVE_LONG_LONG
193 int longlongflag = 0;
194 #endif
195 const char* p = f;
196 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
199 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
200 * they don't affect the amount of space we reserve.
202 if (*f == 'l') {
203 if (f[1] == 'd' || f[1] == 'u') {
204 ++f;
206 #ifdef HAVE_LONG_LONG
207 else if (f[1] == 'l' &&
208 (f[2] == 'd' || f[2] == 'u')) {
209 longlongflag = 1;
210 f += 2;
212 #endif
214 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
215 ++f;
218 switch (*f) {
219 case 'c':
220 (void)va_arg(count, int);
221 /* fall through... */
222 case '%':
223 n++;
224 break;
225 case 'd': case 'u': case 'i': case 'x':
226 (void) va_arg(count, int);
227 #ifdef HAVE_LONG_LONG
228 /* Need at most
229 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
230 plus 1 for the sign. 53/22 is an upper
231 bound for log10(256). */
232 if (longlongflag)
233 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
234 else
235 #endif
236 /* 20 bytes is enough to hold a 64-bit
237 integer. Decimal takes the most
238 space. This isn't enough for
239 octal. */
240 n += 20;
242 break;
243 case 's':
244 s = va_arg(count, char*);
245 n += strlen(s);
246 break;
247 case 'p':
248 (void) va_arg(count, int);
249 /* maximum 64-bit pointer representation:
250 * 0xffffffffffffffff
251 * so 19 characters is enough.
252 * XXX I count 18 -- what's the extra for?
254 n += 19;
255 break;
256 default:
257 /* if we stumble upon an unknown
258 formatting code, copy the rest of
259 the format string to the output
260 string. (we cannot just skip the
261 code, since there's no way to know
262 what's in the argument list) */
263 n += strlen(p);
264 goto expand;
266 } else
267 n++;
269 expand:
270 /* step 2: fill the buffer */
271 /* Since we've analyzed how much space we need for the worst case,
272 use sprintf directly instead of the slower PyOS_snprintf. */
273 string = PyString_FromStringAndSize(NULL, n);
274 if (!string)
275 return NULL;
277 s = PyString_AsString(string);
279 for (f = format; *f; f++) {
280 if (*f == '%') {
281 const char* p = f++;
282 Py_ssize_t i;
283 int longflag = 0;
284 #ifdef HAVE_LONG_LONG
285 int longlongflag = 0;
286 #endif
287 int size_tflag = 0;
288 /* parse the width.precision part (we're only
289 interested in the precision value, if any) */
290 n = 0;
291 while (isdigit(Py_CHARMASK(*f)))
292 n = (n*10) + *f++ - '0';
293 if (*f == '.') {
294 f++;
295 n = 0;
296 while (isdigit(Py_CHARMASK(*f)))
297 n = (n*10) + *f++ - '0';
299 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
300 f++;
301 /* Handle %ld, %lu, %lld and %llu. */
302 if (*f == 'l') {
303 if (f[1] == 'd' || f[1] == 'u') {
304 longflag = 1;
305 ++f;
307 #ifdef HAVE_LONG_LONG
308 else if (f[1] == 'l' &&
309 (f[2] == 'd' || f[2] == 'u')) {
310 longlongflag = 1;
311 f += 2;
313 #endif
315 /* handle the size_t flag. */
316 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
317 size_tflag = 1;
318 ++f;
321 switch (*f) {
322 case 'c':
323 *s++ = va_arg(vargs, int);
324 break;
325 case 'd':
326 if (longflag)
327 sprintf(s, "%ld", va_arg(vargs, long));
328 #ifdef HAVE_LONG_LONG
329 else if (longlongflag)
330 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
331 va_arg(vargs, PY_LONG_LONG));
332 #endif
333 else if (size_tflag)
334 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
335 va_arg(vargs, Py_ssize_t));
336 else
337 sprintf(s, "%d", va_arg(vargs, int));
338 s += strlen(s);
339 break;
340 case 'u':
341 if (longflag)
342 sprintf(s, "%lu",
343 va_arg(vargs, unsigned long));
344 #ifdef HAVE_LONG_LONG
345 else if (longlongflag)
346 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
347 va_arg(vargs, PY_LONG_LONG));
348 #endif
349 else if (size_tflag)
350 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
351 va_arg(vargs, size_t));
352 else
353 sprintf(s, "%u",
354 va_arg(vargs, unsigned int));
355 s += strlen(s);
356 break;
357 case 'i':
358 sprintf(s, "%i", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 'x':
362 sprintf(s, "%x", va_arg(vargs, int));
363 s += strlen(s);
364 break;
365 case 's':
366 p = va_arg(vargs, char*);
367 i = strlen(p);
368 if (n > 0 && i > n)
369 i = n;
370 Py_MEMCPY(s, p, i);
371 s += i;
372 break;
373 case 'p':
374 sprintf(s, "%p", va_arg(vargs, void*));
375 /* %p is ill-defined: ensure leading 0x. */
376 if (s[1] == 'X')
377 s[1] = 'x';
378 else if (s[1] != 'x') {
379 memmove(s+2, s, strlen(s)+1);
380 s[0] = '0';
381 s[1] = 'x';
383 s += strlen(s);
384 break;
385 case '%':
386 *s++ = '%';
387 break;
388 default:
389 strcpy(s, p);
390 s += strlen(s);
391 goto end;
393 } else
394 *s++ = *f;
397 end:
398 _PyString_Resize(&string, s - PyString_AS_STRING(string));
399 return string;
402 PyObject *
403 PyString_FromFormat(const char *format, ...)
405 PyObject* ret;
406 va_list vargs;
408 #ifdef HAVE_STDARG_PROTOTYPES
409 va_start(vargs, format);
410 #else
411 va_start(vargs);
412 #endif
413 ret = PyString_FromFormatV(format, vargs);
414 va_end(vargs);
415 return ret;
419 PyObject *PyString_Decode(const char *s,
420 Py_ssize_t size,
421 const char *encoding,
422 const char *errors)
424 PyObject *v, *str;
426 str = PyString_FromStringAndSize(s, size);
427 if (str == NULL)
428 return NULL;
429 v = PyString_AsDecodedString(str, encoding, errors);
430 Py_DECREF(str);
431 return v;
434 PyObject *PyString_AsDecodedObject(PyObject *str,
435 const char *encoding,
436 const char *errors)
438 PyObject *v;
440 if (!PyString_Check(str)) {
441 PyErr_BadArgument();
442 goto onError;
445 if (encoding == NULL) {
446 #ifdef Py_USING_UNICODE
447 encoding = PyUnicode_GetDefaultEncoding();
448 #else
449 PyErr_SetString(PyExc_ValueError, "no encoding specified");
450 goto onError;
451 #endif
454 /* Decode via the codec registry */
455 v = PyCodec_Decode(str, encoding, errors);
456 if (v == NULL)
457 goto onError;
459 return v;
461 onError:
462 return NULL;
465 PyObject *PyString_AsDecodedString(PyObject *str,
466 const char *encoding,
467 const char *errors)
469 PyObject *v;
471 v = PyString_AsDecodedObject(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
475 #ifdef Py_USING_UNICODE
476 /* Convert Unicode to a string using the default encoding */
477 if (PyUnicode_Check(v)) {
478 PyObject *temp = v;
479 v = PyUnicode_AsEncodedString(v, NULL, NULL);
480 Py_DECREF(temp);
481 if (v == NULL)
482 goto onError;
484 #endif
485 if (!PyString_Check(v)) {
486 PyErr_Format(PyExc_TypeError,
487 "decoder did not return a string object (type=%.400s)",
488 Py_TYPE(v)->tp_name);
489 Py_DECREF(v);
490 goto onError;
493 return v;
495 onError:
496 return NULL;
499 PyObject *PyString_Encode(const char *s,
500 Py_ssize_t size,
501 const char *encoding,
502 const char *errors)
504 PyObject *v, *str;
506 str = PyString_FromStringAndSize(s, size);
507 if (str == NULL)
508 return NULL;
509 v = PyString_AsEncodedString(str, encoding, errors);
510 Py_DECREF(str);
511 return v;
514 PyObject *PyString_AsEncodedObject(PyObject *str,
515 const char *encoding,
516 const char *errors)
518 PyObject *v;
520 if (!PyString_Check(str)) {
521 PyErr_BadArgument();
522 goto onError;
525 if (encoding == NULL) {
526 #ifdef Py_USING_UNICODE
527 encoding = PyUnicode_GetDefaultEncoding();
528 #else
529 PyErr_SetString(PyExc_ValueError, "no encoding specified");
530 goto onError;
531 #endif
534 /* Encode via the codec registry */
535 v = PyCodec_Encode(str, encoding, errors);
536 if (v == NULL)
537 goto onError;
539 return v;
541 onError:
542 return NULL;
545 PyObject *PyString_AsEncodedString(PyObject *str,
546 const char *encoding,
547 const char *errors)
549 PyObject *v;
551 v = PyString_AsEncodedObject(str, encoding, errors);
552 if (v == NULL)
553 goto onError;
555 #ifdef Py_USING_UNICODE
556 /* Convert Unicode to a string using the default encoding */
557 if (PyUnicode_Check(v)) {
558 PyObject *temp = v;
559 v = PyUnicode_AsEncodedString(v, NULL, NULL);
560 Py_DECREF(temp);
561 if (v == NULL)
562 goto onError;
564 #endif
565 if (!PyString_Check(v)) {
566 PyErr_Format(PyExc_TypeError,
567 "encoder did not return a string object (type=%.400s)",
568 Py_TYPE(v)->tp_name);
569 Py_DECREF(v);
570 goto onError;
573 return v;
575 onError:
576 return NULL;
579 static void
580 string_dealloc(PyObject *op)
582 switch (PyString_CHECK_INTERNED(op)) {
583 case SSTATE_NOT_INTERNED:
584 break;
586 case SSTATE_INTERNED_MORTAL:
587 /* revive dead object temporarily for DelItem */
588 Py_REFCNT(op) = 3;
589 if (PyDict_DelItem(interned, op) != 0)
590 Py_FatalError(
591 "deletion of interned string failed");
592 break;
594 case SSTATE_INTERNED_IMMORTAL:
595 Py_FatalError("Immortal interned string died.");
597 default:
598 Py_FatalError("Inconsistent interned string state.");
600 Py_TYPE(op)->tp_free(op);
603 /* Unescape a backslash-escaped string. If unicode is non-zero,
604 the string is a u-literal. If recode_encoding is non-zero,
605 the string is UTF-8 encoded and should be re-encoded in the
606 specified encoding. */
608 PyObject *PyString_DecodeEscape(const char *s,
609 Py_ssize_t len,
610 const char *errors,
611 Py_ssize_t unicode,
612 const char *recode_encoding)
614 int c;
615 char *p, *buf;
616 const char *end;
617 PyObject *v;
618 Py_ssize_t newlen = recode_encoding ? 4*len:len;
619 v = PyString_FromStringAndSize((char *)NULL, newlen);
620 if (v == NULL)
621 return NULL;
622 p = buf = PyString_AsString(v);
623 end = s + len;
624 while (s < end) {
625 if (*s != '\\') {
626 non_esc:
627 #ifdef Py_USING_UNICODE
628 if (recode_encoding && (*s & 0x80)) {
629 PyObject *u, *w;
630 char *r;
631 const char* t;
632 Py_ssize_t rn;
633 t = s;
634 /* Decode non-ASCII bytes as UTF-8. */
635 while (t < end && (*t & 0x80)) t++;
636 u = PyUnicode_DecodeUTF8(s, t - s, errors);
637 if(!u) goto failed;
639 /* Recode them in target encoding. */
640 w = PyUnicode_AsEncodedString(
641 u, recode_encoding, errors);
642 Py_DECREF(u);
643 if (!w) goto failed;
645 /* Append bytes to output buffer. */
646 assert(PyString_Check(w));
647 r = PyString_AS_STRING(w);
648 rn = PyString_GET_SIZE(w);
649 Py_MEMCPY(p, r, rn);
650 p += rn;
651 Py_DECREF(w);
652 s = t;
653 } else {
654 *p++ = *s++;
656 #else
657 *p++ = *s++;
658 #endif
659 continue;
661 s++;
662 if (s==end) {
663 PyErr_SetString(PyExc_ValueError,
664 "Trailing \\ in string");
665 goto failed;
667 switch (*s++) {
668 /* XXX This assumes ASCII! */
669 case '\n': break;
670 case '\\': *p++ = '\\'; break;
671 case '\'': *p++ = '\''; break;
672 case '\"': *p++ = '\"'; break;
673 case 'b': *p++ = '\b'; break;
674 case 'f': *p++ = '\014'; break; /* FF */
675 case 't': *p++ = '\t'; break;
676 case 'n': *p++ = '\n'; break;
677 case 'r': *p++ = '\r'; break;
678 case 'v': *p++ = '\013'; break; /* VT */
679 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
680 case '0': case '1': case '2': case '3':
681 case '4': case '5': case '6': case '7':
682 c = s[-1] - '0';
683 if (s < end && '0' <= *s && *s <= '7') {
684 c = (c<<3) + *s++ - '0';
685 if (s < end && '0' <= *s && *s <= '7')
686 c = (c<<3) + *s++ - '0';
688 *p++ = c;
689 break;
690 case 'x':
691 if (s+1 < end &&
692 isxdigit(Py_CHARMASK(s[0])) &&
693 isxdigit(Py_CHARMASK(s[1])))
695 unsigned int x = 0;
696 c = Py_CHARMASK(*s);
697 s++;
698 if (isdigit(c))
699 x = c - '0';
700 else if (islower(c))
701 x = 10 + c - 'a';
702 else
703 x = 10 + c - 'A';
704 x = x << 4;
705 c = Py_CHARMASK(*s);
706 s++;
707 if (isdigit(c))
708 x += c - '0';
709 else if (islower(c))
710 x += 10 + c - 'a';
711 else
712 x += 10 + c - 'A';
713 *p++ = x;
714 break;
716 if (!errors || strcmp(errors, "strict") == 0) {
717 PyErr_SetString(PyExc_ValueError,
718 "invalid \\x escape");
719 goto failed;
721 if (strcmp(errors, "replace") == 0) {
722 *p++ = '?';
723 } else if (strcmp(errors, "ignore") == 0)
724 /* do nothing */;
725 else {
726 PyErr_Format(PyExc_ValueError,
727 "decoding error; "
728 "unknown error handling code: %.400s",
729 errors);
730 goto failed;
732 #ifndef Py_USING_UNICODE
733 case 'u':
734 case 'U':
735 case 'N':
736 if (unicode) {
737 PyErr_SetString(PyExc_ValueError,
738 "Unicode escapes not legal "
739 "when Unicode disabled");
740 goto failed;
742 #endif
743 default:
744 *p++ = '\\';
745 s--;
746 goto non_esc; /* an arbitry number of unescaped
747 UTF-8 bytes may follow. */
750 if (p-buf < newlen)
751 _PyString_Resize(&v, p - buf);
752 return v;
753 failed:
754 Py_DECREF(v);
755 return NULL;
758 /* -------------------------------------------------------------------- */
759 /* object api */
761 static Py_ssize_t
762 string_getsize(register PyObject *op)
764 char *s;
765 Py_ssize_t len;
766 if (PyString_AsStringAndSize(op, &s, &len))
767 return -1;
768 return len;
771 static /*const*/ char *
772 string_getbuffer(register PyObject *op)
774 char *s;
775 Py_ssize_t len;
776 if (PyString_AsStringAndSize(op, &s, &len))
777 return NULL;
778 return s;
781 Py_ssize_t
782 PyString_Size(register PyObject *op)
784 if (!PyString_Check(op))
785 return string_getsize(op);
786 return Py_SIZE(op);
789 /*const*/ char *
790 PyString_AsString(register PyObject *op)
792 if (!PyString_Check(op))
793 return string_getbuffer(op);
794 return ((PyStringObject *)op) -> ob_sval;
798 PyString_AsStringAndSize(register PyObject *obj,
799 register char **s,
800 register Py_ssize_t *len)
802 if (s == NULL) {
803 PyErr_BadInternalCall();
804 return -1;
807 if (!PyString_Check(obj)) {
808 #ifdef Py_USING_UNICODE
809 if (PyUnicode_Check(obj)) {
810 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
811 if (obj == NULL)
812 return -1;
814 else
815 #endif
817 PyErr_Format(PyExc_TypeError,
818 "expected string or Unicode object, "
819 "%.200s found", Py_TYPE(obj)->tp_name);
820 return -1;
824 *s = PyString_AS_STRING(obj);
825 if (len != NULL)
826 *len = PyString_GET_SIZE(obj);
827 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
828 PyErr_SetString(PyExc_TypeError,
829 "expected string without null bytes");
830 return -1;
832 return 0;
835 /* -------------------------------------------------------------------- */
836 /* Methods */
838 #include "stringlib/stringdefs.h"
839 #include "stringlib/fastsearch.h"
841 #include "stringlib/count.h"
842 #include "stringlib/find.h"
843 #include "stringlib/partition.h"
844 #include "stringlib/split.h"
846 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
847 #include "stringlib/localeutil.h"
851 static int
852 string_print(PyStringObject *op, FILE *fp, int flags)
854 Py_ssize_t i, str_len;
855 char c;
856 int quote;
858 /* XXX Ought to check for interrupts when writing long strings */
859 if (! PyString_CheckExact(op)) {
860 int ret;
861 /* A str subclass may have its own __str__ method. */
862 op = (PyStringObject *) PyObject_Str((PyObject *)op);
863 if (op == NULL)
864 return -1;
865 ret = string_print(op, fp, flags);
866 Py_DECREF(op);
867 return ret;
869 if (flags & Py_PRINT_RAW) {
870 char *data = op->ob_sval;
871 Py_ssize_t size = Py_SIZE(op);
872 Py_BEGIN_ALLOW_THREADS
873 while (size > INT_MAX) {
874 /* Very long strings cannot be written atomically.
875 * But don't write exactly INT_MAX bytes at a time
876 * to avoid memory aligment issues.
878 const int chunk_size = INT_MAX & ~0x3FFF;
879 fwrite(data, 1, chunk_size, fp);
880 data += chunk_size;
881 size -= chunk_size;
883 #ifdef __VMS
884 if (size) fwrite(data, (int)size, 1, fp);
885 #else
886 fwrite(data, 1, (int)size, fp);
887 #endif
888 Py_END_ALLOW_THREADS
889 return 0;
892 /* figure out which quote to use; single is preferred */
893 quote = '\'';
894 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
895 !memchr(op->ob_sval, '"', Py_SIZE(op)))
896 quote = '"';
898 str_len = Py_SIZE(op);
899 Py_BEGIN_ALLOW_THREADS
900 fputc(quote, fp);
901 for (i = 0; i < str_len; i++) {
902 /* Since strings are immutable and the caller should have a
903 reference, accessing the interal buffer should not be an issue
904 with the GIL released. */
905 c = op->ob_sval[i];
906 if (c == quote || c == '\\')
907 fprintf(fp, "\\%c", c);
908 else if (c == '\t')
909 fprintf(fp, "\\t");
910 else if (c == '\n')
911 fprintf(fp, "\\n");
912 else if (c == '\r')
913 fprintf(fp, "\\r");
914 else if (c < ' ' || c >= 0x7f)
915 fprintf(fp, "\\x%02x", c & 0xff);
916 else
917 fputc(c, fp);
919 fputc(quote, fp);
920 Py_END_ALLOW_THREADS
921 return 0;
924 PyObject *
925 PyString_Repr(PyObject *obj, int smartquotes)
927 register PyStringObject* op = (PyStringObject*) obj;
928 size_t newsize = 2 + 4 * Py_SIZE(op);
929 PyObject *v;
930 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
931 PyErr_SetString(PyExc_OverflowError,
932 "string is too large to make repr");
933 return NULL;
935 v = PyString_FromStringAndSize((char *)NULL, newsize);
936 if (v == NULL) {
937 return NULL;
939 else {
940 register Py_ssize_t i;
941 register char c;
942 register char *p;
943 int quote;
945 /* figure out which quote to use; single is preferred */
946 quote = '\'';
947 if (smartquotes &&
948 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
949 !memchr(op->ob_sval, '"', Py_SIZE(op)))
950 quote = '"';
952 p = PyString_AS_STRING(v);
953 *p++ = quote;
954 for (i = 0; i < Py_SIZE(op); i++) {
955 /* There's at least enough room for a hex escape
956 and a closing quote. */
957 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
958 c = op->ob_sval[i];
959 if (c == quote || c == '\\')
960 *p++ = '\\', *p++ = c;
961 else if (c == '\t')
962 *p++ = '\\', *p++ = 't';
963 else if (c == '\n')
964 *p++ = '\\', *p++ = 'n';
965 else if (c == '\r')
966 *p++ = '\\', *p++ = 'r';
967 else if (c < ' ' || c >= 0x7f) {
968 /* For performance, we don't want to call
969 PyOS_snprintf here (extra layers of
970 function call). */
971 sprintf(p, "\\x%02x", c & 0xff);
972 p += 4;
974 else
975 *p++ = c;
977 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
978 *p++ = quote;
979 *p = '\0';
980 _PyString_Resize(
981 &v, (p - PyString_AS_STRING(v)));
982 return v;
986 static PyObject *
987 string_repr(PyObject *op)
989 return PyString_Repr(op, 1);
992 static PyObject *
993 string_str(PyObject *s)
995 assert(PyString_Check(s));
996 if (PyString_CheckExact(s)) {
997 Py_INCREF(s);
998 return s;
1000 else {
1001 /* Subtype -- return genuine string with the same value. */
1002 PyStringObject *t = (PyStringObject *) s;
1003 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1007 static Py_ssize_t
1008 string_length(PyStringObject *a)
1010 return Py_SIZE(a);
1013 static PyObject *
1014 string_concat(register PyStringObject *a, register PyObject *bb)
1016 register Py_ssize_t size;
1017 register PyStringObject *op;
1018 if (!PyString_Check(bb)) {
1019 #ifdef Py_USING_UNICODE
1020 if (PyUnicode_Check(bb))
1021 return PyUnicode_Concat((PyObject *)a, bb);
1022 #endif
1023 if (PyByteArray_Check(bb))
1024 return PyByteArray_Concat((PyObject *)a, bb);
1025 PyErr_Format(PyExc_TypeError,
1026 "cannot concatenate 'str' and '%.200s' objects",
1027 Py_TYPE(bb)->tp_name);
1028 return NULL;
1030 #define b ((PyStringObject *)bb)
1031 /* Optimize cases with empty left or right operand */
1032 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1033 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1034 if (Py_SIZE(a) == 0) {
1035 Py_INCREF(bb);
1036 return bb;
1038 Py_INCREF(a);
1039 return (PyObject *)a;
1041 size = Py_SIZE(a) + Py_SIZE(b);
1042 /* Check that string sizes are not negative, to prevent an
1043 overflow in cases where we are passed incorrectly-created
1044 strings with negative lengths (due to a bug in other code).
1046 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1047 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1048 PyErr_SetString(PyExc_OverflowError,
1049 "strings are too large to concat");
1050 return NULL;
1053 /* Inline PyObject_NewVar */
1054 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1055 PyErr_SetString(PyExc_OverflowError,
1056 "strings are too large to concat");
1057 return NULL;
1059 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1060 if (op == NULL)
1061 return PyErr_NoMemory();
1062 PyObject_INIT_VAR(op, &PyString_Type, size);
1063 op->ob_shash = -1;
1064 op->ob_sstate = SSTATE_NOT_INTERNED;
1065 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1066 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1067 op->ob_sval[size] = '\0';
1068 return (PyObject *) op;
1069 #undef b
1072 static PyObject *
1073 string_repeat(register PyStringObject *a, register Py_ssize_t n)
1075 register Py_ssize_t i;
1076 register Py_ssize_t j;
1077 register Py_ssize_t size;
1078 register PyStringObject *op;
1079 size_t nbytes;
1080 if (n < 0)
1081 n = 0;
1082 /* watch out for overflows: the size can overflow int,
1083 * and the # of bytes needed can overflow size_t
1085 size = Py_SIZE(a) * n;
1086 if (n && size / n != Py_SIZE(a)) {
1087 PyErr_SetString(PyExc_OverflowError,
1088 "repeated string is too long");
1089 return NULL;
1091 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1092 Py_INCREF(a);
1093 return (PyObject *)a;
1095 nbytes = (size_t)size;
1096 if (nbytes + PyStringObject_SIZE <= nbytes) {
1097 PyErr_SetString(PyExc_OverflowError,
1098 "repeated string is too long");
1099 return NULL;
1101 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1102 if (op == NULL)
1103 return PyErr_NoMemory();
1104 PyObject_INIT_VAR(op, &PyString_Type, size);
1105 op->ob_shash = -1;
1106 op->ob_sstate = SSTATE_NOT_INTERNED;
1107 op->ob_sval[size] = '\0';
1108 if (Py_SIZE(a) == 1 && n > 0) {
1109 memset(op->ob_sval, a->ob_sval[0] , n);
1110 return (PyObject *) op;
1112 i = 0;
1113 if (i < size) {
1114 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1115 i = Py_SIZE(a);
1117 while (i < size) {
1118 j = (i <= size-i) ? i : size-i;
1119 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1120 i += j;
1122 return (PyObject *) op;
1125 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1127 static PyObject *
1128 string_slice(register PyStringObject *a, register Py_ssize_t i,
1129 register Py_ssize_t j)
1130 /* j -- may be negative! */
1132 if (i < 0)
1133 i = 0;
1134 if (j < 0)
1135 j = 0; /* Avoid signed/unsigned bug in next line */
1136 if (j > Py_SIZE(a))
1137 j = Py_SIZE(a);
1138 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1139 /* It's the same as a */
1140 Py_INCREF(a);
1141 return (PyObject *)a;
1143 if (j < i)
1144 j = i;
1145 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1148 static int
1149 string_contains(PyObject *str_obj, PyObject *sub_obj)
1151 if (!PyString_CheckExact(sub_obj)) {
1152 #ifdef Py_USING_UNICODE
1153 if (PyUnicode_Check(sub_obj))
1154 return PyUnicode_Contains(str_obj, sub_obj);
1155 #endif
1156 if (!PyString_Check(sub_obj)) {
1157 PyErr_Format(PyExc_TypeError,
1158 "'in <string>' requires string as left operand, "
1159 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1160 return -1;
1164 return stringlib_contains_obj(str_obj, sub_obj);
1167 static PyObject *
1168 string_item(PyStringObject *a, register Py_ssize_t i)
1170 char pchar;
1171 PyObject *v;
1172 if (i < 0 || i >= Py_SIZE(a)) {
1173 PyErr_SetString(PyExc_IndexError, "string index out of range");
1174 return NULL;
1176 pchar = a->ob_sval[i];
1177 v = (PyObject *)characters[pchar & UCHAR_MAX];
1178 if (v == NULL)
1179 v = PyString_FromStringAndSize(&pchar, 1);
1180 else {
1181 #ifdef COUNT_ALLOCS
1182 one_strings++;
1183 #endif
1184 Py_INCREF(v);
1186 return v;
1189 static PyObject*
1190 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1192 int c;
1193 Py_ssize_t len_a, len_b;
1194 Py_ssize_t min_len;
1195 PyObject *result;
1197 /* Make sure both arguments are strings. */
1198 if (!(PyString_Check(a) && PyString_Check(b))) {
1199 result = Py_NotImplemented;
1200 goto out;
1202 if (a == b) {
1203 switch (op) {
1204 case Py_EQ:case Py_LE:case Py_GE:
1205 result = Py_True;
1206 goto out;
1207 case Py_NE:case Py_LT:case Py_GT:
1208 result = Py_False;
1209 goto out;
1212 if (op == Py_EQ) {
1213 /* Supporting Py_NE here as well does not save
1214 much time, since Py_NE is rarely used. */
1215 if (Py_SIZE(a) == Py_SIZE(b)
1216 && (a->ob_sval[0] == b->ob_sval[0]
1217 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1218 result = Py_True;
1219 } else {
1220 result = Py_False;
1222 goto out;
1224 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1225 min_len = (len_a < len_b) ? len_a : len_b;
1226 if (min_len > 0) {
1227 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1228 if (c==0)
1229 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1230 } else
1231 c = 0;
1232 if (c == 0)
1233 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1234 switch (op) {
1235 case Py_LT: c = c < 0; break;
1236 case Py_LE: c = c <= 0; break;
1237 case Py_EQ: assert(0); break; /* unreachable */
1238 case Py_NE: c = c != 0; break;
1239 case Py_GT: c = c > 0; break;
1240 case Py_GE: c = c >= 0; break;
1241 default:
1242 result = Py_NotImplemented;
1243 goto out;
1245 result = c ? Py_True : Py_False;
1246 out:
1247 Py_INCREF(result);
1248 return result;
1252 _PyString_Eq(PyObject *o1, PyObject *o2)
1254 PyStringObject *a = (PyStringObject*) o1;
1255 PyStringObject *b = (PyStringObject*) o2;
1256 return Py_SIZE(a) == Py_SIZE(b)
1257 && *a->ob_sval == *b->ob_sval
1258 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1261 static long
1262 string_hash(PyStringObject *a)
1264 register Py_ssize_t len;
1265 register unsigned char *p;
1266 register long x;
1268 if (a->ob_shash != -1)
1269 return a->ob_shash;
1270 len = Py_SIZE(a);
1271 p = (unsigned char *) a->ob_sval;
1272 x = *p << 7;
1273 while (--len >= 0)
1274 x = (1000003*x) ^ *p++;
1275 x ^= Py_SIZE(a);
1276 if (x == -1)
1277 x = -2;
1278 a->ob_shash = x;
1279 return x;
1282 static PyObject*
1283 string_subscript(PyStringObject* self, PyObject* item)
1285 if (PyIndex_Check(item)) {
1286 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1287 if (i == -1 && PyErr_Occurred())
1288 return NULL;
1289 if (i < 0)
1290 i += PyString_GET_SIZE(self);
1291 return string_item(self, i);
1293 else if (PySlice_Check(item)) {
1294 Py_ssize_t start, stop, step, slicelength, cur, i;
1295 char* source_buf;
1296 char* result_buf;
1297 PyObject* result;
1299 if (PySlice_GetIndicesEx((PySliceObject*)item,
1300 PyString_GET_SIZE(self),
1301 &start, &stop, &step, &slicelength) < 0) {
1302 return NULL;
1305 if (slicelength <= 0) {
1306 return PyString_FromStringAndSize("", 0);
1308 else if (start == 0 && step == 1 &&
1309 slicelength == PyString_GET_SIZE(self) &&
1310 PyString_CheckExact(self)) {
1311 Py_INCREF(self);
1312 return (PyObject *)self;
1314 else if (step == 1) {
1315 return PyString_FromStringAndSize(
1316 PyString_AS_STRING(self) + start,
1317 slicelength);
1319 else {
1320 source_buf = PyString_AsString((PyObject*)self);
1321 result_buf = (char *)PyMem_Malloc(slicelength);
1322 if (result_buf == NULL)
1323 return PyErr_NoMemory();
1325 for (cur = start, i = 0; i < slicelength;
1326 cur += step, i++) {
1327 result_buf[i] = source_buf[cur];
1330 result = PyString_FromStringAndSize(result_buf,
1331 slicelength);
1332 PyMem_Free(result_buf);
1333 return result;
1336 else {
1337 PyErr_Format(PyExc_TypeError,
1338 "string indices must be integers, not %.200s",
1339 Py_TYPE(item)->tp_name);
1340 return NULL;
1344 static Py_ssize_t
1345 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1347 if ( index != 0 ) {
1348 PyErr_SetString(PyExc_SystemError,
1349 "accessing non-existent string segment");
1350 return -1;
1352 *ptr = (void *)self->ob_sval;
1353 return Py_SIZE(self);
1356 static Py_ssize_t
1357 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1359 PyErr_SetString(PyExc_TypeError,
1360 "Cannot use string as modifiable buffer");
1361 return -1;
1364 static Py_ssize_t
1365 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1367 if ( lenp )
1368 *lenp = Py_SIZE(self);
1369 return 1;
1372 static Py_ssize_t
1373 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1375 if ( index != 0 ) {
1376 PyErr_SetString(PyExc_SystemError,
1377 "accessing non-existent string segment");
1378 return -1;
1380 *ptr = self->ob_sval;
1381 return Py_SIZE(self);
1384 static int
1385 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1387 return PyBuffer_FillInfo(view, (PyObject*)self,
1388 (void *)self->ob_sval, Py_SIZE(self),
1389 1, flags);
1392 static PySequenceMethods string_as_sequence = {
1393 (lenfunc)string_length, /*sq_length*/
1394 (binaryfunc)string_concat, /*sq_concat*/
1395 (ssizeargfunc)string_repeat, /*sq_repeat*/
1396 (ssizeargfunc)string_item, /*sq_item*/
1397 (ssizessizeargfunc)string_slice, /*sq_slice*/
1398 0, /*sq_ass_item*/
1399 0, /*sq_ass_slice*/
1400 (objobjproc)string_contains /*sq_contains*/
1403 static PyMappingMethods string_as_mapping = {
1404 (lenfunc)string_length,
1405 (binaryfunc)string_subscript,
1409 static PyBufferProcs string_as_buffer = {
1410 (readbufferproc)string_buffer_getreadbuf,
1411 (writebufferproc)string_buffer_getwritebuf,
1412 (segcountproc)string_buffer_getsegcount,
1413 (charbufferproc)string_buffer_getcharbuf,
1414 (getbufferproc)string_buffer_getbuffer,
1415 0, /* XXX */
1420 #define LEFTSTRIP 0
1421 #define RIGHTSTRIP 1
1422 #define BOTHSTRIP 2
1424 /* Arrays indexed by above */
1425 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1427 #define STRIPNAME(i) (stripformat[i]+3)
1429 PyDoc_STRVAR(split__doc__,
1430 "S.split([sep [,maxsplit]]) -> list of strings\n\
1432 Return a list of the words in the string S, using sep as the\n\
1433 delimiter string. If maxsplit is given, at most maxsplit\n\
1434 splits are done. If sep is not specified or is None, any\n\
1435 whitespace string is a separator and empty strings are removed\n\
1436 from the result.");
1438 static PyObject *
1439 string_split(PyStringObject *self, PyObject *args)
1441 Py_ssize_t len = PyString_GET_SIZE(self), n;
1442 Py_ssize_t maxsplit = -1;
1443 const char *s = PyString_AS_STRING(self), *sub;
1444 PyObject *subobj = Py_None;
1446 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1447 return NULL;
1448 if (maxsplit < 0)
1449 maxsplit = PY_SSIZE_T_MAX;
1450 if (subobj == Py_None)
1451 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1452 if (PyString_Check(subobj)) {
1453 sub = PyString_AS_STRING(subobj);
1454 n = PyString_GET_SIZE(subobj);
1456 #ifdef Py_USING_UNICODE
1457 else if (PyUnicode_Check(subobj))
1458 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1459 #endif
1460 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1461 return NULL;
1463 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1466 PyDoc_STRVAR(partition__doc__,
1467 "S.partition(sep) -> (head, sep, tail)\n\
1469 Search for the separator sep in S, and return the part before it,\n\
1470 the separator itself, and the part after it. If the separator is not\n\
1471 found, return S and two empty strings.");
1473 static PyObject *
1474 string_partition(PyStringObject *self, PyObject *sep_obj)
1476 const char *sep;
1477 Py_ssize_t sep_len;
1479 if (PyString_Check(sep_obj)) {
1480 sep = PyString_AS_STRING(sep_obj);
1481 sep_len = PyString_GET_SIZE(sep_obj);
1483 #ifdef Py_USING_UNICODE
1484 else if (PyUnicode_Check(sep_obj))
1485 return PyUnicode_Partition((PyObject *) self, sep_obj);
1486 #endif
1487 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1488 return NULL;
1490 return stringlib_partition(
1491 (PyObject*) self,
1492 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1493 sep_obj, sep, sep_len
1497 PyDoc_STRVAR(rpartition__doc__,
1498 "S.rpartition(sep) -> (tail, sep, head)\n\
1500 Search for the separator sep in S, starting at the end of S, and return\n\
1501 the part before it, the separator itself, and the part after it. If the\n\
1502 separator is not found, return two empty strings and S.");
1504 static PyObject *
1505 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1507 const char *sep;
1508 Py_ssize_t sep_len;
1510 if (PyString_Check(sep_obj)) {
1511 sep = PyString_AS_STRING(sep_obj);
1512 sep_len = PyString_GET_SIZE(sep_obj);
1514 #ifdef Py_USING_UNICODE
1515 else if (PyUnicode_Check(sep_obj))
1516 return PyUnicode_RPartition((PyObject *) self, sep_obj);
1517 #endif
1518 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1519 return NULL;
1521 return stringlib_rpartition(
1522 (PyObject*) self,
1523 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1524 sep_obj, sep, sep_len
1528 PyDoc_STRVAR(rsplit__doc__,
1529 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1531 Return a list of the words in the string S, using sep as the\n\
1532 delimiter string, starting at the end of the string and working\n\
1533 to the front. If maxsplit is given, at most maxsplit splits are\n\
1534 done. If sep is not specified or is None, any whitespace string\n\
1535 is a separator.");
1537 static PyObject *
1538 string_rsplit(PyStringObject *self, PyObject *args)
1540 Py_ssize_t len = PyString_GET_SIZE(self), n;
1541 Py_ssize_t maxsplit = -1;
1542 const char *s = PyString_AS_STRING(self), *sub;
1543 PyObject *subobj = Py_None;
1545 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1546 return NULL;
1547 if (maxsplit < 0)
1548 maxsplit = PY_SSIZE_T_MAX;
1549 if (subobj == Py_None)
1550 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1551 if (PyString_Check(subobj)) {
1552 sub = PyString_AS_STRING(subobj);
1553 n = PyString_GET_SIZE(subobj);
1555 #ifdef Py_USING_UNICODE
1556 else if (PyUnicode_Check(subobj))
1557 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1558 #endif
1559 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1560 return NULL;
1562 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1566 PyDoc_STRVAR(join__doc__,
1567 "S.join(iterable) -> string\n\
1569 Return a string which is the concatenation of the strings in the\n\
1570 iterable. The separator between elements is S.");
1572 static PyObject *
1573 string_join(PyStringObject *self, PyObject *orig)
1575 char *sep = PyString_AS_STRING(self);
1576 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1577 PyObject *res = NULL;
1578 char *p;
1579 Py_ssize_t seqlen = 0;
1580 size_t sz = 0;
1581 Py_ssize_t i;
1582 PyObject *seq, *item;
1584 seq = PySequence_Fast(orig, "");
1585 if (seq == NULL) {
1586 return NULL;
1589 seqlen = PySequence_Size(seq);
1590 if (seqlen == 0) {
1591 Py_DECREF(seq);
1592 return PyString_FromString("");
1594 if (seqlen == 1) {
1595 item = PySequence_Fast_GET_ITEM(seq, 0);
1596 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1597 Py_INCREF(item);
1598 Py_DECREF(seq);
1599 return item;
1603 /* There are at least two things to join, or else we have a subclass
1604 * of the builtin types in the sequence.
1605 * Do a pre-pass to figure out the total amount of space we'll
1606 * need (sz), see whether any argument is absurd, and defer to
1607 * the Unicode join if appropriate.
1609 for (i = 0; i < seqlen; i++) {
1610 const size_t old_sz = sz;
1611 item = PySequence_Fast_GET_ITEM(seq, i);
1612 if (!PyString_Check(item)){
1613 #ifdef Py_USING_UNICODE
1614 if (PyUnicode_Check(item)) {
1615 /* Defer to Unicode join.
1616 * CAUTION: There's no gurantee that the
1617 * original sequence can be iterated over
1618 * again, so we must pass seq here.
1620 PyObject *result;
1621 result = PyUnicode_Join((PyObject *)self, seq);
1622 Py_DECREF(seq);
1623 return result;
1625 #endif
1626 PyErr_Format(PyExc_TypeError,
1627 "sequence item %zd: expected string,"
1628 " %.80s found",
1629 i, Py_TYPE(item)->tp_name);
1630 Py_DECREF(seq);
1631 return NULL;
1633 sz += PyString_GET_SIZE(item);
1634 if (i != 0)
1635 sz += seplen;
1636 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1637 PyErr_SetString(PyExc_OverflowError,
1638 "join() result is too long for a Python string");
1639 Py_DECREF(seq);
1640 return NULL;
1644 /* Allocate result space. */
1645 res = PyString_FromStringAndSize((char*)NULL, sz);
1646 if (res == NULL) {
1647 Py_DECREF(seq);
1648 return NULL;
1651 /* Catenate everything. */
1652 p = PyString_AS_STRING(res);
1653 for (i = 0; i < seqlen; ++i) {
1654 size_t n;
1655 item = PySequence_Fast_GET_ITEM(seq, i);
1656 n = PyString_GET_SIZE(item);
1657 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1658 p += n;
1659 if (i < seqlen - 1) {
1660 Py_MEMCPY(p, sep, seplen);
1661 p += seplen;
1665 Py_DECREF(seq);
1666 return res;
1669 PyObject *
1670 _PyString_Join(PyObject *sep, PyObject *x)
1672 assert(sep != NULL && PyString_Check(sep));
1673 assert(x != NULL);
1674 return string_join((PyStringObject *)sep, x);
1677 /* helper macro to fixup start/end slice values */
1678 #define ADJUST_INDICES(start, end, len) \
1679 if (end > len) \
1680 end = len; \
1681 else if (end < 0) { \
1682 end += len; \
1683 if (end < 0) \
1684 end = 0; \
1686 if (start < 0) { \
1687 start += len; \
1688 if (start < 0) \
1689 start = 0; \
1692 Py_LOCAL_INLINE(Py_ssize_t)
1693 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1695 PyObject *subobj;
1696 const char *sub;
1697 Py_ssize_t sub_len;
1698 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1699 PyObject *obj_start=Py_None, *obj_end=Py_None;
1701 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1702 &obj_start, &obj_end))
1703 return -2;
1704 /* To support None in "start" and "end" arguments, meaning
1705 the same as if they were not passed.
1707 if (obj_start != Py_None)
1708 if (!_PyEval_SliceIndex(obj_start, &start))
1709 return -2;
1710 if (obj_end != Py_None)
1711 if (!_PyEval_SliceIndex(obj_end, &end))
1712 return -2;
1714 if (PyString_Check(subobj)) {
1715 sub = PyString_AS_STRING(subobj);
1716 sub_len = PyString_GET_SIZE(subobj);
1718 #ifdef Py_USING_UNICODE
1719 else if (PyUnicode_Check(subobj))
1720 return PyUnicode_Find(
1721 (PyObject *)self, subobj, start, end, dir);
1722 #endif
1723 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1724 /* XXX - the "expected a character buffer object" is pretty
1725 confusing for a non-expert. remap to something else ? */
1726 return -2;
1728 if (dir > 0)
1729 return stringlib_find_slice(
1730 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1731 sub, sub_len, start, end);
1732 else
1733 return stringlib_rfind_slice(
1734 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1735 sub, sub_len, start, end);
1739 PyDoc_STRVAR(find__doc__,
1740 "S.find(sub [,start [,end]]) -> int\n\
1742 Return the lowest index in S where substring sub is found,\n\
1743 such that sub is contained within s[start:end]. Optional\n\
1744 arguments start and end are interpreted as in slice notation.\n\
1746 Return -1 on failure.");
1748 static PyObject *
1749 string_find(PyStringObject *self, PyObject *args)
1751 Py_ssize_t result = string_find_internal(self, args, +1);
1752 if (result == -2)
1753 return NULL;
1754 return PyInt_FromSsize_t(result);
1758 PyDoc_STRVAR(index__doc__,
1759 "S.index(sub [,start [,end]]) -> int\n\
1761 Like S.find() but raise ValueError when the substring is not found.");
1763 static PyObject *
1764 string_index(PyStringObject *self, PyObject *args)
1766 Py_ssize_t result = string_find_internal(self, args, +1);
1767 if (result == -2)
1768 return NULL;
1769 if (result == -1) {
1770 PyErr_SetString(PyExc_ValueError,
1771 "substring not found");
1772 return NULL;
1774 return PyInt_FromSsize_t(result);
1778 PyDoc_STRVAR(rfind__doc__,
1779 "S.rfind(sub [,start [,end]]) -> int\n\
1781 Return the highest index in S where substring sub is found,\n\
1782 such that sub is contained within s[start:end]. Optional\n\
1783 arguments start and end are interpreted as in slice notation.\n\
1785 Return -1 on failure.");
1787 static PyObject *
1788 string_rfind(PyStringObject *self, PyObject *args)
1790 Py_ssize_t result = string_find_internal(self, args, -1);
1791 if (result == -2)
1792 return NULL;
1793 return PyInt_FromSsize_t(result);
1797 PyDoc_STRVAR(rindex__doc__,
1798 "S.rindex(sub [,start [,end]]) -> int\n\
1800 Like S.rfind() but raise ValueError when the substring is not found.");
1802 static PyObject *
1803 string_rindex(PyStringObject *self, PyObject *args)
1805 Py_ssize_t result = string_find_internal(self, args, -1);
1806 if (result == -2)
1807 return NULL;
1808 if (result == -1) {
1809 PyErr_SetString(PyExc_ValueError,
1810 "substring not found");
1811 return NULL;
1813 return PyInt_FromSsize_t(result);
1817 Py_LOCAL_INLINE(PyObject *)
1818 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1820 char *s = PyString_AS_STRING(self);
1821 Py_ssize_t len = PyString_GET_SIZE(self);
1822 char *sep = PyString_AS_STRING(sepobj);
1823 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1824 Py_ssize_t i, j;
1826 i = 0;
1827 if (striptype != RIGHTSTRIP) {
1828 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1829 i++;
1833 j = len;
1834 if (striptype != LEFTSTRIP) {
1835 do {
1836 j--;
1837 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1838 j++;
1841 if (i == 0 && j == len && PyString_CheckExact(self)) {
1842 Py_INCREF(self);
1843 return (PyObject*)self;
1845 else
1846 return PyString_FromStringAndSize(s+i, j-i);
1850 Py_LOCAL_INLINE(PyObject *)
1851 do_strip(PyStringObject *self, int striptype)
1853 char *s = PyString_AS_STRING(self);
1854 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1856 i = 0;
1857 if (striptype != RIGHTSTRIP) {
1858 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1859 i++;
1863 j = len;
1864 if (striptype != LEFTSTRIP) {
1865 do {
1866 j--;
1867 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1868 j++;
1871 if (i == 0 && j == len && PyString_CheckExact(self)) {
1872 Py_INCREF(self);
1873 return (PyObject*)self;
1875 else
1876 return PyString_FromStringAndSize(s+i, j-i);
1880 Py_LOCAL_INLINE(PyObject *)
1881 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1883 PyObject *sep = NULL;
1885 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1886 return NULL;
1888 if (sep != NULL && sep != Py_None) {
1889 if (PyString_Check(sep))
1890 return do_xstrip(self, striptype, sep);
1891 #ifdef Py_USING_UNICODE
1892 else if (PyUnicode_Check(sep)) {
1893 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1894 PyObject *res;
1895 if (uniself==NULL)
1896 return NULL;
1897 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1898 striptype, sep);
1899 Py_DECREF(uniself);
1900 return res;
1902 #endif
1903 PyErr_Format(PyExc_TypeError,
1904 #ifdef Py_USING_UNICODE
1905 "%s arg must be None, str or unicode",
1906 #else
1907 "%s arg must be None or str",
1908 #endif
1909 STRIPNAME(striptype));
1910 return NULL;
1913 return do_strip(self, striptype);
1917 PyDoc_STRVAR(strip__doc__,
1918 "S.strip([chars]) -> string or unicode\n\
1920 Return a copy of the string S with leading and trailing\n\
1921 whitespace removed.\n\
1922 If chars is given and not None, remove characters in chars instead.\n\
1923 If chars is unicode, S will be converted to unicode before stripping");
1925 static PyObject *
1926 string_strip(PyStringObject *self, PyObject *args)
1928 if (PyTuple_GET_SIZE(args) == 0)
1929 return do_strip(self, BOTHSTRIP); /* Common case */
1930 else
1931 return do_argstrip(self, BOTHSTRIP, args);
1935 PyDoc_STRVAR(lstrip__doc__,
1936 "S.lstrip([chars]) -> string or unicode\n\
1938 Return a copy of the string S with leading whitespace removed.\n\
1939 If chars is given and not None, remove characters in chars instead.\n\
1940 If chars is unicode, S will be converted to unicode before stripping");
1942 static PyObject *
1943 string_lstrip(PyStringObject *self, PyObject *args)
1945 if (PyTuple_GET_SIZE(args) == 0)
1946 return do_strip(self, LEFTSTRIP); /* Common case */
1947 else
1948 return do_argstrip(self, LEFTSTRIP, args);
1952 PyDoc_STRVAR(rstrip__doc__,
1953 "S.rstrip([chars]) -> string or unicode\n\
1955 Return a copy of the string S with trailing whitespace removed.\n\
1956 If chars is given and not None, remove characters in chars instead.\n\
1957 If chars is unicode, S will be converted to unicode before stripping");
1959 static PyObject *
1960 string_rstrip(PyStringObject *self, PyObject *args)
1962 if (PyTuple_GET_SIZE(args) == 0)
1963 return do_strip(self, RIGHTSTRIP); /* Common case */
1964 else
1965 return do_argstrip(self, RIGHTSTRIP, args);
1969 PyDoc_STRVAR(lower__doc__,
1970 "S.lower() -> string\n\
1972 Return a copy of the string S converted to lowercase.");
1974 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1975 #ifndef _tolower
1976 #define _tolower tolower
1977 #endif
1979 static PyObject *
1980 string_lower(PyStringObject *self)
1982 char *s;
1983 Py_ssize_t i, n = PyString_GET_SIZE(self);
1984 PyObject *newobj;
1986 newobj = PyString_FromStringAndSize(NULL, n);
1987 if (!newobj)
1988 return NULL;
1990 s = PyString_AS_STRING(newobj);
1992 Py_MEMCPY(s, PyString_AS_STRING(self), n);
1994 for (i = 0; i < n; i++) {
1995 int c = Py_CHARMASK(s[i]);
1996 if (isupper(c))
1997 s[i] = _tolower(c);
2000 return newobj;
2003 PyDoc_STRVAR(upper__doc__,
2004 "S.upper() -> string\n\
2006 Return a copy of the string S converted to uppercase.");
2008 #ifndef _toupper
2009 #define _toupper toupper
2010 #endif
2012 static PyObject *
2013 string_upper(PyStringObject *self)
2015 char *s;
2016 Py_ssize_t i, n = PyString_GET_SIZE(self);
2017 PyObject *newobj;
2019 newobj = PyString_FromStringAndSize(NULL, n);
2020 if (!newobj)
2021 return NULL;
2023 s = PyString_AS_STRING(newobj);
2025 Py_MEMCPY(s, PyString_AS_STRING(self), n);
2027 for (i = 0; i < n; i++) {
2028 int c = Py_CHARMASK(s[i]);
2029 if (islower(c))
2030 s[i] = _toupper(c);
2033 return newobj;
2036 PyDoc_STRVAR(title__doc__,
2037 "S.title() -> string\n\
2039 Return a titlecased version of S, i.e. words start with uppercase\n\
2040 characters, all remaining cased characters have lowercase.");
2042 static PyObject*
2043 string_title(PyStringObject *self)
2045 char *s = PyString_AS_STRING(self), *s_new;
2046 Py_ssize_t i, n = PyString_GET_SIZE(self);
2047 int previous_is_cased = 0;
2048 PyObject *newobj;
2050 newobj = PyString_FromStringAndSize(NULL, n);
2051 if (newobj == NULL)
2052 return NULL;
2053 s_new = PyString_AsString(newobj);
2054 for (i = 0; i < n; i++) {
2055 int c = Py_CHARMASK(*s++);
2056 if (islower(c)) {
2057 if (!previous_is_cased)
2058 c = toupper(c);
2059 previous_is_cased = 1;
2060 } else if (isupper(c)) {
2061 if (previous_is_cased)
2062 c = tolower(c);
2063 previous_is_cased = 1;
2064 } else
2065 previous_is_cased = 0;
2066 *s_new++ = c;
2068 return newobj;
2071 PyDoc_STRVAR(capitalize__doc__,
2072 "S.capitalize() -> string\n\
2074 Return a copy of the string S with only its first character\n\
2075 capitalized.");
2077 static PyObject *
2078 string_capitalize(PyStringObject *self)
2080 char *s = PyString_AS_STRING(self), *s_new;
2081 Py_ssize_t i, n = PyString_GET_SIZE(self);
2082 PyObject *newobj;
2084 newobj = PyString_FromStringAndSize(NULL, n);
2085 if (newobj == NULL)
2086 return NULL;
2087 s_new = PyString_AsString(newobj);
2088 if (0 < n) {
2089 int c = Py_CHARMASK(*s++);
2090 if (islower(c))
2091 *s_new = toupper(c);
2092 else
2093 *s_new = c;
2094 s_new++;
2096 for (i = 1; i < n; i++) {
2097 int c = Py_CHARMASK(*s++);
2098 if (isupper(c))
2099 *s_new = tolower(c);
2100 else
2101 *s_new = c;
2102 s_new++;
2104 return newobj;
2108 PyDoc_STRVAR(count__doc__,
2109 "S.count(sub[, start[, end]]) -> int\n\
2111 Return the number of non-overlapping occurrences of substring sub in\n\
2112 string S[start:end]. Optional arguments start and end are interpreted\n\
2113 as in slice notation.");
2115 static PyObject *
2116 string_count(PyStringObject *self, PyObject *args)
2118 PyObject *sub_obj;
2119 const char *str = PyString_AS_STRING(self), *sub;
2120 Py_ssize_t sub_len;
2121 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2123 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2124 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2125 return NULL;
2127 if (PyString_Check(sub_obj)) {
2128 sub = PyString_AS_STRING(sub_obj);
2129 sub_len = PyString_GET_SIZE(sub_obj);
2131 #ifdef Py_USING_UNICODE
2132 else if (PyUnicode_Check(sub_obj)) {
2133 Py_ssize_t count;
2134 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2135 if (count == -1)
2136 return NULL;
2137 else
2138 return PyInt_FromSsize_t(count);
2140 #endif
2141 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2142 return NULL;
2144 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
2146 return PyInt_FromSsize_t(
2147 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2151 PyDoc_STRVAR(swapcase__doc__,
2152 "S.swapcase() -> string\n\
2154 Return a copy of the string S with uppercase characters\n\
2155 converted to lowercase and vice versa.");
2157 static PyObject *
2158 string_swapcase(PyStringObject *self)
2160 char *s = PyString_AS_STRING(self), *s_new;
2161 Py_ssize_t i, n = PyString_GET_SIZE(self);
2162 PyObject *newobj;
2164 newobj = PyString_FromStringAndSize(NULL, n);
2165 if (newobj == NULL)
2166 return NULL;
2167 s_new = PyString_AsString(newobj);
2168 for (i = 0; i < n; i++) {
2169 int c = Py_CHARMASK(*s++);
2170 if (islower(c)) {
2171 *s_new = toupper(c);
2173 else if (isupper(c)) {
2174 *s_new = tolower(c);
2176 else
2177 *s_new = c;
2178 s_new++;
2180 return newobj;
2184 PyDoc_STRVAR(translate__doc__,
2185 "S.translate(table [,deletechars]) -> string\n\
2187 Return a copy of the string S, where all characters occurring\n\
2188 in the optional argument deletechars are removed, and the\n\
2189 remaining characters have been mapped through the given\n\
2190 translation table, which must be a string of length 256.");
2192 static PyObject *
2193 string_translate(PyStringObject *self, PyObject *args)
2195 register char *input, *output;
2196 const char *table;
2197 register Py_ssize_t i, c, changed = 0;
2198 PyObject *input_obj = (PyObject*)self;
2199 const char *output_start, *del_table=NULL;
2200 Py_ssize_t inlen, tablen, dellen = 0;
2201 PyObject *result;
2202 int trans_table[256];
2203 PyObject *tableobj, *delobj = NULL;
2205 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2206 &tableobj, &delobj))
2207 return NULL;
2209 if (PyString_Check(tableobj)) {
2210 table = PyString_AS_STRING(tableobj);
2211 tablen = PyString_GET_SIZE(tableobj);
2213 else if (tableobj == Py_None) {
2214 table = NULL;
2215 tablen = 256;
2217 #ifdef Py_USING_UNICODE
2218 else if (PyUnicode_Check(tableobj)) {
2219 /* Unicode .translate() does not support the deletechars
2220 parameter; instead a mapping to None will cause characters
2221 to be deleted. */
2222 if (delobj != NULL) {
2223 PyErr_SetString(PyExc_TypeError,
2224 "deletions are implemented differently for unicode");
2225 return NULL;
2227 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2229 #endif
2230 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2231 return NULL;
2233 if (tablen != 256) {
2234 PyErr_SetString(PyExc_ValueError,
2235 "translation table must be 256 characters long");
2236 return NULL;
2239 if (delobj != NULL) {
2240 if (PyString_Check(delobj)) {
2241 del_table = PyString_AS_STRING(delobj);
2242 dellen = PyString_GET_SIZE(delobj);
2244 #ifdef Py_USING_UNICODE
2245 else if (PyUnicode_Check(delobj)) {
2246 PyErr_SetString(PyExc_TypeError,
2247 "deletions are implemented differently for unicode");
2248 return NULL;
2250 #endif
2251 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2252 return NULL;
2254 else {
2255 del_table = NULL;
2256 dellen = 0;
2259 inlen = PyString_GET_SIZE(input_obj);
2260 result = PyString_FromStringAndSize((char *)NULL, inlen);
2261 if (result == NULL)
2262 return NULL;
2263 output_start = output = PyString_AsString(result);
2264 input = PyString_AS_STRING(input_obj);
2266 if (dellen == 0 && table != NULL) {
2267 /* If no deletions are required, use faster code */
2268 for (i = inlen; --i >= 0; ) {
2269 c = Py_CHARMASK(*input++);
2270 if (Py_CHARMASK((*output++ = table[c])) != c)
2271 changed = 1;
2273 if (changed || !PyString_CheckExact(input_obj))
2274 return result;
2275 Py_DECREF(result);
2276 Py_INCREF(input_obj);
2277 return input_obj;
2280 if (table == NULL) {
2281 for (i = 0; i < 256; i++)
2282 trans_table[i] = Py_CHARMASK(i);
2283 } else {
2284 for (i = 0; i < 256; i++)
2285 trans_table[i] = Py_CHARMASK(table[i]);
2288 for (i = 0; i < dellen; i++)
2289 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2291 for (i = inlen; --i >= 0; ) {
2292 c = Py_CHARMASK(*input++);
2293 if (trans_table[c] != -1)
2294 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2295 continue;
2296 changed = 1;
2298 if (!changed && PyString_CheckExact(input_obj)) {
2299 Py_DECREF(result);
2300 Py_INCREF(input_obj);
2301 return input_obj;
2303 /* Fix the size of the resulting string */
2304 if (inlen > 0)
2305 _PyString_Resize(&result, output - output_start);
2306 return result;
2310 /* find and count characters and substrings */
2312 #define findchar(target, target_len, c) \
2313 ((char *)memchr((const void *)(target), c, target_len))
2315 /* String ops must return a string. */
2316 /* If the object is subclass of string, create a copy */
2317 Py_LOCAL(PyStringObject *)
2318 return_self(PyStringObject *self)
2320 if (PyString_CheckExact(self)) {
2321 Py_INCREF(self);
2322 return self;
2324 return (PyStringObject *)PyString_FromStringAndSize(
2325 PyString_AS_STRING(self),
2326 PyString_GET_SIZE(self));
2329 Py_LOCAL_INLINE(Py_ssize_t)
2330 countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2332 Py_ssize_t count=0;
2333 const char *start=target;
2334 const char *end=target+target_len;
2336 while ( (start=findchar(start, end-start, c)) != NULL ) {
2337 count++;
2338 if (count >= maxcount)
2339 break;
2340 start += 1;
2342 return count;
2346 /* Algorithms for different cases of string replacement */
2348 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2349 Py_LOCAL(PyStringObject *)
2350 replace_interleave(PyStringObject *self,
2351 const char *to_s, Py_ssize_t to_len,
2352 Py_ssize_t maxcount)
2354 char *self_s, *result_s;
2355 Py_ssize_t self_len, result_len;
2356 Py_ssize_t count, i, product;
2357 PyStringObject *result;
2359 self_len = PyString_GET_SIZE(self);
2361 /* 1 at the end plus 1 after every character */
2362 count = self_len+1;
2363 if (maxcount < count)
2364 count = maxcount;
2366 /* Check for overflow */
2367 /* result_len = count * to_len + self_len; */
2368 product = count * to_len;
2369 if (product / to_len != count) {
2370 PyErr_SetString(PyExc_OverflowError,
2371 "replace string is too long");
2372 return NULL;
2374 result_len = product + self_len;
2375 if (result_len < 0) {
2376 PyErr_SetString(PyExc_OverflowError,
2377 "replace string is too long");
2378 return NULL;
2381 if (! (result = (PyStringObject *)
2382 PyString_FromStringAndSize(NULL, result_len)) )
2383 return NULL;
2385 self_s = PyString_AS_STRING(self);
2386 result_s = PyString_AS_STRING(result);
2388 /* TODO: special case single character, which doesn't need memcpy */
2390 /* Lay the first one down (guaranteed this will occur) */
2391 Py_MEMCPY(result_s, to_s, to_len);
2392 result_s += to_len;
2393 count -= 1;
2395 for (i=0; i<count; i++) {
2396 *result_s++ = *self_s++;
2397 Py_MEMCPY(result_s, to_s, to_len);
2398 result_s += to_len;
2401 /* Copy the rest of the original string */
2402 Py_MEMCPY(result_s, self_s, self_len-i);
2404 return result;
2407 /* Special case for deleting a single character */
2408 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2409 Py_LOCAL(PyStringObject *)
2410 replace_delete_single_character(PyStringObject *self,
2411 char from_c, Py_ssize_t maxcount)
2413 char *self_s, *result_s;
2414 char *start, *next, *end;
2415 Py_ssize_t self_len, result_len;
2416 Py_ssize_t count;
2417 PyStringObject *result;
2419 self_len = PyString_GET_SIZE(self);
2420 self_s = PyString_AS_STRING(self);
2422 count = countchar(self_s, self_len, from_c, maxcount);
2423 if (count == 0) {
2424 return return_self(self);
2427 result_len = self_len - count; /* from_len == 1 */
2428 assert(result_len>=0);
2430 if ( (result = (PyStringObject *)
2431 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2432 return NULL;
2433 result_s = PyString_AS_STRING(result);
2435 start = self_s;
2436 end = self_s + self_len;
2437 while (count-- > 0) {
2438 next = findchar(start, end-start, from_c);
2439 if (next == NULL)
2440 break;
2441 Py_MEMCPY(result_s, start, next-start);
2442 result_s += (next-start);
2443 start = next+1;
2445 Py_MEMCPY(result_s, start, end-start);
2447 return result;
2450 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2452 Py_LOCAL(PyStringObject *)
2453 replace_delete_substring(PyStringObject *self,
2454 const char *from_s, Py_ssize_t from_len,
2455 Py_ssize_t maxcount) {
2456 char *self_s, *result_s;
2457 char *start, *next, *end;
2458 Py_ssize_t self_len, result_len;
2459 Py_ssize_t count, offset;
2460 PyStringObject *result;
2462 self_len = PyString_GET_SIZE(self);
2463 self_s = PyString_AS_STRING(self);
2465 count = stringlib_count(self_s, self_len,
2466 from_s, from_len,
2467 maxcount);
2469 if (count == 0) {
2470 /* no matches */
2471 return return_self(self);
2474 result_len = self_len - (count * from_len);
2475 assert (result_len>=0);
2477 if ( (result = (PyStringObject *)
2478 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2479 return NULL;
2481 result_s = PyString_AS_STRING(result);
2483 start = self_s;
2484 end = self_s + self_len;
2485 while (count-- > 0) {
2486 offset = stringlib_find(start, end-start,
2487 from_s, from_len,
2489 if (offset == -1)
2490 break;
2491 next = start + offset;
2493 Py_MEMCPY(result_s, start, next-start);
2495 result_s += (next-start);
2496 start = next+from_len;
2498 Py_MEMCPY(result_s, start, end-start);
2499 return result;
2502 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2503 Py_LOCAL(PyStringObject *)
2504 replace_single_character_in_place(PyStringObject *self,
2505 char from_c, char to_c,
2506 Py_ssize_t maxcount)
2508 char *self_s, *result_s, *start, *end, *next;
2509 Py_ssize_t self_len;
2510 PyStringObject *result;
2512 /* The result string will be the same size */
2513 self_s = PyString_AS_STRING(self);
2514 self_len = PyString_GET_SIZE(self);
2516 next = findchar(self_s, self_len, from_c);
2518 if (next == NULL) {
2519 /* No matches; return the original string */
2520 return return_self(self);
2523 /* Need to make a new string */
2524 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2525 if (result == NULL)
2526 return NULL;
2527 result_s = PyString_AS_STRING(result);
2528 Py_MEMCPY(result_s, self_s, self_len);
2530 /* change everything in-place, starting with this one */
2531 start = result_s + (next-self_s);
2532 *start = to_c;
2533 start++;
2534 end = result_s + self_len;
2536 while (--maxcount > 0) {
2537 next = findchar(start, end-start, from_c);
2538 if (next == NULL)
2539 break;
2540 *next = to_c;
2541 start = next+1;
2544 return result;
2547 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2548 Py_LOCAL(PyStringObject *)
2549 replace_substring_in_place(PyStringObject *self,
2550 const char *from_s, Py_ssize_t from_len,
2551 const char *to_s, Py_ssize_t to_len,
2552 Py_ssize_t maxcount)
2554 char *result_s, *start, *end;
2555 char *self_s;
2556 Py_ssize_t self_len, offset;
2557 PyStringObject *result;
2559 /* The result string will be the same size */
2561 self_s = PyString_AS_STRING(self);
2562 self_len = PyString_GET_SIZE(self);
2564 offset = stringlib_find(self_s, self_len,
2565 from_s, from_len,
2567 if (offset == -1) {
2568 /* No matches; return the original string */
2569 return return_self(self);
2572 /* Need to make a new string */
2573 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2574 if (result == NULL)
2575 return NULL;
2576 result_s = PyString_AS_STRING(result);
2577 Py_MEMCPY(result_s, self_s, self_len);
2579 /* change everything in-place, starting with this one */
2580 start = result_s + offset;
2581 Py_MEMCPY(start, to_s, from_len);
2582 start += from_len;
2583 end = result_s + self_len;
2585 while ( --maxcount > 0) {
2586 offset = stringlib_find(start, end-start,
2587 from_s, from_len,
2589 if (offset==-1)
2590 break;
2591 Py_MEMCPY(start+offset, to_s, from_len);
2592 start += offset+from_len;
2595 return result;
2598 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2599 Py_LOCAL(PyStringObject *)
2600 replace_single_character(PyStringObject *self,
2601 char from_c,
2602 const char *to_s, Py_ssize_t to_len,
2603 Py_ssize_t maxcount)
2605 char *self_s, *result_s;
2606 char *start, *next, *end;
2607 Py_ssize_t self_len, result_len;
2608 Py_ssize_t count, product;
2609 PyStringObject *result;
2611 self_s = PyString_AS_STRING(self);
2612 self_len = PyString_GET_SIZE(self);
2614 count = countchar(self_s, self_len, from_c, maxcount);
2615 if (count == 0) {
2616 /* no matches, return unchanged */
2617 return return_self(self);
2620 /* use the difference between current and new, hence the "-1" */
2621 /* result_len = self_len + count * (to_len-1) */
2622 product = count * (to_len-1);
2623 if (product / (to_len-1) != count) {
2624 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2625 return NULL;
2627 result_len = self_len + product;
2628 if (result_len < 0) {
2629 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2630 return NULL;
2633 if ( (result = (PyStringObject *)
2634 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2635 return NULL;
2636 result_s = PyString_AS_STRING(result);
2638 start = self_s;
2639 end = self_s + self_len;
2640 while (count-- > 0) {
2641 next = findchar(start, end-start, from_c);
2642 if (next == NULL)
2643 break;
2645 if (next == start) {
2646 /* replace with the 'to' */
2647 Py_MEMCPY(result_s, to_s, to_len);
2648 result_s += to_len;
2649 start += 1;
2650 } else {
2651 /* copy the unchanged old then the 'to' */
2652 Py_MEMCPY(result_s, start, next-start);
2653 result_s += (next-start);
2654 Py_MEMCPY(result_s, to_s, to_len);
2655 result_s += to_len;
2656 start = next+1;
2659 /* Copy the remainder of the remaining string */
2660 Py_MEMCPY(result_s, start, end-start);
2662 return result;
2665 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2666 Py_LOCAL(PyStringObject *)
2667 replace_substring(PyStringObject *self,
2668 const char *from_s, Py_ssize_t from_len,
2669 const char *to_s, Py_ssize_t to_len,
2670 Py_ssize_t maxcount) {
2671 char *self_s, *result_s;
2672 char *start, *next, *end;
2673 Py_ssize_t self_len, result_len;
2674 Py_ssize_t count, offset, product;
2675 PyStringObject *result;
2677 self_s = PyString_AS_STRING(self);
2678 self_len = PyString_GET_SIZE(self);
2680 count = stringlib_count(self_s, self_len,
2681 from_s, from_len,
2682 maxcount);
2684 if (count == 0) {
2685 /* no matches, return unchanged */
2686 return return_self(self);
2689 /* Check for overflow */
2690 /* result_len = self_len + count * (to_len-from_len) */
2691 product = count * (to_len-from_len);
2692 if (product / (to_len-from_len) != count) {
2693 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2694 return NULL;
2696 result_len = self_len + product;
2697 if (result_len < 0) {
2698 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2699 return NULL;
2702 if ( (result = (PyStringObject *)
2703 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2704 return NULL;
2705 result_s = PyString_AS_STRING(result);
2707 start = self_s;
2708 end = self_s + self_len;
2709 while (count-- > 0) {
2710 offset = stringlib_find(start, end-start,
2711 from_s, from_len,
2713 if (offset == -1)
2714 break;
2715 next = start+offset;
2716 if (next == start) {
2717 /* replace with the 'to' */
2718 Py_MEMCPY(result_s, to_s, to_len);
2719 result_s += to_len;
2720 start += from_len;
2721 } else {
2722 /* copy the unchanged old then the 'to' */
2723 Py_MEMCPY(result_s, start, next-start);
2724 result_s += (next-start);
2725 Py_MEMCPY(result_s, to_s, to_len);
2726 result_s += to_len;
2727 start = next+from_len;
2730 /* Copy the remainder of the remaining string */
2731 Py_MEMCPY(result_s, start, end-start);
2733 return result;
2737 Py_LOCAL(PyStringObject *)
2738 replace(PyStringObject *self,
2739 const char *from_s, Py_ssize_t from_len,
2740 const char *to_s, Py_ssize_t to_len,
2741 Py_ssize_t maxcount)
2743 if (maxcount < 0) {
2744 maxcount = PY_SSIZE_T_MAX;
2745 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2746 /* nothing to do; return the original string */
2747 return return_self(self);
2750 if (maxcount == 0 ||
2751 (from_len == 0 && to_len == 0)) {
2752 /* nothing to do; return the original string */
2753 return return_self(self);
2756 /* Handle zero-length special cases */
2758 if (from_len == 0) {
2759 /* insert the 'to' string everywhere. */
2760 /* >>> "Python".replace("", ".") */
2761 /* '.P.y.t.h.o.n.' */
2762 return replace_interleave(self, to_s, to_len, maxcount);
2765 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2766 /* point for an empty self string to generate a non-empty string */
2767 /* Special case so the remaining code always gets a non-empty string */
2768 if (PyString_GET_SIZE(self) == 0) {
2769 return return_self(self);
2772 if (to_len == 0) {
2773 /* delete all occurances of 'from' string */
2774 if (from_len == 1) {
2775 return replace_delete_single_character(
2776 self, from_s[0], maxcount);
2777 } else {
2778 return replace_delete_substring(self, from_s, from_len, maxcount);
2782 /* Handle special case where both strings have the same length */
2784 if (from_len == to_len) {
2785 if (from_len == 1) {
2786 return replace_single_character_in_place(
2787 self,
2788 from_s[0],
2789 to_s[0],
2790 maxcount);
2791 } else {
2792 return replace_substring_in_place(
2793 self, from_s, from_len, to_s, to_len, maxcount);
2797 /* Otherwise use the more generic algorithms */
2798 if (from_len == 1) {
2799 return replace_single_character(self, from_s[0],
2800 to_s, to_len, maxcount);
2801 } else {
2802 /* len('from')>=2, len('to')>=1 */
2803 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2807 PyDoc_STRVAR(replace__doc__,
2808 "S.replace (old, new[, count]) -> string\n\
2810 Return a copy of string S with all occurrences of substring\n\
2811 old replaced by new. If the optional argument count is\n\
2812 given, only the first count occurrences are replaced.");
2814 static PyObject *
2815 string_replace(PyStringObject *self, PyObject *args)
2817 Py_ssize_t count = -1;
2818 PyObject *from, *to;
2819 const char *from_s, *to_s;
2820 Py_ssize_t from_len, to_len;
2822 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2823 return NULL;
2825 if (PyString_Check(from)) {
2826 from_s = PyString_AS_STRING(from);
2827 from_len = PyString_GET_SIZE(from);
2829 #ifdef Py_USING_UNICODE
2830 if (PyUnicode_Check(from))
2831 return PyUnicode_Replace((PyObject *)self,
2832 from, to, count);
2833 #endif
2834 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2835 return NULL;
2837 if (PyString_Check(to)) {
2838 to_s = PyString_AS_STRING(to);
2839 to_len = PyString_GET_SIZE(to);
2841 #ifdef Py_USING_UNICODE
2842 else if (PyUnicode_Check(to))
2843 return PyUnicode_Replace((PyObject *)self,
2844 from, to, count);
2845 #endif
2846 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2847 return NULL;
2849 return (PyObject *)replace((PyStringObject *) self,
2850 from_s, from_len,
2851 to_s, to_len, count);
2854 /** End DALKE **/
2856 /* Matches the end (direction >= 0) or start (direction < 0) of self
2857 * against substr, using the start and end arguments. Returns
2858 * -1 on error, 0 if not found and 1 if found.
2860 Py_LOCAL(int)
2861 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2862 Py_ssize_t end, int direction)
2864 Py_ssize_t len = PyString_GET_SIZE(self);
2865 Py_ssize_t slen;
2866 const char* sub;
2867 const char* str;
2869 if (PyString_Check(substr)) {
2870 sub = PyString_AS_STRING(substr);
2871 slen = PyString_GET_SIZE(substr);
2873 #ifdef Py_USING_UNICODE
2874 else if (PyUnicode_Check(substr))
2875 return PyUnicode_Tailmatch((PyObject *)self,
2876 substr, start, end, direction);
2877 #endif
2878 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2879 return -1;
2880 str = PyString_AS_STRING(self);
2882 ADJUST_INDICES(start, end, len);
2884 if (direction < 0) {
2885 /* startswith */
2886 if (start+slen > len)
2887 return 0;
2888 } else {
2889 /* endswith */
2890 if (end-start < slen || start > len)
2891 return 0;
2893 if (end-slen > start)
2894 start = end - slen;
2896 if (end-start >= slen)
2897 return ! memcmp(str+start, sub, slen);
2898 return 0;
2902 PyDoc_STRVAR(startswith__doc__,
2903 "S.startswith(prefix[, start[, end]]) -> bool\n\
2905 Return True if S starts with the specified prefix, False otherwise.\n\
2906 With optional start, test S beginning at that position.\n\
2907 With optional end, stop comparing S at that position.\n\
2908 prefix can also be a tuple of strings to try.");
2910 static PyObject *
2911 string_startswith(PyStringObject *self, PyObject *args)
2913 Py_ssize_t start = 0;
2914 Py_ssize_t end = PY_SSIZE_T_MAX;
2915 PyObject *subobj;
2916 int result;
2918 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2919 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2920 return NULL;
2921 if (PyTuple_Check(subobj)) {
2922 Py_ssize_t i;
2923 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2924 result = _string_tailmatch(self,
2925 PyTuple_GET_ITEM(subobj, i),
2926 start, end, -1);
2927 if (result == -1)
2928 return NULL;
2929 else if (result) {
2930 Py_RETURN_TRUE;
2933 Py_RETURN_FALSE;
2935 result = _string_tailmatch(self, subobj, start, end, -1);
2936 if (result == -1)
2937 return NULL;
2938 else
2939 return PyBool_FromLong(result);
2943 PyDoc_STRVAR(endswith__doc__,
2944 "S.endswith(suffix[, start[, end]]) -> bool\n\
2946 Return True if S ends with the specified suffix, False otherwise.\n\
2947 With optional start, test S beginning at that position.\n\
2948 With optional end, stop comparing S at that position.\n\
2949 suffix can also be a tuple of strings to try.");
2951 static PyObject *
2952 string_endswith(PyStringObject *self, PyObject *args)
2954 Py_ssize_t start = 0;
2955 Py_ssize_t end = PY_SSIZE_T_MAX;
2956 PyObject *subobj;
2957 int result;
2959 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2960 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2961 return NULL;
2962 if (PyTuple_Check(subobj)) {
2963 Py_ssize_t i;
2964 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2965 result = _string_tailmatch(self,
2966 PyTuple_GET_ITEM(subobj, i),
2967 start, end, +1);
2968 if (result == -1)
2969 return NULL;
2970 else if (result) {
2971 Py_RETURN_TRUE;
2974 Py_RETURN_FALSE;
2976 result = _string_tailmatch(self, subobj, start, end, +1);
2977 if (result == -1)
2978 return NULL;
2979 else
2980 return PyBool_FromLong(result);
2984 PyDoc_STRVAR(encode__doc__,
2985 "S.encode([encoding[,errors]]) -> object\n\
2987 Encodes S using the codec registered for encoding. encoding defaults\n\
2988 to the default encoding. errors may be given to set a different error\n\
2989 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2990 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2991 'xmlcharrefreplace' as well as any other name registered with\n\
2992 codecs.register_error that is able to handle UnicodeEncodeErrors.");
2994 static PyObject *
2995 string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
2997 static char *kwlist[] = {"encoding", "errors", 0};
2998 char *encoding = NULL;
2999 char *errors = NULL;
3000 PyObject *v;
3002 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3003 kwlist, &encoding, &errors))
3004 return NULL;
3005 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3006 if (v == NULL)
3007 goto onError;
3008 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3009 PyErr_Format(PyExc_TypeError,
3010 "encoder did not return a string/unicode object "
3011 "(type=%.400s)",
3012 Py_TYPE(v)->tp_name);
3013 Py_DECREF(v);
3014 return NULL;
3016 return v;
3018 onError:
3019 return NULL;
3023 PyDoc_STRVAR(decode__doc__,
3024 "S.decode([encoding[,errors]]) -> object\n\
3026 Decodes S using the codec registered for encoding. encoding defaults\n\
3027 to the default encoding. errors may be given to set a different error\n\
3028 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3029 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3030 as well as any other name registered with codecs.register_error that is\n\
3031 able to handle UnicodeDecodeErrors.");
3033 static PyObject *
3034 string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3036 static char *kwlist[] = {"encoding", "errors", 0};
3037 char *encoding = NULL;
3038 char *errors = NULL;
3039 PyObject *v;
3041 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3042 kwlist, &encoding, &errors))
3043 return NULL;
3044 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3045 if (v == NULL)
3046 goto onError;
3047 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3048 PyErr_Format(PyExc_TypeError,
3049 "decoder did not return a string/unicode object "
3050 "(type=%.400s)",
3051 Py_TYPE(v)->tp_name);
3052 Py_DECREF(v);
3053 return NULL;
3055 return v;
3057 onError:
3058 return NULL;
3062 PyDoc_STRVAR(expandtabs__doc__,
3063 "S.expandtabs([tabsize]) -> string\n\
3065 Return a copy of S where all tab characters are expanded using spaces.\n\
3066 If tabsize is not given, a tab size of 8 characters is assumed.");
3068 static PyObject*
3069 string_expandtabs(PyStringObject *self, PyObject *args)
3071 const char *e, *p, *qe;
3072 char *q;
3073 Py_ssize_t i, j, incr;
3074 PyObject *u;
3075 int tabsize = 8;
3077 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3078 return NULL;
3080 /* First pass: determine size of output string */
3081 i = 0; /* chars up to and including most recent \n or \r */
3082 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3083 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3084 for (p = PyString_AS_STRING(self); p < e; p++)
3085 if (*p == '\t') {
3086 if (tabsize > 0) {
3087 incr = tabsize - (j % tabsize);
3088 if (j > PY_SSIZE_T_MAX - incr)
3089 goto overflow1;
3090 j += incr;
3093 else {
3094 if (j > PY_SSIZE_T_MAX - 1)
3095 goto overflow1;
3096 j++;
3097 if (*p == '\n' || *p == '\r') {
3098 if (i > PY_SSIZE_T_MAX - j)
3099 goto overflow1;
3100 i += j;
3101 j = 0;
3105 if (i > PY_SSIZE_T_MAX - j)
3106 goto overflow1;
3108 /* Second pass: create output string and fill it */
3109 u = PyString_FromStringAndSize(NULL, i + j);
3110 if (!u)
3111 return NULL;
3113 j = 0; /* same as in first pass */
3114 q = PyString_AS_STRING(u); /* next output char */
3115 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3117 for (p = PyString_AS_STRING(self); p < e; p++)
3118 if (*p == '\t') {
3119 if (tabsize > 0) {
3120 i = tabsize - (j % tabsize);
3121 j += i;
3122 while (i--) {
3123 if (q >= qe)
3124 goto overflow2;
3125 *q++ = ' ';
3129 else {
3130 if (q >= qe)
3131 goto overflow2;
3132 *q++ = *p;
3133 j++;
3134 if (*p == '\n' || *p == '\r')
3135 j = 0;
3138 return u;
3140 overflow2:
3141 Py_DECREF(u);
3142 overflow1:
3143 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3144 return NULL;
3147 Py_LOCAL_INLINE(PyObject *)
3148 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3150 PyObject *u;
3152 if (left < 0)
3153 left = 0;
3154 if (right < 0)
3155 right = 0;
3157 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3158 Py_INCREF(self);
3159 return (PyObject *)self;
3162 u = PyString_FromStringAndSize(NULL,
3163 left + PyString_GET_SIZE(self) + right);
3164 if (u) {
3165 if (left)
3166 memset(PyString_AS_STRING(u), fill, left);
3167 Py_MEMCPY(PyString_AS_STRING(u) + left,
3168 PyString_AS_STRING(self),
3169 PyString_GET_SIZE(self));
3170 if (right)
3171 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3172 fill, right);
3175 return u;
3178 PyDoc_STRVAR(ljust__doc__,
3179 "S.ljust(width[, fillchar]) -> string\n"
3180 "\n"
3181 "Return S left-justified in a string of length width. Padding is\n"
3182 "done using the specified fill character (default is a space).");
3184 static PyObject *
3185 string_ljust(PyStringObject *self, PyObject *args)
3187 Py_ssize_t width;
3188 char fillchar = ' ';
3190 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3191 return NULL;
3193 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3194 Py_INCREF(self);
3195 return (PyObject*) self;
3198 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3202 PyDoc_STRVAR(rjust__doc__,
3203 "S.rjust(width[, fillchar]) -> string\n"
3204 "\n"
3205 "Return S right-justified in a string of length width. Padding is\n"
3206 "done using the specified fill character (default is a space)");
3208 static PyObject *
3209 string_rjust(PyStringObject *self, PyObject *args)
3211 Py_ssize_t width;
3212 char fillchar = ' ';
3214 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3215 return NULL;
3217 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3218 Py_INCREF(self);
3219 return (PyObject*) self;
3222 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3226 PyDoc_STRVAR(center__doc__,
3227 "S.center(width[, fillchar]) -> string\n"
3228 "\n"
3229 "Return S centered in a string of length width. Padding is\n"
3230 "done using the specified fill character (default is a space)");
3232 static PyObject *
3233 string_center(PyStringObject *self, PyObject *args)
3235 Py_ssize_t marg, left;
3236 Py_ssize_t width;
3237 char fillchar = ' ';
3239 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3240 return NULL;
3242 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3243 Py_INCREF(self);
3244 return (PyObject*) self;
3247 marg = width - PyString_GET_SIZE(self);
3248 left = marg / 2 + (marg & width & 1);
3250 return pad(self, left, marg - left, fillchar);
3253 PyDoc_STRVAR(zfill__doc__,
3254 "S.zfill(width) -> string\n"
3255 "\n"
3256 "Pad a numeric string S with zeros on the left, to fill a field\n"
3257 "of the specified width. The string S is never truncated.");
3259 static PyObject *
3260 string_zfill(PyStringObject *self, PyObject *args)
3262 Py_ssize_t fill;
3263 PyObject *s;
3264 char *p;
3265 Py_ssize_t width;
3267 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3268 return NULL;
3270 if (PyString_GET_SIZE(self) >= width) {
3271 if (PyString_CheckExact(self)) {
3272 Py_INCREF(self);
3273 return (PyObject*) self;
3275 else
3276 return PyString_FromStringAndSize(
3277 PyString_AS_STRING(self),
3278 PyString_GET_SIZE(self)
3282 fill = width - PyString_GET_SIZE(self);
3284 s = pad(self, fill, 0, '0');
3286 if (s == NULL)
3287 return NULL;
3289 p = PyString_AS_STRING(s);
3290 if (p[fill] == '+' || p[fill] == '-') {
3291 /* move sign to beginning of string */
3292 p[0] = p[fill];
3293 p[fill] = '0';
3296 return (PyObject*) s;
3299 PyDoc_STRVAR(isspace__doc__,
3300 "S.isspace() -> bool\n\
3302 Return True if all characters in S are whitespace\n\
3303 and there is at least one character in S, False otherwise.");
3305 static PyObject*
3306 string_isspace(PyStringObject *self)
3308 register const unsigned char *p
3309 = (unsigned char *) PyString_AS_STRING(self);
3310 register const unsigned char *e;
3312 /* Shortcut for single character strings */
3313 if (PyString_GET_SIZE(self) == 1 &&
3314 isspace(*p))
3315 return PyBool_FromLong(1);
3317 /* Special case for empty strings */
3318 if (PyString_GET_SIZE(self) == 0)
3319 return PyBool_FromLong(0);
3321 e = p + PyString_GET_SIZE(self);
3322 for (; p < e; p++) {
3323 if (!isspace(*p))
3324 return PyBool_FromLong(0);
3326 return PyBool_FromLong(1);
3330 PyDoc_STRVAR(isalpha__doc__,
3331 "S.isalpha() -> bool\n\
3333 Return True if all characters in S are alphabetic\n\
3334 and there is at least one character in S, False otherwise.");
3336 static PyObject*
3337 string_isalpha(PyStringObject *self)
3339 register const unsigned char *p
3340 = (unsigned char *) PyString_AS_STRING(self);
3341 register const unsigned char *e;
3343 /* Shortcut for single character strings */
3344 if (PyString_GET_SIZE(self) == 1 &&
3345 isalpha(*p))
3346 return PyBool_FromLong(1);
3348 /* Special case for empty strings */
3349 if (PyString_GET_SIZE(self) == 0)
3350 return PyBool_FromLong(0);
3352 e = p + PyString_GET_SIZE(self);
3353 for (; p < e; p++) {
3354 if (!isalpha(*p))
3355 return PyBool_FromLong(0);
3357 return PyBool_FromLong(1);
3361 PyDoc_STRVAR(isalnum__doc__,
3362 "S.isalnum() -> bool\n\
3364 Return True if all characters in S are alphanumeric\n\
3365 and there is at least one character in S, False otherwise.");
3367 static PyObject*
3368 string_isalnum(PyStringObject *self)
3370 register const unsigned char *p
3371 = (unsigned char *) PyString_AS_STRING(self);
3372 register const unsigned char *e;
3374 /* Shortcut for single character strings */
3375 if (PyString_GET_SIZE(self) == 1 &&
3376 isalnum(*p))
3377 return PyBool_FromLong(1);
3379 /* Special case for empty strings */
3380 if (PyString_GET_SIZE(self) == 0)
3381 return PyBool_FromLong(0);
3383 e = p + PyString_GET_SIZE(self);
3384 for (; p < e; p++) {
3385 if (!isalnum(*p))
3386 return PyBool_FromLong(0);
3388 return PyBool_FromLong(1);
3392 PyDoc_STRVAR(isdigit__doc__,
3393 "S.isdigit() -> bool\n\
3395 Return True if all characters in S are digits\n\
3396 and there is at least one character in S, False otherwise.");
3398 static PyObject*
3399 string_isdigit(PyStringObject *self)
3401 register const unsigned char *p
3402 = (unsigned char *) PyString_AS_STRING(self);
3403 register const unsigned char *e;
3405 /* Shortcut for single character strings */
3406 if (PyString_GET_SIZE(self) == 1 &&
3407 isdigit(*p))
3408 return PyBool_FromLong(1);
3410 /* Special case for empty strings */
3411 if (PyString_GET_SIZE(self) == 0)
3412 return PyBool_FromLong(0);
3414 e = p + PyString_GET_SIZE(self);
3415 for (; p < e; p++) {
3416 if (!isdigit(*p))
3417 return PyBool_FromLong(0);
3419 return PyBool_FromLong(1);
3423 PyDoc_STRVAR(islower__doc__,
3424 "S.islower() -> bool\n\
3426 Return True if all cased characters in S are lowercase and there is\n\
3427 at least one cased character in S, False otherwise.");
3429 static PyObject*
3430 string_islower(PyStringObject *self)
3432 register const unsigned char *p
3433 = (unsigned char *) PyString_AS_STRING(self);
3434 register const unsigned char *e;
3435 int cased;
3437 /* Shortcut for single character strings */
3438 if (PyString_GET_SIZE(self) == 1)
3439 return PyBool_FromLong(islower(*p) != 0);
3441 /* Special case for empty strings */
3442 if (PyString_GET_SIZE(self) == 0)
3443 return PyBool_FromLong(0);
3445 e = p + PyString_GET_SIZE(self);
3446 cased = 0;
3447 for (; p < e; p++) {
3448 if (isupper(*p))
3449 return PyBool_FromLong(0);
3450 else if (!cased && islower(*p))
3451 cased = 1;
3453 return PyBool_FromLong(cased);
3457 PyDoc_STRVAR(isupper__doc__,
3458 "S.isupper() -> bool\n\
3460 Return True if all cased characters in S are uppercase and there is\n\
3461 at least one cased character in S, False otherwise.");
3463 static PyObject*
3464 string_isupper(PyStringObject *self)
3466 register const unsigned char *p
3467 = (unsigned char *) PyString_AS_STRING(self);
3468 register const unsigned char *e;
3469 int cased;
3471 /* Shortcut for single character strings */
3472 if (PyString_GET_SIZE(self) == 1)
3473 return PyBool_FromLong(isupper(*p) != 0);
3475 /* Special case for empty strings */
3476 if (PyString_GET_SIZE(self) == 0)
3477 return PyBool_FromLong(0);
3479 e = p + PyString_GET_SIZE(self);
3480 cased = 0;
3481 for (; p < e; p++) {
3482 if (islower(*p))
3483 return PyBool_FromLong(0);
3484 else if (!cased && isupper(*p))
3485 cased = 1;
3487 return PyBool_FromLong(cased);
3491 PyDoc_STRVAR(istitle__doc__,
3492 "S.istitle() -> bool\n\
3494 Return True if S is a titlecased string and there is at least one\n\
3495 character in S, i.e. uppercase characters may only follow uncased\n\
3496 characters and lowercase characters only cased ones. Return False\n\
3497 otherwise.");
3499 static PyObject*
3500 string_istitle(PyStringObject *self, PyObject *uncased)
3502 register const unsigned char *p
3503 = (unsigned char *) PyString_AS_STRING(self);
3504 register const unsigned char *e;
3505 int cased, previous_is_cased;
3507 /* Shortcut for single character strings */
3508 if (PyString_GET_SIZE(self) == 1)
3509 return PyBool_FromLong(isupper(*p) != 0);
3511 /* Special case for empty strings */
3512 if (PyString_GET_SIZE(self) == 0)
3513 return PyBool_FromLong(0);
3515 e = p + PyString_GET_SIZE(self);
3516 cased = 0;
3517 previous_is_cased = 0;
3518 for (; p < e; p++) {
3519 register const unsigned char ch = *p;
3521 if (isupper(ch)) {
3522 if (previous_is_cased)
3523 return PyBool_FromLong(0);
3524 previous_is_cased = 1;
3525 cased = 1;
3527 else if (islower(ch)) {
3528 if (!previous_is_cased)
3529 return PyBool_FromLong(0);
3530 previous_is_cased = 1;
3531 cased = 1;
3533 else
3534 previous_is_cased = 0;
3536 return PyBool_FromLong(cased);
3540 PyDoc_STRVAR(splitlines__doc__,
3541 "S.splitlines([keepends]) -> list of strings\n\
3543 Return a list of the lines in S, breaking at line boundaries.\n\
3544 Line breaks are not included in the resulting list unless keepends\n\
3545 is given and true.");
3547 static PyObject*
3548 string_splitlines(PyStringObject *self, PyObject *args)
3550 int keepends = 0;
3552 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3553 return NULL;
3555 return stringlib_splitlines(
3556 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3557 keepends
3561 PyDoc_STRVAR(sizeof__doc__,
3562 "S.__sizeof__() -> size of S in memory, in bytes");
3564 static PyObject *
3565 string_sizeof(PyStringObject *v)
3567 Py_ssize_t res;
3568 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3569 return PyInt_FromSsize_t(res);
3572 static PyObject *
3573 string_getnewargs(PyStringObject *v)
3575 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3579 #include "stringlib/string_format.h"
3581 PyDoc_STRVAR(format__doc__,
3582 "S.format(*args, **kwargs) -> unicode\n\
3586 static PyObject *
3587 string__format__(PyObject* self, PyObject* args)
3589 PyObject *format_spec;
3590 PyObject *result = NULL;
3591 PyObject *tmp = NULL;
3593 /* If 2.x, convert format_spec to the same type as value */
3594 /* This is to allow things like u''.format('') */
3595 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3596 goto done;
3597 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
3598 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3599 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3600 goto done;
3602 tmp = PyObject_Str(format_spec);
3603 if (tmp == NULL)
3604 goto done;
3605 format_spec = tmp;
3607 result = _PyBytes_FormatAdvanced(self,
3608 PyString_AS_STRING(format_spec),
3609 PyString_GET_SIZE(format_spec));
3610 done:
3611 Py_XDECREF(tmp);
3612 return result;
3615 PyDoc_STRVAR(p_format__doc__,
3616 "S.__format__(format_spec) -> unicode\n\
3621 static PyMethodDef
3622 string_methods[] = {
3623 /* Counterparts of the obsolete stropmodule functions; except
3624 string.maketrans(). */
3625 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3626 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3627 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3628 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3629 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3630 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3631 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3632 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3633 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3634 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3635 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3636 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3637 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3638 capitalize__doc__},
3639 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3640 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3641 endswith__doc__},
3642 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3643 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3644 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3645 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3646 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3647 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3648 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3649 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3650 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3651 rpartition__doc__},
3652 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3653 startswith__doc__},
3654 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3655 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3656 swapcase__doc__},
3657 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3658 translate__doc__},
3659 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3660 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3661 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3662 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3663 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3664 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3665 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3666 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3667 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3668 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3669 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3670 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3671 expandtabs__doc__},
3672 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3673 splitlines__doc__},
3674 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3675 sizeof__doc__},
3676 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3677 {NULL, NULL} /* sentinel */
3680 static PyObject *
3681 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3683 static PyObject *
3684 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3686 PyObject *x = NULL;
3687 static char *kwlist[] = {"object", 0};
3689 if (type != &PyString_Type)
3690 return str_subtype_new(type, args, kwds);
3691 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3692 return NULL;
3693 if (x == NULL)
3694 return PyString_FromString("");
3695 return PyObject_Str(x);
3698 static PyObject *
3699 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3701 PyObject *tmp, *pnew;
3702 Py_ssize_t n;
3704 assert(PyType_IsSubtype(type, &PyString_Type));
3705 tmp = string_new(&PyString_Type, args, kwds);
3706 if (tmp == NULL)
3707 return NULL;
3708 assert(PyString_CheckExact(tmp));
3709 n = PyString_GET_SIZE(tmp);
3710 pnew = type->tp_alloc(type, n);
3711 if (pnew != NULL) {
3712 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3713 ((PyStringObject *)pnew)->ob_shash =
3714 ((PyStringObject *)tmp)->ob_shash;
3715 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3717 Py_DECREF(tmp);
3718 return pnew;
3721 static PyObject *
3722 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3724 PyErr_SetString(PyExc_TypeError,
3725 "The basestring type cannot be instantiated");
3726 return NULL;
3729 static PyObject *
3730 string_mod(PyObject *v, PyObject *w)
3732 if (!PyString_Check(v)) {
3733 Py_INCREF(Py_NotImplemented);
3734 return Py_NotImplemented;
3736 return PyString_Format(v, w);
3739 PyDoc_STRVAR(basestring_doc,
3740 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3742 static PyNumberMethods string_as_number = {
3743 0, /*nb_add*/
3744 0, /*nb_subtract*/
3745 0, /*nb_multiply*/
3746 0, /*nb_divide*/
3747 string_mod, /*nb_remainder*/
3751 PyTypeObject PyBaseString_Type = {
3752 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3753 "basestring",
3756 0, /* tp_dealloc */
3757 0, /* tp_print */
3758 0, /* tp_getattr */
3759 0, /* tp_setattr */
3760 0, /* tp_compare */
3761 0, /* tp_repr */
3762 0, /* tp_as_number */
3763 0, /* tp_as_sequence */
3764 0, /* tp_as_mapping */
3765 0, /* tp_hash */
3766 0, /* tp_call */
3767 0, /* tp_str */
3768 0, /* tp_getattro */
3769 0, /* tp_setattro */
3770 0, /* tp_as_buffer */
3771 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3772 basestring_doc, /* tp_doc */
3773 0, /* tp_traverse */
3774 0, /* tp_clear */
3775 0, /* tp_richcompare */
3776 0, /* tp_weaklistoffset */
3777 0, /* tp_iter */
3778 0, /* tp_iternext */
3779 0, /* tp_methods */
3780 0, /* tp_members */
3781 0, /* tp_getset */
3782 &PyBaseObject_Type, /* tp_base */
3783 0, /* tp_dict */
3784 0, /* tp_descr_get */
3785 0, /* tp_descr_set */
3786 0, /* tp_dictoffset */
3787 0, /* tp_init */
3788 0, /* tp_alloc */
3789 basestring_new, /* tp_new */
3790 0, /* tp_free */
3793 PyDoc_STRVAR(string_doc,
3794 "str(object) -> string\n\
3796 Return a nice string representation of the object.\n\
3797 If the argument is a string, the return value is the same object.");
3799 PyTypeObject PyString_Type = {
3800 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3801 "str",
3802 PyStringObject_SIZE,
3803 sizeof(char),
3804 string_dealloc, /* tp_dealloc */
3805 (printfunc)string_print, /* tp_print */
3806 0, /* tp_getattr */
3807 0, /* tp_setattr */
3808 0, /* tp_compare */
3809 string_repr, /* tp_repr */
3810 &string_as_number, /* tp_as_number */
3811 &string_as_sequence, /* tp_as_sequence */
3812 &string_as_mapping, /* tp_as_mapping */
3813 (hashfunc)string_hash, /* tp_hash */
3814 0, /* tp_call */
3815 string_str, /* tp_str */
3816 PyObject_GenericGetAttr, /* tp_getattro */
3817 0, /* tp_setattro */
3818 &string_as_buffer, /* tp_as_buffer */
3819 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3820 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3821 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3822 string_doc, /* tp_doc */
3823 0, /* tp_traverse */
3824 0, /* tp_clear */
3825 (richcmpfunc)string_richcompare, /* tp_richcompare */
3826 0, /* tp_weaklistoffset */
3827 0, /* tp_iter */
3828 0, /* tp_iternext */
3829 string_methods, /* tp_methods */
3830 0, /* tp_members */
3831 0, /* tp_getset */
3832 &PyBaseString_Type, /* tp_base */
3833 0, /* tp_dict */
3834 0, /* tp_descr_get */
3835 0, /* tp_descr_set */
3836 0, /* tp_dictoffset */
3837 0, /* tp_init */
3838 0, /* tp_alloc */
3839 string_new, /* tp_new */
3840 PyObject_Del, /* tp_free */
3843 void
3844 PyString_Concat(register PyObject **pv, register PyObject *w)
3846 register PyObject *v;
3847 if (*pv == NULL)
3848 return;
3849 if (w == NULL || !PyString_Check(*pv)) {
3850 Py_DECREF(*pv);
3851 *pv = NULL;
3852 return;
3854 v = string_concat((PyStringObject *) *pv, w);
3855 Py_DECREF(*pv);
3856 *pv = v;
3859 void
3860 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
3862 PyString_Concat(pv, w);
3863 Py_XDECREF(w);
3867 /* The following function breaks the notion that strings are immutable:
3868 it changes the size of a string. We get away with this only if there
3869 is only one module referencing the object. You can also think of it
3870 as creating a new string object and destroying the old one, only
3871 more efficiently. In any case, don't use this if the string may
3872 already be known to some other part of the code...
3873 Note that if there's not enough memory to resize the string, the original
3874 string object at *pv is deallocated, *pv is set to NULL, an "out of
3875 memory" exception is set, and -1 is returned. Else (on success) 0 is
3876 returned, and the value in *pv may or may not be the same as on input.
3877 As always, an extra byte is allocated for a trailing \0 byte (newsize
3878 does *not* include that), and a trailing \0 byte is stored.
3882 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
3884 register PyObject *v;
3885 register PyStringObject *sv;
3886 v = *pv;
3887 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3888 PyString_CHECK_INTERNED(v)) {
3889 *pv = 0;
3890 Py_DECREF(v);
3891 PyErr_BadInternalCall();
3892 return -1;
3894 /* XXX UNREF/NEWREF interface should be more symmetrical */
3895 _Py_DEC_REFTOTAL;
3896 _Py_ForgetReference(v);
3897 *pv = (PyObject *)
3898 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3899 if (*pv == NULL) {
3900 PyObject_Del(v);
3901 PyErr_NoMemory();
3902 return -1;
3904 _Py_NewReference(*pv);
3905 sv = (PyStringObject *) *pv;
3906 Py_SIZE(sv) = newsize;
3907 sv->ob_sval[newsize] = '\0';
3908 sv->ob_shash = -1; /* invalidate cached hash value */
3909 return 0;
3912 /* Helpers for formatstring */
3914 Py_LOCAL_INLINE(PyObject *)
3915 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3917 Py_ssize_t argidx = *p_argidx;
3918 if (argidx < arglen) {
3919 (*p_argidx)++;
3920 if (arglen < 0)
3921 return args;
3922 else
3923 return PyTuple_GetItem(args, argidx);
3925 PyErr_SetString(PyExc_TypeError,
3926 "not enough arguments for format string");
3927 return NULL;
3930 /* Format codes
3931 * F_LJUST '-'
3932 * F_SIGN '+'
3933 * F_BLANK ' '
3934 * F_ALT '#'
3935 * F_ZERO '0'
3937 #define F_LJUST (1<<0)
3938 #define F_SIGN (1<<1)
3939 #define F_BLANK (1<<2)
3940 #define F_ALT (1<<3)
3941 #define F_ZERO (1<<4)
3943 /* Returns a new reference to a PyString object, or NULL on failure. */
3945 static PyObject *
3946 formatfloat(PyObject *v, int flags, int prec, int type)
3948 char *p;
3949 PyObject *result;
3950 double x;
3952 x = PyFloat_AsDouble(v);
3953 if (x == -1.0 && PyErr_Occurred()) {
3954 PyErr_Format(PyExc_TypeError, "float argument required, "
3955 "not %.200s", Py_TYPE(v)->tp_name);
3956 return NULL;
3959 if (prec < 0)
3960 prec = 6;
3962 p = PyOS_double_to_string(x, type, prec,
3963 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
3965 if (p == NULL)
3966 return NULL;
3967 result = PyString_FromStringAndSize(p, strlen(p));
3968 PyMem_Free(p);
3969 return result;
3972 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3973 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3974 * Python's regular ints.
3975 * Return value: a new PyString*, or NULL if error.
3976 * . *pbuf is set to point into it,
3977 * *plen set to the # of chars following that.
3978 * Caller must decref it when done using pbuf.
3979 * The string starting at *pbuf is of the form
3980 * "-"? ("0x" | "0X")? digit+
3981 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3982 * set in flags. The case of hex digits will be correct,
3983 * There will be at least prec digits, zero-filled on the left if
3984 * necessary to get that many.
3985 * val object to be converted
3986 * flags bitmask of format flags; only F_ALT is looked at
3987 * prec minimum number of digits; 0-fill on left if needed
3988 * type a character in [duoxX]; u acts the same as d
3990 * CAUTION: o, x and X conversions on regular ints can never
3991 * produce a '-' sign, but can for Python's unbounded ints.
3993 PyObject*
3994 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3995 char **pbuf, int *plen)
3997 PyObject *result = NULL;
3998 char *buf;
3999 Py_ssize_t i;
4000 int sign; /* 1 if '-', else 0 */
4001 int len; /* number of characters */
4002 Py_ssize_t llen;
4003 int numdigits; /* len == numnondigits + numdigits */
4004 int numnondigits = 0;
4006 switch (type) {
4007 case 'd':
4008 case 'u':
4009 result = Py_TYPE(val)->tp_str(val);
4010 break;
4011 case 'o':
4012 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4013 break;
4014 case 'x':
4015 case 'X':
4016 numnondigits = 2;
4017 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4018 break;
4019 default:
4020 assert(!"'type' not in [duoxX]");
4022 if (!result)
4023 return NULL;
4025 buf = PyString_AsString(result);
4026 if (!buf) {
4027 Py_DECREF(result);
4028 return NULL;
4031 /* To modify the string in-place, there can only be one reference. */
4032 if (Py_REFCNT(result) != 1) {
4033 PyErr_BadInternalCall();
4034 return NULL;
4036 llen = PyString_Size(result);
4037 if (llen > INT_MAX) {
4038 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4039 return NULL;
4041 len = (int)llen;
4042 if (buf[len-1] == 'L') {
4043 --len;
4044 buf[len] = '\0';
4046 sign = buf[0] == '-';
4047 numnondigits += sign;
4048 numdigits = len - numnondigits;
4049 assert(numdigits > 0);
4051 /* Get rid of base marker unless F_ALT */
4052 if ((flags & F_ALT) == 0) {
4053 /* Need to skip 0x, 0X or 0. */
4054 int skipped = 0;
4055 switch (type) {
4056 case 'o':
4057 assert(buf[sign] == '0');
4058 /* If 0 is only digit, leave it alone. */
4059 if (numdigits > 1) {
4060 skipped = 1;
4061 --numdigits;
4063 break;
4064 case 'x':
4065 case 'X':
4066 assert(buf[sign] == '0');
4067 assert(buf[sign + 1] == 'x');
4068 skipped = 2;
4069 numnondigits -= 2;
4070 break;
4072 if (skipped) {
4073 buf += skipped;
4074 len -= skipped;
4075 if (sign)
4076 buf[0] = '-';
4078 assert(len == numnondigits + numdigits);
4079 assert(numdigits > 0);
4082 /* Fill with leading zeroes to meet minimum width. */
4083 if (prec > numdigits) {
4084 PyObject *r1 = PyString_FromStringAndSize(NULL,
4085 numnondigits + prec);
4086 char *b1;
4087 if (!r1) {
4088 Py_DECREF(result);
4089 return NULL;
4091 b1 = PyString_AS_STRING(r1);
4092 for (i = 0; i < numnondigits; ++i)
4093 *b1++ = *buf++;
4094 for (i = 0; i < prec - numdigits; i++)
4095 *b1++ = '0';
4096 for (i = 0; i < numdigits; i++)
4097 *b1++ = *buf++;
4098 *b1 = '\0';
4099 Py_DECREF(result);
4100 result = r1;
4101 buf = PyString_AS_STRING(result);
4102 len = numnondigits + prec;
4105 /* Fix up case for hex conversions. */
4106 if (type == 'X') {
4107 /* Need to convert all lower case letters to upper case.
4108 and need to convert 0x to 0X (and -0x to -0X). */
4109 for (i = 0; i < len; i++)
4110 if (buf[i] >= 'a' && buf[i] <= 'x')
4111 buf[i] -= 'a'-'A';
4113 *pbuf = buf;
4114 *plen = len;
4115 return result;
4118 Py_LOCAL_INLINE(int)
4119 formatint(char *buf, size_t buflen, int flags,
4120 int prec, int type, PyObject *v)
4122 /* fmt = '%#.' + `prec` + 'l' + `type`
4123 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4124 + 1 + 1 = 24 */
4125 char fmt[64]; /* plenty big enough! */
4126 char *sign;
4127 long x;
4129 x = PyInt_AsLong(v);
4130 if (x == -1 && PyErr_Occurred()) {
4131 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4132 Py_TYPE(v)->tp_name);
4133 return -1;
4135 if (x < 0 && type == 'u') {
4136 type = 'd';
4138 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4139 sign = "-";
4140 else
4141 sign = "";
4142 if (prec < 0)
4143 prec = 1;
4145 if ((flags & F_ALT) &&
4146 (type == 'x' || type == 'X')) {
4147 /* When converting under %#x or %#X, there are a number
4148 * of issues that cause pain:
4149 * - when 0 is being converted, the C standard leaves off
4150 * the '0x' or '0X', which is inconsistent with other
4151 * %#x/%#X conversions and inconsistent with Python's
4152 * hex() function
4153 * - there are platforms that violate the standard and
4154 * convert 0 with the '0x' or '0X'
4155 * (Metrowerks, Compaq Tru64)
4156 * - there are platforms that give '0x' when converting
4157 * under %#X, but convert 0 in accordance with the
4158 * standard (OS/2 EMX)
4160 * We can achieve the desired consistency by inserting our
4161 * own '0x' or '0X' prefix, and substituting %x/%X in place
4162 * of %#x/%#X.
4164 * Note that this is the same approach as used in
4165 * formatint() in unicodeobject.c
4167 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4168 sign, type, prec, type);
4170 else {
4171 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4172 sign, (flags&F_ALT) ? "#" : "",
4173 prec, type);
4176 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4177 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4179 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4180 PyErr_SetString(PyExc_OverflowError,
4181 "formatted integer is too long (precision too large?)");
4182 return -1;
4184 if (sign[0])
4185 PyOS_snprintf(buf, buflen, fmt, -x);
4186 else
4187 PyOS_snprintf(buf, buflen, fmt, x);
4188 return (int)strlen(buf);
4191 Py_LOCAL_INLINE(int)
4192 formatchar(char *buf, size_t buflen, PyObject *v)
4194 /* presume that the buffer is at least 2 characters long */
4195 if (PyString_Check(v)) {
4196 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4197 return -1;
4199 else {
4200 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4201 return -1;
4203 buf[1] = '\0';
4204 return 1;
4207 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4209 FORMATBUFLEN is the length of the buffer in which the ints &
4210 chars are formatted. XXX This is a magic number. Each formatting
4211 routine does bounds checking to ensure no overflow, but a better
4212 solution may be to malloc a buffer of appropriate size for each
4213 format. For now, the current solution is sufficient.
4215 #define FORMATBUFLEN (size_t)120
4217 PyObject *
4218 PyString_Format(PyObject *format, PyObject *args)
4220 char *fmt, *res;
4221 Py_ssize_t arglen, argidx;
4222 Py_ssize_t reslen, rescnt, fmtcnt;
4223 int args_owned = 0;
4224 PyObject *result, *orig_args;
4225 #ifdef Py_USING_UNICODE
4226 PyObject *v, *w;
4227 #endif
4228 PyObject *dict = NULL;
4229 if (format == NULL || !PyString_Check(format) || args == NULL) {
4230 PyErr_BadInternalCall();
4231 return NULL;
4233 orig_args = args;
4234 fmt = PyString_AS_STRING(format);
4235 fmtcnt = PyString_GET_SIZE(format);
4236 reslen = rescnt = fmtcnt + 100;
4237 result = PyString_FromStringAndSize((char *)NULL, reslen);
4238 if (result == NULL)
4239 return NULL;
4240 res = PyString_AsString(result);
4241 if (PyTuple_Check(args)) {
4242 arglen = PyTuple_GET_SIZE(args);
4243 argidx = 0;
4245 else {
4246 arglen = -1;
4247 argidx = -2;
4249 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4250 !PyObject_TypeCheck(args, &PyBaseString_Type))
4251 dict = args;
4252 while (--fmtcnt >= 0) {
4253 if (*fmt != '%') {
4254 if (--rescnt < 0) {
4255 rescnt = fmtcnt + 100;
4256 reslen += rescnt;
4257 if (_PyString_Resize(&result, reslen) < 0)
4258 return NULL;
4259 res = PyString_AS_STRING(result)
4260 + reslen - rescnt;
4261 --rescnt;
4263 *res++ = *fmt++;
4265 else {
4266 /* Got a format specifier */
4267 int flags = 0;
4268 Py_ssize_t width = -1;
4269 int prec = -1;
4270 int c = '\0';
4271 int fill;
4272 int isnumok;
4273 PyObject *v = NULL;
4274 PyObject *temp = NULL;
4275 char *pbuf;
4276 int sign;
4277 Py_ssize_t len;
4278 char formatbuf[FORMATBUFLEN];
4279 /* For format{int,char}() */
4280 #ifdef Py_USING_UNICODE
4281 char *fmt_start = fmt;
4282 Py_ssize_t argidx_start = argidx;
4283 #endif
4285 fmt++;
4286 if (*fmt == '(') {
4287 char *keystart;
4288 Py_ssize_t keylen;
4289 PyObject *key;
4290 int pcount = 1;
4292 if (dict == NULL) {
4293 PyErr_SetString(PyExc_TypeError,
4294 "format requires a mapping");
4295 goto error;
4297 ++fmt;
4298 --fmtcnt;
4299 keystart = fmt;
4300 /* Skip over balanced parentheses */
4301 while (pcount > 0 && --fmtcnt >= 0) {
4302 if (*fmt == ')')
4303 --pcount;
4304 else if (*fmt == '(')
4305 ++pcount;
4306 fmt++;
4308 keylen = fmt - keystart - 1;
4309 if (fmtcnt < 0 || pcount > 0) {
4310 PyErr_SetString(PyExc_ValueError,
4311 "incomplete format key");
4312 goto error;
4314 key = PyString_FromStringAndSize(keystart,
4315 keylen);
4316 if (key == NULL)
4317 goto error;
4318 if (args_owned) {
4319 Py_DECREF(args);
4320 args_owned = 0;
4322 args = PyObject_GetItem(dict, key);
4323 Py_DECREF(key);
4324 if (args == NULL) {
4325 goto error;
4327 args_owned = 1;
4328 arglen = -1;
4329 argidx = -2;
4331 while (--fmtcnt >= 0) {
4332 switch (c = *fmt++) {
4333 case '-': flags |= F_LJUST; continue;
4334 case '+': flags |= F_SIGN; continue;
4335 case ' ': flags |= F_BLANK; continue;
4336 case '#': flags |= F_ALT; continue;
4337 case '0': flags |= F_ZERO; continue;
4339 break;
4341 if (c == '*') {
4342 v = getnextarg(args, arglen, &argidx);
4343 if (v == NULL)
4344 goto error;
4345 if (!PyInt_Check(v)) {
4346 PyErr_SetString(PyExc_TypeError,
4347 "* wants int");
4348 goto error;
4350 width = PyInt_AsLong(v);
4351 if (width < 0) {
4352 flags |= F_LJUST;
4353 width = -width;
4355 if (--fmtcnt >= 0)
4356 c = *fmt++;
4358 else if (c >= 0 && isdigit(c)) {
4359 width = c - '0';
4360 while (--fmtcnt >= 0) {
4361 c = Py_CHARMASK(*fmt++);
4362 if (!isdigit(c))
4363 break;
4364 if ((width*10) / 10 != width) {
4365 PyErr_SetString(
4366 PyExc_ValueError,
4367 "width too big");
4368 goto error;
4370 width = width*10 + (c - '0');
4373 if (c == '.') {
4374 prec = 0;
4375 if (--fmtcnt >= 0)
4376 c = *fmt++;
4377 if (c == '*') {
4378 v = getnextarg(args, arglen, &argidx);
4379 if (v == NULL)
4380 goto error;
4381 if (!PyInt_Check(v)) {
4382 PyErr_SetString(
4383 PyExc_TypeError,
4384 "* wants int");
4385 goto error;
4387 prec = PyInt_AsLong(v);
4388 if (prec < 0)
4389 prec = 0;
4390 if (--fmtcnt >= 0)
4391 c = *fmt++;
4393 else if (c >= 0 && isdigit(c)) {
4394 prec = c - '0';
4395 while (--fmtcnt >= 0) {
4396 c = Py_CHARMASK(*fmt++);
4397 if (!isdigit(c))
4398 break;
4399 if ((prec*10) / 10 != prec) {
4400 PyErr_SetString(
4401 PyExc_ValueError,
4402 "prec too big");
4403 goto error;
4405 prec = prec*10 + (c - '0');
4408 } /* prec */
4409 if (fmtcnt >= 0) {
4410 if (c == 'h' || c == 'l' || c == 'L') {
4411 if (--fmtcnt >= 0)
4412 c = *fmt++;
4415 if (fmtcnt < 0) {
4416 PyErr_SetString(PyExc_ValueError,
4417 "incomplete format");
4418 goto error;
4420 if (c != '%') {
4421 v = getnextarg(args, arglen, &argidx);
4422 if (v == NULL)
4423 goto error;
4425 sign = 0;
4426 fill = ' ';
4427 switch (c) {
4428 case '%':
4429 pbuf = "%";
4430 len = 1;
4431 break;
4432 case 's':
4433 #ifdef Py_USING_UNICODE
4434 if (PyUnicode_Check(v)) {
4435 fmt = fmt_start;
4436 argidx = argidx_start;
4437 goto unicode;
4439 #endif
4440 temp = _PyObject_Str(v);
4441 #ifdef Py_USING_UNICODE
4442 if (temp != NULL && PyUnicode_Check(temp)) {
4443 Py_DECREF(temp);
4444 fmt = fmt_start;
4445 argidx = argidx_start;
4446 goto unicode;
4448 #endif
4449 /* Fall through */
4450 case 'r':
4451 if (c == 'r')
4452 temp = PyObject_Repr(v);
4453 if (temp == NULL)
4454 goto error;
4455 if (!PyString_Check(temp)) {
4456 PyErr_SetString(PyExc_TypeError,
4457 "%s argument has non-string str()");
4458 Py_DECREF(temp);
4459 goto error;
4461 pbuf = PyString_AS_STRING(temp);
4462 len = PyString_GET_SIZE(temp);
4463 if (prec >= 0 && len > prec)
4464 len = prec;
4465 break;
4466 case 'i':
4467 case 'd':
4468 case 'u':
4469 case 'o':
4470 case 'x':
4471 case 'X':
4472 if (c == 'i')
4473 c = 'd';
4474 isnumok = 0;
4475 if (PyNumber_Check(v)) {
4476 PyObject *iobj=NULL;
4478 if (PyInt_Check(v) || (PyLong_Check(v))) {
4479 iobj = v;
4480 Py_INCREF(iobj);
4482 else {
4483 iobj = PyNumber_Int(v);
4484 if (iobj==NULL) iobj = PyNumber_Long(v);
4486 if (iobj!=NULL) {
4487 if (PyInt_Check(iobj)) {
4488 isnumok = 1;
4489 pbuf = formatbuf;
4490 len = formatint(pbuf,
4491 sizeof(formatbuf),
4492 flags, prec, c, iobj);
4493 Py_DECREF(iobj);
4494 if (len < 0)
4495 goto error;
4496 sign = 1;
4498 else if (PyLong_Check(iobj)) {
4499 int ilen;
4501 isnumok = 1;
4502 temp = _PyString_FormatLong(iobj, flags,
4503 prec, c, &pbuf, &ilen);
4504 Py_DECREF(iobj);
4505 len = ilen;
4506 if (!temp)
4507 goto error;
4508 sign = 1;
4510 else {
4511 Py_DECREF(iobj);
4515 if (!isnumok) {
4516 PyErr_Format(PyExc_TypeError,
4517 "%%%c format: a number is required, "
4518 "not %.200s", c, Py_TYPE(v)->tp_name);
4519 goto error;
4521 if (flags & F_ZERO)
4522 fill = '0';
4523 break;
4524 case 'e':
4525 case 'E':
4526 case 'f':
4527 case 'F':
4528 case 'g':
4529 case 'G':
4530 temp = formatfloat(v, flags, prec, c);
4531 if (temp == NULL)
4532 goto error;
4533 pbuf = PyString_AS_STRING(temp);
4534 len = PyString_GET_SIZE(temp);
4535 sign = 1;
4536 if (flags & F_ZERO)
4537 fill = '0';
4538 break;
4539 case 'c':
4540 #ifdef Py_USING_UNICODE
4541 if (PyUnicode_Check(v)) {
4542 fmt = fmt_start;
4543 argidx = argidx_start;
4544 goto unicode;
4546 #endif
4547 pbuf = formatbuf;
4548 len = formatchar(pbuf, sizeof(formatbuf), v);
4549 if (len < 0)
4550 goto error;
4551 break;
4552 default:
4553 PyErr_Format(PyExc_ValueError,
4554 "unsupported format character '%c' (0x%x) "
4555 "at index %zd",
4556 c, c,
4557 (Py_ssize_t)(fmt - 1 -
4558 PyString_AsString(format)));
4559 goto error;
4561 if (sign) {
4562 if (*pbuf == '-' || *pbuf == '+') {
4563 sign = *pbuf++;
4564 len--;
4566 else if (flags & F_SIGN)
4567 sign = '+';
4568 else if (flags & F_BLANK)
4569 sign = ' ';
4570 else
4571 sign = 0;
4573 if (width < len)
4574 width = len;
4575 if (rescnt - (sign != 0) < width) {
4576 reslen -= rescnt;
4577 rescnt = width + fmtcnt + 100;
4578 reslen += rescnt;
4579 if (reslen < 0) {
4580 Py_DECREF(result);
4581 Py_XDECREF(temp);
4582 return PyErr_NoMemory();
4584 if (_PyString_Resize(&result, reslen) < 0) {
4585 Py_XDECREF(temp);
4586 return NULL;
4588 res = PyString_AS_STRING(result)
4589 + reslen - rescnt;
4591 if (sign) {
4592 if (fill != ' ')
4593 *res++ = sign;
4594 rescnt--;
4595 if (width > len)
4596 width--;
4598 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4599 assert(pbuf[0] == '0');
4600 assert(pbuf[1] == c);
4601 if (fill != ' ') {
4602 *res++ = *pbuf++;
4603 *res++ = *pbuf++;
4605 rescnt -= 2;
4606 width -= 2;
4607 if (width < 0)
4608 width = 0;
4609 len -= 2;
4611 if (width > len && !(flags & F_LJUST)) {
4612 do {
4613 --rescnt;
4614 *res++ = fill;
4615 } while (--width > len);
4617 if (fill == ' ') {
4618 if (sign)
4619 *res++ = sign;
4620 if ((flags & F_ALT) &&
4621 (c == 'x' || c == 'X')) {
4622 assert(pbuf[0] == '0');
4623 assert(pbuf[1] == c);
4624 *res++ = *pbuf++;
4625 *res++ = *pbuf++;
4628 Py_MEMCPY(res, pbuf, len);
4629 res += len;
4630 rescnt -= len;
4631 while (--width >= len) {
4632 --rescnt;
4633 *res++ = ' ';
4635 if (dict && (argidx < arglen) && c != '%') {
4636 PyErr_SetString(PyExc_TypeError,
4637 "not all arguments converted during string formatting");
4638 Py_XDECREF(temp);
4639 goto error;
4641 Py_XDECREF(temp);
4642 } /* '%' */
4643 } /* until end */
4644 if (argidx < arglen && !dict) {
4645 PyErr_SetString(PyExc_TypeError,
4646 "not all arguments converted during string formatting");
4647 goto error;
4649 if (args_owned) {
4650 Py_DECREF(args);
4652 _PyString_Resize(&result, reslen - rescnt);
4653 return result;
4655 #ifdef Py_USING_UNICODE
4656 unicode:
4657 if (args_owned) {
4658 Py_DECREF(args);
4659 args_owned = 0;
4661 /* Fiddle args right (remove the first argidx arguments) */
4662 if (PyTuple_Check(orig_args) && argidx > 0) {
4663 PyObject *v;
4664 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4665 v = PyTuple_New(n);
4666 if (v == NULL)
4667 goto error;
4668 while (--n >= 0) {
4669 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4670 Py_INCREF(w);
4671 PyTuple_SET_ITEM(v, n, w);
4673 args = v;
4674 } else {
4675 Py_INCREF(orig_args);
4676 args = orig_args;
4678 args_owned = 1;
4679 /* Take what we have of the result and let the Unicode formatting
4680 function format the rest of the input. */
4681 rescnt = res - PyString_AS_STRING(result);
4682 if (_PyString_Resize(&result, rescnt))
4683 goto error;
4684 fmtcnt = PyString_GET_SIZE(format) - \
4685 (fmt - PyString_AS_STRING(format));
4686 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4687 if (format == NULL)
4688 goto error;
4689 v = PyUnicode_Format(format, args);
4690 Py_DECREF(format);
4691 if (v == NULL)
4692 goto error;
4693 /* Paste what we have (result) to what the Unicode formatting
4694 function returned (v) and return the result (or error) */
4695 w = PyUnicode_Concat(result, v);
4696 Py_DECREF(result);
4697 Py_DECREF(v);
4698 Py_DECREF(args);
4699 return w;
4700 #endif /* Py_USING_UNICODE */
4702 error:
4703 Py_DECREF(result);
4704 if (args_owned) {
4705 Py_DECREF(args);
4707 return NULL;
4710 void
4711 PyString_InternInPlace(PyObject **p)
4713 register PyStringObject *s = (PyStringObject *)(*p);
4714 PyObject *t;
4715 if (s == NULL || !PyString_Check(s))
4716 Py_FatalError("PyString_InternInPlace: strings only please!");
4717 /* If it's a string subclass, we don't really know what putting
4718 it in the interned dict might do. */
4719 if (!PyString_CheckExact(s))
4720 return;
4721 if (PyString_CHECK_INTERNED(s))
4722 return;
4723 if (interned == NULL) {
4724 interned = PyDict_New();
4725 if (interned == NULL) {
4726 PyErr_Clear(); /* Don't leave an exception */
4727 return;
4730 t = PyDict_GetItem(interned, (PyObject *)s);
4731 if (t) {
4732 Py_INCREF(t);
4733 Py_DECREF(*p);
4734 *p = t;
4735 return;
4738 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4739 PyErr_Clear();
4740 return;
4742 /* The two references in interned are not counted by refcnt.
4743 The string deallocator will take care of this */
4744 Py_REFCNT(s) -= 2;
4745 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4748 void
4749 PyString_InternImmortal(PyObject **p)
4751 PyString_InternInPlace(p);
4752 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4753 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4754 Py_INCREF(*p);
4759 PyObject *
4760 PyString_InternFromString(const char *cp)
4762 PyObject *s = PyString_FromString(cp);
4763 if (s == NULL)
4764 return NULL;
4765 PyString_InternInPlace(&s);
4766 return s;
4769 void
4770 PyString_Fini(void)
4772 int i;
4773 for (i = 0; i < UCHAR_MAX + 1; i++) {
4774 Py_XDECREF(characters[i]);
4775 characters[i] = NULL;
4777 Py_XDECREF(nullstring);
4778 nullstring = NULL;
4781 void _Py_ReleaseInternedStrings(void)
4783 PyObject *keys;
4784 PyStringObject *s;
4785 Py_ssize_t i, n;
4786 Py_ssize_t immortal_size = 0, mortal_size = 0;
4788 if (interned == NULL || !PyDict_Check(interned))
4789 return;
4790 keys = PyDict_Keys(interned);
4791 if (keys == NULL || !PyList_Check(keys)) {
4792 PyErr_Clear();
4793 return;
4796 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4797 detector, interned strings are not forcibly deallocated; rather, we
4798 give them their stolen references back, and then clear and DECREF
4799 the interned dict. */
4801 n = PyList_GET_SIZE(keys);
4802 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4804 for (i = 0; i < n; i++) {
4805 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4806 switch (s->ob_sstate) {
4807 case SSTATE_NOT_INTERNED:
4808 /* XXX Shouldn't happen */
4809 break;
4810 case SSTATE_INTERNED_IMMORTAL:
4811 Py_REFCNT(s) += 1;
4812 immortal_size += Py_SIZE(s);
4813 break;
4814 case SSTATE_INTERNED_MORTAL:
4815 Py_REFCNT(s) += 2;
4816 mortal_size += Py_SIZE(s);
4817 break;
4818 default:
4819 Py_FatalError("Inconsistent interned string state.");
4821 s->ob_sstate = SSTATE_NOT_INTERNED;
4823 fprintf(stderr, "total size of all interned strings: "
4824 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4825 "mortal/immortal\n", mortal_size, immortal_size);
4826 Py_DECREF(keys);
4827 PyDict_Clear(interned);
4828 Py_DECREF(interned);
4829 interned = NULL;