Objects/stringobject.c

   1 /* String (str/bytes) object implementation */
   2
   3 #define PY_SSIZE_T_CLEAN
   4
   5 #include "Python.h"
   6 #include <ctype.h>
   7 #include <stddef.h>
   8
   9 #ifdef COUNT_ALLOCS
  10 Py_ssize_t null_strings, one_strings;
  11 #endif
  12
  13 static PyStringObject *characters[UCHAR_MAX + 1];
  14 static PyStringObject *nullstring;
  15
  16 /* This dictionary holds all interned strings.  Note that references to
  17    strings in this dictionary are *not* counted in the string's ob_refcnt.
  18    When the interned string reaches a refcnt of 0 the string deallocation
  19    function will delete the reference from this dictionary.
  20
  21    Another way to look at this is that to say that the actual reference
  22    count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
  23 */
  24 static PyObject *interned;
  25
  26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
  27    for a string of length n should request PyStringObject_SIZE + n bytes.
  28
  29    Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
  30    3 bytes per string allocation on a typical system.
  31 */
  32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
  33
  34 /*
  35    For both PyString_FromString() and PyString_FromStringAndSize(), the
  36    parameter `size' denotes number of characters to allocate, not counting any
  37    null terminating character.
  38
  39    For PyString_FromString(), the parameter `str' points to a null-terminated
  40    string containing exactly `size' bytes.
  41
  42    For PyString_FromStringAndSize(), the parameter the parameter `str' is
  43    either NULL or else points to a string containing at least `size' bytes.
  44    For PyString_FromStringAndSize(), the string in the `str' parameter does
  45    not have to be null-terminated.  (Therefore it is safe to construct a
  46    substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
  47    If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
  48    bytes (setting the last byte to the null terminating character) and you can
  49    fill in the data yourself.  If `str' is non-NULL then the resulting
  50    PyString object must be treated as immutable and you must not fill in nor
  51    alter the data yourself, since the strings may be shared.
  52
  53    The PyObject member `op->ob_size', which denotes the number of "extra
  54    items" in a variable-size object, will contain the number of bytes
  55    allocated for string data, not counting the null terminating character.  It
  56    is therefore equal to the equal to the `size' parameter (for
  57    PyString_FromStringAndSize()) or the length of the string in the `str'
  58    parameter (for PyString_FromString()).
  59 */
  60 PyObject *
  61 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
  62 {
  63         register PyStringObject *op;
  64         if (size < 0) {
  65                 PyErr_SetString(PyExc_SystemError,
  66                     "Negative size passed to PyString_FromStringAndSize");
  67                 return NULL;
  68         }
  69         if (size == 0 && (op = nullstring) != NULL) {
  70 #ifdef COUNT_ALLOCS
  71                 null_strings++;
  72 #endif
  73                 Py_INCREF(op);
  74                 return (PyObject *)op;
  75         }
  76         if (size == 1 && str != NULL &&
  77             (op = characters[*str & UCHAR_MAX]) != NULL)
  78         {
  79 #ifdef COUNT_ALLOCS
  80                 one_strings++;
  81 #endif
  82                 Py_INCREF(op);
  83                 return (PyObject *)op;
  84         }
  85
  86         if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
  87                 PyErr_SetString(PyExc_OverflowError, "string is too large");
  88                 return NULL;
  89         }
  90
  91         /* Inline PyObject_NewVar */
  92         op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
  93         if (op == NULL)
  94                 return PyErr_NoMemory();
  95         PyObject_INIT_VAR(op, &PyString_Type, size);
  96         op->ob_shash = -1;
  97         op->ob_sstate = SSTATE_NOT_INTERNED;
  98         if (str != NULL)
  99                 Py_MEMCPY(op->ob_sval, str, size);
 100         op->ob_sval[size] = '\0';
 101         /* share short strings */
 102         if (size == 0) {
 103                 PyObject *t = (PyObject *)op;
 104                 PyString_InternInPlace(&t);
 105                 op = (PyStringObject *)t;
 106                 nullstring = op;
 107                 Py_INCREF(op);
 108         } else if (size == 1 && str != NULL) {
 109                 PyObject *t = (PyObject *)op;
 110                 PyString_InternInPlace(&t);
 111                 op = (PyStringObject *)t;
 112                 characters[*str & UCHAR_MAX] = op;
 113                 Py_INCREF(op);
 114         }
 115         return (PyObject *) op;
 116 }
 117
 118 PyObject *
 119 PyString_FromString(const char *str)
 120 {
 121         register size_t size;
 122         register PyStringObject *op;
 123
 124         assert(str != NULL);
 125         size = strlen(str);
 126         if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
 127                 PyErr_SetString(PyExc_OverflowError,
 128                         "string is too long for a Python string");
 129                 return NULL;
 130         }
 131         if (size == 0 && (op = nullstring) != NULL) {
 132 #ifdef COUNT_ALLOCS
 133                 null_strings++;
 134 #endif
 135                 Py_INCREF(op);
 136                 return (PyObject *)op;
 137         }
 138         if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
 139 #ifdef COUNT_ALLOCS
 140                 one_strings++;
 141 #endif
 142                 Py_INCREF(op);
 143                 return (PyObject *)op;
 144         }
 145
 146         /* Inline PyObject_NewVar */
 147         op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
 148         if (op == NULL)
 149                 return PyErr_NoMemory();
 150         PyObject_INIT_VAR(op, &PyString_Type, size);
 151         op->ob_shash = -1;
 152         op->ob_sstate = SSTATE_NOT_INTERNED;
 153         Py_MEMCPY(op->ob_sval, str, size+1);
 154         /* share short strings */
 155         if (size == 0) {
 156                 PyObject *t = (PyObject *)op;
 157                 PyString_InternInPlace(&t);
 158                 op = (PyStringObject *)t;
 159                 nullstring = op;
 160                 Py_INCREF(op);
 161         } else if (size == 1) {
 162                 PyObject *t = (PyObject *)op;
 163                 PyString_InternInPlace(&t);
 164                 op = (PyStringObject *)t;
 165                 characters[*str & UCHAR_MAX] = op;
 166                 Py_INCREF(op);
 167         }
 168         return (PyObject *) op;
 169 }
 170
 171 PyObject *
 172 PyString_FromFormatV(const char *format, va_list vargs)
 173 {
 174         va_list count;
 175         Py_ssize_t n = 0;
 176         const char* f;
 177         char *s;
 178         PyObject* string;
 179
 180 #ifdef VA_LIST_IS_ARRAY
 181         Py_MEMCPY(count, vargs, sizeof(va_list));
 182 #else
 183 #ifdef  __va_copy
 184         __va_copy(count, vargs);
 185 #else
 186         count = vargs;
 187 #endif
 188 #endif
 189         /* step 1: figure out how large a buffer we need */
 190         for (f = format; *f; f++) {
 191                 if (*f == '%') {
 192 #ifdef HAVE_LONG_LONG
 193                         int longlongflag = 0;
 194 #endif
 195                         const char* p = f;
 196                         while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 197                                 ;
 198
 199                         /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
 200                          * they don't affect the amount of space we reserve.
 201                          */
 202                         if (*f == 'l') {
 203                                 if (f[1] == 'd' || f[1] == 'u') {
 204                                         ++f;
 205                                 }
 206 #ifdef HAVE_LONG_LONG
 207                                 else if (f[1] == 'l' &&
 208                                          (f[2] == 'd' || f[2] == 'u')) {
 209                                         longlongflag = 1;
 210                                         f += 2;
 211                                 }
 212 #endif
 213                         }
 214                         else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
 215                                 ++f;
 216                         }
 217
 218                         switch (*f) {
 219                         case 'c':
 220                                 (void)va_arg(count, int);
 221                                 /* fall through... */
 222                         case '%':
 223                                 n++;
 224                                 break;
 225                         case 'd': case 'u': case 'i': case 'x':
 226                                 (void) va_arg(count, int);
 227 #ifdef HAVE_LONG_LONG
 228                                 /* Need at most
 229                                    ceil(log10(256)*SIZEOF_LONG_LONG) digits,
 230                                    plus 1 for the sign.  53/22 is an upper
 231                                    bound for log10(256). */
 232                                 if (longlongflag)
 233                                         n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
 234                                 else
 235 #endif
 236                                         /* 20 bytes is enough to hold a 64-bit
 237                                            integer.  Decimal takes the most
 238                                            space.  This isn't enough for
 239                                            octal. */
 240                                         n += 20;
 241
 242                                 break;
 243                         case 's':
 244                                 s = va_arg(count, char*);
 245                                 n += strlen(s);
 246                                 break;
 247                         case 'p':
 248                                 (void) va_arg(count, int);
 249                                 /* maximum 64-bit pointer representation:
 250                                  * 0xffffffffffffffff
 251                                  * so 19 characters is enough.
 252                                  * XXX I count 18 -- what's the extra for?
 253                                  */
 254                                 n += 19;
 255                                 break;
 256                         default:
 257                                 /* if we stumble upon an unknown
 258                                    formatting code, copy the rest of
 259                                    the format string to the output
 260                                    string. (we cannot just skip the
 261                                    code, since there's no way to know
 262                                    what's in the argument list) */
 263                                 n += strlen(p);
 264                                 goto expand;
 265                         }
 266                 } else
 267                         n++;
 268         }
 269  expand:
 270         /* step 2: fill the buffer */
 271         /* Since we've analyzed how much space we need for the worst case,
 272            use sprintf directly instead of the slower PyOS_snprintf. */
 273         string = PyString_FromStringAndSize(NULL, n);
 274         if (!string)
 275                 return NULL;
 276
 277         s = PyString_AsString(string);
 278
 279         for (f = format; *f; f++) {
 280                 if (*f == '%') {
 281                         const char* p = f++;
 282                         Py_ssize_t i;
 283                         int longflag = 0;
 284 #ifdef HAVE_LONG_LONG
 285                         int longlongflag = 0;
 286 #endif
 287                         int size_tflag = 0;
 288                         /* parse the width.precision part (we're only
 289                            interested in the precision value, if any) */
 290                         n = 0;
 291                         while (isdigit(Py_CHARMASK(*f)))
 292                                 n = (n*10) + *f++ - '0';
 293                         if (*f == '.') {
 294                                 f++;
 295                                 n = 0;
 296                                 while (isdigit(Py_CHARMASK(*f)))
 297                                         n = (n*10) + *f++ - '0';
 298                         }
 299                         while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 300                                 f++;
 301                         /* Handle %ld, %lu, %lld and %llu. */
 302                         if (*f == 'l') {
 303                                 if (f[1] == 'd' || f[1] == 'u') {
 304                                         longflag = 1;
 305                                         ++f;
 306                                 }
 307 #ifdef HAVE_LONG_LONG
 308                                 else if (f[1] == 'l' &&
 309                                          (f[2] == 'd' || f[2] == 'u')) {
 310                                         longlongflag = 1;
 311                                         f += 2;
 312                                 }
 313 #endif
 314                         }
 315                         /* handle the size_t flag. */
 316                         else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
 317                                 size_tflag = 1;
 318                                 ++f;
 319                         }
 320
 321                         switch (*f) {
 322                         case 'c':
 323                                 *s++ = va_arg(vargs, int);
 324                                 break;
 325                         case 'd':
 326                                 if (longflag)
 327                                         sprintf(s, "%ld", va_arg(vargs, long));
 328 #ifdef HAVE_LONG_LONG
 329                                 else if (longlongflag)
 330                                         sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
 331                                                 va_arg(vargs, PY_LONG_LONG));
 332 #endif
 333                                 else if (size_tflag)
 334                                         sprintf(s, "%" PY_FORMAT_SIZE_T "d",
 335                                                 va_arg(vargs, Py_ssize_t));
 336                                 else
 337                                         sprintf(s, "%d", va_arg(vargs, int));
 338                                 s += strlen(s);
 339                                 break;
 340                         case 'u':
 341                                 if (longflag)
 342                                         sprintf(s, "%lu",
 343                                                 va_arg(vargs, unsigned long));
 344 #ifdef HAVE_LONG_LONG
 345                                 else if (longlongflag)
 346                                         sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
 347                                                 va_arg(vargs, PY_LONG_LONG));
 348 #endif
 349                                 else if (size_tflag)
 350                                         sprintf(s, "%" PY_FORMAT_SIZE_T "u",
 351                                                 va_arg(vargs, size_t));
 352                                 else
 353                                         sprintf(s, "%u",
 354                                                 va_arg(vargs, unsigned int));
 355                                 s += strlen(s);
 356                                 break;
 357                         case 'i':
 358                                 sprintf(s, "%i", va_arg(vargs, int));
 359                                 s += strlen(s);
 360                                 break;
 361                         case 'x':
 362                                 sprintf(s, "%x", va_arg(vargs, int));
 363                                 s += strlen(s);
 364                                 break;
 365                         case 's':
 366                                 p = va_arg(vargs, char*);
 367                                 i = strlen(p);
 368                                 if (n > 0 && i > n)
 369                                         i = n;
 370                                 Py_MEMCPY(s, p, i);
 371                                 s += i;
 372                                 break;
 373                         case 'p':
 374                                 sprintf(s, "%p", va_arg(vargs, void*));
 375                                 /* %p is ill-defined:  ensure leading 0x. */
 376                                 if (s[1] == 'X')
 377                                         s[1] = 'x';
 378                                 else if (s[1] != 'x') {
 379                                         memmove(s+2, s, strlen(s)+1);
 380                                         s[0] = '0';
 381                                         s[1] = 'x';
 382                                 }
 383                                 s += strlen(s);
 384                                 break;
 385                         case '%':
 386                                 *s++ = '%';
 387                                 break;
 388                         default:
 389                                 strcpy(s, p);
 390                                 s += strlen(s);
 391                                 goto end;
 392                         }
 393                 } else
 394                         *s++ = *f;
 395         }
 396
 397  end:
 398         _PyString_Resize(&string, s - PyString_AS_STRING(string));
 399         return string;
 400 }
 401
 402 PyObject *
 403 PyString_FromFormat(const char *format, ...)
 404 {
 405         PyObject* ret;
 406         va_list vargs;
 407
 408 #ifdef HAVE_STDARG_PROTOTYPES
 409         va_start(vargs, format);
 410 #else
 411         va_start(vargs);
 412 #endif
 413         ret = PyString_FromFormatV(format, vargs);
 414         va_end(vargs);
 415         return ret;
 416 }
 417
 418
 419 PyObject *PyString_Decode(const char *s,
 420                           Py_ssize_t size,
 421                           const char *encoding,
 422                           const char *errors)
 423 {
 424     PyObject *v, *str;
 425
 426     str = PyString_FromStringAndSize(s, size);
 427     if (str == NULL)
 428         return NULL;
 429     v = PyString_AsDecodedString(str, encoding, errors);
 430     Py_DECREF(str);
 431     return v;
 432 }
 433
 434 PyObject *PyString_AsDecodedObject(PyObject *str,
 435                                    const char *encoding,
 436                                    const char *errors)
 437 {
 438     PyObject *v;
 439
 440     if (!PyString_Check(str)) {
 441         PyErr_BadArgument();
 442         goto onError;
 443     }
 444
 445     if (encoding == NULL) {
 446 #ifdef Py_USING_UNICODE
 447         encoding = PyUnicode_GetDefaultEncoding();
 448 #else
 449         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 450         goto onError;
 451 #endif
 452     }
 453
 454     /* Decode via the codec registry */
 455     v = PyCodec_Decode(str, encoding, errors);
 456     if (v == NULL)
 457         goto onError;
 458
 459     return v;
 460
 461  onError:
 462     return NULL;
 463 }
 464
 465 PyObject *PyString_AsDecodedString(PyObject *str,
 466                                    const char *encoding,
 467                                    const char *errors)
 468 {
 469     PyObject *v;
 470
 471     v = PyString_AsDecodedObject(str, encoding, errors);
 472     if (v == NULL)
 473         goto onError;
 474
 475 #ifdef Py_USING_UNICODE
 476     /* Convert Unicode to a string using the default encoding */
 477     if (PyUnicode_Check(v)) {
 478         PyObject *temp = v;
 479         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 480         Py_DECREF(temp);
 481         if (v == NULL)
 482             goto onError;
 483     }
 484 #endif
 485     if (!PyString_Check(v)) {
 486         PyErr_Format(PyExc_TypeError,
 487                      "decoder did not return a string object (type=%.400s)",
 488                      Py_TYPE(v)->tp_name);
 489         Py_DECREF(v);
 490         goto onError;
 491     }
 492
 493     return v;
 494
 495  onError:
 496     return NULL;
 497 }
 498
 499 PyObject *PyString_Encode(const char *s,
 500                           Py_ssize_t size,
 501                           const char *encoding,
 502                           const char *errors)
 503 {
 504     PyObject *v, *str;
 505
 506     str = PyString_FromStringAndSize(s, size);
 507     if (str == NULL)
 508         return NULL;
 509     v = PyString_AsEncodedString(str, encoding, errors);
 510     Py_DECREF(str);
 511     return v;
 512 }
 513
 514 PyObject *PyString_AsEncodedObject(PyObject *str,
 515                                    const char *encoding,
 516                                    const char *errors)
 517 {
 518     PyObject *v;
 519
 520     if (!PyString_Check(str)) {
 521         PyErr_BadArgument();
 522         goto onError;
 523     }
 524
 525     if (encoding == NULL) {
 526 #ifdef Py_USING_UNICODE
 527         encoding = PyUnicode_GetDefaultEncoding();
 528 #else
 529         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 530         goto onError;
 531 #endif
 532     }
 533
 534     /* Encode via the codec registry */
 535     v = PyCodec_Encode(str, encoding, errors);
 536     if (v == NULL)
 537         goto onError;
 538
 539     return v;
 540
 541  onError:
 542     return NULL;
 543 }
 544
 545 PyObject *PyString_AsEncodedString(PyObject *str,
 546                                    const char *encoding,
 547                                    const char *errors)
 548 {
 549     PyObject *v;
 550
 551     v = PyString_AsEncodedObject(str, encoding, errors);
 552     if (v == NULL)
 553         goto onError;
 554
 555 #ifdef Py_USING_UNICODE
 556     /* Convert Unicode to a string using the default encoding */
 557     if (PyUnicode_Check(v)) {
 558         PyObject *temp = v;
 559         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 560         Py_DECREF(temp);
 561         if (v == NULL)
 562             goto onError;
 563     }
 564 #endif
 565     if (!PyString_Check(v)) {
 566         PyErr_Format(PyExc_TypeError,
 567                      "encoder did not return a string object (type=%.400s)",
 568                      Py_TYPE(v)->tp_name);
 569         Py_DECREF(v);
 570         goto onError;
 571     }
 572
 573     return v;
 574
 575  onError:
 576     return NULL;
 577 }
 578
 579 static void
 580 string_dealloc(PyObject *op)
 581 {
 582         switch (PyString_CHECK_INTERNED(op)) {
 583                 case SSTATE_NOT_INTERNED:
 584                         break;
 585
 586                 case SSTATE_INTERNED_MORTAL:
 587                         /* revive dead object temporarily for DelItem */
 588                         Py_REFCNT(op) = 3;
 589                         if (PyDict_DelItem(interned, op) != 0)
 590                                 Py_FatalError(
 591                                         "deletion of interned string failed");
 592                         break;
 593
 594                 case SSTATE_INTERNED_IMMORTAL:
 595                         Py_FatalError("Immortal interned string died.");
 596
 597                 default:
 598                         Py_FatalError("Inconsistent interned string state.");
 599         }
 600         Py_TYPE(op)->tp_free(op);
 601 }
 602
 603 /* Unescape a backslash-escaped string. If unicode is non-zero,
 604    the string is a u-literal. If recode_encoding is non-zero,
 605    the string is UTF-8 encoded and should be re-encoded in the
 606    specified encoding.  */
 607
 608 PyObject *PyString_DecodeEscape(const char *s,
 609                                 Py_ssize_t len,
 610                                 const char *errors,
 611                                 Py_ssize_t unicode,
 612                                 const char *recode_encoding)
 613 {
 614         int c;
 615         char *p, *buf;
 616         const char *end;
 617         PyObject *v;
 618         Py_ssize_t newlen = recode_encoding ? 4*len:len;
 619         v = PyString_FromStringAndSize((char *)NULL, newlen);
 620         if (v == NULL)
 621                 return NULL;
 622         p = buf = PyString_AsString(v);
 623         end = s + len;
 624         while (s < end) {
 625                 if (*s != '\\') {
 626                   non_esc:
 627 #ifdef Py_USING_UNICODE
 628                         if (recode_encoding && (*s & 0x80)) {
 629                                 PyObject *u, *w;
 630                                 char *r;
 631                                 const char* t;
 632                                 Py_ssize_t rn;
 633                                 t = s;
 634                                 /* Decode non-ASCII bytes as UTF-8. */
 635                                 while (t < end && (*t & 0x80)) t++;
 636                                 u = PyUnicode_DecodeUTF8(s, t - s, errors);
 637                                 if(!u) goto failed;
 638
 639                                 /* Recode them in target encoding. */
 640                                 w = PyUnicode_AsEncodedString(
 641                                         u, recode_encoding, errors);
 642                                 Py_DECREF(u);
 643                                 if (!w) goto failed;
 644
 645                                 /* Append bytes to output buffer. */
 646                                 assert(PyString_Check(w));
 647                                 r = PyString_AS_STRING(w);
 648                                 rn = PyString_GET_SIZE(w);
 649                                 Py_MEMCPY(p, r, rn);
 650                                 p += rn;
 651                                 Py_DECREF(w);
 652                                 s = t;
 653                         } else {
 654                                 *p++ = *s++;
 655                         }
 656 #else
 657                         *p++ = *s++;
 658 #endif
 659                         continue;
 660                 }
 661                 s++;
 662                 if (s==end) {
 663                         PyErr_SetString(PyExc_ValueError,
 664                                         "Trailing \\ in string");
 665                         goto failed;
 666                 }
 667                 switch (*s++) {
 668                 /* XXX This assumes ASCII! */
 669                 case '\n': break;
 670                 case '\\': *p++ = '\\'; break;
 671                 case '\'': *p++ = '\''; break;
 672                 case '\"': *p++ = '\"'; break;
 673                 case 'b': *p++ = '\b'; break;
 674                 case 'f': *p++ = '\014'; break; /* FF */
 675                 case 't': *p++ = '\t'; break;
 676                 case 'n': *p++ = '\n'; break;
 677                 case 'r': *p++ = '\r'; break;
 678                 case 'v': *p++ = '\013'; break; /* VT */
 679                 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
 680                 case '0': case '1': case '2': case '3':
 681                 case '4': case '5': case '6': case '7':
 682                         c = s[-1] - '0';
 683                         if (s < end && '0' <= *s && *s <= '7') {
 684                                 c = (c<<3) + *s++ - '0';
 685                                 if (s < end && '0' <= *s && *s <= '7')
 686                                         c = (c<<3) + *s++ - '0';
 687                         }
 688                         *p++ = c;
 689                         break;
 690                 case 'x':
 691                         if (s+1 < end &&
 692                             isxdigit(Py_CHARMASK(s[0])) &&
 693                             isxdigit(Py_CHARMASK(s[1])))
 694                         {
 695                                 unsigned int x = 0;
 696                                 c = Py_CHARMASK(*s);
 697                                 s++;
 698                                 if (isdigit(c))
 699                                         x = c - '0';
 700                                 else if (islower(c))
 701                                         x = 10 + c - 'a';
 702                                 else
 703                                         x = 10 + c - 'A';
 704                                 x = x << 4;
 705                                 c = Py_CHARMASK(*s);
 706                                 s++;
 707                                 if (isdigit(c))
 708                                         x += c - '0';
 709                                 else if (islower(c))
 710                                         x += 10 + c - 'a';
 711                                 else
 712                                         x += 10 + c - 'A';
 713                                 *p++ = x;
 714                                 break;
 715                         }
 716                         if (!errors || strcmp(errors, "strict") == 0) {
 717                                 PyErr_SetString(PyExc_ValueError,
 718                                                 "invalid \\x escape");
 719                                 goto failed;
 720                         }
 721                         if (strcmp(errors, "replace") == 0) {
 722                                 *p++ = '?';
 723                         } else if (strcmp(errors, "ignore") == 0)
 724                                 /* do nothing */;
 725                         else {
 726                                 PyErr_Format(PyExc_ValueError,
 727                                              "decoding error; "
 728                                              "unknown error handling code: %.400s",
 729                                              errors);
 730                                 goto failed;
 731                         }
 732 #ifndef Py_USING_UNICODE
 733                 case 'u':
 734                 case 'U':
 735                 case 'N':
 736                         if (unicode) {
 737                                 PyErr_SetString(PyExc_ValueError,
 738                                           "Unicode escapes not legal "
 739                                           "when Unicode disabled");
 740                                 goto failed;
 741                         }
 742 #endif
 743                 default:
 744                         *p++ = '\\';
 745                         s--;
 746                         goto non_esc; /* an arbitry number of unescaped
 747                                          UTF-8 bytes may follow. */
 748                 }
 749         }
 750         if (p-buf < newlen)
 751                 _PyString_Resize(&v, p - buf);
 752         return v;
 753   failed:
 754         Py_DECREF(v);
 755         return NULL;
 756 }
 757
 758 /* -------------------------------------------------------------------- */
 759 /* object api */
 760
 761 static Py_ssize_t
 762 string_getsize(register PyObject *op)
 763 {
 764         char *s;
 765         Py_ssize_t len;
 766         if (PyString_AsStringAndSize(op, &s, &len))
 767                 return -1;
 768         return len;
 769 }
 770
 771 static /*const*/ char *
 772 string_getbuffer(register PyObject *op)
 773 {
 774         char *s;
 775         Py_ssize_t len;
 776         if (PyString_AsStringAndSize(op, &s, &len))
 777                 return NULL;
 778         return s;
 779 }
 780
 781 Py_ssize_t
 782 PyString_Size(register PyObject *op)
 783 {
 784         if (!PyString_Check(op))
 785                 return string_getsize(op);
 786         return Py_SIZE(op);
 787 }
 788
 789 /*const*/ char *
 790 PyString_AsString(register PyObject *op)
 791 {
 792         if (!PyString_Check(op))
 793                 return string_getbuffer(op);
 794         return ((PyStringObject *)op) -> ob_sval;
 795 }
 796
 797 int
 798 PyString_AsStringAndSize(register PyObject *obj,
 799                          register char **s,
 800                          register Py_ssize_t *len)
 801 {
 802         if (s == NULL) {
 803                 PyErr_BadInternalCall();
 804                 return -1;
 805         }
 806
 807         if (!PyString_Check(obj)) {
 808 #ifdef Py_USING_UNICODE
 809                 if (PyUnicode_Check(obj)) {
 810                         obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
 811                         if (obj == NULL)
 812                                 return -1;
 813                 }
 814                 else
 815 #endif
 816                 {
 817                         PyErr_Format(PyExc_TypeError,
 818                                      "expected string or Unicode object, "
 819                                      "%.200s found", Py_TYPE(obj)->tp_name);
 820                         return -1;
 821                 }
 822         }
 823
 824         *s = PyString_AS_STRING(obj);
 825         if (len != NULL)
 826                 *len = PyString_GET_SIZE(obj);
 827         else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
 828                 PyErr_SetString(PyExc_TypeError,
 829                                 "expected string without null bytes");
 830                 return -1;
 831         }
 832         return 0;
 833 }
 834
 835 /* -------------------------------------------------------------------- */
 836 /* Methods */
 837
 838 #include "stringlib/stringdefs.h"
 839 #include "stringlib/fastsearch.h"
 840
 841 #include "stringlib/count.h"
 842 #include "stringlib/find.h"
 843 #include "stringlib/partition.h"
 844 #include "stringlib/split.h"
 845
 846 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
 847 #include "stringlib/localeutil.h"
 848
 849
 850
 851 static int
 852 string_print(PyStringObject *op, FILE *fp, int flags)
 853 {
 854         Py_ssize_t i, str_len;
 855         char c;
 856         int quote;
 857
 858         /* XXX Ought to check for interrupts when writing long strings */
 859         if (! PyString_CheckExact(op)) {
 860                 int ret;
 861                 /* A str subclass may have its own __str__ method. */
 862                 op = (PyStringObject *) PyObject_Str((PyObject *)op);
 863                 if (op == NULL)
 864                         return -1;
 865                 ret = string_print(op, fp, flags);
 866                 Py_DECREF(op);
 867                 return ret;
 868         }
 869         if (flags & Py_PRINT_RAW) {
 870                 char *data = op->ob_sval;
 871                 Py_ssize_t size = Py_SIZE(op);
 872                 Py_BEGIN_ALLOW_THREADS
 873                 while (size > INT_MAX) {
 874                         /* Very long strings cannot be written atomically.
 875                          * But don't write exactly INT_MAX bytes at a time
 876                          * to avoid memory aligment issues.
 877                          */
 878                         const int chunk_size = INT_MAX & ~0x3FFF;
 879                         fwrite(data, 1, chunk_size, fp);
 880                         data += chunk_size;
 881                         size -= chunk_size;
 882                 }
 883 #ifdef __VMS
 884                 if (size) fwrite(data, (int)size, 1, fp);
 885 #else
 886                 fwrite(data, 1, (int)size, fp);
 887 #endif
 888                 Py_END_ALLOW_THREADS
 889                 return 0;
 890         }
 891
 892         /* figure out which quote to use; single is preferred */
 893         quote = '\'';
 894         if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
 895             !memchr(op->ob_sval, '"', Py_SIZE(op)))
 896                 quote = '"';
 897
 898         str_len = Py_SIZE(op);
 899         Py_BEGIN_ALLOW_THREADS
 900         fputc(quote, fp);
 901         for (i = 0; i < str_len; i++) {
 902                 /* Since strings are immutable and the caller should have a
 903                 reference, accessing the interal buffer should not be an issue
 904                 with the GIL released. */
 905                 c = op->ob_sval[i];
 906                 if (c == quote || c == '\\')
 907                         fprintf(fp, "\\%c", c);
 908                 else if (c == '\t')
 909                         fprintf(fp, "\\t");
 910                 else if (c == '\n')
 911                         fprintf(fp, "\\n");
 912                 else if (c == '\r')
 913                         fprintf(fp, "\\r");
 914                 else if (c < ' ' || c >= 0x7f)
 915                         fprintf(fp, "\\x%02x", c & 0xff);
 916                 else
 917                         fputc(c, fp);
 918         }
 919         fputc(quote, fp);
 920         Py_END_ALLOW_THREADS
 921         return 0;
 922 }
 923
 924 PyObject *
 925 PyString_Repr(PyObject *obj, int smartquotes)
 926 {
 927         register PyStringObject* op = (PyStringObject*) obj;
 928         size_t newsize = 2 + 4 * Py_SIZE(op);
 929         PyObject *v;
 930         if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
 931                 PyErr_SetString(PyExc_OverflowError,
 932                         "string is too large to make repr");
 933                 return NULL;
 934         }
 935         v = PyString_FromStringAndSize((char *)NULL, newsize);
 936         if (v == NULL) {
 937                 return NULL;
 938         }
 939         else {
 940                 register Py_ssize_t i;
 941                 register char c;
 942                 register char *p;
 943                 int quote;
 944
 945                 /* figure out which quote to use; single is preferred */
 946                 quote = '\'';
 947                 if (smartquotes &&
 948                     memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
 949                     !memchr(op->ob_sval, '"', Py_SIZE(op)))
 950                         quote = '"';
 951
 952                 p = PyString_AS_STRING(v);
 953                 *p++ = quote;
 954                 for (i = 0; i < Py_SIZE(op); i++) {
 955                         /* There's at least enough room for a hex escape
 956                            and a closing quote. */
 957                         assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
 958                         c = op->ob_sval[i];
 959                         if (c == quote || c == '\\')
 960                                 *p++ = '\\', *p++ = c;
 961                         else if (c == '\t')
 962                                 *p++ = '\\', *p++ = 't';
 963                         else if (c == '\n')
 964                                 *p++ = '\\', *p++ = 'n';
 965                         else if (c == '\r')
 966                                 *p++ = '\\', *p++ = 'r';
 967                         else if (c < ' ' || c >= 0x7f) {
 968                                 /* For performance, we don't want to call
 969                                    PyOS_snprintf here (extra layers of
 970                                    function call). */
 971                                 sprintf(p, "\\x%02x", c & 0xff);
 972                                 p += 4;
 973                         }
 974                         else
 975                                 *p++ = c;
 976                 }
 977                 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
 978                 *p++ = quote;
 979                 *p = '\0';
 980                 _PyString_Resize(
 981                         &v, (p - PyString_AS_STRING(v)));
 982                 return v;
 983         }
 984 }
 985
 986 static PyObject *
 987 string_repr(PyObject *op)
 988 {
 989         return PyString_Repr(op, 1);
 990 }
 991
 992 static PyObject *
 993 string_str(PyObject *s)
 994 {
 995         assert(PyString_Check(s));
 996         if (PyString_CheckExact(s)) {
 997                 Py_INCREF(s);
 998                 return s;
 999         }
1000         else {
1001                 /* Subtype -- return genuine string with the same value. */
1002                 PyStringObject *t = (PyStringObject *) s;
1003                 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1004         }
1005 }
1006
1007 static Py_ssize_t
1008 string_length(PyStringObject *a)
1009 {
1010         return Py_SIZE(a);
1011 }
1012
1013 static PyObject *
1014 string_concat(register PyStringObject *a, register PyObject *bb)
1015 {
1016         register Py_ssize_t size;
1017         register PyStringObject *op;
1018         if (!PyString_Check(bb)) {
1019 #ifdef Py_USING_UNICODE
1020                 if (PyUnicode_Check(bb))
1021                     return PyUnicode_Concat((PyObject *)a, bb);
1022 #endif
1023                 if (PyByteArray_Check(bb))
1024                     return PyByteArray_Concat((PyObject *)a, bb);
1025                 PyErr_Format(PyExc_TypeError,
1026                              "cannot concatenate 'str' and '%.200s' objects",
1027                              Py_TYPE(bb)->tp_name);
1028                 return NULL;
1029         }
1030 #define b ((PyStringObject *)bb)
1031         /* Optimize cases with empty left or right operand */
1032         if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1033             PyString_CheckExact(a) && PyString_CheckExact(b)) {
1034                 if (Py_SIZE(a) == 0) {
1035                         Py_INCREF(bb);
1036                         return bb;
1037                 }
1038                 Py_INCREF(a);
1039                 return (PyObject *)a;
1040         }
1041         size = Py_SIZE(a) + Py_SIZE(b);
1042         /* Check that string sizes are not negative, to prevent an
1043            overflow in cases where we are passed incorrectly-created
1044            strings with negative lengths (due to a bug in other code).
1045         */
1046         if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1047             Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1048                 PyErr_SetString(PyExc_OverflowError,
1049                                 "strings are too large to concat");
1050                 return NULL;
1051         }
1052
1053         /* Inline PyObject_NewVar */
1054         if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1055                 PyErr_SetString(PyExc_OverflowError,
1056                                 "strings are too large to concat");
1057                 return NULL;
1058         }
1059         op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1060         if (op == NULL)
1061                 return PyErr_NoMemory();
1062         PyObject_INIT_VAR(op, &PyString_Type, size);
1063         op->ob_shash = -1;
1064         op->ob_sstate = SSTATE_NOT_INTERNED;
1065         Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1066         Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1067         op->ob_sval[size] = '\0';
1068         return (PyObject *) op;
1069 #undef b
1070 }
1071
1072 static PyObject *
1073 string_repeat(register PyStringObject *a, register Py_ssize_t n)
1074 {
1075         register Py_ssize_t i;
1076         register Py_ssize_t j;
1077         register Py_ssize_t size;
1078         register PyStringObject *op;
1079         size_t nbytes;
1080         if (n < 0)
1081                 n = 0;
1082         /* watch out for overflows:  the size can overflow int,
1083          * and the # of bytes needed can overflow size_t
1084          */
1085         size = Py_SIZE(a) * n;
1086         if (n && size / n != Py_SIZE(a)) {
1087                 PyErr_SetString(PyExc_OverflowError,
1088                         "repeated string is too long");
1089                 return NULL;
1090         }
1091         if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1092                 Py_INCREF(a);
1093                 return (PyObject *)a;
1094         }
1095         nbytes = (size_t)size;
1096         if (nbytes + PyStringObject_SIZE <= nbytes) {
1097                 PyErr_SetString(PyExc_OverflowError,
1098                         "repeated string is too long");
1099                 return NULL;
1100         }
1101         op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1102         if (op == NULL)
1103                 return PyErr_NoMemory();
1104         PyObject_INIT_VAR(op, &PyString_Type, size);
1105         op->ob_shash = -1;
1106         op->ob_sstate = SSTATE_NOT_INTERNED;
1107         op->ob_sval[size] = '\0';
1108         if (Py_SIZE(a) == 1 && n > 0) {
1109                 memset(op->ob_sval, a->ob_sval[0] , n);
1110                 return (PyObject *) op;
1111         }
1112         i = 0;
1113         if (i < size) {
1114                 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1115                 i = Py_SIZE(a);
1116         }
1117         while (i < size) {
1118                 j = (i <= size-i)  ?  i  :  size-i;
1119                 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1120                 i += j;
1121         }
1122         return (PyObject *) op;
1123 }
1124
1125 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1126
1127 static PyObject *
1128 string_slice(register PyStringObject *a, register Py_ssize_t i,
1129              register Py_ssize_t j)
1130      /* j -- may be negative! */
1131 {
1132         if (i < 0)
1133                 i = 0;
1134         if (j < 0)
1135                 j = 0; /* Avoid signed/unsigned bug in next line */
1136         if (j > Py_SIZE(a))
1137                 j = Py_SIZE(a);
1138         if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1139                 /* It's the same as a */
1140                 Py_INCREF(a);
1141                 return (PyObject *)a;
1142         }
1143         if (j < i)
1144                 j = i;
1145         return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1146 }
1147
1148 static int
1149 string_contains(PyObject *str_obj, PyObject *sub_obj)
1150 {
1151         if (!PyString_CheckExact(sub_obj)) {
1152 #ifdef Py_USING_UNICODE
1153                 if (PyUnicode_Check(sub_obj))
1154                         return PyUnicode_Contains(str_obj, sub_obj);
1155 #endif
1156                 if (!PyString_Check(sub_obj)) {
1157                         PyErr_Format(PyExc_TypeError,
1158                             "'in <string>' requires string as left operand, "
1159                             "not %.200s", Py_TYPE(sub_obj)->tp_name);
1160                         return -1;
1161                 }
1162         }
1163
1164         return stringlib_contains_obj(str_obj, sub_obj);
1165 }
1166
1167 static PyObject *
1168 string_item(PyStringObject *a, register Py_ssize_t i)
1169 {
1170         char pchar;
1171         PyObject *v;
1172         if (i < 0 || i >= Py_SIZE(a)) {
1173                 PyErr_SetString(PyExc_IndexError, "string index out of range");
1174                 return NULL;
1175         }
1176         pchar = a->ob_sval[i];
1177         v = (PyObject *)characters[pchar & UCHAR_MAX];
1178         if (v == NULL)
1179                 v = PyString_FromStringAndSize(&pchar, 1);
1180         else {
1181 #ifdef COUNT_ALLOCS
1182                 one_strings++;
1183 #endif
1184                 Py_INCREF(v);
1185         }
1186         return v;
1187 }
1188
1189 static PyObject*
1190 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1191 {
1192         int c;
1193         Py_ssize_t len_a, len_b;
1194         Py_ssize_t min_len;
1195         PyObject *result;
1196
1197         /* Make sure both arguments are strings. */
1198         if (!(PyString_Check(a) && PyString_Check(b))) {
1199                 result = Py_NotImplemented;
1200                 goto out;
1201         }
1202         if (a == b) {
1203                 switch (op) {
1204                 case Py_EQ:case Py_LE:case Py_GE:
1205                         result = Py_True;
1206                         goto out;
1207                 case Py_NE:case Py_LT:case Py_GT:
1208                         result = Py_False;
1209                         goto out;
1210                 }
1211         }
1212         if (op == Py_EQ) {
1213                 /* Supporting Py_NE here as well does not save
1214                    much time, since Py_NE is rarely used.  */
1215                 if (Py_SIZE(a) == Py_SIZE(b)
1216                     && (a->ob_sval[0] == b->ob_sval[0]
1217                         && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1218                         result = Py_True;
1219                 } else {
1220                         result = Py_False;
1221                 }
1222                 goto out;
1223         }
1224         len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1225         min_len = (len_a < len_b) ? len_a : len_b;
1226         if (min_len > 0) {
1227                 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1228                 if (c==0)
1229                         c = memcmp(a->ob_sval, b->ob_sval, min_len);
1230         } else
1231                 c = 0;
1232         if (c == 0)
1233                 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1234         switch (op) {
1235         case Py_LT: c = c <  0; break;
1236         case Py_LE: c = c <= 0; break;
1237         case Py_EQ: assert(0);  break; /* unreachable */
1238         case Py_NE: c = c != 0; break;
1239         case Py_GT: c = c >  0; break;
1240         case Py_GE: c = c >= 0; break;
1241         default:
1242                 result = Py_NotImplemented;
1243                 goto out;
1244         }
1245         result = c ? Py_True : Py_False;
1246   out:
1247         Py_INCREF(result);
1248         return result;
1249 }
1250
1251 int
1252 _PyString_Eq(PyObject *o1, PyObject *o2)
1253 {
1254         PyStringObject *a = (PyStringObject*) o1;
1255         PyStringObject *b = (PyStringObject*) o2;
1256         return Py_SIZE(a) == Py_SIZE(b)
1257           && *a->ob_sval == *b->ob_sval
1258           && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1259 }
1260
1261 static long
1262 string_hash(PyStringObject *a)
1263 {
1264         register Py_ssize_t len;
1265         register unsigned char *p;
1266         register long x;
1267
1268         if (a->ob_shash != -1)
1269                 return a->ob_shash;
1270         len = Py_SIZE(a);
1271         p = (unsigned char *) a->ob_sval;
1272         x = *p << 7;
1273         while (--len >= 0)
1274                 x = (1000003*x) ^ *p++;
1275         x ^= Py_SIZE(a);
1276         if (x == -1)
1277                 x = -2;
1278         a->ob_shash = x;
1279         return x;
1280 }
1281
1282 static PyObject*
1283 string_subscript(PyStringObject* self, PyObject* item)
1284 {
1285         if (PyIndex_Check(item)) {
1286                 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1287                 if (i == -1 && PyErr_Occurred())
1288                         return NULL;
1289                 if (i < 0)
1290                         i += PyString_GET_SIZE(self);
1291                 return string_item(self, i);
1292         }
1293         else if (PySlice_Check(item)) {
1294                 Py_ssize_t start, stop, step, slicelength, cur, i;
1295                 char* source_buf;
1296                 char* result_buf;
1297                 PyObject* result;
1298
1299                 if (PySlice_GetIndicesEx((PySliceObject*)item,
1300                                  PyString_GET_SIZE(self),
1301                                  &start, &stop, &step, &slicelength) < 0) {
1302                         return NULL;
1303                 }
1304
1305                 if (slicelength <= 0) {
1306                         return PyString_FromStringAndSize("", 0);
1307                 }
1308                 else if (start == 0 && step == 1 &&
1309                          slicelength == PyString_GET_SIZE(self) &&
1310                          PyString_CheckExact(self)) {
1311                         Py_INCREF(self);
1312                         return (PyObject *)self;
1313                 }
1314                 else if (step == 1) {
1315                         return PyString_FromStringAndSize(
1316                                 PyString_AS_STRING(self) + start,
1317                                 slicelength);
1318                 }
1319                 else {
1320                         source_buf = PyString_AsString((PyObject*)self);
1321                         result_buf = (char *)PyMem_Malloc(slicelength);
1322                         if (result_buf == NULL)
1323                                 return PyErr_NoMemory();
1324
1325                         for (cur = start, i = 0; i < slicelength;
1326                              cur += step, i++) {
1327                                 result_buf[i] = source_buf[cur];
1328                         }
1329
1330                         result = PyString_FromStringAndSize(result_buf,
1331                                                             slicelength);
1332                         PyMem_Free(result_buf);
1333                         return result;
1334                 }
1335         }
1336         else {
1337                 PyErr_Format(PyExc_TypeError,
1338                              "string indices must be integers, not %.200s",
1339                              Py_TYPE(item)->tp_name);
1340                 return NULL;
1341         }
1342 }
1343
1344 static Py_ssize_t
1345 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1346 {
1347         if ( index != 0 ) {
1348                 PyErr_SetString(PyExc_SystemError,
1349                                 "accessing non-existent string segment");
1350                 return -1;
1351         }
1352         *ptr = (void *)self->ob_sval;
1353         return Py_SIZE(self);
1354 }
1355
1356 static Py_ssize_t
1357 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1358 {
1359         PyErr_SetString(PyExc_TypeError,
1360                         "Cannot use string as modifiable buffer");
1361         return -1;
1362 }
1363
1364 static Py_ssize_t
1365 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1366 {
1367         if ( lenp )
1368                 *lenp = Py_SIZE(self);
1369         return 1;
1370 }
1371
1372 static Py_ssize_t
1373 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1374 {
1375         if ( index != 0 ) {
1376                 PyErr_SetString(PyExc_SystemError,
1377                                 "accessing non-existent string segment");
1378                 return -1;
1379         }
1380         *ptr = self->ob_sval;
1381         return Py_SIZE(self);
1382 }
1383
1384 static int
1385 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1386 {
1387         return PyBuffer_FillInfo(view, (PyObject*)self,
1388                                  (void *)self->ob_sval, Py_SIZE(self),
1389                                  1, flags);
1390 }
1391
1392 static PySequenceMethods string_as_sequence = {
1393         (lenfunc)string_length, /*sq_length*/
1394         (binaryfunc)string_concat, /*sq_concat*/
1395         (ssizeargfunc)string_repeat, /*sq_repeat*/
1396         (ssizeargfunc)string_item, /*sq_item*/
1397         (ssizessizeargfunc)string_slice, /*sq_slice*/
1398         0,              /*sq_ass_item*/
1399         0,              /*sq_ass_slice*/
1400         (objobjproc)string_contains /*sq_contains*/
1401 };
1402
1403 static PyMappingMethods string_as_mapping = {
1404         (lenfunc)string_length,
1405         (binaryfunc)string_subscript,
1406         0,
1407 };
1408
1409 static PyBufferProcs string_as_buffer = {
1410         (readbufferproc)string_buffer_getreadbuf,
1411         (writebufferproc)string_buffer_getwritebuf,
1412         (segcountproc)string_buffer_getsegcount,
1413         (charbufferproc)string_buffer_getcharbuf,
1414         (getbufferproc)string_buffer_getbuffer,
1415         0, /* XXX */
1416 };
1417
1418
1419
1420 #define LEFTSTRIP 0
1421 #define RIGHTSTRIP 1
1422 #define BOTHSTRIP 2
1423
1424 /* Arrays indexed by above */
1425 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1426
1427 #define STRIPNAME(i) (stripformat[i]+3)
1428
1429 PyDoc_STRVAR(split__doc__,
1430 "S.split([sep [,maxsplit]]) -> list of strings\n\
1431 \n\
1432 Return a list of the words in the string S, using sep as the\n\
1433 delimiter string.  If maxsplit is given, at most maxsplit\n\
1434 splits are done. If sep is not specified or is None, any\n\
1435 whitespace string is a separator and empty strings are removed\n\
1436 from the result.");
1437
1438 static PyObject *
1439 string_split(PyStringObject *self, PyObject *args)
1440 {
1441         Py_ssize_t len = PyString_GET_SIZE(self), n;
1442         Py_ssize_t maxsplit = -1;
1443         const char *s = PyString_AS_STRING(self), *sub;
1444         PyObject *subobj = Py_None;
1445
1446         if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1447                 return NULL;
1448         if (maxsplit < 0)
1449                 maxsplit = PY_SSIZE_T_MAX;
1450         if (subobj == Py_None)
1451                 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1452         if (PyString_Check(subobj)) {
1453                 sub = PyString_AS_STRING(subobj);
1454                 n = PyString_GET_SIZE(subobj);
1455         }
1456 #ifdef Py_USING_UNICODE
1457         else if (PyUnicode_Check(subobj))
1458                 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1459 #endif
1460         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1461                 return NULL;
1462
1463         return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1464 }
1465
1466 PyDoc_STRVAR(partition__doc__,
1467 "S.partition(sep) -> (head, sep, tail)\n\
1468 \n\
1469 Search for the separator sep in S, and return the part before it,\n\
1470 the separator itself, and the part after it.  If the separator is not\n\
1471 found, return S and two empty strings.");
1472
1473 static PyObject *
1474 string_partition(PyStringObject *self, PyObject *sep_obj)
1475 {
1476         const char *sep;
1477         Py_ssize_t sep_len;
1478
1479         if (PyString_Check(sep_obj)) {
1480                 sep = PyString_AS_STRING(sep_obj);
1481                 sep_len = PyString_GET_SIZE(sep_obj);
1482         }
1483 #ifdef Py_USING_UNICODE
1484         else if (PyUnicode_Check(sep_obj))
1485                 return PyUnicode_Partition((PyObject *) self, sep_obj);
1486 #endif
1487         else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1488                 return NULL;
1489
1490         return stringlib_partition(
1491                 (PyObject*) self,
1492                 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1493                 sep_obj, sep, sep_len
1494                 );
1495 }
1496
1497 PyDoc_STRVAR(rpartition__doc__,
1498 "S.rpartition(sep) -> (tail, sep, head)\n\
1499 \n\
1500 Search for the separator sep in S, starting at the end of S, and return\n\
1501 the part before it, the separator itself, and the part after it.  If the\n\
1502 separator is not found, return two empty strings and S.");
1503
1504 static PyObject *
1505 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1506 {
1507         const char *sep;
1508         Py_ssize_t sep_len;
1509
1510         if (PyString_Check(sep_obj)) {
1511                 sep = PyString_AS_STRING(sep_obj);
1512                 sep_len = PyString_GET_SIZE(sep_obj);
1513         }
1514 #ifdef Py_USING_UNICODE
1515         else if (PyUnicode_Check(sep_obj))
1516                 return PyUnicode_RPartition((PyObject *) self, sep_obj);
1517 #endif
1518         else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1519                 return NULL;
1520
1521         return stringlib_rpartition(
1522                 (PyObject*) self,
1523                 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1524                 sep_obj, sep, sep_len
1525                 );
1526 }
1527
1528 PyDoc_STRVAR(rsplit__doc__,
1529 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1530 \n\
1531 Return a list of the words in the string S, using sep as the\n\
1532 delimiter string, starting at the end of the string and working\n\
1533 to the front.  If maxsplit is given, at most maxsplit splits are\n\
1534 done. If sep is not specified or is None, any whitespace string\n\
1535 is a separator.");
1536
1537 static PyObject *
1538 string_rsplit(PyStringObject *self, PyObject *args)
1539 {
1540         Py_ssize_t len = PyString_GET_SIZE(self), n;
1541         Py_ssize_t maxsplit = -1;
1542         const char *s = PyString_AS_STRING(self), *sub;
1543         PyObject *subobj = Py_None;
1544
1545         if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1546                 return NULL;
1547         if (maxsplit < 0)
1548                 maxsplit = PY_SSIZE_T_MAX;
1549         if (subobj == Py_None)
1550                 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1551         if (PyString_Check(subobj)) {
1552                 sub = PyString_AS_STRING(subobj);
1553                 n = PyString_GET_SIZE(subobj);
1554         }
1555 #ifdef Py_USING_UNICODE
1556         else if (PyUnicode_Check(subobj))
1557                 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1558 #endif
1559         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1560                 return NULL;
1561
1562         return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1563 }
1564
1565
1566 PyDoc_STRVAR(join__doc__,
1567 "S.join(iterable) -> string\n\
1568 \n\
1569 Return a string which is the concatenation of the strings in the\n\
1570 iterable.  The separator between elements is S.");
1571
1572 static PyObject *
1573 string_join(PyStringObject *self, PyObject *orig)
1574 {
1575         char *sep = PyString_AS_STRING(self);
1576         const Py_ssize_t seplen = PyString_GET_SIZE(self);
1577         PyObject *res = NULL;
1578         char *p;
1579         Py_ssize_t seqlen = 0;
1580         size_t sz = 0;
1581         Py_ssize_t i;
1582         PyObject *seq, *item;
1583
1584         seq = PySequence_Fast(orig, "");
1585         if (seq == NULL) {
1586                 return NULL;
1587         }
1588
1589         seqlen = PySequence_Size(seq);
1590         if (seqlen == 0) {
1591                 Py_DECREF(seq);
1592                 return PyString_FromString("");
1593         }
1594         if (seqlen == 1) {
1595                 item = PySequence_Fast_GET_ITEM(seq, 0);
1596                 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1597                         Py_INCREF(item);
1598                         Py_DECREF(seq);
1599                         return item;
1600                 }
1601         }
1602
1603         /* There are at least two things to join, or else we have a subclass
1604          * of the builtin types in the sequence.
1605          * Do a pre-pass to figure out the total amount of space we'll
1606          * need (sz), see whether any argument is absurd, and defer to
1607          * the Unicode join if appropriate.
1608          */
1609         for (i = 0; i < seqlen; i++) {
1610                 const size_t old_sz = sz;
1611                 item = PySequence_Fast_GET_ITEM(seq, i);
1612                 if (!PyString_Check(item)){
1613 #ifdef Py_USING_UNICODE
1614                         if (PyUnicode_Check(item)) {
1615                                 /* Defer to Unicode join.
1616                                  * CAUTION:  There's no gurantee that the
1617                                  * original sequence can be iterated over
1618                                  * again, so we must pass seq here.
1619                                  */
1620                                 PyObject *result;
1621                                 result = PyUnicode_Join((PyObject *)self, seq);
1622                                 Py_DECREF(seq);
1623                                 return result;
1624                         }
1625 #endif
1626                         PyErr_Format(PyExc_TypeError,
1627                                      "sequence item %zd: expected string,"
1628                                      " %.80s found",
1629                                      i, Py_TYPE(item)->tp_name);
1630                         Py_DECREF(seq);
1631                         return NULL;
1632                 }
1633                 sz += PyString_GET_SIZE(item);
1634                 if (i != 0)
1635                         sz += seplen;
1636                 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1637                         PyErr_SetString(PyExc_OverflowError,
1638                                 "join() result is too long for a Python string");
1639                         Py_DECREF(seq);
1640                         return NULL;
1641                 }
1642         }
1643
1644         /* Allocate result space. */
1645         res = PyString_FromStringAndSize((char*)NULL, sz);
1646         if (res == NULL) {
1647                 Py_DECREF(seq);
1648                 return NULL;
1649         }
1650
1651         /* Catenate everything. */
1652         p = PyString_AS_STRING(res);
1653         for (i = 0; i < seqlen; ++i) {
1654                 size_t n;
1655                 item = PySequence_Fast_GET_ITEM(seq, i);
1656                 n = PyString_GET_SIZE(item);
1657                 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1658                 p += n;
1659                 if (i < seqlen - 1) {
1660                         Py_MEMCPY(p, sep, seplen);
1661                         p += seplen;
1662                 }
1663         }
1664
1665         Py_DECREF(seq);
1666         return res;
1667 }
1668
1669 PyObject *
1670 _PyString_Join(PyObject *sep, PyObject *x)
1671 {
1672         assert(sep != NULL && PyString_Check(sep));
1673         assert(x != NULL);
1674         return string_join((PyStringObject *)sep, x);
1675 }
1676
1677 /* helper macro to fixup start/end slice values */
1678 #define ADJUST_INDICES(start, end, len)         \
1679         if (end > len)                          \
1680             end = len;                          \
1681         else if (end < 0) {                     \
1682             end += len;                         \
1683             if (end < 0)                        \
1684                 end = 0;                        \
1685         }                                       \
1686         if (start < 0) {                        \
1687             start += len;                       \
1688             if (start < 0)                      \
1689                 start = 0;                      \
1690         }
1691
1692 Py_LOCAL_INLINE(Py_ssize_t)
1693 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1694 {
1695         PyObject *subobj;
1696         const char *sub;
1697         Py_ssize_t sub_len;
1698         Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1699         PyObject *obj_start=Py_None, *obj_end=Py_None;
1700
1701         if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1702                 &obj_start, &obj_end))
1703                 return -2;
1704         /* To support None in "start" and "end" arguments, meaning
1705            the same as if they were not passed.
1706         */
1707         if (obj_start != Py_None)
1708                 if (!_PyEval_SliceIndex(obj_start, &start))
1709                 return -2;
1710         if (obj_end != Py_None)
1711                 if (!_PyEval_SliceIndex(obj_end, &end))
1712                 return -2;
1713
1714         if (PyString_Check(subobj)) {
1715                 sub = PyString_AS_STRING(subobj);
1716                 sub_len = PyString_GET_SIZE(subobj);
1717         }
1718 #ifdef Py_USING_UNICODE
1719         else if (PyUnicode_Check(subobj))
1720                 return PyUnicode_Find(
1721                         (PyObject *)self, subobj, start, end, dir);
1722 #endif
1723         else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1724                 /* XXX - the "expected a character buffer object" is pretty
1725                    confusing for a non-expert.  remap to something else ? */
1726                 return -2;
1727
1728         if (dir > 0)
1729                 return stringlib_find_slice(
1730                         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1731                         sub, sub_len, start, end);
1732         else
1733                 return stringlib_rfind_slice(
1734                         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1735                         sub, sub_len, start, end);
1736 }
1737
1738
1739 PyDoc_STRVAR(find__doc__,
1740 "S.find(sub [,start [,end]]) -> int\n\
1741 \n\
1742 Return the lowest index in S where substring sub is found,\n\
1743 such that sub is contained within s[start:end].  Optional\n\
1744 arguments start and end are interpreted as in slice notation.\n\
1745 \n\
1746 Return -1 on failure.");
1747
1748 static PyObject *
1749 string_find(PyStringObject *self, PyObject *args)
1750 {
1751         Py_ssize_t result = string_find_internal(self, args, +1);
1752         if (result == -2)
1753                 return NULL;
1754         return PyInt_FromSsize_t(result);
1755 }
1756
1757
1758 PyDoc_STRVAR(index__doc__,
1759 "S.index(sub [,start [,end]]) -> int\n\
1760 \n\
1761 Like S.find() but raise ValueError when the substring is not found.");
1762
1763 static PyObject *
1764 string_index(PyStringObject *self, PyObject *args)
1765 {
1766         Py_ssize_t result = string_find_internal(self, args, +1);
1767         if (result == -2)
1768                 return NULL;
1769         if (result == -1) {
1770                 PyErr_SetString(PyExc_ValueError,
1771                                 "substring not found");
1772                 return NULL;
1773         }
1774         return PyInt_FromSsize_t(result);
1775 }
1776
1777
1778 PyDoc_STRVAR(rfind__doc__,
1779 "S.rfind(sub [,start [,end]]) -> int\n\
1780 \n\
1781 Return the highest index in S where substring sub is found,\n\
1782 such that sub is contained within s[start:end].  Optional\n\
1783 arguments start and end are interpreted as in slice notation.\n\
1784 \n\
1785 Return -1 on failure.");
1786
1787 static PyObject *
1788 string_rfind(PyStringObject *self, PyObject *args)
1789 {
1790         Py_ssize_t result = string_find_internal(self, args, -1);
1791         if (result == -2)
1792                 return NULL;
1793         return PyInt_FromSsize_t(result);
1794 }
1795
1796
1797 PyDoc_STRVAR(rindex__doc__,
1798 "S.rindex(sub [,start [,end]]) -> int\n\
1799 \n\
1800 Like S.rfind() but raise ValueError when the substring is not found.");
1801
1802 static PyObject *
1803 string_rindex(PyStringObject *self, PyObject *args)
1804 {
1805         Py_ssize_t result = string_find_internal(self, args, -1);
1806         if (result == -2)
1807                 return NULL;
1808         if (result == -1) {
1809                 PyErr_SetString(PyExc_ValueError,
1810                                 "substring not found");
1811                 return NULL;
1812         }
1813         return PyInt_FromSsize_t(result);
1814 }
1815
1816
1817 Py_LOCAL_INLINE(PyObject *)
1818 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1819 {
1820         char *s = PyString_AS_STRING(self);
1821         Py_ssize_t len = PyString_GET_SIZE(self);
1822         char *sep = PyString_AS_STRING(sepobj);
1823         Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1824         Py_ssize_t i, j;
1825
1826         i = 0;
1827         if (striptype != RIGHTSTRIP) {
1828                 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1829                         i++;
1830                 }
1831         }
1832
1833         j = len;
1834         if (striptype != LEFTSTRIP) {
1835                 do {
1836                         j--;
1837                 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1838                 j++;
1839         }
1840
1841         if (i == 0 && j == len && PyString_CheckExact(self)) {
1842                 Py_INCREF(self);
1843                 return (PyObject*)self;
1844         }
1845         else
1846                 return PyString_FromStringAndSize(s+i, j-i);
1847 }
1848
1849
1850 Py_LOCAL_INLINE(PyObject *)
1851 do_strip(PyStringObject *self, int striptype)
1852 {
1853         char *s = PyString_AS_STRING(self);
1854         Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1855
1856         i = 0;
1857         if (striptype != RIGHTSTRIP) {
1858                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1859                         i++;
1860                 }
1861         }
1862
1863         j = len;
1864         if (striptype != LEFTSTRIP) {
1865                 do {
1866                         j--;
1867                 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1868                 j++;
1869         }
1870
1871         if (i == 0 && j == len && PyString_CheckExact(self)) {
1872                 Py_INCREF(self);
1873                 return (PyObject*)self;
1874         }
1875         else
1876                 return PyString_FromStringAndSize(s+i, j-i);
1877 }
1878
1879
1880 Py_LOCAL_INLINE(PyObject *)
1881 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1882 {
1883         PyObject *sep = NULL;
1884
1885         if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1886                 return NULL;
1887
1888         if (sep != NULL && sep != Py_None) {
1889                 if (PyString_Check(sep))
1890                         return do_xstrip(self, striptype, sep);
1891 #ifdef Py_USING_UNICODE
1892                 else if (PyUnicode_Check(sep)) {
1893                         PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1894                         PyObject *res;
1895                         if (uniself==NULL)
1896                                 return NULL;
1897                         res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1898                                 striptype, sep);
1899                         Py_DECREF(uniself);
1900                         return res;
1901                 }
1902 #endif
1903                 PyErr_Format(PyExc_TypeError,
1904 #ifdef Py_USING_UNICODE
1905                              "%s arg must be None, str or unicode",
1906 #else
1907                              "%s arg must be None or str",
1908 #endif
1909                              STRIPNAME(striptype));
1910                 return NULL;
1911         }
1912
1913         return do_strip(self, striptype);
1914 }
1915
1916
1917 PyDoc_STRVAR(strip__doc__,
1918 "S.strip([chars]) -> string or unicode\n\
1919 \n\
1920 Return a copy of the string S with leading and trailing\n\
1921 whitespace removed.\n\
1922 If chars is given and not None, remove characters in chars instead.\n\
1923 If chars is unicode, S will be converted to unicode before stripping");
1924
1925 static PyObject *
1926 string_strip(PyStringObject *self, PyObject *args)
1927 {
1928         if (PyTuple_GET_SIZE(args) == 0)
1929                 return do_strip(self, BOTHSTRIP); /* Common case */
1930         else
1931                 return do_argstrip(self, BOTHSTRIP, args);
1932 }
1933
1934
1935 PyDoc_STRVAR(lstrip__doc__,
1936 "S.lstrip([chars]) -> string or unicode\n\
1937 \n\
1938 Return a copy of the string S with leading whitespace removed.\n\
1939 If chars is given and not None, remove characters in chars instead.\n\
1940 If chars is unicode, S will be converted to unicode before stripping");
1941
1942 static PyObject *
1943 string_lstrip(PyStringObject *self, PyObject *args)
1944 {
1945         if (PyTuple_GET_SIZE(args) == 0)
1946                 return do_strip(self, LEFTSTRIP); /* Common case */
1947         else
1948                 return do_argstrip(self, LEFTSTRIP, args);
1949 }
1950
1951
1952 PyDoc_STRVAR(rstrip__doc__,
1953 "S.rstrip([chars]) -> string or unicode\n\
1954 \n\
1955 Return a copy of the string S with trailing whitespace removed.\n\
1956 If chars is given and not None, remove characters in chars instead.\n\
1957 If chars is unicode, S will be converted to unicode before stripping");
1958
1959 static PyObject *
1960 string_rstrip(PyStringObject *self, PyObject *args)
1961 {
1962         if (PyTuple_GET_SIZE(args) == 0)
1963                 return do_strip(self, RIGHTSTRIP); /* Common case */
1964         else
1965                 return do_argstrip(self, RIGHTSTRIP, args);
1966 }
1967
1968
1969 PyDoc_STRVAR(lower__doc__,
1970 "S.lower() -> string\n\
1971 \n\
1972 Return a copy of the string S converted to lowercase.");
1973
1974 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1975 #ifndef _tolower
1976 #define _tolower tolower
1977 #endif
1978
1979 static PyObject *
1980 string_lower(PyStringObject *self)
1981 {
1982         char *s;
1983         Py_ssize_t i, n = PyString_GET_SIZE(self);
1984         PyObject *newobj;
1985
1986         newobj = PyString_FromStringAndSize(NULL, n);
1987         if (!newobj)
1988                 return NULL;
1989
1990         s = PyString_AS_STRING(newobj);
1991
1992         Py_MEMCPY(s, PyString_AS_STRING(self), n);
1993
1994         for (i = 0; i < n; i++) {
1995                 int c = Py_CHARMASK(s[i]);
1996                 if (isupper(c))
1997                         s[i] = _tolower(c);
1998         }
1999
2000         return newobj;
2001 }
2002
2003 PyDoc_STRVAR(upper__doc__,
2004 "S.upper() -> string\n\
2005 \n\
2006 Return a copy of the string S converted to uppercase.");
2007
2008 #ifndef _toupper
2009 #define _toupper toupper
2010 #endif
2011
2012 static PyObject *
2013 string_upper(PyStringObject *self)
2014 {
2015         char *s;
2016         Py_ssize_t i, n = PyString_GET_SIZE(self);
2017         PyObject *newobj;
2018
2019         newobj = PyString_FromStringAndSize(NULL, n);
2020         if (!newobj)
2021                 return NULL;
2022
2023         s = PyString_AS_STRING(newobj);
2024
2025         Py_MEMCPY(s, PyString_AS_STRING(self), n);
2026
2027         for (i = 0; i < n; i++) {
2028                 int c = Py_CHARMASK(s[i]);
2029                 if (islower(c))
2030                         s[i] = _toupper(c);
2031         }
2032
2033         return newobj;
2034 }
2035
2036 PyDoc_STRVAR(title__doc__,
2037 "S.title() -> string\n\
2038 \n\
2039 Return a titlecased version of S, i.e. words start with uppercase\n\
2040 characters, all remaining cased characters have lowercase.");
2041
2042 static PyObject*
2043 string_title(PyStringObject *self)
2044 {
2045         char *s = PyString_AS_STRING(self), *s_new;
2046         Py_ssize_t i, n = PyString_GET_SIZE(self);
2047         int previous_is_cased = 0;
2048         PyObject *newobj;
2049
2050         newobj = PyString_FromStringAndSize(NULL, n);
2051         if (newobj == NULL)
2052                 return NULL;
2053         s_new = PyString_AsString(newobj);
2054         for (i = 0; i < n; i++) {
2055                 int c = Py_CHARMASK(*s++);
2056                 if (islower(c)) {
2057                         if (!previous_is_cased)
2058                             c = toupper(c);
2059                         previous_is_cased = 1;
2060                 } else if (isupper(c)) {
2061                         if (previous_is_cased)
2062                             c = tolower(c);
2063                         previous_is_cased = 1;
2064                 } else
2065                         previous_is_cased = 0;
2066                 *s_new++ = c;
2067         }
2068         return newobj;
2069 }
2070
2071 PyDoc_STRVAR(capitalize__doc__,
2072 "S.capitalize() -> string\n\
2073 \n\
2074 Return a copy of the string S with only its first character\n\
2075 capitalized.");
2076
2077 static PyObject *
2078 string_capitalize(PyStringObject *self)
2079 {
2080         char *s = PyString_AS_STRING(self), *s_new;
2081         Py_ssize_t i, n = PyString_GET_SIZE(self);
2082         PyObject *newobj;
2083
2084         newobj = PyString_FromStringAndSize(NULL, n);
2085         if (newobj == NULL)
2086                 return NULL;
2087         s_new = PyString_AsString(newobj);
2088         if (0 < n) {
2089                 int c = Py_CHARMASK(*s++);
2090                 if (islower(c))
2091                         *s_new = toupper(c);
2092                 else
2093                         *s_new = c;
2094                 s_new++;
2095         }
2096         for (i = 1; i < n; i++) {
2097                 int c = Py_CHARMASK(*s++);
2098                 if (isupper(c))
2099                         *s_new = tolower(c);
2100                 else
2101                         *s_new = c;
2102                 s_new++;
2103         }
2104         return newobj;
2105 }
2106
2107
2108 PyDoc_STRVAR(count__doc__,
2109 "S.count(sub[, start[, end]]) -> int\n\
2110 \n\
2111 Return the number of non-overlapping occurrences of substring sub in\n\
2112 string S[start:end].  Optional arguments start and end are interpreted\n\
2113 as in slice notation.");
2114
2115 static PyObject *
2116 string_count(PyStringObject *self, PyObject *args)
2117 {
2118         PyObject *sub_obj;
2119         const char *str = PyString_AS_STRING(self), *sub;
2120         Py_ssize_t sub_len;
2121         Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2122
2123         if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2124                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2125                 return NULL;
2126
2127         if (PyString_Check(sub_obj)) {
2128                 sub = PyString_AS_STRING(sub_obj);
2129                 sub_len = PyString_GET_SIZE(sub_obj);
2130         }
2131 #ifdef Py_USING_UNICODE
2132         else if (PyUnicode_Check(sub_obj)) {
2133                 Py_ssize_t count;
2134                 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2135                 if (count == -1)
2136                         return NULL;
2137                 else
2138                         return PyInt_FromSsize_t(count);
2139         }
2140 #endif
2141         else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2142                 return NULL;
2143
2144         ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
2145
2146         return PyInt_FromSsize_t(
2147                 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2148                 );
2149 }
2150
2151 PyDoc_STRVAR(swapcase__doc__,
2152 "S.swapcase() -> string\n\
2153 \n\
2154 Return a copy of the string S with uppercase characters\n\
2155 converted to lowercase and vice versa.");
2156
2157 static PyObject *
2158 string_swapcase(PyStringObject *self)
2159 {
2160         char *s = PyString_AS_STRING(self), *s_new;
2161         Py_ssize_t i, n = PyString_GET_SIZE(self);
2162         PyObject *newobj;
2163
2164         newobj = PyString_FromStringAndSize(NULL, n);
2165         if (newobj == NULL)
2166                 return NULL;
2167         s_new = PyString_AsString(newobj);
2168         for (i = 0; i < n; i++) {
2169                 int c = Py_CHARMASK(*s++);
2170                 if (islower(c)) {
2171                         *s_new = toupper(c);
2172                 }
2173                 else if (isupper(c)) {
2174                         *s_new = tolower(c);
2175                 }
2176                 else
2177                         *s_new = c;
2178                 s_new++;
2179         }
2180         return newobj;
2181 }
2182
2183
2184 PyDoc_STRVAR(translate__doc__,
2185 "S.translate(table [,deletechars]) -> string\n\
2186 \n\
2187 Return a copy of the string S, where all characters occurring\n\
2188 in the optional argument deletechars are removed, and the\n\
2189 remaining characters have been mapped through the given\n\
2190 translation table, which must be a string of length 256.");
2191
2192 static PyObject *
2193 string_translate(PyStringObject *self, PyObject *args)
2194 {
2195         register char *input, *output;
2196         const char *table;
2197         register Py_ssize_t i, c, changed = 0;
2198         PyObject *input_obj = (PyObject*)self;
2199         const char *output_start, *del_table=NULL;
2200         Py_ssize_t inlen, tablen, dellen = 0;
2201         PyObject *result;
2202         int trans_table[256];
2203         PyObject *tableobj, *delobj = NULL;
2204
2205         if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2206                               &tableobj, &delobj))
2207                 return NULL;
2208
2209         if (PyString_Check(tableobj)) {
2210                 table = PyString_AS_STRING(tableobj);
2211                 tablen = PyString_GET_SIZE(tableobj);
2212         }
2213         else if (tableobj == Py_None) {
2214                 table = NULL;
2215                 tablen = 256;
2216         }
2217 #ifdef Py_USING_UNICODE
2218         else if (PyUnicode_Check(tableobj)) {
2219                 /* Unicode .translate() does not support the deletechars
2220                    parameter; instead a mapping to None will cause characters
2221                    to be deleted. */
2222                 if (delobj != NULL) {
2223                         PyErr_SetString(PyExc_TypeError,
2224                         "deletions are implemented differently for unicode");
2225                         return NULL;
2226                 }
2227                 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2228         }
2229 #endif
2230         else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2231                 return NULL;
2232
2233         if (tablen != 256) {
2234                 PyErr_SetString(PyExc_ValueError,
2235                   "translation table must be 256 characters long");
2236                 return NULL;
2237         }
2238
2239         if (delobj != NULL) {
2240                 if (PyString_Check(delobj)) {
2241                         del_table = PyString_AS_STRING(delobj);
2242                         dellen = PyString_GET_SIZE(delobj);
2243                 }
2244 #ifdef Py_USING_UNICODE
2245                 else if (PyUnicode_Check(delobj)) {
2246                         PyErr_SetString(PyExc_TypeError,
2247                         "deletions are implemented differently for unicode");
2248                         return NULL;
2249                 }
2250 #endif
2251                 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2252                         return NULL;
2253         }
2254         else {
2255                 del_table = NULL;
2256                 dellen = 0;
2257         }
2258
2259         inlen = PyString_GET_SIZE(input_obj);
2260         result = PyString_FromStringAndSize((char *)NULL, inlen);
2261         if (result == NULL)
2262                 return NULL;
2263         output_start = output = PyString_AsString(result);
2264         input = PyString_AS_STRING(input_obj);
2265
2266         if (dellen == 0 && table != NULL) {
2267                 /* If no deletions are required, use faster code */
2268                 for (i = inlen; --i >= 0; ) {
2269                         c = Py_CHARMASK(*input++);
2270                         if (Py_CHARMASK((*output++ = table[c])) != c)
2271                                 changed = 1;
2272                 }
2273                 if (changed || !PyString_CheckExact(input_obj))
2274                         return result;
2275                 Py_DECREF(result);
2276                 Py_INCREF(input_obj);
2277                 return input_obj;
2278         }
2279
2280         if (table == NULL) {
2281                 for (i = 0; i < 256; i++)
2282                         trans_table[i] = Py_CHARMASK(i);
2283         } else {
2284                 for (i = 0; i < 256; i++)
2285                         trans_table[i] = Py_CHARMASK(table[i]);
2286         }
2287
2288         for (i = 0; i < dellen; i++)
2289                 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2290
2291         for (i = inlen; --i >= 0; ) {
2292                 c = Py_CHARMASK(*input++);
2293                 if (trans_table[c] != -1)
2294                         if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2295                                 continue;
2296                 changed = 1;
2297         }
2298         if (!changed && PyString_CheckExact(input_obj)) {
2299                 Py_DECREF(result);
2300                 Py_INCREF(input_obj);
2301                 return input_obj;
2302         }
2303         /* Fix the size of the resulting string */
2304         if (inlen > 0)
2305                 _PyString_Resize(&result, output - output_start);
2306         return result;
2307 }
2308
2309
2310 /* find and count characters and substrings */
2311
2312 #define findchar(target, target_len, c)                         \
2313   ((char *)memchr((const void *)(target), c, target_len))
2314
2315 /* String ops must return a string.  */
2316 /* If the object is subclass of string, create a copy */
2317 Py_LOCAL(PyStringObject *)
2318 return_self(PyStringObject *self)
2319 {
2320         if (PyString_CheckExact(self)) {
2321                 Py_INCREF(self);
2322                 return self;
2323         }
2324         return (PyStringObject *)PyString_FromStringAndSize(
2325                 PyString_AS_STRING(self),
2326                 PyString_GET_SIZE(self));
2327 }
2328
2329 Py_LOCAL_INLINE(Py_ssize_t)
2330 countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2331 {
2332         Py_ssize_t count=0;
2333         const char *start=target;
2334         const char *end=target+target_len;
2335
2336         while ( (start=findchar(start, end-start, c)) != NULL ) {
2337                 count++;
2338                 if (count >= maxcount)
2339                         break;
2340                 start += 1;
2341         }
2342         return count;
2343 }
2344
2345
2346 /* Algorithms for different cases of string replacement */
2347
2348 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2349 Py_LOCAL(PyStringObject *)
2350 replace_interleave(PyStringObject *self,
2351                    const char *to_s, Py_ssize_t to_len,
2352                    Py_ssize_t maxcount)
2353 {
2354         char *self_s, *result_s;
2355         Py_ssize_t self_len, result_len;
2356         Py_ssize_t count, i, product;
2357         PyStringObject *result;
2358
2359         self_len = PyString_GET_SIZE(self);
2360
2361         /* 1 at the end plus 1 after every character */
2362         count = self_len+1;
2363         if (maxcount < count)
2364                 count = maxcount;
2365
2366         /* Check for overflow */
2367         /*   result_len = count * to_len + self_len; */
2368         product = count * to_len;
2369         if (product / to_len != count) {
2370                 PyErr_SetString(PyExc_OverflowError,
2371                                 "replace string is too long");
2372                 return NULL;
2373         }
2374         result_len = product + self_len;
2375         if (result_len < 0) {
2376                 PyErr_SetString(PyExc_OverflowError,
2377                                 "replace string is too long");
2378                 return NULL;
2379         }
2380
2381         if (! (result = (PyStringObject *)
2382                          PyString_FromStringAndSize(NULL, result_len)) )
2383                 return NULL;
2384
2385         self_s = PyString_AS_STRING(self);
2386         result_s = PyString_AS_STRING(result);
2387
2388         /* TODO: special case single character, which doesn't need memcpy */
2389
2390         /* Lay the first one down (guaranteed this will occur) */
2391         Py_MEMCPY(result_s, to_s, to_len);
2392         result_s += to_len;
2393         count -= 1;
2394
2395         for (i=0; i<count; i++) {
2396                 *result_s++ = *self_s++;
2397                 Py_MEMCPY(result_s, to_s, to_len);
2398                 result_s += to_len;
2399         }
2400
2401         /* Copy the rest of the original string */
2402         Py_MEMCPY(result_s, self_s, self_len-i);
2403
2404         return result;
2405 }
2406
2407 /* Special case for deleting a single character */
2408 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2409 Py_LOCAL(PyStringObject *)
2410 replace_delete_single_character(PyStringObject *self,
2411                                 char from_c, Py_ssize_t maxcount)
2412 {
2413         char *self_s, *result_s;
2414         char *start, *next, *end;
2415         Py_ssize_t self_len, result_len;
2416         Py_ssize_t count;
2417         PyStringObject *result;
2418
2419         self_len = PyString_GET_SIZE(self);
2420         self_s = PyString_AS_STRING(self);
2421
2422         count = countchar(self_s, self_len, from_c, maxcount);
2423         if (count == 0) {
2424                 return return_self(self);
2425         }
2426
2427         result_len = self_len - count;  /* from_len == 1 */
2428         assert(result_len>=0);
2429
2430         if ( (result = (PyStringObject *)
2431                         PyString_FromStringAndSize(NULL, result_len)) == NULL)
2432                 return NULL;
2433         result_s = PyString_AS_STRING(result);
2434
2435         start = self_s;
2436         end = self_s + self_len;
2437         while (count-- > 0) {
2438                 next = findchar(start, end-start, from_c);
2439                 if (next == NULL)
2440                         break;
2441                 Py_MEMCPY(result_s, start, next-start);
2442                 result_s += (next-start);
2443                 start = next+1;
2444         }
2445         Py_MEMCPY(result_s, start, end-start);
2446
2447         return result;
2448 }
2449
2450 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2451
2452 Py_LOCAL(PyStringObject *)
2453 replace_delete_substring(PyStringObject *self,
2454                          const char *from_s, Py_ssize_t from_len,
2455                          Py_ssize_t maxcount) {
2456         char *self_s, *result_s;
2457         char *start, *next, *end;
2458         Py_ssize_t self_len, result_len;
2459         Py_ssize_t count, offset;
2460         PyStringObject *result;
2461
2462         self_len = PyString_GET_SIZE(self);
2463         self_s = PyString_AS_STRING(self);
2464
2465         count = stringlib_count(self_s, self_len,
2466                                 from_s, from_len,
2467                                 maxcount);
2468
2469         if (count == 0) {
2470                 /* no matches */
2471                 return return_self(self);
2472         }
2473
2474         result_len = self_len - (count * from_len);
2475         assert (result_len>=0);
2476
2477         if ( (result = (PyStringObject *)
2478               PyString_FromStringAndSize(NULL, result_len)) == NULL )
2479                 return NULL;
2480
2481         result_s = PyString_AS_STRING(result);
2482
2483         start = self_s;
2484         end = self_s + self_len;
2485         while (count-- > 0) {
2486                 offset = stringlib_find(start, end-start,
2487                                         from_s, from_len,
2488                                         0);
2489                 if (offset == -1)
2490                         break;
2491                 next = start + offset;
2492
2493                 Py_MEMCPY(result_s, start, next-start);
2494
2495                 result_s += (next-start);
2496                 start = next+from_len;
2497         }
2498         Py_MEMCPY(result_s, start, end-start);
2499         return result;
2500 }
2501
2502 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2503 Py_LOCAL(PyStringObject *)
2504 replace_single_character_in_place(PyStringObject *self,
2505                                   char from_c, char to_c,
2506                                   Py_ssize_t maxcount)
2507 {
2508         char *self_s, *result_s, *start, *end, *next;
2509         Py_ssize_t self_len;
2510         PyStringObject *result;
2511
2512         /* The result string will be the same size */
2513         self_s = PyString_AS_STRING(self);
2514         self_len = PyString_GET_SIZE(self);
2515
2516         next = findchar(self_s, self_len, from_c);
2517
2518         if (next == NULL) {
2519                 /* No matches; return the original string */
2520                 return return_self(self);
2521         }
2522
2523         /* Need to make a new string */
2524         result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2525         if (result == NULL)
2526                 return NULL;
2527         result_s = PyString_AS_STRING(result);
2528         Py_MEMCPY(result_s, self_s, self_len);
2529
2530         /* change everything in-place, starting with this one */
2531         start =  result_s + (next-self_s);
2532         *start = to_c;
2533         start++;
2534         end = result_s + self_len;
2535
2536         while (--maxcount > 0) {
2537                 next = findchar(start, end-start, from_c);
2538                 if (next == NULL)
2539                         break;
2540                 *next = to_c;
2541                 start = next+1;
2542         }
2543
2544         return result;
2545 }
2546
2547 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2548 Py_LOCAL(PyStringObject *)
2549 replace_substring_in_place(PyStringObject *self,
2550                            const char *from_s, Py_ssize_t from_len,
2551                            const char *to_s, Py_ssize_t to_len,
2552                            Py_ssize_t maxcount)
2553 {
2554         char *result_s, *start, *end;
2555         char *self_s;
2556         Py_ssize_t self_len, offset;
2557         PyStringObject *result;
2558
2559         /* The result string will be the same size */
2560
2561         self_s = PyString_AS_STRING(self);
2562         self_len = PyString_GET_SIZE(self);
2563
2564         offset = stringlib_find(self_s, self_len,
2565                                 from_s, from_len,
2566                                 0);
2567         if (offset == -1) {
2568                 /* No matches; return the original string */
2569                 return return_self(self);
2570         }
2571
2572         /* Need to make a new string */
2573         result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2574         if (result == NULL)
2575                 return NULL;
2576         result_s = PyString_AS_STRING(result);
2577         Py_MEMCPY(result_s, self_s, self_len);
2578
2579         /* change everything in-place, starting with this one */
2580         start =  result_s + offset;
2581         Py_MEMCPY(start, to_s, from_len);
2582         start += from_len;
2583         end = result_s + self_len;
2584
2585         while ( --maxcount > 0) {
2586                 offset = stringlib_find(start, end-start,
2587                                         from_s, from_len,
2588                                         0);
2589                 if (offset==-1)
2590                         break;
2591                 Py_MEMCPY(start+offset, to_s, from_len);
2592                 start += offset+from_len;
2593         }
2594
2595         return result;
2596 }
2597
2598 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2599 Py_LOCAL(PyStringObject *)
2600 replace_single_character(PyStringObject *self,
2601                          char from_c,
2602                          const char *to_s, Py_ssize_t to_len,
2603                          Py_ssize_t maxcount)
2604 {
2605         char *self_s, *result_s;
2606         char *start, *next, *end;
2607         Py_ssize_t self_len, result_len;
2608         Py_ssize_t count, product;
2609         PyStringObject *result;
2610
2611         self_s = PyString_AS_STRING(self);
2612         self_len = PyString_GET_SIZE(self);
2613
2614         count = countchar(self_s, self_len, from_c, maxcount);
2615         if (count == 0) {
2616                 /* no matches, return unchanged */
2617                 return return_self(self);
2618         }
2619
2620         /* use the difference between current and new, hence the "-1" */
2621         /*   result_len = self_len + count * (to_len-1)  */
2622         product = count * (to_len-1);
2623         if (product / (to_len-1) != count) {
2624                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2625                 return NULL;
2626         }
2627         result_len = self_len + product;
2628         if (result_len < 0) {
2629                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2630                 return NULL;
2631         }
2632
2633         if ( (result = (PyStringObject *)
2634               PyString_FromStringAndSize(NULL, result_len)) == NULL)
2635                 return NULL;
2636         result_s = PyString_AS_STRING(result);
2637
2638         start = self_s;
2639         end = self_s + self_len;
2640         while (count-- > 0) {
2641                 next = findchar(start, end-start, from_c);
2642                 if (next == NULL)
2643                         break;
2644
2645                 if (next == start) {
2646                         /* replace with the 'to' */
2647                         Py_MEMCPY(result_s, to_s, to_len);
2648                         result_s += to_len;
2649                         start += 1;
2650                 } else {
2651                         /* copy the unchanged old then the 'to' */
2652                         Py_MEMCPY(result_s, start, next-start);
2653                         result_s += (next-start);
2654                         Py_MEMCPY(result_s, to_s, to_len);
2655                         result_s += to_len;
2656                         start = next+1;
2657                 }
2658         }
2659         /* Copy the remainder of the remaining string */
2660         Py_MEMCPY(result_s, start, end-start);
2661
2662         return result;
2663 }
2664
2665 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2666 Py_LOCAL(PyStringObject *)
2667 replace_substring(PyStringObject *self,
2668                   const char *from_s, Py_ssize_t from_len,
2669                   const char *to_s, Py_ssize_t to_len,
2670                   Py_ssize_t maxcount) {
2671         char *self_s, *result_s;
2672         char *start, *next, *end;
2673         Py_ssize_t self_len, result_len;
2674         Py_ssize_t count, offset, product;
2675         PyStringObject *result;
2676
2677         self_s = PyString_AS_STRING(self);
2678         self_len = PyString_GET_SIZE(self);
2679
2680         count = stringlib_count(self_s, self_len,
2681                                 from_s, from_len,
2682                                 maxcount);
2683
2684         if (count == 0) {
2685                 /* no matches, return unchanged */
2686                 return return_self(self);
2687         }
2688
2689         /* Check for overflow */
2690         /*    result_len = self_len + count * (to_len-from_len) */
2691         product = count * (to_len-from_len);
2692         if (product / (to_len-from_len) != count) {
2693                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2694                 return NULL;
2695         }
2696         result_len = self_len + product;
2697         if (result_len < 0) {
2698                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2699                 return NULL;
2700         }
2701
2702         if ( (result = (PyStringObject *)
2703               PyString_FromStringAndSize(NULL, result_len)) == NULL)
2704                 return NULL;
2705         result_s = PyString_AS_STRING(result);
2706
2707         start = self_s;
2708         end = self_s + self_len;
2709         while (count-- > 0) {
2710                 offset = stringlib_find(start, end-start,
2711                                         from_s, from_len,
2712                                         0);
2713                 if (offset == -1)
2714                         break;
2715                 next = start+offset;
2716                 if (next == start) {
2717                         /* replace with the 'to' */
2718                         Py_MEMCPY(result_s, to_s, to_len);
2719                         result_s += to_len;
2720                         start += from_len;
2721                 } else {
2722                         /* copy the unchanged old then the 'to' */
2723                         Py_MEMCPY(result_s, start, next-start);
2724                         result_s += (next-start);
2725                         Py_MEMCPY(result_s, to_s, to_len);
2726                         result_s += to_len;
2727                         start = next+from_len;
2728                 }
2729         }
2730         /* Copy the remainder of the remaining string */
2731         Py_MEMCPY(result_s, start, end-start);
2732
2733         return result;
2734 }
2735
2736
2737 Py_LOCAL(PyStringObject *)
2738 replace(PyStringObject *self,
2739         const char *from_s, Py_ssize_t from_len,
2740         const char *to_s, Py_ssize_t to_len,
2741         Py_ssize_t maxcount)
2742 {
2743         if (maxcount < 0) {
2744                 maxcount = PY_SSIZE_T_MAX;
2745         } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2746                 /* nothing to do; return the original string */
2747                 return return_self(self);
2748         }
2749
2750         if (maxcount == 0 ||
2751             (from_len == 0 && to_len == 0)) {
2752                 /* nothing to do; return the original string */
2753                 return return_self(self);
2754         }
2755
2756         /* Handle zero-length special cases */
2757
2758         if (from_len == 0) {
2759                 /* insert the 'to' string everywhere.   */
2760                 /*    >>> "Python".replace("", ".")     */
2761                 /*    '.P.y.t.h.o.n.'                   */
2762                 return replace_interleave(self, to_s, to_len, maxcount);
2763         }
2764
2765         /* Except for "".replace("", "A") == "A" there is no way beyond this */
2766         /* point for an empty self string to generate a non-empty string */
2767         /* Special case so the remaining code always gets a non-empty string */
2768         if (PyString_GET_SIZE(self) == 0) {
2769                 return return_self(self);
2770         }
2771
2772         if (to_len == 0) {
2773                 /* delete all occurances of 'from' string */
2774                 if (from_len == 1) {
2775                         return replace_delete_single_character(
2776                                 self, from_s[0], maxcount);
2777                 } else {
2778                         return replace_delete_substring(self, from_s, from_len, maxcount);
2779                 }
2780         }
2781
2782         /* Handle special case where both strings have the same length */
2783
2784         if (from_len == to_len) {
2785                 if (from_len == 1) {
2786                         return replace_single_character_in_place(
2787                                 self,
2788                                 from_s[0],
2789                                 to_s[0],
2790                                 maxcount);
2791                 } else {
2792                         return replace_substring_in_place(
2793                                 self, from_s, from_len, to_s, to_len, maxcount);
2794                 }
2795         }
2796
2797         /* Otherwise use the more generic algorithms */
2798         if (from_len == 1) {
2799                 return replace_single_character(self, from_s[0],
2800                                                 to_s, to_len, maxcount);
2801         } else {
2802                 /* len('from')>=2, len('to')>=1 */
2803                 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2804         }
2805 }
2806
2807 PyDoc_STRVAR(replace__doc__,
2808 "S.replace (old, new[, count]) -> string\n\
2809 \n\
2810 Return a copy of string S with all occurrences of substring\n\
2811 old replaced by new.  If the optional argument count is\n\
2812 given, only the first count occurrences are replaced.");
2813
2814 static PyObject *
2815 string_replace(PyStringObject *self, PyObject *args)
2816 {
2817         Py_ssize_t count = -1;
2818         PyObject *from, *to;
2819         const char *from_s, *to_s;
2820         Py_ssize_t from_len, to_len;
2821
2822         if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2823                 return NULL;
2824
2825         if (PyString_Check(from)) {
2826                 from_s = PyString_AS_STRING(from);
2827                 from_len = PyString_GET_SIZE(from);
2828         }
2829 #ifdef Py_USING_UNICODE
2830         if (PyUnicode_Check(from))
2831                 return PyUnicode_Replace((PyObject *)self,
2832                                          from, to, count);
2833 #endif
2834         else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2835                 return NULL;
2836
2837         if (PyString_Check(to)) {
2838                 to_s = PyString_AS_STRING(to);
2839                 to_len = PyString_GET_SIZE(to);
2840         }
2841 #ifdef Py_USING_UNICODE
2842         else if (PyUnicode_Check(to))
2843                 return PyUnicode_Replace((PyObject *)self,
2844                                          from, to, count);
2845 #endif
2846         else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2847                 return NULL;
2848
2849         return (PyObject *)replace((PyStringObject *) self,
2850                                    from_s, from_len,
2851                                    to_s, to_len, count);
2852 }
2853
2854 /** End DALKE **/
2855
2856 /* Matches the end (direction >= 0) or start (direction < 0) of self
2857  * against substr, using the start and end arguments. Returns
2858  * -1 on error, 0 if not found and 1 if found.
2859  */
2860 Py_LOCAL(int)
2861 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2862                   Py_ssize_t end, int direction)
2863 {
2864         Py_ssize_t len = PyString_GET_SIZE(self);
2865         Py_ssize_t slen;
2866         const char* sub;
2867         const char* str;
2868
2869         if (PyString_Check(substr)) {
2870                 sub = PyString_AS_STRING(substr);
2871                 slen = PyString_GET_SIZE(substr);
2872         }
2873 #ifdef Py_USING_UNICODE
2874         else if (PyUnicode_Check(substr))
2875                 return PyUnicode_Tailmatch((PyObject *)self,
2876                                            substr, start, end, direction);
2877 #endif
2878         else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2879                 return -1;
2880         str = PyString_AS_STRING(self);
2881
2882         ADJUST_INDICES(start, end, len);
2883
2884         if (direction < 0) {
2885                 /* startswith */
2886                 if (start+slen > len)
2887                         return 0;
2888         } else {
2889                 /* endswith */
2890                 if (end-start < slen || start > len)
2891                         return 0;
2892
2893                 if (end-slen > start)
2894                         start = end - slen;
2895         }
2896         if (end-start >= slen)
2897                 return ! memcmp(str+start, sub, slen);
2898         return 0;
2899 }
2900
2901
2902 PyDoc_STRVAR(startswith__doc__,
2903 "S.startswith(prefix[, start[, end]]) -> bool\n\
2904 \n\
2905 Return True if S starts with the specified prefix, False otherwise.\n\
2906 With optional start, test S beginning at that position.\n\
2907 With optional end, stop comparing S at that position.\n\
2908 prefix can also be a tuple of strings to try.");
2909
2910 static PyObject *
2911 string_startswith(PyStringObject *self, PyObject *args)
2912 {
2913         Py_ssize_t start = 0;
2914         Py_ssize_t end = PY_SSIZE_T_MAX;
2915         PyObject *subobj;
2916         int result;
2917
2918         if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2919                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2920                 return NULL;
2921         if (PyTuple_Check(subobj)) {
2922                 Py_ssize_t i;
2923                 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2924                         result = _string_tailmatch(self,
2925                                         PyTuple_GET_ITEM(subobj, i),
2926                                         start, end, -1);
2927                         if (result == -1)
2928                                 return NULL;
2929                         else if (result) {
2930                                 Py_RETURN_TRUE;
2931                         }
2932                 }
2933                 Py_RETURN_FALSE;
2934         }
2935         result = _string_tailmatch(self, subobj, start, end, -1);
2936         if (result == -1)
2937                 return NULL;
2938         else
2939                 return PyBool_FromLong(result);
2940 }
2941
2942
2943 PyDoc_STRVAR(endswith__doc__,
2944 "S.endswith(suffix[, start[, end]]) -> bool\n\
2945 \n\
2946 Return True if S ends with the specified suffix, False otherwise.\n\
2947 With optional start, test S beginning at that position.\n\
2948 With optional end, stop comparing S at that position.\n\
2949 suffix can also be a tuple of strings to try.");
2950
2951 static PyObject *
2952 string_endswith(PyStringObject *self, PyObject *args)
2953 {
2954         Py_ssize_t start = 0;
2955         Py_ssize_t end = PY_SSIZE_T_MAX;
2956         PyObject *subobj;
2957         int result;
2958
2959         if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2960                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2961                 return NULL;
2962         if (PyTuple_Check(subobj)) {
2963                 Py_ssize_t i;
2964                 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2965                         result = _string_tailmatch(self,
2966                                         PyTuple_GET_ITEM(subobj, i),
2967                                         start, end, +1);
2968                         if (result == -1)
2969                                 return NULL;
2970                         else if (result) {
2971                                 Py_RETURN_TRUE;
2972                         }
2973                 }
2974                 Py_RETURN_FALSE;
2975         }
2976         result = _string_tailmatch(self, subobj, start, end, +1);
2977         if (result == -1)
2978                 return NULL;
2979         else
2980                 return PyBool_FromLong(result);
2981 }
2982
2983
2984 PyDoc_STRVAR(encode__doc__,
2985 "S.encode([encoding[,errors]]) -> object\n\
2986 \n\
2987 Encodes S using the codec registered for encoding. encoding defaults\n\
2988 to the default encoding. errors may be given to set a different error\n\
2989 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2990 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2991 'xmlcharrefreplace' as well as any other name registered with\n\
2992 codecs.register_error that is able to handle UnicodeEncodeErrors.");
2993
2994 static PyObject *
2995 string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
2996 {
2997     static char *kwlist[] = {"encoding", "errors", 0};
2998     char *encoding = NULL;
2999     char *errors = NULL;
3000     PyObject *v;
3001
3002     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3003                                      kwlist, &encoding, &errors))
3004         return NULL;
3005     v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3006     if (v == NULL)
3007         goto onError;
3008     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3009         PyErr_Format(PyExc_TypeError,
3010                      "encoder did not return a string/unicode object "
3011                      "(type=%.400s)",
3012                      Py_TYPE(v)->tp_name);
3013         Py_DECREF(v);
3014         return NULL;
3015     }
3016     return v;
3017
3018  onError:
3019     return NULL;
3020 }
3021
3022
3023 PyDoc_STRVAR(decode__doc__,
3024 "S.decode([encoding[,errors]]) -> object\n\
3025 \n\
3026 Decodes S using the codec registered for encoding. encoding defaults\n\
3027 to the default encoding. errors may be given to set a different error\n\
3028 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3029 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3030 as well as any other name registered with codecs.register_error that is\n\
3031 able to handle UnicodeDecodeErrors.");
3032
3033 static PyObject *
3034 string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3035 {
3036     static char *kwlist[] = {"encoding", "errors", 0};
3037     char *encoding = NULL;
3038     char *errors = NULL;
3039     PyObject *v;
3040
3041     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3042                                      kwlist, &encoding, &errors))
3043         return NULL;
3044     v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3045     if (v == NULL)
3046         goto onError;
3047     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3048         PyErr_Format(PyExc_TypeError,
3049                      "decoder did not return a string/unicode object "
3050                      "(type=%.400s)",
3051                      Py_TYPE(v)->tp_name);
3052         Py_DECREF(v);
3053         return NULL;
3054     }
3055     return v;
3056
3057  onError:
3058     return NULL;
3059 }
3060
3061
3062 PyDoc_STRVAR(expandtabs__doc__,
3063 "S.expandtabs([tabsize]) -> string\n\
3064 \n\
3065 Return a copy of S where all tab characters are expanded using spaces.\n\
3066 If tabsize is not given, a tab size of 8 characters is assumed.");
3067
3068 static PyObject*
3069 string_expandtabs(PyStringObject *self, PyObject *args)
3070 {
3071     const char *e, *p, *qe;
3072     char *q;
3073     Py_ssize_t i, j, incr;
3074     PyObject *u;
3075     int tabsize = 8;
3076
3077     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3078         return NULL;
3079
3080     /* First pass: determine size of output string */
3081     i = 0; /* chars up to and including most recent \n or \r */
3082     j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3083     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3084     for (p = PyString_AS_STRING(self); p < e; p++)
3085         if (*p == '\t') {
3086             if (tabsize > 0) {
3087                 incr = tabsize - (j % tabsize);
3088                 if (j > PY_SSIZE_T_MAX - incr)
3089                     goto overflow1;
3090                 j += incr;
3091             }
3092         }
3093         else {
3094             if (j > PY_SSIZE_T_MAX - 1)
3095                 goto overflow1;
3096             j++;
3097             if (*p == '\n' || *p == '\r') {
3098                 if (i > PY_SSIZE_T_MAX - j)
3099                     goto overflow1;
3100                 i += j;
3101                 j = 0;
3102             }
3103         }
3104
3105     if (i > PY_SSIZE_T_MAX - j)
3106         goto overflow1;
3107
3108     /* Second pass: create output string and fill it */
3109     u = PyString_FromStringAndSize(NULL, i + j);
3110     if (!u)
3111         return NULL;
3112
3113     j = 0; /* same as in first pass */
3114     q = PyString_AS_STRING(u); /* next output char */
3115     qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3116
3117     for (p = PyString_AS_STRING(self); p < e; p++)
3118         if (*p == '\t') {
3119             if (tabsize > 0) {
3120                 i = tabsize - (j % tabsize);
3121                 j += i;
3122                 while (i--) {
3123                     if (q >= qe)
3124                         goto overflow2;
3125                     *q++ = ' ';
3126                 }
3127             }
3128         }
3129         else {
3130             if (q >= qe)
3131                 goto overflow2;
3132             *q++ = *p;
3133             j++;
3134             if (*p == '\n' || *p == '\r')
3135                 j = 0;
3136         }
3137
3138     return u;
3139
3140   overflow2:
3141     Py_DECREF(u);
3142   overflow1:
3143     PyErr_SetString(PyExc_OverflowError, "new string is too long");
3144     return NULL;
3145 }
3146
3147 Py_LOCAL_INLINE(PyObject *)
3148 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3149 {
3150     PyObject *u;
3151
3152     if (left < 0)
3153         left = 0;
3154     if (right < 0)
3155         right = 0;
3156
3157     if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3158         Py_INCREF(self);
3159         return (PyObject *)self;
3160     }
3161
3162     u = PyString_FromStringAndSize(NULL,
3163                                    left + PyString_GET_SIZE(self) + right);
3164     if (u) {
3165         if (left)
3166             memset(PyString_AS_STRING(u), fill, left);
3167         Py_MEMCPY(PyString_AS_STRING(u) + left,
3168                PyString_AS_STRING(self),
3169                PyString_GET_SIZE(self));
3170         if (right)
3171             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3172                    fill, right);
3173     }
3174
3175     return u;
3176 }
3177
3178 PyDoc_STRVAR(ljust__doc__,
3179 "S.ljust(width[, fillchar]) -> string\n"
3180 "\n"
3181 "Return S left-justified in a string of length width. Padding is\n"
3182 "done using the specified fill character (default is a space).");
3183
3184 static PyObject *
3185 string_ljust(PyStringObject *self, PyObject *args)
3186 {
3187     Py_ssize_t width;
3188     char fillchar = ' ';
3189
3190     if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3191         return NULL;
3192
3193     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3194         Py_INCREF(self);
3195         return (PyObject*) self;
3196     }
3197
3198     return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3199 }
3200
3201
3202 PyDoc_STRVAR(rjust__doc__,
3203 "S.rjust(width[, fillchar]) -> string\n"
3204 "\n"
3205 "Return S right-justified in a string of length width. Padding is\n"
3206 "done using the specified fill character (default is a space)");
3207
3208 static PyObject *
3209 string_rjust(PyStringObject *self, PyObject *args)
3210 {
3211     Py_ssize_t width;
3212     char fillchar = ' ';
3213
3214     if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3215         return NULL;
3216
3217     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3218         Py_INCREF(self);
3219         return (PyObject*) self;
3220     }
3221
3222     return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3223 }
3224
3225
3226 PyDoc_STRVAR(center__doc__,
3227 "S.center(width[, fillchar]) -> string\n"
3228 "\n"
3229 "Return S centered in a string of length width. Padding is\n"
3230 "done using the specified fill character (default is a space)");
3231
3232 static PyObject *
3233 string_center(PyStringObject *self, PyObject *args)
3234 {
3235     Py_ssize_t marg, left;
3236     Py_ssize_t width;
3237     char fillchar = ' ';
3238
3239     if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3240         return NULL;
3241
3242     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3243         Py_INCREF(self);
3244         return (PyObject*) self;
3245     }
3246
3247     marg = width - PyString_GET_SIZE(self);
3248     left = marg / 2 + (marg & width & 1);
3249
3250     return pad(self, left, marg - left, fillchar);
3251 }
3252
3253 PyDoc_STRVAR(zfill__doc__,
3254 "S.zfill(width) -> string\n"
3255 "\n"
3256 "Pad a numeric string S with zeros on the left, to fill a field\n"
3257 "of the specified width.  The string S is never truncated.");
3258
3259 static PyObject *
3260 string_zfill(PyStringObject *self, PyObject *args)
3261 {
3262     Py_ssize_t fill;
3263     PyObject *s;
3264     char *p;
3265     Py_ssize_t width;
3266
3267     if (!PyArg_ParseTuple(args, "n:zfill", &width))
3268         return NULL;
3269
3270     if (PyString_GET_SIZE(self) >= width) {
3271         if (PyString_CheckExact(self)) {
3272             Py_INCREF(self);
3273             return (PyObject*) self;
3274         }
3275         else
3276             return PyString_FromStringAndSize(
3277                 PyString_AS_STRING(self),
3278                 PyString_GET_SIZE(self)
3279             );
3280     }
3281
3282     fill = width - PyString_GET_SIZE(self);
3283
3284     s = pad(self, fill, 0, '0');
3285
3286     if (s == NULL)
3287         return NULL;
3288
3289     p = PyString_AS_STRING(s);
3290     if (p[fill] == '+' || p[fill] == '-') {
3291         /* move sign to beginning of string */
3292         p[0] = p[fill];
3293         p[fill] = '0';
3294     }
3295
3296     return (PyObject*) s;
3297 }
3298
3299 PyDoc_STRVAR(isspace__doc__,
3300 "S.isspace() -> bool\n\
3301 \n\
3302 Return True if all characters in S are whitespace\n\
3303 and there is at least one character in S, False otherwise.");
3304
3305 static PyObject*
3306 string_isspace(PyStringObject *self)
3307 {
3308     register const unsigned char *p
3309         = (unsigned char *) PyString_AS_STRING(self);
3310     register const unsigned char *e;
3311
3312     /* Shortcut for single character strings */
3313     if (PyString_GET_SIZE(self) == 1 &&
3314         isspace(*p))
3315         return PyBool_FromLong(1);
3316
3317     /* Special case for empty strings */
3318     if (PyString_GET_SIZE(self) == 0)
3319         return PyBool_FromLong(0);
3320
3321     e = p + PyString_GET_SIZE(self);
3322     for (; p < e; p++) {
3323         if (!isspace(*p))
3324             return PyBool_FromLong(0);
3325     }
3326     return PyBool_FromLong(1);
3327 }
3328
3329
3330 PyDoc_STRVAR(isalpha__doc__,
3331 "S.isalpha() -> bool\n\
3332 \n\
3333 Return True if all characters in S are alphabetic\n\
3334 and there is at least one character in S, False otherwise.");
3335
3336 static PyObject*
3337 string_isalpha(PyStringObject *self)
3338 {
3339     register const unsigned char *p
3340         = (unsigned char *) PyString_AS_STRING(self);
3341     register const unsigned char *e;
3342
3343     /* Shortcut for single character strings */
3344     if (PyString_GET_SIZE(self) == 1 &&
3345         isalpha(*p))
3346         return PyBool_FromLong(1);
3347
3348     /* Special case for empty strings */
3349     if (PyString_GET_SIZE(self) == 0)
3350         return PyBool_FromLong(0);
3351
3352     e = p + PyString_GET_SIZE(self);
3353     for (; p < e; p++) {
3354         if (!isalpha(*p))
3355             return PyBool_FromLong(0);
3356     }
3357     return PyBool_FromLong(1);
3358 }
3359
3360
3361 PyDoc_STRVAR(isalnum__doc__,
3362 "S.isalnum() -> bool\n\
3363 \n\
3364 Return True if all characters in S are alphanumeric\n\
3365 and there is at least one character in S, False otherwise.");
3366
3367 static PyObject*
3368 string_isalnum(PyStringObject *self)
3369 {
3370     register const unsigned char *p
3371         = (unsigned char *) PyString_AS_STRING(self);
3372     register const unsigned char *e;
3373
3374     /* Shortcut for single character strings */
3375     if (PyString_GET_SIZE(self) == 1 &&
3376         isalnum(*p))
3377         return PyBool_FromLong(1);
3378
3379     /* Special case for empty strings */
3380     if (PyString_GET_SIZE(self) == 0)
3381         return PyBool_FromLong(0);
3382
3383     e = p + PyString_GET_SIZE(self);
3384     for (; p < e; p++) {
3385         if (!isalnum(*p))
3386             return PyBool_FromLong(0);
3387     }
3388     return PyBool_FromLong(1);
3389 }
3390
3391
3392 PyDoc_STRVAR(isdigit__doc__,
3393 "S.isdigit() -> bool\n\
3394 \n\
3395 Return True if all characters in S are digits\n\
3396 and there is at least one character in S, False otherwise.");
3397
3398 static PyObject*
3399 string_isdigit(PyStringObject *self)
3400 {
3401     register const unsigned char *p
3402         = (unsigned char *) PyString_AS_STRING(self);
3403     register const unsigned char *e;
3404
3405     /* Shortcut for single character strings */
3406     if (PyString_GET_SIZE(self) == 1 &&
3407         isdigit(*p))
3408         return PyBool_FromLong(1);
3409
3410     /* Special case for empty strings */
3411     if (PyString_GET_SIZE(self) == 0)
3412         return PyBool_FromLong(0);
3413
3414     e = p + PyString_GET_SIZE(self);
3415     for (; p < e; p++) {
3416         if (!isdigit(*p))
3417             return PyBool_FromLong(0);
3418     }
3419     return PyBool_FromLong(1);
3420 }
3421
3422
3423 PyDoc_STRVAR(islower__doc__,
3424 "S.islower() -> bool\n\
3425 \n\
3426 Return True if all cased characters in S are lowercase and there is\n\
3427 at least one cased character in S, False otherwise.");
3428
3429 static PyObject*
3430 string_islower(PyStringObject *self)
3431 {
3432     register const unsigned char *p
3433         = (unsigned char *) PyString_AS_STRING(self);
3434     register const unsigned char *e;
3435     int cased;
3436
3437     /* Shortcut for single character strings */
3438     if (PyString_GET_SIZE(self) == 1)
3439         return PyBool_FromLong(islower(*p) != 0);
3440
3441     /* Special case for empty strings */
3442     if (PyString_GET_SIZE(self) == 0)
3443         return PyBool_FromLong(0);
3444
3445     e = p + PyString_GET_SIZE(self);
3446     cased = 0;
3447     for (; p < e; p++) {
3448         if (isupper(*p))
3449             return PyBool_FromLong(0);
3450         else if (!cased && islower(*p))
3451             cased = 1;
3452     }
3453     return PyBool_FromLong(cased);
3454 }
3455
3456
3457 PyDoc_STRVAR(isupper__doc__,
3458 "S.isupper() -> bool\n\
3459 \n\
3460 Return True if all cased characters in S are uppercase and there is\n\
3461 at least one cased character in S, False otherwise.");
3462
3463 static PyObject*
3464 string_isupper(PyStringObject *self)
3465 {
3466     register const unsigned char *p
3467         = (unsigned char *) PyString_AS_STRING(self);
3468     register const unsigned char *e;
3469     int cased;
3470
3471     /* Shortcut for single character strings */
3472     if (PyString_GET_SIZE(self) == 1)
3473         return PyBool_FromLong(isupper(*p) != 0);
3474
3475     /* Special case for empty strings */
3476     if (PyString_GET_SIZE(self) == 0)
3477         return PyBool_FromLong(0);
3478
3479     e = p + PyString_GET_SIZE(self);
3480     cased = 0;
3481     for (; p < e; p++) {
3482         if (islower(*p))
3483             return PyBool_FromLong(0);
3484         else if (!cased && isupper(*p))
3485             cased = 1;
3486     }
3487     return PyBool_FromLong(cased);
3488 }
3489
3490
3491 PyDoc_STRVAR(istitle__doc__,
3492 "S.istitle() -> bool\n\
3493 \n\
3494 Return True if S is a titlecased string and there is at least one\n\
3495 character in S, i.e. uppercase characters may only follow uncased\n\
3496 characters and lowercase characters only cased ones. Return False\n\
3497 otherwise.");
3498
3499 static PyObject*
3500 string_istitle(PyStringObject *self, PyObject *uncased)
3501 {
3502     register const unsigned char *p
3503         = (unsigned char *) PyString_AS_STRING(self);
3504     register const unsigned char *e;
3505     int cased, previous_is_cased;
3506
3507     /* Shortcut for single character strings */
3508     if (PyString_GET_SIZE(self) == 1)
3509         return PyBool_FromLong(isupper(*p) != 0);
3510
3511     /* Special case for empty strings */
3512     if (PyString_GET_SIZE(self) == 0)
3513         return PyBool_FromLong(0);
3514
3515     e = p + PyString_GET_SIZE(self);
3516     cased = 0;
3517     previous_is_cased = 0;
3518     for (; p < e; p++) {
3519         register const unsigned char ch = *p;
3520
3521         if (isupper(ch)) {
3522             if (previous_is_cased)
3523                 return PyBool_FromLong(0);
3524             previous_is_cased = 1;
3525             cased = 1;
3526         }
3527         else if (islower(ch)) {
3528             if (!previous_is_cased)
3529                 return PyBool_FromLong(0);
3530             previous_is_cased = 1;
3531             cased = 1;
3532         }
3533         else
3534             previous_is_cased = 0;
3535     }
3536     return PyBool_FromLong(cased);
3537 }
3538
3539
3540 PyDoc_STRVAR(splitlines__doc__,
3541 "S.splitlines([keepends]) -> list of strings\n\
3542 \n\
3543 Return a list of the lines in S, breaking at line boundaries.\n\
3544 Line breaks are not included in the resulting list unless keepends\n\
3545 is given and true.");
3546
3547 static PyObject*
3548 string_splitlines(PyStringObject *self, PyObject *args)
3549 {
3550     int keepends = 0;
3551
3552     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3553         return NULL;
3554
3555     return stringlib_splitlines(
3556         (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3557         keepends
3558         );
3559 }
3560
3561 PyDoc_STRVAR(sizeof__doc__,
3562 "S.__sizeof__() -> size of S in memory, in bytes");
3563
3564 static PyObject *
3565 string_sizeof(PyStringObject *v)
3566 {
3567         Py_ssize_t res;
3568         res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3569         return PyInt_FromSsize_t(res);
3570 }
3571
3572 static PyObject *
3573 string_getnewargs(PyStringObject *v)
3574 {
3575         return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3576 }
3577
3578
3579 #include "stringlib/string_format.h"
3580
3581 PyDoc_STRVAR(format__doc__,
3582 "S.format(*args, **kwargs) -> unicode\n\
3583 \n\
3584 ");
3585
3586 static PyObject *
3587 string__format__(PyObject* self, PyObject* args)
3588 {
3589     PyObject *format_spec;
3590     PyObject *result = NULL;
3591     PyObject *tmp = NULL;
3592
3593     /* If 2.x, convert format_spec to the same type as value */
3594     /* This is to allow things like u''.format('') */
3595     if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3596         goto done;
3597     if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
3598         PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3599                      "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3600         goto done;
3601     }
3602     tmp = PyObject_Str(format_spec);
3603     if (tmp == NULL)
3604         goto done;
3605     format_spec = tmp;
3606
3607     result = _PyBytes_FormatAdvanced(self,
3608                                      PyString_AS_STRING(format_spec),
3609                                      PyString_GET_SIZE(format_spec));
3610 done:
3611     Py_XDECREF(tmp);
3612     return result;
3613 }
3614
3615 PyDoc_STRVAR(p_format__doc__,
3616 "S.__format__(format_spec) -> unicode\n\
3617 \n\
3618 ");
3619
3620
3621 static PyMethodDef
3622 string_methods[] = {
3623         /* Counterparts of the obsolete stropmodule functions; except
3624            string.maketrans(). */
3625         {"join", (PyCFunction)string_join, METH_O, join__doc__},
3626         {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3627         {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3628         {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3629         {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3630         {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3631         {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3632         {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3633         {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3634         {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3635         {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3636         {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3637         {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3638          capitalize__doc__},
3639         {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3640         {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3641          endswith__doc__},
3642         {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3643         {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3644         {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3645         {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3646         {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3647         {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3648         {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3649         {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3650         {"rpartition", (PyCFunction)string_rpartition, METH_O,
3651          rpartition__doc__},
3652         {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3653          startswith__doc__},
3654         {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3655         {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3656          swapcase__doc__},
3657         {"translate", (PyCFunction)string_translate, METH_VARARGS,
3658          translate__doc__},
3659         {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3660         {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3661         {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3662         {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3663         {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3664         {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3665         {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3666         {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3667         {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3668         {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3669         {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3670         {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3671          expandtabs__doc__},
3672         {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3673          splitlines__doc__},
3674         {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3675          sizeof__doc__},
3676         {"__getnewargs__",      (PyCFunction)string_getnewargs, METH_NOARGS},
3677         {NULL,     NULL}                     /* sentinel */
3678 };
3679
3680 static PyObject *
3681 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3682
3683 static PyObject *
3684 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3685 {
3686         PyObject *x = NULL;
3687         static char *kwlist[] = {"object", 0};
3688
3689         if (type != &PyString_Type)
3690                 return str_subtype_new(type, args, kwds);
3691         if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3692                 return NULL;
3693         if (x == NULL)
3694                 return PyString_FromString("");
3695         return PyObject_Str(x);
3696 }
3697
3698 static PyObject *
3699 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3700 {
3701         PyObject *tmp, *pnew;
3702         Py_ssize_t n;
3703
3704         assert(PyType_IsSubtype(type, &PyString_Type));
3705         tmp = string_new(&PyString_Type, args, kwds);
3706         if (tmp == NULL)
3707                 return NULL;
3708         assert(PyString_CheckExact(tmp));
3709         n = PyString_GET_SIZE(tmp);
3710         pnew = type->tp_alloc(type, n);
3711         if (pnew != NULL) {
3712                 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3713                 ((PyStringObject *)pnew)->ob_shash =
3714                         ((PyStringObject *)tmp)->ob_shash;
3715                 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3716         }
3717         Py_DECREF(tmp);
3718         return pnew;
3719 }
3720
3721 static PyObject *
3722 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3723 {
3724         PyErr_SetString(PyExc_TypeError,
3725                         "The basestring type cannot be instantiated");
3726         return NULL;
3727 }
3728
3729 static PyObject *
3730 string_mod(PyObject *v, PyObject *w)
3731 {
3732         if (!PyString_Check(v)) {
3733                 Py_INCREF(Py_NotImplemented);
3734                 return Py_NotImplemented;
3735         }
3736         return PyString_Format(v, w);
3737 }
3738
3739 PyDoc_STRVAR(basestring_doc,
3740 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3741
3742 static PyNumberMethods string_as_number = {
3743         0,                      /*nb_add*/
3744         0,                      /*nb_subtract*/
3745         0,                      /*nb_multiply*/
3746         0,                      /*nb_divide*/
3747         string_mod,             /*nb_remainder*/
3748 };
3749
3750
3751 PyTypeObject PyBaseString_Type = {
3752         PyVarObject_HEAD_INIT(&PyType_Type, 0)
3753         "basestring",
3754         0,
3755         0,
3756         0,                                      /* tp_dealloc */
3757         0,                                      /* tp_print */
3758         0,                                      /* tp_getattr */
3759         0,                                      /* tp_setattr */
3760         0,                                      /* tp_compare */
3761         0,                                      /* tp_repr */
3762         0,                                      /* tp_as_number */
3763         0,                                      /* tp_as_sequence */
3764         0,                                      /* tp_as_mapping */
3765         0,                                      /* tp_hash */
3766         0,                                      /* tp_call */
3767         0,                                      /* tp_str */
3768         0,                                      /* tp_getattro */
3769         0,                                      /* tp_setattro */
3770         0,                                      /* tp_as_buffer */
3771         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3772         basestring_doc,                         /* tp_doc */
3773         0,                                      /* tp_traverse */
3774         0,                                      /* tp_clear */
3775         0,                                      /* tp_richcompare */
3776         0,                                      /* tp_weaklistoffset */
3777         0,                                      /* tp_iter */
3778         0,                                      /* tp_iternext */
3779         0,                                      /* tp_methods */
3780         0,                                      /* tp_members */
3781         0,                                      /* tp_getset */
3782         &PyBaseObject_Type,                     /* tp_base */
3783         0,                                      /* tp_dict */
3784         0,                                      /* tp_descr_get */
3785         0,                                      /* tp_descr_set */
3786         0,                                      /* tp_dictoffset */
3787         0,                                      /* tp_init */
3788         0,                                      /* tp_alloc */
3789         basestring_new,                         /* tp_new */
3790         0,                                      /* tp_free */
3791 };
3792
3793 PyDoc_STRVAR(string_doc,
3794 "str(object) -> string\n\
3795 \n\
3796 Return a nice string representation of the object.\n\
3797 If the argument is a string, the return value is the same object.");
3798
3799 PyTypeObject PyString_Type = {
3800         PyVarObject_HEAD_INIT(&PyType_Type, 0)
3801         "str",
3802         PyStringObject_SIZE,
3803         sizeof(char),
3804         string_dealloc,                         /* tp_dealloc */
3805         (printfunc)string_print,                /* tp_print */
3806         0,                                      /* tp_getattr */
3807         0,                                      /* tp_setattr */
3808         0,                                      /* tp_compare */
3809         string_repr,                            /* tp_repr */
3810         &string_as_number,                      /* tp_as_number */
3811         &string_as_sequence,                    /* tp_as_sequence */
3812         &string_as_mapping,                     /* tp_as_mapping */
3813         (hashfunc)string_hash,                  /* tp_hash */
3814         0,                                      /* tp_call */
3815         string_str,                             /* tp_str */
3816         PyObject_GenericGetAttr,                /* tp_getattro */
3817         0,                                      /* tp_setattro */
3818         &string_as_buffer,                      /* tp_as_buffer */
3819         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3820                 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3821                 Py_TPFLAGS_HAVE_NEWBUFFER,      /* tp_flags */
3822         string_doc,                             /* tp_doc */
3823         0,                                      /* tp_traverse */
3824         0,                                      /* tp_clear */
3825         (richcmpfunc)string_richcompare,        /* tp_richcompare */
3826         0,                                      /* tp_weaklistoffset */
3827         0,                                      /* tp_iter */
3828         0,                                      /* tp_iternext */
3829         string_methods,                         /* tp_methods */
3830         0,                                      /* tp_members */
3831         0,                                      /* tp_getset */
3832         &PyBaseString_Type,                     /* tp_base */
3833         0,                                      /* tp_dict */
3834         0,                                      /* tp_descr_get */
3835         0,                                      /* tp_descr_set */
3836         0,                                      /* tp_dictoffset */
3837         0,                                      /* tp_init */
3838         0,                                      /* tp_alloc */
3839         string_new,                             /* tp_new */
3840         PyObject_Del,                           /* tp_free */
3841 };
3842
3843 void
3844 PyString_Concat(register PyObject **pv, register PyObject *w)
3845 {
3846         register PyObject *v;
3847         if (*pv == NULL)
3848                 return;
3849         if (w == NULL || !PyString_Check(*pv)) {
3850                 Py_DECREF(*pv);
3851                 *pv = NULL;
3852                 return;
3853         }
3854         v = string_concat((PyStringObject *) *pv, w);
3855         Py_DECREF(*pv);
3856         *pv = v;
3857 }
3858
3859 void
3860 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
3861 {
3862         PyString_Concat(pv, w);
3863         Py_XDECREF(w);
3864 }
3865
3866
3867 /* The following function breaks the notion that strings are immutable:
3868    it changes the size of a string.  We get away with this only if there
3869    is only one module referencing the object.  You can also think of it
3870    as creating a new string object and destroying the old one, only
3871    more efficiently.  In any case, don't use this if the string may
3872    already be known to some other part of the code...
3873    Note that if there's not enough memory to resize the string, the original
3874    string object at *pv is deallocated, *pv is set to NULL, an "out of
3875    memory" exception is set, and -1 is returned.  Else (on success) 0 is
3876    returned, and the value in *pv may or may not be the same as on input.
3877    As always, an extra byte is allocated for a trailing \0 byte (newsize
3878    does *not* include that), and a trailing \0 byte is stored.
3879 */
3880
3881 int
3882 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
3883 {
3884         register PyObject *v;
3885         register PyStringObject *sv;
3886         v = *pv;
3887         if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3888             PyString_CHECK_INTERNED(v)) {
3889                 *pv = 0;
3890                 Py_DECREF(v);
3891                 PyErr_BadInternalCall();
3892                 return -1;
3893         }
3894         /* XXX UNREF/NEWREF interface should be more symmetrical */
3895         _Py_DEC_REFTOTAL;
3896         _Py_ForgetReference(v);
3897         *pv = (PyObject *)
3898                 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3899         if (*pv == NULL) {
3900                 PyObject_Del(v);
3901                 PyErr_NoMemory();
3902                 return -1;
3903         }
3904         _Py_NewReference(*pv);
3905         sv = (PyStringObject *) *pv;
3906         Py_SIZE(sv) = newsize;
3907         sv->ob_sval[newsize] = '\0';
3908         sv->ob_shash = -1;      /* invalidate cached hash value */
3909         return 0;
3910 }
3911
3912 /* Helpers for formatstring */
3913
3914 Py_LOCAL_INLINE(PyObject *)
3915 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3916 {
3917         Py_ssize_t argidx = *p_argidx;
3918         if (argidx < arglen) {
3919                 (*p_argidx)++;
3920                 if (arglen < 0)
3921                         return args;
3922                 else
3923                         return PyTuple_GetItem(args, argidx);
3924         }
3925         PyErr_SetString(PyExc_TypeError,
3926                         "not enough arguments for format string");
3927         return NULL;
3928 }
3929
3930 /* Format codes
3931  * F_LJUST      '-'
3932  * F_SIGN       '+'
3933  * F_BLANK      ' '
3934  * F_ALT        '#'
3935  * F_ZERO       '0'
3936  */
3937 #define F_LJUST (1<<0)
3938 #define F_SIGN  (1<<1)
3939 #define F_BLANK (1<<2)
3940 #define F_ALT   (1<<3)
3941 #define F_ZERO  (1<<4)
3942
3943 /* Returns a new reference to a PyString object, or NULL on failure. */
3944
3945 static PyObject *
3946 formatfloat(PyObject *v, int flags, int prec, int type)
3947 {
3948         char *p;
3949         PyObject *result;
3950         double x;
3951
3952         x = PyFloat_AsDouble(v);
3953         if (x == -1.0 && PyErr_Occurred()) {
3954                 PyErr_Format(PyExc_TypeError, "float argument required, "
3955                              "not %.200s", Py_TYPE(v)->tp_name);
3956                 return NULL;
3957         }
3958
3959         if (prec < 0)
3960                 prec = 6;
3961
3962         p = PyOS_double_to_string(x, type, prec,
3963                                   (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
3964
3965         if (p == NULL)
3966                 return NULL;
3967         result = PyString_FromStringAndSize(p, strlen(p));
3968         PyMem_Free(p);
3969         return result;
3970 }
3971
3972 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3973  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
3974  * Python's regular ints.
3975  * Return value:  a new PyString*, or NULL if error.
3976  *  .  *pbuf is set to point into it,
3977  *     *plen set to the # of chars following that.
3978  *     Caller must decref it when done using pbuf.
3979  *     The string starting at *pbuf is of the form
3980  *         "-"? ("0x" | "0X")? digit+
3981  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
3982  *         set in flags.  The case of hex digits will be correct,
3983  *     There will be at least prec digits, zero-filled on the left if
3984  *         necessary to get that many.
3985  * val          object to be converted
3986  * flags        bitmask of format flags; only F_ALT is looked at
3987  * prec         minimum number of digits; 0-fill on left if needed
3988  * type         a character in [duoxX]; u acts the same as d
3989  *
3990  * CAUTION:  o, x and X conversions on regular ints can never
3991  * produce a '-' sign, but can for Python's unbounded ints.
3992  */
3993 PyObject*
3994 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3995                      char **pbuf, int *plen)
3996 {
3997         PyObject *result = NULL;
3998         char *buf;
3999         Py_ssize_t i;
4000         int sign;       /* 1 if '-', else 0 */
4001         int len;        /* number of characters */
4002         Py_ssize_t llen;
4003         int numdigits;  /* len == numnondigits + numdigits */
4004         int numnondigits = 0;
4005
4006         switch (type) {
4007         case 'd':
4008         case 'u':
4009                 result = Py_TYPE(val)->tp_str(val);
4010                 break;
4011         case 'o':
4012                 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4013                 break;
4014         case 'x':
4015         case 'X':
4016                 numnondigits = 2;
4017                 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4018                 break;
4019         default:
4020                 assert(!"'type' not in [duoxX]");
4021         }
4022         if (!result)
4023                 return NULL;
4024
4025         buf = PyString_AsString(result);
4026         if (!buf) {
4027                 Py_DECREF(result);
4028                 return NULL;
4029         }
4030
4031         /* To modify the string in-place, there can only be one reference. */
4032         if (Py_REFCNT(result) != 1) {
4033                 PyErr_BadInternalCall();
4034                 return NULL;
4035         }
4036         llen = PyString_Size(result);
4037         if (llen > INT_MAX) {
4038                 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4039                 return NULL;
4040         }
4041         len = (int)llen;
4042         if (buf[len-1] == 'L') {
4043                 --len;
4044                 buf[len] = '\0';
4045         }
4046         sign = buf[0] == '-';
4047         numnondigits += sign;
4048         numdigits = len - numnondigits;
4049         assert(numdigits > 0);
4050
4051         /* Get rid of base marker unless F_ALT */
4052         if ((flags & F_ALT) == 0) {
4053                 /* Need to skip 0x, 0X or 0. */
4054                 int skipped = 0;
4055                 switch (type) {
4056                 case 'o':
4057                         assert(buf[sign] == '0');
4058                         /* If 0 is only digit, leave it alone. */
4059                         if (numdigits > 1) {
4060                                 skipped = 1;
4061                                 --numdigits;
4062                         }
4063                         break;
4064                 case 'x':
4065                 case 'X':
4066                         assert(buf[sign] == '0');
4067                         assert(buf[sign + 1] == 'x');
4068                         skipped = 2;
4069                         numnondigits -= 2;
4070                         break;
4071                 }
4072                 if (skipped) {
4073                         buf += skipped;
4074                         len -= skipped;
4075                         if (sign)
4076                                 buf[0] = '-';
4077                 }
4078                 assert(len == numnondigits + numdigits);
4079                 assert(numdigits > 0);
4080         }
4081
4082         /* Fill with leading zeroes to meet minimum width. */
4083         if (prec > numdigits) {
4084                 PyObject *r1 = PyString_FromStringAndSize(NULL,
4085                                         numnondigits + prec);
4086                 char *b1;
4087                 if (!r1) {
4088                         Py_DECREF(result);
4089                         return NULL;
4090                 }
4091                 b1 = PyString_AS_STRING(r1);
4092                 for (i = 0; i < numnondigits; ++i)
4093                         *b1++ = *buf++;
4094                 for (i = 0; i < prec - numdigits; i++)
4095                         *b1++ = '0';
4096                 for (i = 0; i < numdigits; i++)
4097                         *b1++ = *buf++;
4098                 *b1 = '\0';
4099                 Py_DECREF(result);
4100                 result = r1;
4101                 buf = PyString_AS_STRING(result);
4102                 len = numnondigits + prec;
4103         }
4104
4105         /* Fix up case for hex conversions. */
4106         if (type == 'X') {
4107                 /* Need to convert all lower case letters to upper case.
4108                    and need to convert 0x to 0X (and -0x to -0X). */
4109                 for (i = 0; i < len; i++)
4110                         if (buf[i] >= 'a' && buf[i] <= 'x')
4111                                 buf[i] -= 'a'-'A';
4112         }
4113         *pbuf = buf;
4114         *plen = len;
4115         return result;
4116 }
4117
4118 Py_LOCAL_INLINE(int)
4119 formatint(char *buf, size_t buflen, int flags,
4120           int prec, int type, PyObject *v)
4121 {
4122         /* fmt = '%#.' + `prec` + 'l' + `type`
4123            worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4124            + 1 + 1 = 24 */
4125         char fmt[64];   /* plenty big enough! */
4126         char *sign;
4127         long x;
4128
4129         x = PyInt_AsLong(v);
4130         if (x == -1 && PyErr_Occurred()) {
4131                 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4132                              Py_TYPE(v)->tp_name);
4133                 return -1;
4134         }
4135         if (x < 0 && type == 'u') {
4136                 type = 'd';
4137         }
4138         if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4139                 sign = "-";
4140         else
4141                 sign = "";
4142         if (prec < 0)
4143                 prec = 1;
4144
4145         if ((flags & F_ALT) &&
4146             (type == 'x' || type == 'X')) {
4147                 /* When converting under %#x or %#X, there are a number
4148                  * of issues that cause pain:
4149                  * - when 0 is being converted, the C standard leaves off
4150                  *   the '0x' or '0X', which is inconsistent with other
4151                  *   %#x/%#X conversions and inconsistent with Python's
4152                  *   hex() function
4153                  * - there are platforms that violate the standard and
4154                  *   convert 0 with the '0x' or '0X'
4155                  *   (Metrowerks, Compaq Tru64)
4156                  * - there are platforms that give '0x' when converting
4157                  *   under %#X, but convert 0 in accordance with the
4158                  *   standard (OS/2 EMX)
4159                  *
4160                  * We can achieve the desired consistency by inserting our
4161                  * own '0x' or '0X' prefix, and substituting %x/%X in place
4162                  * of %#x/%#X.
4163                  *
4164                  * Note that this is the same approach as used in
4165                  * formatint() in unicodeobject.c
4166                  */
4167                 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4168                               sign, type, prec, type);
4169         }
4170         else {
4171                 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4172                               sign, (flags&F_ALT) ? "#" : "",
4173                               prec, type);
4174         }
4175
4176         /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4177          * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4178          */
4179         if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4180                 PyErr_SetString(PyExc_OverflowError,
4181                     "formatted integer is too long (precision too large?)");
4182                 return -1;
4183         }
4184         if (sign[0])
4185                 PyOS_snprintf(buf, buflen, fmt, -x);
4186         else
4187                 PyOS_snprintf(buf, buflen, fmt, x);
4188         return (int)strlen(buf);
4189 }
4190
4191 Py_LOCAL_INLINE(int)
4192 formatchar(char *buf, size_t buflen, PyObject *v)
4193 {
4194         /* presume that the buffer is at least 2 characters long */
4195         if (PyString_Check(v)) {
4196                 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4197                         return -1;
4198         }
4199         else {
4200                 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4201                         return -1;
4202         }
4203         buf[1] = '\0';
4204         return 1;
4205 }
4206
4207 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4208
4209    FORMATBUFLEN is the length of the buffer in which the ints &
4210    chars are formatted. XXX This is a magic number. Each formatting
4211    routine does bounds checking to ensure no overflow, but a better
4212    solution may be to malloc a buffer of appropriate size for each
4213    format. For now, the current solution is sufficient.
4214 */
4215 #define FORMATBUFLEN (size_t)120
4216
4217 PyObject *
4218 PyString_Format(PyObject *format, PyObject *args)
4219 {
4220         char *fmt, *res;
4221         Py_ssize_t arglen, argidx;
4222         Py_ssize_t reslen, rescnt, fmtcnt;
4223         int args_owned = 0;
4224         PyObject *result, *orig_args;
4225 #ifdef Py_USING_UNICODE
4226         PyObject *v, *w;
4227 #endif
4228         PyObject *dict = NULL;
4229         if (format == NULL || !PyString_Check(format) || args == NULL) {
4230                 PyErr_BadInternalCall();
4231                 return NULL;
4232         }
4233         orig_args = args;
4234         fmt = PyString_AS_STRING(format);
4235         fmtcnt = PyString_GET_SIZE(format);
4236         reslen = rescnt = fmtcnt + 100;
4237         result = PyString_FromStringAndSize((char *)NULL, reslen);
4238         if (result == NULL)
4239                 return NULL;
4240         res = PyString_AsString(result);
4241         if (PyTuple_Check(args)) {
4242                 arglen = PyTuple_GET_SIZE(args);
4243                 argidx = 0;
4244         }
4245         else {
4246                 arglen = -1;
4247                 argidx = -2;
4248         }
4249         if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4250             !PyObject_TypeCheck(args, &PyBaseString_Type))
4251                 dict = args;
4252         while (--fmtcnt >= 0) {
4253                 if (*fmt != '%') {
4254                         if (--rescnt < 0) {
4255                                 rescnt = fmtcnt + 100;
4256                                 reslen += rescnt;
4257                                 if (_PyString_Resize(&result, reslen) < 0)
4258                                         return NULL;
4259                                 res = PyString_AS_STRING(result)
4260                                         + reslen - rescnt;
4261                                 --rescnt;
4262                         }
4263                         *res++ = *fmt++;
4264                 }
4265                 else {
4266                         /* Got a format specifier */
4267                         int flags = 0;
4268                         Py_ssize_t width = -1;
4269                         int prec = -1;
4270                         int c = '\0';
4271                         int fill;
4272                         int isnumok;
4273                         PyObject *v = NULL;
4274                         PyObject *temp = NULL;
4275                         char *pbuf;
4276                         int sign;
4277                         Py_ssize_t len;
4278                         char formatbuf[FORMATBUFLEN];
4279                              /* For format{int,char}() */
4280 #ifdef Py_USING_UNICODE
4281                         char *fmt_start = fmt;
4282                         Py_ssize_t argidx_start = argidx;
4283 #endif
4284
4285                         fmt++;
4286                         if (*fmt == '(') {
4287                                 char *keystart;
4288                                 Py_ssize_t keylen;
4289                                 PyObject *key;
4290                                 int pcount = 1;
4291
4292                                 if (dict == NULL) {
4293                                         PyErr_SetString(PyExc_TypeError,
4294                                                  "format requires a mapping");
4295                                         goto error;
4296                                 }
4297                                 ++fmt;
4298                                 --fmtcnt;
4299                                 keystart = fmt;
4300                                 /* Skip over balanced parentheses */
4301                                 while (pcount > 0 && --fmtcnt >= 0) {
4302                                         if (*fmt == ')')
4303                                                 --pcount;
4304                                         else if (*fmt == '(')
4305                                                 ++pcount;
4306                                         fmt++;
4307                                 }
4308                                 keylen = fmt - keystart - 1;
4309                                 if (fmtcnt < 0 || pcount > 0) {
4310                                         PyErr_SetString(PyExc_ValueError,
4311                                                    "incomplete format key");
4312                                         goto error;
4313                                 }
4314                                 key = PyString_FromStringAndSize(keystart,
4315                                                                  keylen);
4316                                 if (key == NULL)
4317                                         goto error;
4318                                 if (args_owned) {
4319                                         Py_DECREF(args);
4320                                         args_owned = 0;
4321                                 }
4322                                 args = PyObject_GetItem(dict, key);
4323                                 Py_DECREF(key);
4324                                 if (args == NULL) {
4325                                         goto error;
4326                                 }
4327                                 args_owned = 1;
4328                                 arglen = -1;
4329                                 argidx = -2;
4330                         }
4331                         while (--fmtcnt >= 0) {
4332                                 switch (c = *fmt++) {
4333                                 case '-': flags |= F_LJUST; continue;
4334                                 case '+': flags |= F_SIGN; continue;
4335                                 case ' ': flags |= F_BLANK; continue;
4336                                 case '#': flags |= F_ALT; continue;
4337                                 case '0': flags |= F_ZERO; continue;
4338                                 }
4339                                 break;
4340                         }
4341                         if (c == '*') {
4342                                 v = getnextarg(args, arglen, &argidx);
4343                                 if (v == NULL)
4344                                         goto error;
4345                                 if (!PyInt_Check(v)) {
4346                                         PyErr_SetString(PyExc_TypeError,
4347                                                         "* wants int");
4348                                         goto error;
4349                                 }
4350                                 width = PyInt_AsLong(v);
4351                                 if (width < 0) {
4352                                         flags |= F_LJUST;
4353                                         width = -width;
4354                                 }
4355                                 if (--fmtcnt >= 0)
4356                                         c = *fmt++;
4357                         }
4358                         else if (c >= 0 && isdigit(c)) {
4359                                 width = c - '0';
4360                                 while (--fmtcnt >= 0) {
4361                                         c = Py_CHARMASK(*fmt++);
4362                                         if (!isdigit(c))
4363                                                 break;
4364                                         if ((width*10) / 10 != width) {
4365                                                 PyErr_SetString(
4366                                                         PyExc_ValueError,
4367                                                         "width too big");
4368                                                 goto error;
4369                                         }
4370                                         width = width*10 + (c - '0');
4371                                 }
4372                         }
4373                         if (c == '.') {
4374                                 prec = 0;
4375                                 if (--fmtcnt >= 0)
4376                                         c = *fmt++;
4377                                 if (c == '*') {
4378                                         v = getnextarg(args, arglen, &argidx);
4379                                         if (v == NULL)
4380                                                 goto error;
4381                                         if (!PyInt_Check(v)) {
4382                                                 PyErr_SetString(
4383                                                         PyExc_TypeError,
4384                                                         "* wants int");
4385                                                 goto error;
4386                                         }
4387                                         prec = PyInt_AsLong(v);
4388                                         if (prec < 0)
4389                                                 prec = 0;
4390                                         if (--fmtcnt >= 0)
4391                                                 c = *fmt++;
4392                                 }
4393                                 else if (c >= 0 && isdigit(c)) {
4394                                         prec = c - '0';
4395                                         while (--fmtcnt >= 0) {
4396                                                 c = Py_CHARMASK(*fmt++);
4397                                                 if (!isdigit(c))
4398                                                         break;
4399                                                 if ((prec*10) / 10 != prec) {
4400                                                         PyErr_SetString(
4401                                                             PyExc_ValueError,
4402                                                             "prec too big");
4403                                                         goto error;
4404                                                 }
4405                                                 prec = prec*10 + (c - '0');
4406                                         }
4407                                 }
4408                         } /* prec */
4409                         if (fmtcnt >= 0) {
4410                                 if (c == 'h' || c == 'l' || c == 'L') {
4411                                         if (--fmtcnt >= 0)
4412                                                 c = *fmt++;
4413                                 }
4414                         }
4415                         if (fmtcnt < 0) {
4416                                 PyErr_SetString(PyExc_ValueError,
4417                                                 "incomplete format");
4418                                 goto error;
4419                         }
4420                         if (c != '%') {
4421                                 v = getnextarg(args, arglen, &argidx);
4422                                 if (v == NULL)
4423                                         goto error;
4424                         }
4425                         sign = 0;
4426                         fill = ' ';
4427                         switch (c) {
4428                         case '%':
4429                                 pbuf = "%";
4430                                 len = 1;
4431                                 break;
4432                         case 's':
4433 #ifdef Py_USING_UNICODE
4434                                 if (PyUnicode_Check(v)) {
4435                                         fmt = fmt_start;
4436                                         argidx = argidx_start;
4437                                         goto unicode;
4438                                 }
4439 #endif
4440                                 temp = _PyObject_Str(v);
4441 #ifdef Py_USING_UNICODE
4442                                 if (temp != NULL && PyUnicode_Check(temp)) {
4443                                         Py_DECREF(temp);
4444                                         fmt = fmt_start;
4445                                         argidx = argidx_start;
4446                                         goto unicode;
4447                                 }
4448 #endif
4449                                 /* Fall through */
4450                         case 'r':
4451                                 if (c == 'r')
4452                                         temp = PyObject_Repr(v);
4453                                 if (temp == NULL)
4454                                         goto error;
4455                                 if (!PyString_Check(temp)) {
4456                                         PyErr_SetString(PyExc_TypeError,
4457                                           "%s argument has non-string str()");
4458                                         Py_DECREF(temp);
4459                                         goto error;
4460                                 }
4461                                 pbuf = PyString_AS_STRING(temp);
4462                                 len = PyString_GET_SIZE(temp);
4463                                 if (prec >= 0 && len > prec)
4464                                         len = prec;
4465                                 break;
4466                         case 'i':
4467                         case 'd':
4468                         case 'u':
4469                         case 'o':
4470                         case 'x':
4471                         case 'X':
4472                                 if (c == 'i')
4473                                         c = 'd';
4474                                 isnumok = 0;
4475                                 if (PyNumber_Check(v)) {
4476                                         PyObject *iobj=NULL;
4477
4478                                         if (PyInt_Check(v) || (PyLong_Check(v))) {
4479                                                 iobj = v;
4480                                                 Py_INCREF(iobj);
4481                                         }
4482                                         else {
4483                                                 iobj = PyNumber_Int(v);
4484                                                 if (iobj==NULL) iobj = PyNumber_Long(v);
4485                                         }
4486                                         if (iobj!=NULL) {
4487                                                 if (PyInt_Check(iobj)) {
4488                                                         isnumok = 1;
4489                                                         pbuf = formatbuf;
4490                                                         len = formatint(pbuf,
4491                                                                         sizeof(formatbuf),
4492                                                                         flags, prec, c, iobj);
4493                                                         Py_DECREF(iobj);
4494                                                         if (len < 0)
4495                                                                 goto error;
4496                                                         sign = 1;
4497                                                 }
4498                                                 else if (PyLong_Check(iobj)) {
4499                                                         int ilen;
4500
4501                                                         isnumok = 1;
4502                                                         temp = _PyString_FormatLong(iobj, flags,
4503                                                                 prec, c, &pbuf, &ilen);
4504                                                         Py_DECREF(iobj);
4505                                                         len = ilen;
4506                                                         if (!temp)
4507                                                                 goto error;
4508                                                         sign = 1;
4509                                                 }
4510                                                 else {
4511                                                         Py_DECREF(iobj);
4512                                                 }
4513                                         }
4514                                 }
4515                                 if (!isnumok) {
4516                                         PyErr_Format(PyExc_TypeError,
4517                                             "%%%c format: a number is required, "
4518                                             "not %.200s", c, Py_TYPE(v)->tp_name);
4519                                         goto error;
4520                                 }
4521                                 if (flags & F_ZERO)
4522                                         fill = '0';
4523                                 break;
4524                         case 'e':
4525                         case 'E':
4526                         case 'f':
4527                         case 'F':
4528                         case 'g':
4529                         case 'G':
4530                                 temp = formatfloat(v, flags, prec, c);
4531                                 if (temp == NULL)
4532                                         goto error;
4533                                 pbuf = PyString_AS_STRING(temp);
4534                                 len = PyString_GET_SIZE(temp);
4535                                 sign = 1;
4536                                 if (flags & F_ZERO)
4537                                         fill = '0';
4538                                 break;
4539                         case 'c':
4540 #ifdef Py_USING_UNICODE
4541                                 if (PyUnicode_Check(v)) {
4542                                         fmt = fmt_start;
4543                                         argidx = argidx_start;
4544                                         goto unicode;
4545                                 }
4546 #endif
4547                                 pbuf = formatbuf;
4548                                 len = formatchar(pbuf, sizeof(formatbuf), v);
4549                                 if (len < 0)
4550                                         goto error;
4551                                 break;
4552                         default:
4553                                 PyErr_Format(PyExc_ValueError,
4554                                   "unsupported format character '%c' (0x%x) "
4555                                   "at index %zd",
4556                                   c, c,
4557                                   (Py_ssize_t)(fmt - 1 -
4558                                                PyString_AsString(format)));
4559                                 goto error;
4560                         }
4561                         if (sign) {
4562                                 if (*pbuf == '-' || *pbuf == '+') {
4563                                         sign = *pbuf++;
4564                                         len--;
4565                                 }
4566                                 else if (flags & F_SIGN)
4567                                         sign = '+';
4568                                 else if (flags & F_BLANK)
4569                                         sign = ' ';
4570                                 else
4571                                         sign = 0;
4572                         }
4573                         if (width < len)
4574                                 width = len;
4575                         if (rescnt - (sign != 0) < width) {
4576                                 reslen -= rescnt;
4577                                 rescnt = width + fmtcnt + 100;
4578                                 reslen += rescnt;
4579                                 if (reslen < 0) {
4580                                         Py_DECREF(result);
4581                                         Py_XDECREF(temp);
4582                                         return PyErr_NoMemory();
4583                                 }
4584                                 if (_PyString_Resize(&result, reslen) < 0) {
4585                                         Py_XDECREF(temp);
4586                                         return NULL;
4587                                 }
4588                                 res = PyString_AS_STRING(result)
4589                                         + reslen - rescnt;
4590                         }
4591                         if (sign) {
4592                                 if (fill != ' ')
4593                                         *res++ = sign;
4594                                 rescnt--;
4595                                 if (width > len)
4596                                         width--;
4597                         }
4598                         if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4599                                 assert(pbuf[0] == '0');
4600                                 assert(pbuf[1] == c);
4601                                 if (fill != ' ') {
4602                                         *res++ = *pbuf++;
4603                                         *res++ = *pbuf++;
4604                                 }
4605                                 rescnt -= 2;
4606                                 width -= 2;
4607                                 if (width < 0)
4608                                         width = 0;
4609                                 len -= 2;
4610                         }
4611                         if (width > len && !(flags & F_LJUST)) {
4612                                 do {
4613                                         --rescnt;
4614                                         *res++ = fill;
4615                                 } while (--width > len);
4616                         }
4617                         if (fill == ' ') {
4618                                 if (sign)
4619                                         *res++ = sign;
4620                                 if ((flags & F_ALT) &&
4621                                     (c == 'x' || c == 'X')) {
4622                                         assert(pbuf[0] == '0');
4623                                         assert(pbuf[1] == c);
4624                                         *res++ = *pbuf++;
4625                                         *res++ = *pbuf++;
4626                                 }
4627                         }
4628                         Py_MEMCPY(res, pbuf, len);
4629                         res += len;
4630                         rescnt -= len;
4631                         while (--width >= len) {
4632                                 --rescnt;
4633                                 *res++ = ' ';
4634                         }
4635                         if (dict && (argidx < arglen) && c != '%') {
4636                                 PyErr_SetString(PyExc_TypeError,
4637                                            "not all arguments converted during string formatting");
4638                                 Py_XDECREF(temp);
4639                                 goto error;
4640                         }
4641                         Py_XDECREF(temp);
4642                 } /* '%' */
4643         } /* until end */
4644         if (argidx < arglen && !dict) {
4645                 PyErr_SetString(PyExc_TypeError,
4646                                 "not all arguments converted during string formatting");
4647                 goto error;
4648         }
4649         if (args_owned) {
4650                 Py_DECREF(args);
4651         }
4652         _PyString_Resize(&result, reslen - rescnt);
4653         return result;
4654
4655 #ifdef Py_USING_UNICODE
4656  unicode:
4657         if (args_owned) {
4658                 Py_DECREF(args);
4659                 args_owned = 0;
4660         }
4661         /* Fiddle args right (remove the first argidx arguments) */
4662         if (PyTuple_Check(orig_args) && argidx > 0) {
4663                 PyObject *v;
4664                 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4665                 v = PyTuple_New(n);
4666                 if (v == NULL)
4667                         goto error;
4668                 while (--n >= 0) {
4669                         PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4670                         Py_INCREF(w);
4671                         PyTuple_SET_ITEM(v, n, w);
4672                 }
4673                 args = v;
4674         } else {
4675                 Py_INCREF(orig_args);
4676                 args = orig_args;
4677         }
4678         args_owned = 1;
4679         /* Take what we have of the result and let the Unicode formatting
4680            function format the rest of the input. */
4681         rescnt = res - PyString_AS_STRING(result);
4682         if (_PyString_Resize(&result, rescnt))
4683                 goto error;
4684         fmtcnt = PyString_GET_SIZE(format) - \
4685                  (fmt - PyString_AS_STRING(format));
4686         format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4687         if (format == NULL)
4688                 goto error;
4689         v = PyUnicode_Format(format, args);
4690         Py_DECREF(format);
4691         if (v == NULL)
4692                 goto error;
4693         /* Paste what we have (result) to what the Unicode formatting
4694            function returned (v) and return the result (or error) */
4695         w = PyUnicode_Concat(result, v);
4696         Py_DECREF(result);
4697         Py_DECREF(v);
4698         Py_DECREF(args);
4699         return w;
4700 #endif /* Py_USING_UNICODE */
4701
4702  error:
4703         Py_DECREF(result);
4704         if (args_owned) {
4705                 Py_DECREF(args);
4706         }
4707         return NULL;
4708 }
4709
4710 void
4711 PyString_InternInPlace(PyObject **p)
4712 {
4713         register PyStringObject *s = (PyStringObject *)(*p);
4714         PyObject *t;
4715         if (s == NULL || !PyString_Check(s))
4716                 Py_FatalError("PyString_InternInPlace: strings only please!");
4717         /* If it's a string subclass, we don't really know what putting
4718            it in the interned dict might do. */
4719         if (!PyString_CheckExact(s))
4720                 return;
4721         if (PyString_CHECK_INTERNED(s))
4722                 return;
4723         if (interned == NULL) {
4724                 interned = PyDict_New();
4725                 if (interned == NULL) {
4726                         PyErr_Clear(); /* Don't leave an exception */
4727                         return;
4728                 }
4729         }
4730         t = PyDict_GetItem(interned, (PyObject *)s);
4731         if (t) {
4732                 Py_INCREF(t);
4733                 Py_DECREF(*p);
4734                 *p = t;
4735                 return;
4736         }
4737
4738         if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4739                 PyErr_Clear();
4740                 return;
4741         }
4742         /* The two references in interned are not counted by refcnt.
4743            The string deallocator will take care of this */
4744         Py_REFCNT(s) -= 2;
4745         PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4746 }
4747
4748 void
4749 PyString_InternImmortal(PyObject **p)
4750 {
4751         PyString_InternInPlace(p);
4752         if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4753                 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4754                 Py_INCREF(*p);
4755         }
4756 }
4757
4758
4759 PyObject *
4760 PyString_InternFromString(const char *cp)
4761 {
4762         PyObject *s = PyString_FromString(cp);
4763         if (s == NULL)
4764                 return NULL;
4765         PyString_InternInPlace(&s);
4766         return s;
4767 }
4768
4769 void
4770 PyString_Fini(void)
4771 {
4772         int i;
4773         for (i = 0; i < UCHAR_MAX + 1; i++) {
4774                 Py_XDECREF(characters[i]);
4775                 characters[i] = NULL;
4776         }
4777         Py_XDECREF(nullstring);
4778         nullstring = NULL;
4779 }
4780
4781 void _Py_ReleaseInternedStrings(void)
4782 {
4783         PyObject *keys;
4784         PyStringObject *s;
4785         Py_ssize_t i, n;
4786         Py_ssize_t immortal_size = 0, mortal_size = 0;
4787
4788         if (interned == NULL || !PyDict_Check(interned))
4789                 return;
4790         keys = PyDict_Keys(interned);
4791         if (keys == NULL || !PyList_Check(keys)) {
4792                 PyErr_Clear();
4793                 return;
4794         }
4795
4796         /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4797            detector, interned strings are not forcibly deallocated; rather, we
4798            give them their stolen references back, and then clear and DECREF
4799            the interned dict. */
4800
4801         n = PyList_GET_SIZE(keys);
4802         fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4803                 n);
4804         for (i = 0; i < n; i++) {
4805                 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4806                 switch (s->ob_sstate) {
4807                 case SSTATE_NOT_INTERNED:
4808                         /* XXX Shouldn't happen */
4809                         break;
4810                 case SSTATE_INTERNED_IMMORTAL:
4811                         Py_REFCNT(s) += 1;
4812                         immortal_size += Py_SIZE(s);
4813                         break;
4814                 case SSTATE_INTERNED_MORTAL:
4815                         Py_REFCNT(s) += 2;
4816                         mortal_size += Py_SIZE(s);
4817                         break;
4818                 default:
4819                         Py_FatalError("Inconsistent interned string state.");
4820                 }
4821                 s->ob_sstate = SSTATE_NOT_INTERNED;
4822         }
4823         fprintf(stderr, "total size of all interned strings: "
4824                         "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4825                         "mortal/immortal\n", mortal_size, immortal_size);
4826         Py_DECREF(keys);
4827         PyDict_Clear(interned);
4828         Py_DECREF(interned);
4829         interned = NULL;
4830 }