Objects/stringobject.c

   1 /* String object implementation */
   2
   3 #define PY_SSIZE_T_CLEAN
   4
   5 #include "Python.h"
   6
   7 #include <ctype.h>
   8
   9 #ifdef COUNT_ALLOCS
  10 int null_strings, one_strings;
  11 #endif
  12
  13 static PyStringObject *characters[UCHAR_MAX + 1];
  14 static PyStringObject *nullstring;
  15
  16 /* This dictionary holds all interned strings.  Note that references to
  17    strings in this dictionary are *not* counted in the string's ob_refcnt.
  18    When the interned string reaches a refcnt of 0 the string deallocation
  19    function will delete the reference from this dictionary.
  20
  21    Another way to look at this is that to say that the actual reference
  22    count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
  23 */
  24 static PyObject *interned;
  25
  26 /*
  27    For both PyString_FromString() and PyString_FromStringAndSize(), the
  28    parameter `size' denotes number of characters to allocate, not counting any
  29    null terminating character.
  30
  31    For PyString_FromString(), the parameter `str' points to a null-terminated
  32    string containing exactly `size' bytes.
  33
  34    For PyString_FromStringAndSize(), the parameter the parameter `str' is
  35    either NULL or else points to a string containing at least `size' bytes.
  36    For PyString_FromStringAndSize(), the string in the `str' parameter does
  37    not have to be null-terminated.  (Therefore it is safe to construct a
  38    substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
  39    If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
  40    bytes (setting the last byte to the null terminating character) and you can
  41    fill in the data yourself.  If `str' is non-NULL then the resulting
  42    PyString object must be treated as immutable and you must not fill in nor
  43    alter the data yourself, since the strings may be shared.
  44
  45    The PyObject member `op->ob_size', which denotes the number of "extra
  46    items" in a variable-size object, will contain the number of bytes
  47    allocated for string data, not counting the null terminating character.  It
  48    is therefore equal to the equal to the `size' parameter (for
  49    PyString_FromStringAndSize()) or the length of the string in the `str'
  50    parameter (for PyString_FromString()).
  51 */
  52 PyObject *
  53 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
  54 {
  55         register PyStringObject *op;
  56         assert(size >= 0);
  57         if (size == 0 && (op = nullstring) != NULL) {
  58 #ifdef COUNT_ALLOCS
  59                 null_strings++;
  60 #endif
  61                 Py_INCREF(op);
  62                 return (PyObject *)op;
  63         }
  64         if (size == 1 && str != NULL &&
  65             (op = characters[*str & UCHAR_MAX]) != NULL)
  66         {
  67 #ifdef COUNT_ALLOCS
  68                 one_strings++;
  69 #endif
  70                 Py_INCREF(op);
  71                 return (PyObject *)op;
  72         }
  73
  74         /* Inline PyObject_NewVar */
  75         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
  76         if (op == NULL)
  77                 return PyErr_NoMemory();
  78         PyObject_INIT_VAR(op, &PyString_Type, size);
  79         op->ob_shash = -1;
  80         op->ob_sstate = SSTATE_NOT_INTERNED;
  81         if (str != NULL)
  82                 Py_MEMCPY(op->ob_sval, str, size);
  83         op->ob_sval[size] = '\0';
  84         /* share short strings */
  85         if (size == 0) {
  86                 PyObject *t = (PyObject *)op;
  87                 PyString_InternInPlace(&t);
  88                 op = (PyStringObject *)t;
  89                 nullstring = op;
  90                 Py_INCREF(op);
  91         } else if (size == 1 && str != NULL) {
  92                 PyObject *t = (PyObject *)op;
  93                 PyString_InternInPlace(&t);
  94                 op = (PyStringObject *)t;
  95                 characters[*str & UCHAR_MAX] = op;
  96                 Py_INCREF(op);
  97         }
  98         return (PyObject *) op;
  99 }
 100
 101 PyObject *
 102 PyString_FromString(const char *str)
 103 {
 104         register size_t size;
 105         register PyStringObject *op;
 106
 107         assert(str != NULL);
 108         size = strlen(str);
 109         if (size > PY_SSIZE_T_MAX) {
 110                 PyErr_SetString(PyExc_OverflowError,
 111                         "string is too long for a Python string");
 112                 return NULL;
 113         }
 114         if (size == 0 && (op = nullstring) != NULL) {
 115 #ifdef COUNT_ALLOCS
 116                 null_strings++;
 117 #endif
 118                 Py_INCREF(op);
 119                 return (PyObject *)op;
 120         }
 121         if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
 122 #ifdef COUNT_ALLOCS
 123                 one_strings++;
 124 #endif
 125                 Py_INCREF(op);
 126                 return (PyObject *)op;
 127         }
 128
 129         /* Inline PyObject_NewVar */
 130         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
 131         if (op == NULL)
 132                 return PyErr_NoMemory();
 133         PyObject_INIT_VAR(op, &PyString_Type, size);
 134         op->ob_shash = -1;
 135         op->ob_sstate = SSTATE_NOT_INTERNED;
 136         Py_MEMCPY(op->ob_sval, str, size+1);
 137         /* share short strings */
 138         if (size == 0) {
 139                 PyObject *t = (PyObject *)op;
 140                 PyString_InternInPlace(&t);
 141                 op = (PyStringObject *)t;
 142                 nullstring = op;
 143                 Py_INCREF(op);
 144         } else if (size == 1) {
 145                 PyObject *t = (PyObject *)op;
 146                 PyString_InternInPlace(&t);
 147                 op = (PyStringObject *)t;
 148                 characters[*str & UCHAR_MAX] = op;
 149                 Py_INCREF(op);
 150         }
 151         return (PyObject *) op;
 152 }
 153
 154 PyObject *
 155 PyString_FromFormatV(const char *format, va_list vargs)
 156 {
 157         va_list count;
 158         Py_ssize_t n = 0;
 159         const char* f;
 160         char *s;
 161         PyObject* string;
 162
 163 #ifdef VA_LIST_IS_ARRAY
 164         Py_MEMCPY(count, vargs, sizeof(va_list));
 165 #else
 166 #ifdef  __va_copy
 167         __va_copy(count, vargs);
 168 #else
 169         count = vargs;
 170 #endif
 171 #endif
 172         /* step 1: figure out how large a buffer we need */
 173         for (f = format; *f; f++) {
 174                 if (*f == '%') {
 175                         const char* p = f;
 176                         while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 177                                 ;
 178
 179                         /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
 180                          * they don't affect the amount of space we reserve.
 181                          */
 182                         if ((*f == 'l' || *f == 'z') &&
 183                                         (f[1] == 'd' || f[1] == 'u'))
 184                                 ++f;
 185
 186                         switch (*f) {
 187                         case 'c':
 188                                 (void)va_arg(count, int);
 189                                 /* fall through... */
 190                         case '%':
 191                                 n++;
 192                                 break;
 193                         case 'd': case 'u': case 'i': case 'x':
 194                                 (void) va_arg(count, int);
 195                                 /* 20 bytes is enough to hold a 64-bit
 196                                    integer.  Decimal takes the most space.
 197                                    This isn't enough for octal. */
 198                                 n += 20;
 199                                 break;
 200                         case 's':
 201                                 s = va_arg(count, char*);
 202                                 n += strlen(s);
 203                                 break;
 204                         case 'p':
 205                                 (void) va_arg(count, int);
 206                                 /* maximum 64-bit pointer representation:
 207                                  * 0xffffffffffffffff
 208                                  * so 19 characters is enough.
 209                                  * XXX I count 18 -- what's the extra for?
 210                                  */
 211                                 n += 19;
 212                                 break;
 213                         default:
 214                                 /* if we stumble upon an unknown
 215                                    formatting code, copy the rest of
 216                                    the format string to the output
 217                                    string. (we cannot just skip the
 218                                    code, since there's no way to know
 219                                    what's in the argument list) */
 220                                 n += strlen(p);
 221                                 goto expand;
 222                         }
 223                 } else
 224                         n++;
 225         }
 226  expand:
 227         /* step 2: fill the buffer */
 228         /* Since we've analyzed how much space we need for the worst case,
 229            use sprintf directly instead of the slower PyOS_snprintf. */
 230         string = PyString_FromStringAndSize(NULL, n);
 231         if (!string)
 232                 return NULL;
 233
 234         s = PyString_AsString(string);
 235
 236         for (f = format; *f; f++) {
 237                 if (*f == '%') {
 238                         const char* p = f++;
 239                         Py_ssize_t i;
 240                         int longflag = 0;
 241                         int size_tflag = 0;
 242                         /* parse the width.precision part (we're only
 243                            interested in the precision value, if any) */
 244                         n = 0;
 245                         while (isdigit(Py_CHARMASK(*f)))
 246                                 n = (n*10) + *f++ - '0';
 247                         if (*f == '.') {
 248                                 f++;
 249                                 n = 0;
 250                                 while (isdigit(Py_CHARMASK(*f)))
 251                                         n = (n*10) + *f++ - '0';
 252                         }
 253                         while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 254                                 f++;
 255                         /* handle the long flag, but only for %ld and %lu.
 256                            others can be added when necessary. */
 257                         if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
 258                                 longflag = 1;
 259                                 ++f;
 260                         }
 261                         /* handle the size_t flag. */
 262                         if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
 263                                 size_tflag = 1;
 264                                 ++f;
 265                         }
 266
 267                         switch (*f) {
 268                         case 'c':
 269                                 *s++ = va_arg(vargs, int);
 270                                 break;
 271                         case 'd':
 272                                 if (longflag)
 273                                         sprintf(s, "%ld", va_arg(vargs, long));
 274                                 else if (size_tflag)
 275                                         sprintf(s, "%" PY_FORMAT_SIZE_T "d",
 276                                                 va_arg(vargs, Py_ssize_t));
 277                                 else
 278                                         sprintf(s, "%d", va_arg(vargs, int));
 279                                 s += strlen(s);
 280                                 break;
 281                         case 'u':
 282                                 if (longflag)
 283                                         sprintf(s, "%lu",
 284                                                 va_arg(vargs, unsigned long));
 285                                 else if (size_tflag)
 286                                         sprintf(s, "%" PY_FORMAT_SIZE_T "u",
 287                                                 va_arg(vargs, size_t));
 288                                 else
 289                                         sprintf(s, "%u",
 290                                                 va_arg(vargs, unsigned int));
 291                                 s += strlen(s);
 292                                 break;
 293                         case 'i':
 294                                 sprintf(s, "%i", va_arg(vargs, int));
 295                                 s += strlen(s);
 296                                 break;
 297                         case 'x':
 298                                 sprintf(s, "%x", va_arg(vargs, int));
 299                                 s += strlen(s);
 300                                 break;
 301                         case 's':
 302                                 p = va_arg(vargs, char*);
 303                                 i = strlen(p);
 304                                 if (n > 0 && i > n)
 305                                         i = n;
 306                                 Py_MEMCPY(s, p, i);
 307                                 s += i;
 308                                 break;
 309                         case 'p':
 310                                 sprintf(s, "%p", va_arg(vargs, void*));
 311                                 /* %p is ill-defined:  ensure leading 0x. */
 312                                 if (s[1] == 'X')
 313                                         s[1] = 'x';
 314                                 else if (s[1] != 'x') {
 315                                         memmove(s+2, s, strlen(s)+1);
 316                                         s[0] = '0';
 317                                         s[1] = 'x';
 318                                 }
 319                                 s += strlen(s);
 320                                 break;
 321                         case '%':
 322                                 *s++ = '%';
 323                                 break;
 324                         default:
 325                                 strcpy(s, p);
 326                                 s += strlen(s);
 327                                 goto end;
 328                         }
 329                 } else
 330                         *s++ = *f;
 331         }
 332
 333  end:
 334         _PyString_Resize(&string, s - PyString_AS_STRING(string));
 335         return string;
 336 }
 337
 338 PyObject *
 339 PyString_FromFormat(const char *format, ...)
 340 {
 341         PyObject* ret;
 342         va_list vargs;
 343
 344 #ifdef HAVE_STDARG_PROTOTYPES
 345         va_start(vargs, format);
 346 #else
 347         va_start(vargs);
 348 #endif
 349         ret = PyString_FromFormatV(format, vargs);
 350         va_end(vargs);
 351         return ret;
 352 }
 353
 354
 355 PyObject *PyString_Decode(const char *s,
 356                           Py_ssize_t size,
 357                           const char *encoding,
 358                           const char *errors)
 359 {
 360     PyObject *v, *str;
 361
 362     str = PyString_FromStringAndSize(s, size);
 363     if (str == NULL)
 364         return NULL;
 365     v = PyString_AsDecodedString(str, encoding, errors);
 366     Py_DECREF(str);
 367     return v;
 368 }
 369
 370 PyObject *PyString_AsDecodedObject(PyObject *str,
 371                                    const char *encoding,
 372                                    const char *errors)
 373 {
 374     PyObject *v;
 375
 376     if (!PyString_Check(str)) {
 377         PyErr_BadArgument();
 378         goto onError;
 379     }
 380
 381     if (encoding == NULL) {
 382 #ifdef Py_USING_UNICODE
 383         encoding = PyUnicode_GetDefaultEncoding();
 384 #else
 385         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 386         goto onError;
 387 #endif
 388     }
 389
 390     /* Decode via the codec registry */
 391     v = PyCodec_Decode(str, encoding, errors);
 392     if (v == NULL)
 393         goto onError;
 394
 395     return v;
 396
 397  onError:
 398     return NULL;
 399 }
 400
 401 PyObject *PyString_AsDecodedString(PyObject *str,
 402                                    const char *encoding,
 403                                    const char *errors)
 404 {
 405     PyObject *v;
 406
 407     v = PyString_AsDecodedObject(str, encoding, errors);
 408     if (v == NULL)
 409         goto onError;
 410
 411 #ifdef Py_USING_UNICODE
 412     /* Convert Unicode to a string using the default encoding */
 413     if (PyUnicode_Check(v)) {
 414         PyObject *temp = v;
 415         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 416         Py_DECREF(temp);
 417         if (v == NULL)
 418             goto onError;
 419     }
 420 #endif
 421     if (!PyString_Check(v)) {
 422         PyErr_Format(PyExc_TypeError,
 423                      "decoder did not return a string object (type=%.400s)",
 424                      v->ob_type->tp_name);
 425         Py_DECREF(v);
 426         goto onError;
 427     }
 428
 429     return v;
 430
 431  onError:
 432     return NULL;
 433 }
 434
 435 PyObject *PyString_Encode(const char *s,
 436                           Py_ssize_t size,
 437                           const char *encoding,
 438                           const char *errors)
 439 {
 440     PyObject *v, *str;
 441
 442     str = PyString_FromStringAndSize(s, size);
 443     if (str == NULL)
 444         return NULL;
 445     v = PyString_AsEncodedString(str, encoding, errors);
 446     Py_DECREF(str);
 447     return v;
 448 }
 449
 450 PyObject *PyString_AsEncodedObject(PyObject *str,
 451                                    const char *encoding,
 452                                    const char *errors)
 453 {
 454     PyObject *v;
 455
 456     if (!PyString_Check(str)) {
 457         PyErr_BadArgument();
 458         goto onError;
 459     }
 460
 461     if (encoding == NULL) {
 462 #ifdef Py_USING_UNICODE
 463         encoding = PyUnicode_GetDefaultEncoding();
 464 #else
 465         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 466         goto onError;
 467 #endif
 468     }
 469
 470     /* Encode via the codec registry */
 471     v = PyCodec_Encode(str, encoding, errors);
 472     if (v == NULL)
 473         goto onError;
 474
 475     return v;
 476
 477  onError:
 478     return NULL;
 479 }
 480
 481 PyObject *PyString_AsEncodedString(PyObject *str,
 482                                    const char *encoding,
 483                                    const char *errors)
 484 {
 485     PyObject *v;
 486
 487     v = PyString_AsEncodedObject(str, encoding, errors);
 488     if (v == NULL)
 489         goto onError;
 490
 491 #ifdef Py_USING_UNICODE
 492     /* Convert Unicode to a string using the default encoding */
 493     if (PyUnicode_Check(v)) {
 494         PyObject *temp = v;
 495         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 496         Py_DECREF(temp);
 497         if (v == NULL)
 498             goto onError;
 499     }
 500 #endif
 501     if (!PyString_Check(v)) {
 502         PyErr_Format(PyExc_TypeError,
 503                      "encoder did not return a string object (type=%.400s)",
 504                      v->ob_type->tp_name);
 505         Py_DECREF(v);
 506         goto onError;
 507     }
 508
 509     return v;
 510
 511  onError:
 512     return NULL;
 513 }
 514
 515 static void
 516 string_dealloc(PyObject *op)
 517 {
 518         switch (PyString_CHECK_INTERNED(op)) {
 519                 case SSTATE_NOT_INTERNED:
 520                         break;
 521
 522                 case SSTATE_INTERNED_MORTAL:
 523                         /* revive dead object temporarily for DelItem */
 524                         op->ob_refcnt = 3;
 525                         if (PyDict_DelItem(interned, op) != 0)
 526                                 Py_FatalError(
 527                                         "deletion of interned string failed");
 528                         break;
 529
 530                 case SSTATE_INTERNED_IMMORTAL:
 531                         Py_FatalError("Immortal interned string died.");
 532
 533                 default:
 534                         Py_FatalError("Inconsistent interned string state.");
 535         }
 536         op->ob_type->tp_free(op);
 537 }
 538
 539 /* Unescape a backslash-escaped string. If unicode is non-zero,
 540    the string is a u-literal. If recode_encoding is non-zero,
 541    the string is UTF-8 encoded and should be re-encoded in the
 542    specified encoding.  */
 543
 544 PyObject *PyString_DecodeEscape(const char *s,
 545                                 Py_ssize_t len,
 546                                 const char *errors,
 547                                 Py_ssize_t unicode,
 548                                 const char *recode_encoding)
 549 {
 550         int c;
 551         char *p, *buf;
 552         const char *end;
 553         PyObject *v;
 554         Py_ssize_t newlen = recode_encoding ? 4*len:len;
 555         v = PyString_FromStringAndSize((char *)NULL, newlen);
 556         if (v == NULL)
 557                 return NULL;
 558         p = buf = PyString_AsString(v);
 559         end = s + len;
 560         while (s < end) {
 561                 if (*s != '\\') {
 562                   non_esc:
 563 #ifdef Py_USING_UNICODE
 564                         if (recode_encoding && (*s & 0x80)) {
 565                                 PyObject *u, *w;
 566                                 char *r;
 567                                 const char* t;
 568                                 Py_ssize_t rn;
 569                                 t = s;
 570                                 /* Decode non-ASCII bytes as UTF-8. */
 571                                 while (t < end && (*t & 0x80)) t++;
 572                                 u = PyUnicode_DecodeUTF8(s, t - s, errors);
 573                                 if(!u) goto failed;
 574
 575                                 /* Recode them in target encoding. */
 576                                 w = PyUnicode_AsEncodedString(
 577                                         u, recode_encoding, errors);
 578                                 Py_DECREF(u);
 579                                 if (!w) goto failed;
 580
 581                                 /* Append bytes to output buffer. */
 582                                 assert(PyString_Check(w));
 583                                 r = PyString_AS_STRING(w);
 584                                 rn = PyString_GET_SIZE(w);
 585                                 Py_MEMCPY(p, r, rn);
 586                                 p += rn;
 587                                 Py_DECREF(w);
 588                                 s = t;
 589                         } else {
 590                                 *p++ = *s++;
 591                         }
 592 #else
 593                         *p++ = *s++;
 594 #endif
 595                         continue;
 596                 }
 597                 s++;
 598                 if (s==end) {
 599                         PyErr_SetString(PyExc_ValueError,
 600                                         "Trailing \\ in string");
 601                         goto failed;
 602                 }
 603                 switch (*s++) {
 604                 /* XXX This assumes ASCII! */
 605                 case '\n': break;
 606                 case '\\': *p++ = '\\'; break;
 607                 case '\'': *p++ = '\''; break;
 608                 case '\"': *p++ = '\"'; break;
 609                 case 'b': *p++ = '\b'; break;
 610                 case 'f': *p++ = '\014'; break; /* FF */
 611                 case 't': *p++ = '\t'; break;
 612                 case 'n': *p++ = '\n'; break;
 613                 case 'r': *p++ = '\r'; break;
 614                 case 'v': *p++ = '\013'; break; /* VT */
 615                 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
 616                 case '0': case '1': case '2': case '3':
 617                 case '4': case '5': case '6': case '7':
 618                         c = s[-1] - '0';
 619                         if (s < end && '0' <= *s && *s <= '7') {
 620                                 c = (c<<3) + *s++ - '0';
 621                                 if (s < end && '0' <= *s && *s <= '7')
 622                                         c = (c<<3) + *s++ - '0';
 623                         }
 624                         *p++ = c;
 625                         break;
 626                 case 'x':
 627                         if (s+1 < end &&
 628                             isxdigit(Py_CHARMASK(s[0])) &&
 629                             isxdigit(Py_CHARMASK(s[1])))
 630                         {
 631                                 unsigned int x = 0;
 632                                 c = Py_CHARMASK(*s);
 633                                 s++;
 634                                 if (isdigit(c))
 635                                         x = c - '0';
 636                                 else if (islower(c))
 637                                         x = 10 + c - 'a';
 638                                 else
 639                                         x = 10 + c - 'A';
 640                                 x = x << 4;
 641                                 c = Py_CHARMASK(*s);
 642                                 s++;
 643                                 if (isdigit(c))
 644                                         x += c - '0';
 645                                 else if (islower(c))
 646                                         x += 10 + c - 'a';
 647                                 else
 648                                         x += 10 + c - 'A';
 649                                 *p++ = x;
 650                                 break;
 651                         }
 652                         if (!errors || strcmp(errors, "strict") == 0) {
 653                                 PyErr_SetString(PyExc_ValueError,
 654                                                 "invalid \\x escape");
 655                                 goto failed;
 656                         }
 657                         if (strcmp(errors, "replace") == 0) {
 658                                 *p++ = '?';
 659                         } else if (strcmp(errors, "ignore") == 0)
 660                                 /* do nothing */;
 661                         else {
 662                                 PyErr_Format(PyExc_ValueError,
 663                                              "decoding error; "
 664                                              "unknown error handling code: %.400s",
 665                                              errors);
 666                                 goto failed;
 667                         }
 668 #ifndef Py_USING_UNICODE
 669                 case 'u':
 670                 case 'U':
 671                 case 'N':
 672                         if (unicode) {
 673                                 PyErr_SetString(PyExc_ValueError,
 674                                           "Unicode escapes not legal "
 675                                           "when Unicode disabled");
 676                                 goto failed;
 677                         }
 678 #endif
 679                 default:
 680                         *p++ = '\\';
 681                         s--;
 682                         goto non_esc; /* an arbitry number of unescaped
 683                                          UTF-8 bytes may follow. */
 684                 }
 685         }
 686         if (p-buf < newlen)
 687                 _PyString_Resize(&v, p - buf);
 688         return v;
 689   failed:
 690         Py_DECREF(v);
 691         return NULL;
 692 }
 693
 694 /* -------------------------------------------------------------------- */
 695 /* object api */
 696
 697 static Py_ssize_t
 698 string_getsize(register PyObject *op)
 699 {
 700         char *s;
 701         Py_ssize_t len;
 702         if (PyString_AsStringAndSize(op, &s, &len))
 703                 return -1;
 704         return len;
 705 }
 706
 707 static /*const*/ char *
 708 string_getbuffer(register PyObject *op)
 709 {
 710         char *s;
 711         Py_ssize_t len;
 712         if (PyString_AsStringAndSize(op, &s, &len))
 713                 return NULL;
 714         return s;
 715 }
 716
 717 Py_ssize_t
 718 PyString_Size(register PyObject *op)
 719 {
 720         if (!PyString_Check(op))
 721                 return string_getsize(op);
 722         return ((PyStringObject *)op) -> ob_size;
 723 }
 724
 725 /*const*/ char *
 726 PyString_AsString(register PyObject *op)
 727 {
 728         if (!PyString_Check(op))
 729                 return string_getbuffer(op);
 730         return ((PyStringObject *)op) -> ob_sval;
 731 }
 732
 733 int
 734 PyString_AsStringAndSize(register PyObject *obj,
 735                          register char **s,
 736                          register Py_ssize_t *len)
 737 {
 738         if (s == NULL) {
 739                 PyErr_BadInternalCall();
 740                 return -1;
 741         }
 742
 743         if (!PyString_Check(obj)) {
 744 #ifdef Py_USING_UNICODE
 745                 if (PyUnicode_Check(obj)) {
 746                         obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
 747                         if (obj == NULL)
 748                                 return -1;
 749                 }
 750                 else
 751 #endif
 752                 {
 753                         PyErr_Format(PyExc_TypeError,
 754                                      "expected string or Unicode object, "
 755                                      "%.200s found", obj->ob_type->tp_name);
 756                         return -1;
 757                 }
 758         }
 759
 760         *s = PyString_AS_STRING(obj);
 761         if (len != NULL)
 762                 *len = PyString_GET_SIZE(obj);
 763         else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
 764                 PyErr_SetString(PyExc_TypeError,
 765                                 "expected string without null bytes");
 766                 return -1;
 767         }
 768         return 0;
 769 }
 770
 771 /* -------------------------------------------------------------------- */
 772 /* Methods */
 773
 774 #define STRINGLIB_CHAR char
 775
 776 #define STRINGLIB_CMP memcmp
 777 #define STRINGLIB_LEN PyString_GET_SIZE
 778 #define STRINGLIB_NEW PyString_FromStringAndSize
 779 #define STRINGLIB_STR PyString_AS_STRING
 780
 781 #define STRINGLIB_EMPTY nullstring
 782
 783 #include "stringlib/fastsearch.h"
 784
 785 #include "stringlib/count.h"
 786 #include "stringlib/find.h"
 787 #include "stringlib/partition.h"
 788
 789
 790 static int
 791 string_print(PyStringObject *op, FILE *fp, int flags)
 792 {
 793         Py_ssize_t i;
 794         char c;
 795         int quote;
 796
 797         /* XXX Ought to check for interrupts when writing long strings */
 798         if (! PyString_CheckExact(op)) {
 799                 int ret;
 800                 /* A str subclass may have its own __str__ method. */
 801                 op = (PyStringObject *) PyObject_Str((PyObject *)op);
 802                 if (op == NULL)
 803                         return -1;
 804                 ret = string_print(op, fp, flags);
 805                 Py_DECREF(op);
 806                 return ret;
 807         }
 808         if (flags & Py_PRINT_RAW) {
 809                 char *data = op->ob_sval;
 810                 Py_ssize_t size = op->ob_size;
 811                 while (size > INT_MAX) {
 812                         /* Very long strings cannot be written atomically.
 813                          * But don't write exactly INT_MAX bytes at a time
 814                          * to avoid memory aligment issues.
 815                          */
 816                         const int chunk_size = INT_MAX & ~0x3FFF;
 817                         fwrite(data, 1, chunk_size, fp);
 818                         data += chunk_size;
 819                         size -= chunk_size;
 820                 }
 821 #ifdef __VMS
 822                 if (size) fwrite(data, (int)size, 1, fp);
 823 #else
 824                 fwrite(data, 1, (int)size, fp);
 825 #endif
 826                 return 0;
 827         }
 828
 829         /* figure out which quote to use; single is preferred */
 830         quote = '\'';
 831         if (memchr(op->ob_sval, '\'', op->ob_size) &&
 832             !memchr(op->ob_sval, '"', op->ob_size))
 833                 quote = '"';
 834
 835         fputc(quote, fp);
 836         for (i = 0; i < op->ob_size; i++) {
 837                 c = op->ob_sval[i];
 838                 if (c == quote || c == '\\')
 839                         fprintf(fp, "\\%c", c);
 840                 else if (c == '\t')
 841                         fprintf(fp, "\\t");
 842                 else if (c == '\n')
 843                         fprintf(fp, "\\n");
 844                 else if (c == '\r')
 845                         fprintf(fp, "\\r");
 846                 else if (c < ' ' || c >= 0x7f)
 847                         fprintf(fp, "\\x%02x", c & 0xff);
 848                 else
 849                         fputc(c, fp);
 850         }
 851         fputc(quote, fp);
 852         return 0;
 853 }
 854
 855 PyObject *
 856 PyString_Repr(PyObject *obj, int smartquotes)
 857 {
 858         register PyStringObject* op = (PyStringObject*) obj;
 859         size_t newsize = 2 + 4 * op->ob_size;
 860         PyObject *v;
 861         if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) {
 862                 PyErr_SetString(PyExc_OverflowError,
 863                         "string is too large to make repr");
 864                 return NULL;
 865         }
 866         v = PyString_FromStringAndSize((char *)NULL, newsize);
 867         if (v == NULL) {
 868                 return NULL;
 869         }
 870         else {
 871                 register Py_ssize_t i;
 872                 register char c;
 873                 register char *p;
 874                 int quote;
 875
 876                 /* figure out which quote to use; single is preferred */
 877                 quote = '\'';
 878                 if (smartquotes &&
 879                     memchr(op->ob_sval, '\'', op->ob_size) &&
 880                     !memchr(op->ob_sval, '"', op->ob_size))
 881                         quote = '"';
 882
 883                 p = PyString_AS_STRING(v);
 884                 *p++ = quote;
 885                 for (i = 0; i < op->ob_size; i++) {
 886                         /* There's at least enough room for a hex escape
 887                            and a closing quote. */
 888                         assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
 889                         c = op->ob_sval[i];
 890                         if (c == quote || c == '\\')
 891                                 *p++ = '\\', *p++ = c;
 892                         else if (c == '\t')
 893                                 *p++ = '\\', *p++ = 't';
 894                         else if (c == '\n')
 895                                 *p++ = '\\', *p++ = 'n';
 896                         else if (c == '\r')
 897                                 *p++ = '\\', *p++ = 'r';
 898                         else if (c < ' ' || c >= 0x7f) {
 899                                 /* For performance, we don't want to call
 900                                    PyOS_snprintf here (extra layers of
 901                                    function call). */
 902                                 sprintf(p, "\\x%02x", c & 0xff);
 903                                 p += 4;
 904                         }
 905                         else
 906                                 *p++ = c;
 907                 }
 908                 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
 909                 *p++ = quote;
 910                 *p = '\0';
 911                 _PyString_Resize(
 912                         &v, (p - PyString_AS_STRING(v)));
 913                 return v;
 914         }
 915 }
 916
 917 static PyObject *
 918 string_repr(PyObject *op)
 919 {
 920         return PyString_Repr(op, 1);
 921 }
 922
 923 static PyObject *
 924 string_str(PyObject *s)
 925 {
 926         assert(PyString_Check(s));
 927         if (PyString_CheckExact(s)) {
 928                 Py_INCREF(s);
 929                 return s;
 930         }
 931         else {
 932                 /* Subtype -- return genuine string with the same value. */
 933                 PyStringObject *t = (PyStringObject *) s;
 934                 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
 935         }
 936 }
 937
 938 static Py_ssize_t
 939 string_length(PyStringObject *a)
 940 {
 941         return a->ob_size;
 942 }
 943
 944 static PyObject *
 945 string_concat(register PyStringObject *a, register PyObject *bb)
 946 {
 947         register Py_ssize_t size;
 948         register PyStringObject *op;
 949         if (!PyString_Check(bb)) {
 950 #ifdef Py_USING_UNICODE
 951                 if (PyUnicode_Check(bb))
 952                     return PyUnicode_Concat((PyObject *)a, bb);
 953 #endif
 954                 PyErr_Format(PyExc_TypeError,
 955                              "cannot concatenate 'str' and '%.200s' objects",
 956                              bb->ob_type->tp_name);
 957                 return NULL;
 958         }
 959 #define b ((PyStringObject *)bb)
 960         /* Optimize cases with empty left or right operand */
 961         if ((a->ob_size == 0 || b->ob_size == 0) &&
 962             PyString_CheckExact(a) && PyString_CheckExact(b)) {
 963                 if (a->ob_size == 0) {
 964                         Py_INCREF(bb);
 965                         return bb;
 966                 }
 967                 Py_INCREF(a);
 968                 return (PyObject *)a;
 969         }
 970         size = a->ob_size + b->ob_size;
 971         if (size < 0) {
 972                 PyErr_SetString(PyExc_OverflowError,
 973                                 "strings are too large to concat");
 974                 return NULL;
 975         }
 976
 977         /* Inline PyObject_NewVar */
 978         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
 979         if (op == NULL)
 980                 return PyErr_NoMemory();
 981         PyObject_INIT_VAR(op, &PyString_Type, size);
 982         op->ob_shash = -1;
 983         op->ob_sstate = SSTATE_NOT_INTERNED;
 984         Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
 985         Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
 986         op->ob_sval[size] = '\0';
 987         return (PyObject *) op;
 988 #undef b
 989 }
 990
 991 static PyObject *
 992 string_repeat(register PyStringObject *a, register Py_ssize_t n)
 993 {
 994         register Py_ssize_t i;
 995         register Py_ssize_t j;
 996         register Py_ssize_t size;
 997         register PyStringObject *op;
 998         size_t nbytes;
 999         if (n < 0)
1000                 n = 0;
1001         /* watch out for overflows:  the size can overflow int,
1002          * and the # of bytes needed can overflow size_t
1003          */
1004         size = a->ob_size * n;
1005         if (n && size / n != a->ob_size) {
1006                 PyErr_SetString(PyExc_OverflowError,
1007                         "repeated string is too long");
1008                 return NULL;
1009         }
1010         if (size == a->ob_size && PyString_CheckExact(a)) {
1011                 Py_INCREF(a);
1012                 return (PyObject *)a;
1013         }
1014         nbytes = (size_t)size;
1015         if (nbytes + sizeof(PyStringObject) <= nbytes) {
1016                 PyErr_SetString(PyExc_OverflowError,
1017                         "repeated string is too long");
1018                 return NULL;
1019         }
1020         op = (PyStringObject *)
1021                 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
1022         if (op == NULL)
1023                 return PyErr_NoMemory();
1024         PyObject_INIT_VAR(op, &PyString_Type, size);
1025         op->ob_shash = -1;
1026         op->ob_sstate = SSTATE_NOT_INTERNED;
1027         op->ob_sval[size] = '\0';
1028         if (a->ob_size == 1 && n > 0) {
1029                 memset(op->ob_sval, a->ob_sval[0] , n);
1030                 return (PyObject *) op;
1031         }
1032         i = 0;
1033         if (i < size) {
1034                 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
1035                 i = a->ob_size;
1036         }
1037         while (i < size) {
1038                 j = (i <= size-i)  ?  i  :  size-i;
1039                 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1040                 i += j;
1041         }
1042         return (PyObject *) op;
1043 }
1044
1045 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1046
1047 static PyObject *
1048 string_slice(register PyStringObject *a, register Py_ssize_t i,
1049              register Py_ssize_t j)
1050      /* j -- may be negative! */
1051 {
1052         if (i < 0)
1053                 i = 0;
1054         if (j < 0)
1055                 j = 0; /* Avoid signed/unsigned bug in next line */
1056         if (j > a->ob_size)
1057                 j = a->ob_size;
1058         if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1059                 /* It's the same as a */
1060                 Py_INCREF(a);
1061                 return (PyObject *)a;
1062         }
1063         if (j < i)
1064                 j = i;
1065         return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1066 }
1067
1068 static int
1069 string_contains(PyObject *str_obj, PyObject *sub_obj)
1070 {
1071         if (!PyString_CheckExact(sub_obj)) {
1072 #ifdef Py_USING_UNICODE
1073                 if (PyUnicode_Check(sub_obj))
1074                         return PyUnicode_Contains(str_obj, sub_obj);
1075 #endif
1076                 if (!PyString_Check(sub_obj)) {
1077                         PyErr_SetString(PyExc_TypeError,
1078                             "'in <string>' requires string as left operand");
1079                         return -1;
1080                 }
1081         }
1082
1083         return stringlib_contains_obj(str_obj, sub_obj);
1084 }
1085
1086 static PyObject *
1087 string_item(PyStringObject *a, register Py_ssize_t i)
1088 {
1089         char pchar;
1090         PyObject *v;
1091         if (i < 0 || i >= a->ob_size) {
1092                 PyErr_SetString(PyExc_IndexError, "string index out of range");
1093                 return NULL;
1094         }
1095         pchar = a->ob_sval[i];
1096         v = (PyObject *)characters[pchar & UCHAR_MAX];
1097         if (v == NULL)
1098                 v = PyString_FromStringAndSize(&pchar, 1);
1099         else {
1100 #ifdef COUNT_ALLOCS
1101                 one_strings++;
1102 #endif
1103                 Py_INCREF(v);
1104         }
1105         return v;
1106 }
1107
1108 static PyObject*
1109 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1110 {
1111         int c;
1112         Py_ssize_t len_a, len_b;
1113         Py_ssize_t min_len;
1114         PyObject *result;
1115
1116         /* Make sure both arguments are strings. */
1117         if (!(PyString_Check(a) && PyString_Check(b))) {
1118                 result = Py_NotImplemented;
1119                 goto out;
1120         }
1121         if (a == b) {
1122                 switch (op) {
1123                 case Py_EQ:case Py_LE:case Py_GE:
1124                         result = Py_True;
1125                         goto out;
1126                 case Py_NE:case Py_LT:case Py_GT:
1127                         result = Py_False;
1128                         goto out;
1129                 }
1130         }
1131         if (op == Py_EQ) {
1132                 /* Supporting Py_NE here as well does not save
1133                    much time, since Py_NE is rarely used.  */
1134                 if (a->ob_size == b->ob_size
1135                     && (a->ob_sval[0] == b->ob_sval[0]
1136                         && memcmp(a->ob_sval, b->ob_sval,
1137                                   a->ob_size) == 0)) {
1138                         result = Py_True;
1139                 } else {
1140                         result = Py_False;
1141                 }
1142                 goto out;
1143         }
1144         len_a = a->ob_size; len_b = b->ob_size;
1145         min_len = (len_a < len_b) ? len_a : len_b;
1146         if (min_len > 0) {
1147                 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1148                 if (c==0)
1149                         c = memcmp(a->ob_sval, b->ob_sval, min_len);
1150         }else
1151                 c = 0;
1152         if (c == 0)
1153                 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1154         switch (op) {
1155         case Py_LT: c = c <  0; break;
1156         case Py_LE: c = c <= 0; break;
1157         case Py_EQ: assert(0);  break; /* unreachable */
1158         case Py_NE: c = c != 0; break;
1159         case Py_GT: c = c >  0; break;
1160         case Py_GE: c = c >= 0; break;
1161         default:
1162                 result = Py_NotImplemented;
1163                 goto out;
1164         }
1165         result = c ? Py_True : Py_False;
1166   out:
1167         Py_INCREF(result);
1168         return result;
1169 }
1170
1171 int
1172 _PyString_Eq(PyObject *o1, PyObject *o2)
1173 {
1174         PyStringObject *a = (PyStringObject*) o1;
1175         PyStringObject *b = (PyStringObject*) o2;
1176         return a->ob_size == b->ob_size
1177           && *a->ob_sval == *b->ob_sval
1178           && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
1179 }
1180
1181 static long
1182 string_hash(PyStringObject *a)
1183 {
1184         register Py_ssize_t len;
1185         register unsigned char *p;
1186         register long x;
1187
1188         if (a->ob_shash != -1)
1189                 return a->ob_shash;
1190         len = a->ob_size;
1191         p = (unsigned char *) a->ob_sval;
1192         x = *p << 7;
1193         while (--len >= 0)
1194                 x = (1000003*x) ^ *p++;
1195         x ^= a->ob_size;
1196         if (x == -1)
1197                 x = -2;
1198         a->ob_shash = x;
1199         return x;
1200 }
1201
1202 static PyObject*
1203 string_subscript(PyStringObject* self, PyObject* item)
1204 {
1205         if (PyIndex_Check(item)) {
1206                 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1207                 if (i == -1 && PyErr_Occurred())
1208                         return NULL;
1209                 if (i < 0)
1210                         i += PyString_GET_SIZE(self);
1211                 return string_item(self, i);
1212         }
1213         else if (PySlice_Check(item)) {
1214                 Py_ssize_t start, stop, step, slicelength, cur, i;
1215                 char* source_buf;
1216                 char* result_buf;
1217                 PyObject* result;
1218
1219                 if (PySlice_GetIndicesEx((PySliceObject*)item,
1220                                  PyString_GET_SIZE(self),
1221                                  &start, &stop, &step, &slicelength) < 0) {
1222                         return NULL;
1223                 }
1224
1225                 if (slicelength <= 0) {
1226                         return PyString_FromStringAndSize("", 0);
1227                 }
1228                 else {
1229                         source_buf = PyString_AsString((PyObject*)self);
1230                         result_buf = (char *)PyMem_Malloc(slicelength);
1231                         if (result_buf == NULL)
1232                                 return PyErr_NoMemory();
1233
1234                         for (cur = start, i = 0; i < slicelength;
1235                              cur += step, i++) {
1236                                 result_buf[i] = source_buf[cur];
1237                         }
1238
1239                         result = PyString_FromStringAndSize(result_buf,
1240                                                             slicelength);
1241                         PyMem_Free(result_buf);
1242                         return result;
1243                 }
1244         }
1245         else {
1246                 PyErr_SetString(PyExc_TypeError,
1247                                 "string indices must be integers");
1248                 return NULL;
1249         }
1250 }
1251
1252 static Py_ssize_t
1253 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1254 {
1255         if ( index != 0 ) {
1256                 PyErr_SetString(PyExc_SystemError,
1257                                 "accessing non-existent string segment");
1258                 return -1;
1259         }
1260         *ptr = (void *)self->ob_sval;
1261         return self->ob_size;
1262 }
1263
1264 static Py_ssize_t
1265 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1266 {
1267         PyErr_SetString(PyExc_TypeError,
1268                         "Cannot use string as modifiable buffer");
1269         return -1;
1270 }
1271
1272 static Py_ssize_t
1273 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1274 {
1275         if ( lenp )
1276                 *lenp = self->ob_size;
1277         return 1;
1278 }
1279
1280 static Py_ssize_t
1281 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1282 {
1283         if ( index != 0 ) {
1284                 PyErr_SetString(PyExc_SystemError,
1285                                 "accessing non-existent string segment");
1286                 return -1;
1287         }
1288         *ptr = self->ob_sval;
1289         return self->ob_size;
1290 }
1291
1292 static PySequenceMethods string_as_sequence = {
1293         (lenfunc)string_length, /*sq_length*/
1294         (binaryfunc)string_concat, /*sq_concat*/
1295         (ssizeargfunc)string_repeat, /*sq_repeat*/
1296         (ssizeargfunc)string_item, /*sq_item*/
1297         (ssizessizeargfunc)string_slice, /*sq_slice*/
1298         0,              /*sq_ass_item*/
1299         0,              /*sq_ass_slice*/
1300         (objobjproc)string_contains /*sq_contains*/
1301 };
1302
1303 static PyMappingMethods string_as_mapping = {
1304         (lenfunc)string_length,
1305         (binaryfunc)string_subscript,
1306         0,
1307 };
1308
1309 static PyBufferProcs string_as_buffer = {
1310         (readbufferproc)string_buffer_getreadbuf,
1311         (writebufferproc)string_buffer_getwritebuf,
1312         (segcountproc)string_buffer_getsegcount,
1313         (charbufferproc)string_buffer_getcharbuf,
1314 };
1315
1316
1317 \f
1318 #define LEFTSTRIP 0
1319 #define RIGHTSTRIP 1
1320 #define BOTHSTRIP 2
1321
1322 /* Arrays indexed by above */
1323 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1324
1325 #define STRIPNAME(i) (stripformat[i]+3)
1326
1327
1328 /* Don't call if length < 2 */
1329 #define Py_STRING_MATCH(target, offset, pattern, length)        \
1330   (target[offset] == pattern[0] &&                              \
1331    target[offset+length-1] == pattern[length-1] &&              \
1332    !memcmp(target+offset+1, pattern+1, length-2) )
1333
1334
1335 /* Overallocate the initial list to reduce the number of reallocs for small
1336    split sizes.  Eg, "A A A A A A A A A A".split() (10 elements) has three
1337    resizes, to sizes 4, 8, then 16.  Most observed string splits are for human
1338    text (roughly 11 words per line) and field delimited data (usually 1-10
1339    fields).  For large strings the split algorithms are bandwidth limited
1340    so increasing the preallocation likely will not improve things.*/
1341
1342 #define MAX_PREALLOC 12
1343
1344 /* 5 splits gives 6 elements */
1345 #define PREALLOC_SIZE(maxsplit) \
1346         (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1347
1348 #define SPLIT_APPEND(data, left, right)                         \
1349         str = PyString_FromStringAndSize((data) + (left),       \
1350                                          (right) - (left));     \
1351         if (str == NULL)                                        \
1352                 goto onError;                                   \
1353         if (PyList_Append(list, str)) {                         \
1354                 Py_DECREF(str);                                 \
1355                 goto onError;                                   \
1356         }                                                       \
1357         else                                                    \
1358                 Py_DECREF(str);
1359
1360 #define SPLIT_ADD(data, left, right) {                          \
1361         str = PyString_FromStringAndSize((data) + (left),       \
1362                                          (right) - (left));     \
1363         if (str == NULL)                                        \
1364                 goto onError;                                   \
1365         if (count < MAX_PREALLOC) {                             \
1366                 PyList_SET_ITEM(list, count, str);              \
1367         } else {                                                \
1368                 if (PyList_Append(list, str)) {                 \
1369                         Py_DECREF(str);                         \
1370                         goto onError;                           \
1371                 }                                               \
1372                 else                                            \
1373                         Py_DECREF(str);                         \
1374         }                                                       \
1375         count++; }
1376
1377 /* Always force the list to the expected size. */
1378 #define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
1379
1380 #define SKIP_SPACE(s, i, len)    { while (i<len &&  isspace(Py_CHARMASK(s[i]))) i++; }
1381 #define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1382 #define RSKIP_SPACE(s, i)        { while (i>=0  &&  isspace(Py_CHARMASK(s[i]))) i--; }
1383 #define RSKIP_NONSPACE(s, i)     { while (i>=0  && !isspace(Py_CHARMASK(s[i]))) i--; }
1384
1385 Py_LOCAL_INLINE(PyObject *)
1386 split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
1387 {
1388         Py_ssize_t i, j, count=0;
1389         PyObject *str;
1390         PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1391
1392         if (list == NULL)
1393                 return NULL;
1394
1395         i = j = 0;
1396
1397         while (maxsplit-- > 0) {
1398                 SKIP_SPACE(s, i, len);
1399                 if (i==len) break;
1400                 j = i; i++;
1401                 SKIP_NONSPACE(s, i, len);
1402                 SPLIT_ADD(s, j, i);
1403         }
1404
1405         if (i < len) {
1406                 /* Only occurs when maxsplit was reached */
1407                 /* Skip any remaining whitespace and copy to end of string */
1408                 SKIP_SPACE(s, i, len);
1409                 if (i != len)
1410                         SPLIT_ADD(s, i, len);
1411         }
1412         FIX_PREALLOC_SIZE(list);
1413         return list;
1414   onError:
1415         Py_DECREF(list);
1416         return NULL;
1417 }
1418
1419 Py_LOCAL_INLINE(PyObject *)
1420 split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1421 {
1422         register Py_ssize_t i, j, count=0;
1423         PyObject *str;
1424         PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1425
1426         if (list == NULL)
1427                 return NULL;
1428
1429         i = j = 0;
1430         while ((j < len) && (maxcount-- > 0)) {
1431                 for(; j<len; j++) {
1432                         /* I found that using memchr makes no difference */
1433                         if (s[j] == ch) {
1434                                 SPLIT_ADD(s, i, j);
1435                                 i = j = j + 1;
1436                                 break;
1437                         }
1438                 }
1439         }
1440         if (i <= len) {
1441                 SPLIT_ADD(s, i, len);
1442         }
1443         FIX_PREALLOC_SIZE(list);
1444         return list;
1445
1446   onError:
1447         Py_DECREF(list);
1448         return NULL;
1449 }
1450
1451 PyDoc_STRVAR(split__doc__,
1452 "S.split([sep [,maxsplit]]) -> list of strings\n\
1453 \n\
1454 Return a list of the words in the string S, using sep as the\n\
1455 delimiter string.  If maxsplit is given, at most maxsplit\n\
1456 splits are done. If sep is not specified or is None, any\n\
1457 whitespace string is a separator.");
1458
1459 static PyObject *
1460 string_split(PyStringObject *self, PyObject *args)
1461 {
1462         Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1463         Py_ssize_t maxsplit = -1, count=0;
1464         const char *s = PyString_AS_STRING(self), *sub;
1465         PyObject *list, *str, *subobj = Py_None;
1466 #ifdef USE_FAST
1467         Py_ssize_t pos;
1468 #endif
1469
1470         if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1471                 return NULL;
1472         if (maxsplit < 0)
1473                 maxsplit = PY_SSIZE_T_MAX;
1474         if (subobj == Py_None)
1475                 return split_whitespace(s, len, maxsplit);
1476         if (PyString_Check(subobj)) {
1477                 sub = PyString_AS_STRING(subobj);
1478                 n = PyString_GET_SIZE(subobj);
1479         }
1480 #ifdef Py_USING_UNICODE
1481         else if (PyUnicode_Check(subobj))
1482                 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1483 #endif
1484         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1485                 return NULL;
1486
1487         if (n == 0) {
1488                 PyErr_SetString(PyExc_ValueError, "empty separator");
1489                 return NULL;
1490         }
1491         else if (n == 1)
1492                 return split_char(s, len, sub[0], maxsplit);
1493
1494         list = PyList_New(PREALLOC_SIZE(maxsplit));
1495         if (list == NULL)
1496                 return NULL;
1497
1498 #ifdef USE_FAST
1499         i = j = 0;
1500         while (maxsplit-- > 0) {
1501                 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1502                 if (pos < 0)
1503                         break;
1504                 j = i+pos;
1505                 SPLIT_ADD(s, i, j);
1506                 i = j + n;
1507         }
1508 #else
1509         i = j = 0;
1510         while ((j+n <= len) && (maxsplit-- > 0)) {
1511                 for (; j+n <= len; j++) {
1512                         if (Py_STRING_MATCH(s, j, sub, n)) {
1513                                 SPLIT_ADD(s, i, j);
1514                                 i = j = j + n;
1515                                 break;
1516                         }
1517                 }
1518         }
1519 #endif
1520         SPLIT_ADD(s, i, len);
1521         FIX_PREALLOC_SIZE(list);
1522         return list;
1523
1524  onError:
1525         Py_DECREF(list);
1526         return NULL;
1527 }
1528
1529 PyDoc_STRVAR(partition__doc__,
1530 "S.partition(sep) -> (head, sep, tail)\n\
1531 \n\
1532 Searches for the separator sep in S, and returns the part before it,\n\
1533 the separator itself, and the part after it.  If the separator is not\n\
1534 found, returns S and two empty strings.");
1535
1536 static PyObject *
1537 string_partition(PyStringObject *self, PyObject *sep_obj)
1538 {
1539         const char *sep;
1540         Py_ssize_t sep_len;
1541
1542         if (PyString_Check(sep_obj)) {
1543                 sep = PyString_AS_STRING(sep_obj);
1544                 sep_len = PyString_GET_SIZE(sep_obj);
1545         }
1546 #ifdef Py_USING_UNICODE
1547         else if (PyUnicode_Check(sep_obj))
1548                 return PyUnicode_Partition((PyObject *) self, sep_obj);
1549 #endif
1550         else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1551                 return NULL;
1552
1553         return stringlib_partition(
1554                 (PyObject*) self,
1555                 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1556                 sep_obj, sep, sep_len
1557                 );
1558 }
1559
1560 PyDoc_STRVAR(rpartition__doc__,
1561 "S.rpartition(sep) -> (tail, sep, head)\n\
1562 \n\
1563 Searches for the separator sep in S, starting at the end of S, and returns\n\
1564 the part before it, the separator itself, and the part after it.  If the\n\
1565 separator is not found, returns two empty strings and S.");
1566
1567 static PyObject *
1568 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1569 {
1570         const char *sep;
1571         Py_ssize_t sep_len;
1572
1573         if (PyString_Check(sep_obj)) {
1574                 sep = PyString_AS_STRING(sep_obj);
1575                 sep_len = PyString_GET_SIZE(sep_obj);
1576         }
1577 #ifdef Py_USING_UNICODE
1578         else if (PyUnicode_Check(sep_obj))
1579                 return PyUnicode_Partition((PyObject *) self, sep_obj);
1580 #endif
1581         else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1582                 return NULL;
1583
1584         return stringlib_rpartition(
1585                 (PyObject*) self,
1586                 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1587                 sep_obj, sep, sep_len
1588                 );
1589 }
1590
1591 Py_LOCAL_INLINE(PyObject *)
1592 rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
1593 {
1594         Py_ssize_t i, j, count=0;
1595         PyObject *str;
1596         PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1597
1598         if (list == NULL)
1599                 return NULL;
1600
1601         i = j = len-1;
1602
1603         while (maxsplit-- > 0) {
1604                 RSKIP_SPACE(s, i);
1605                 if (i<0) break;
1606                 j = i; i--;
1607                 RSKIP_NONSPACE(s, i);
1608                 SPLIT_ADD(s, i + 1, j + 1);
1609         }
1610         if (i >= 0) {
1611                 /* Only occurs when maxsplit was reached */
1612                 /* Skip any remaining whitespace and copy to beginning of string */
1613                 RSKIP_SPACE(s, i);
1614                 if (i >= 0)
1615                         SPLIT_ADD(s, 0, i + 1);
1616
1617         }
1618         FIX_PREALLOC_SIZE(list);
1619         if (PyList_Reverse(list) < 0)
1620                 goto onError;
1621         return list;
1622   onError:
1623         Py_DECREF(list);
1624         return NULL;
1625 }
1626
1627 Py_LOCAL_INLINE(PyObject *)
1628 rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1629 {
1630         register Py_ssize_t i, j, count=0;
1631         PyObject *str;
1632         PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1633
1634         if (list == NULL)
1635                 return NULL;
1636
1637         i = j = len - 1;
1638         while ((i >= 0) && (maxcount-- > 0)) {
1639                 for (; i >= 0; i--) {
1640                         if (s[i] == ch) {
1641                                 SPLIT_ADD(s, i + 1, j + 1);
1642                                 j = i = i - 1;
1643                                 break;
1644                         }
1645                 }
1646         }
1647         if (j >= -1) {
1648                 SPLIT_ADD(s, 0, j + 1);
1649         }
1650         FIX_PREALLOC_SIZE(list);
1651         if (PyList_Reverse(list) < 0)
1652                 goto onError;
1653         return list;
1654
1655  onError:
1656         Py_DECREF(list);
1657         return NULL;
1658 }
1659
1660 PyDoc_STRVAR(rsplit__doc__,
1661 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1662 \n\
1663 Return a list of the words in the string S, using sep as the\n\
1664 delimiter string, starting at the end of the string and working\n\
1665 to the front.  If maxsplit is given, at most maxsplit splits are\n\
1666 done. If sep is not specified or is None, any whitespace string\n\
1667 is a separator.");
1668
1669 static PyObject *
1670 string_rsplit(PyStringObject *self, PyObject *args)
1671 {
1672         Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1673         Py_ssize_t maxsplit = -1, count=0;
1674         const char *s = PyString_AS_STRING(self), *sub;
1675         PyObject *list, *str, *subobj = Py_None;
1676
1677         if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1678                 return NULL;
1679         if (maxsplit < 0)
1680                 maxsplit = PY_SSIZE_T_MAX;
1681         if (subobj == Py_None)
1682                 return rsplit_whitespace(s, len, maxsplit);
1683         if (PyString_Check(subobj)) {
1684                 sub = PyString_AS_STRING(subobj);
1685                 n = PyString_GET_SIZE(subobj);
1686         }
1687 #ifdef Py_USING_UNICODE
1688         else if (PyUnicode_Check(subobj))
1689                 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1690 #endif
1691         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1692                 return NULL;
1693
1694         if (n == 0) {
1695                 PyErr_SetString(PyExc_ValueError, "empty separator");
1696                 return NULL;
1697         }
1698         else if (n == 1)
1699                 return rsplit_char(s, len, sub[0], maxsplit);
1700
1701         list = PyList_New(PREALLOC_SIZE(maxsplit));
1702         if (list == NULL)
1703                 return NULL;
1704
1705         j = len;
1706         i = j - n;
1707
1708         while ( (i >= 0) && (maxsplit-- > 0) ) {
1709                 for (; i>=0; i--) {
1710                         if (Py_STRING_MATCH(s, i, sub, n)) {
1711                                 SPLIT_ADD(s, i + n, j);
1712                                 j = i;
1713                                 i -= n;
1714                                 break;
1715                         }
1716                 }
1717         }
1718         SPLIT_ADD(s, 0, j);
1719         FIX_PREALLOC_SIZE(list);
1720         if (PyList_Reverse(list) < 0)
1721                 goto onError;
1722         return list;
1723
1724 onError:
1725         Py_DECREF(list);
1726         return NULL;
1727 }
1728
1729
1730 PyDoc_STRVAR(join__doc__,
1731 "S.join(sequence) -> string\n\
1732 \n\
1733 Return a string which is the concatenation of the strings in the\n\
1734 sequence.  The separator between elements is S.");
1735
1736 static PyObject *
1737 string_join(PyStringObject *self, PyObject *orig)
1738 {
1739         char *sep = PyString_AS_STRING(self);
1740         const Py_ssize_t seplen = PyString_GET_SIZE(self);
1741         PyObject *res = NULL;
1742         char *p;
1743         Py_ssize_t seqlen = 0;
1744         size_t sz = 0;
1745         Py_ssize_t i;
1746         PyObject *seq, *item;
1747
1748         seq = PySequence_Fast(orig, "");
1749         if (seq == NULL) {
1750                 return NULL;
1751         }
1752
1753         seqlen = PySequence_Size(seq);
1754         if (seqlen == 0) {
1755                 Py_DECREF(seq);
1756                 return PyString_FromString("");
1757         }
1758         if (seqlen == 1) {
1759                 item = PySequence_Fast_GET_ITEM(seq, 0);
1760                 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1761                         Py_INCREF(item);
1762                         Py_DECREF(seq);
1763                         return item;
1764                 }
1765         }
1766
1767         /* There are at least two things to join, or else we have a subclass
1768          * of the builtin types in the sequence.
1769          * Do a pre-pass to figure out the total amount of space we'll
1770          * need (sz), see whether any argument is absurd, and defer to
1771          * the Unicode join if appropriate.
1772          */
1773         for (i = 0; i < seqlen; i++) {
1774                 const size_t old_sz = sz;
1775                 item = PySequence_Fast_GET_ITEM(seq, i);
1776                 if (!PyString_Check(item)){
1777 #ifdef Py_USING_UNICODE
1778                         if (PyUnicode_Check(item)) {
1779                                 /* Defer to Unicode join.
1780                                  * CAUTION:  There's no gurantee that the
1781                                  * original sequence can be iterated over
1782                                  * again, so we must pass seq here.
1783                                  */
1784                                 PyObject *result;
1785                                 result = PyUnicode_Join((PyObject *)self, seq);
1786                                 Py_DECREF(seq);
1787                                 return result;
1788                         }
1789 #endif
1790                         PyErr_Format(PyExc_TypeError,
1791                                      "sequence item %zd: expected string,"
1792                                      " %.80s found",
1793                                      i, item->ob_type->tp_name);
1794                         Py_DECREF(seq);
1795                         return NULL;
1796                 }
1797                 sz += PyString_GET_SIZE(item);
1798                 if (i != 0)
1799                         sz += seplen;
1800                 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1801                         PyErr_SetString(PyExc_OverflowError,
1802                                 "join() result is too long for a Python string");
1803                         Py_DECREF(seq);
1804                         return NULL;
1805                 }
1806         }
1807
1808         /* Allocate result space. */
1809         res = PyString_FromStringAndSize((char*)NULL, sz);
1810         if (res == NULL) {
1811                 Py_DECREF(seq);
1812                 return NULL;
1813         }
1814
1815         /* Catenate everything. */
1816         p = PyString_AS_STRING(res);
1817         for (i = 0; i < seqlen; ++i) {
1818                 size_t n;
1819                 item = PySequence_Fast_GET_ITEM(seq, i);
1820                 n = PyString_GET_SIZE(item);
1821                 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1822                 p += n;
1823                 if (i < seqlen - 1) {
1824                         Py_MEMCPY(p, sep, seplen);
1825                         p += seplen;
1826                 }
1827         }
1828
1829         Py_DECREF(seq);
1830         return res;
1831 }
1832
1833 PyObject *
1834 _PyString_Join(PyObject *sep, PyObject *x)
1835 {
1836         assert(sep != NULL && PyString_Check(sep));
1837         assert(x != NULL);
1838         return string_join((PyStringObject *)sep, x);
1839 }
1840
1841 Py_LOCAL_INLINE(void)
1842 string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1843 {
1844         if (*end > len)
1845                 *end = len;
1846         else if (*end < 0)
1847                 *end += len;
1848         if (*end < 0)
1849                 *end = 0;
1850         if (*start < 0)
1851                 *start += len;
1852         if (*start < 0)
1853                 *start = 0;
1854 }
1855
1856 Py_LOCAL_INLINE(Py_ssize_t)
1857 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1858 {
1859         PyObject *subobj;
1860         const char *sub;
1861         Py_ssize_t sub_len;
1862         Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1863
1864         if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1865                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1866                 return -2;
1867         if (PyString_Check(subobj)) {
1868                 sub = PyString_AS_STRING(subobj);
1869                 sub_len = PyString_GET_SIZE(subobj);
1870         }
1871 #ifdef Py_USING_UNICODE
1872         else if (PyUnicode_Check(subobj))
1873                 return PyUnicode_Find(
1874                         (PyObject *)self, subobj, start, end, dir);
1875 #endif
1876         else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1877                 /* XXX - the "expected a character buffer object" is pretty
1878                    confusing for a non-expert.  remap to something else ? */
1879                 return -2;
1880
1881         if (dir > 0)
1882                 return stringlib_find_slice(
1883                         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1884                         sub, sub_len, start, end);
1885         else
1886                 return stringlib_rfind_slice(
1887                         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1888                         sub, sub_len, start, end);
1889 }
1890
1891
1892 PyDoc_STRVAR(find__doc__,
1893 "S.find(sub [,start [,end]]) -> int\n\
1894 \n\
1895 Return the lowest index in S where substring sub is found,\n\
1896 such that sub is contained within s[start:end].  Optional\n\
1897 arguments start and end are interpreted as in slice notation.\n\
1898 \n\
1899 Return -1 on failure.");
1900
1901 static PyObject *
1902 string_find(PyStringObject *self, PyObject *args)
1903 {
1904         Py_ssize_t result = string_find_internal(self, args, +1);
1905         if (result == -2)
1906                 return NULL;
1907         return PyInt_FromSsize_t(result);
1908 }
1909
1910
1911 PyDoc_STRVAR(index__doc__,
1912 "S.index(sub [,start [,end]]) -> int\n\
1913 \n\
1914 Like S.find() but raise ValueError when the substring is not found.");
1915
1916 static PyObject *
1917 string_index(PyStringObject *self, PyObject *args)
1918 {
1919         Py_ssize_t result = string_find_internal(self, args, +1);
1920         if (result == -2)
1921                 return NULL;
1922         if (result == -1) {
1923                 PyErr_SetString(PyExc_ValueError,
1924                                 "substring not found");
1925                 return NULL;
1926         }
1927         return PyInt_FromSsize_t(result);
1928 }
1929
1930
1931 PyDoc_STRVAR(rfind__doc__,
1932 "S.rfind(sub [,start [,end]]) -> int\n\
1933 \n\
1934 Return the highest index in S where substring sub is found,\n\
1935 such that sub is contained within s[start:end].  Optional\n\
1936 arguments start and end are interpreted as in slice notation.\n\
1937 \n\
1938 Return -1 on failure.");
1939
1940 static PyObject *
1941 string_rfind(PyStringObject *self, PyObject *args)
1942 {
1943         Py_ssize_t result = string_find_internal(self, args, -1);
1944         if (result == -2)
1945                 return NULL;
1946         return PyInt_FromSsize_t(result);
1947 }
1948
1949
1950 PyDoc_STRVAR(rindex__doc__,
1951 "S.rindex(sub [,start [,end]]) -> int\n\
1952 \n\
1953 Like S.rfind() but raise ValueError when the substring is not found.");
1954
1955 static PyObject *
1956 string_rindex(PyStringObject *self, PyObject *args)
1957 {
1958         Py_ssize_t result = string_find_internal(self, args, -1);
1959         if (result == -2)
1960                 return NULL;
1961         if (result == -1) {
1962                 PyErr_SetString(PyExc_ValueError,
1963                                 "substring not found");
1964                 return NULL;
1965         }
1966         return PyInt_FromSsize_t(result);
1967 }
1968
1969
1970 Py_LOCAL_INLINE(PyObject *)
1971 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1972 {
1973         char *s = PyString_AS_STRING(self);
1974         Py_ssize_t len = PyString_GET_SIZE(self);
1975         char *sep = PyString_AS_STRING(sepobj);
1976         Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1977         Py_ssize_t i, j;
1978
1979         i = 0;
1980         if (striptype != RIGHTSTRIP) {
1981                 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1982                         i++;
1983                 }
1984         }
1985
1986         j = len;
1987         if (striptype != LEFTSTRIP) {
1988                 do {
1989                         j--;
1990                 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1991                 j++;
1992         }
1993
1994         if (i == 0 && j == len && PyString_CheckExact(self)) {
1995                 Py_INCREF(self);
1996                 return (PyObject*)self;
1997         }
1998         else
1999                 return PyString_FromStringAndSize(s+i, j-i);
2000 }
2001
2002
2003 Py_LOCAL_INLINE(PyObject *)
2004 do_strip(PyStringObject *self, int striptype)
2005 {
2006         char *s = PyString_AS_STRING(self);
2007         Py_ssize_t len = PyString_GET_SIZE(self), i, j;
2008
2009         i = 0;
2010         if (striptype != RIGHTSTRIP) {
2011                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2012                         i++;
2013                 }
2014         }
2015
2016         j = len;
2017         if (striptype != LEFTSTRIP) {
2018                 do {
2019                         j--;
2020                 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2021                 j++;
2022         }
2023
2024         if (i == 0 && j == len && PyString_CheckExact(self)) {
2025                 Py_INCREF(self);
2026                 return (PyObject*)self;
2027         }
2028         else
2029                 return PyString_FromStringAndSize(s+i, j-i);
2030 }
2031
2032
2033 Py_LOCAL_INLINE(PyObject *)
2034 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2035 {
2036         PyObject *sep = NULL;
2037
2038         if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2039                 return NULL;
2040
2041         if (sep != NULL && sep != Py_None) {
2042                 if (PyString_Check(sep))
2043                         return do_xstrip(self, striptype, sep);
2044 #ifdef Py_USING_UNICODE
2045                 else if (PyUnicode_Check(sep)) {
2046                         PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2047                         PyObject *res;
2048                         if (uniself==NULL)
2049                                 return NULL;
2050                         res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2051                                 striptype, sep);
2052                         Py_DECREF(uniself);
2053                         return res;
2054                 }
2055 #endif
2056                 PyErr_Format(PyExc_TypeError,
2057 #ifdef Py_USING_UNICODE
2058                              "%s arg must be None, str or unicode",
2059 #else
2060                              "%s arg must be None or str",
2061 #endif
2062                              STRIPNAME(striptype));
2063                 return NULL;
2064         }
2065
2066         return do_strip(self, striptype);
2067 }
2068
2069
2070 PyDoc_STRVAR(strip__doc__,
2071 "S.strip([chars]) -> string or unicode\n\
2072 \n\
2073 Return a copy of the string S with leading and trailing\n\
2074 whitespace removed.\n\
2075 If chars is given and not None, remove characters in chars instead.\n\
2076 If chars is unicode, S will be converted to unicode before stripping");
2077
2078 static PyObject *
2079 string_strip(PyStringObject *self, PyObject *args)
2080 {
2081         if (PyTuple_GET_SIZE(args) == 0)
2082                 return do_strip(self, BOTHSTRIP); /* Common case */
2083         else
2084                 return do_argstrip(self, BOTHSTRIP, args);
2085 }
2086
2087
2088 PyDoc_STRVAR(lstrip__doc__,
2089 "S.lstrip([chars]) -> string or unicode\n\
2090 \n\
2091 Return a copy of the string S with leading whitespace removed.\n\
2092 If chars is given and not None, remove characters in chars instead.\n\
2093 If chars is unicode, S will be converted to unicode before stripping");
2094
2095 static PyObject *
2096 string_lstrip(PyStringObject *self, PyObject *args)
2097 {
2098         if (PyTuple_GET_SIZE(args) == 0)
2099                 return do_strip(self, LEFTSTRIP); /* Common case */
2100         else
2101                 return do_argstrip(self, LEFTSTRIP, args);
2102 }
2103
2104
2105 PyDoc_STRVAR(rstrip__doc__,
2106 "S.rstrip([chars]) -> string or unicode\n\
2107 \n\
2108 Return a copy of the string S with trailing whitespace removed.\n\
2109 If chars is given and not None, remove characters in chars instead.\n\
2110 If chars is unicode, S will be converted to unicode before stripping");
2111
2112 static PyObject *
2113 string_rstrip(PyStringObject *self, PyObject *args)
2114 {
2115         if (PyTuple_GET_SIZE(args) == 0)
2116                 return do_strip(self, RIGHTSTRIP); /* Common case */
2117         else
2118                 return do_argstrip(self, RIGHTSTRIP, args);
2119 }
2120
2121
2122 PyDoc_STRVAR(lower__doc__,
2123 "S.lower() -> string\n\
2124 \n\
2125 Return a copy of the string S converted to lowercase.");
2126
2127 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2128 #ifndef _tolower
2129 #define _tolower tolower
2130 #endif
2131
2132 static PyObject *
2133 string_lower(PyStringObject *self)
2134 {
2135         char *s;
2136         Py_ssize_t i, n = PyString_GET_SIZE(self);
2137         PyObject *newobj;
2138
2139         newobj = PyString_FromStringAndSize(NULL, n);
2140         if (!newobj)
2141                 return NULL;
2142
2143         s = PyString_AS_STRING(newobj);
2144
2145         Py_MEMCPY(s, PyString_AS_STRING(self), n);
2146
2147         for (i = 0; i < n; i++) {
2148                 int c = Py_CHARMASK(s[i]);
2149                 if (isupper(c))
2150                         s[i] = _tolower(c);
2151         }
2152
2153         return newobj;
2154 }
2155
2156 PyDoc_STRVAR(upper__doc__,
2157 "S.upper() -> string\n\
2158 \n\
2159 Return a copy of the string S converted to uppercase.");
2160
2161 #ifndef _toupper
2162 #define _toupper toupper
2163 #endif
2164
2165 static PyObject *
2166 string_upper(PyStringObject *self)
2167 {
2168         char *s;
2169         Py_ssize_t i, n = PyString_GET_SIZE(self);
2170         PyObject *newobj;
2171
2172         newobj = PyString_FromStringAndSize(NULL, n);
2173         if (!newobj)
2174                 return NULL;
2175
2176         s = PyString_AS_STRING(newobj);
2177
2178         Py_MEMCPY(s, PyString_AS_STRING(self), n);
2179
2180         for (i = 0; i < n; i++) {
2181                 int c = Py_CHARMASK(s[i]);
2182                 if (islower(c))
2183                         s[i] = _toupper(c);
2184         }
2185
2186         return newobj;
2187 }
2188
2189 PyDoc_STRVAR(title__doc__,
2190 "S.title() -> string\n\
2191 \n\
2192 Return a titlecased version of S, i.e. words start with uppercase\n\
2193 characters, all remaining cased characters have lowercase.");
2194
2195 static PyObject*
2196 string_title(PyStringObject *self)
2197 {
2198         char *s = PyString_AS_STRING(self), *s_new;
2199         Py_ssize_t i, n = PyString_GET_SIZE(self);
2200         int previous_is_cased = 0;
2201         PyObject *newobj;
2202
2203         newobj = PyString_FromStringAndSize(NULL, n);
2204         if (newobj == NULL)
2205                 return NULL;
2206         s_new = PyString_AsString(newobj);
2207         for (i = 0; i < n; i++) {
2208                 int c = Py_CHARMASK(*s++);
2209                 if (islower(c)) {
2210                         if (!previous_is_cased)
2211                             c = toupper(c);
2212                         previous_is_cased = 1;
2213                 } else if (isupper(c)) {
2214                         if (previous_is_cased)
2215                             c = tolower(c);
2216                         previous_is_cased = 1;
2217                 } else
2218                         previous_is_cased = 0;
2219                 *s_new++ = c;
2220         }
2221         return newobj;
2222 }
2223
2224 PyDoc_STRVAR(capitalize__doc__,
2225 "S.capitalize() -> string\n\
2226 \n\
2227 Return a copy of the string S with only its first character\n\
2228 capitalized.");
2229
2230 static PyObject *
2231 string_capitalize(PyStringObject *self)
2232 {
2233         char *s = PyString_AS_STRING(self), *s_new;
2234         Py_ssize_t i, n = PyString_GET_SIZE(self);
2235         PyObject *newobj;
2236
2237         newobj = PyString_FromStringAndSize(NULL, n);
2238         if (newobj == NULL)
2239                 return NULL;
2240         s_new = PyString_AsString(newobj);
2241         if (0 < n) {
2242                 int c = Py_CHARMASK(*s++);
2243                 if (islower(c))
2244                         *s_new = toupper(c);
2245                 else
2246                         *s_new = c;
2247                 s_new++;
2248         }
2249         for (i = 1; i < n; i++) {
2250                 int c = Py_CHARMASK(*s++);
2251                 if (isupper(c))
2252                         *s_new = tolower(c);
2253                 else
2254                         *s_new = c;
2255                 s_new++;
2256         }
2257         return newobj;
2258 }
2259
2260
2261 PyDoc_STRVAR(count__doc__,
2262 "S.count(sub[, start[, end]]) -> int\n\
2263 \n\
2264 Return the number of non-overlapping occurrences of substring sub in\n\
2265 string S[start:end].  Optional arguments start and end are interpreted\n\
2266 as in slice notation.");
2267
2268 static PyObject *
2269 string_count(PyStringObject *self, PyObject *args)
2270 {
2271         PyObject *sub_obj;
2272         const char *str = PyString_AS_STRING(self), *sub;
2273         Py_ssize_t sub_len;
2274         Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2275
2276         if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2277                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2278                 return NULL;
2279
2280         if (PyString_Check(sub_obj)) {
2281                 sub = PyString_AS_STRING(sub_obj);
2282                 sub_len = PyString_GET_SIZE(sub_obj);
2283         }
2284 #ifdef Py_USING_UNICODE
2285         else if (PyUnicode_Check(sub_obj)) {
2286                 Py_ssize_t count;
2287                 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2288                 if (count == -1)
2289                         return NULL;
2290                 else
2291                         return PyInt_FromSsize_t(count);
2292         }
2293 #endif
2294         else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2295                 return NULL;
2296
2297         string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
2298
2299         return PyInt_FromSsize_t(
2300                 stringlib_count(str + start, end - start, sub, sub_len)
2301                 );
2302 }
2303
2304 PyDoc_STRVAR(swapcase__doc__,
2305 "S.swapcase() -> string\n\
2306 \n\
2307 Return a copy of the string S with uppercase characters\n\
2308 converted to lowercase and vice versa.");
2309
2310 static PyObject *
2311 string_swapcase(PyStringObject *self)
2312 {
2313         char *s = PyString_AS_STRING(self), *s_new;
2314         Py_ssize_t i, n = PyString_GET_SIZE(self);
2315         PyObject *newobj;
2316
2317         newobj = PyString_FromStringAndSize(NULL, n);
2318         if (newobj == NULL)
2319                 return NULL;
2320         s_new = PyString_AsString(newobj);
2321         for (i = 0; i < n; i++) {
2322                 int c = Py_CHARMASK(*s++);
2323                 if (islower(c)) {
2324                         *s_new = toupper(c);
2325                 }
2326                 else if (isupper(c)) {
2327                         *s_new = tolower(c);
2328                 }
2329                 else
2330                         *s_new = c;
2331                 s_new++;
2332         }
2333         return newobj;
2334 }
2335
2336
2337 PyDoc_STRVAR(translate__doc__,
2338 "S.translate(table [,deletechars]) -> string\n\
2339 \n\
2340 Return a copy of the string S, where all characters occurring\n\
2341 in the optional argument deletechars are removed, and the\n\
2342 remaining characters have been mapped through the given\n\
2343 translation table, which must be a string of length 256.");
2344
2345 static PyObject *
2346 string_translate(PyStringObject *self, PyObject *args)
2347 {
2348         register char *input, *output;
2349         register const char *table;
2350         register Py_ssize_t i, c, changed = 0;
2351         PyObject *input_obj = (PyObject*)self;
2352         const char *table1, *output_start, *del_table=NULL;
2353         Py_ssize_t inlen, tablen, dellen = 0;
2354         PyObject *result;
2355         int trans_table[256];
2356         PyObject *tableobj, *delobj = NULL;
2357
2358         if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2359                               &tableobj, &delobj))
2360                 return NULL;
2361
2362         if (PyString_Check(tableobj)) {
2363                 table1 = PyString_AS_STRING(tableobj);
2364                 tablen = PyString_GET_SIZE(tableobj);
2365         }
2366 #ifdef Py_USING_UNICODE
2367         else if (PyUnicode_Check(tableobj)) {
2368                 /* Unicode .translate() does not support the deletechars
2369                    parameter; instead a mapping to None will cause characters
2370                    to be deleted. */
2371                 if (delobj != NULL) {
2372                         PyErr_SetString(PyExc_TypeError,
2373                         "deletions are implemented differently for unicode");
2374                         return NULL;
2375                 }
2376                 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2377         }
2378 #endif
2379         else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
2380                 return NULL;
2381
2382         if (tablen != 256) {
2383                 PyErr_SetString(PyExc_ValueError,
2384                   "translation table must be 256 characters long");
2385                 return NULL;
2386         }
2387
2388         if (delobj != NULL) {
2389                 if (PyString_Check(delobj)) {
2390                         del_table = PyString_AS_STRING(delobj);
2391                         dellen = PyString_GET_SIZE(delobj);
2392                 }
2393 #ifdef Py_USING_UNICODE
2394                 else if (PyUnicode_Check(delobj)) {
2395                         PyErr_SetString(PyExc_TypeError,
2396                         "deletions are implemented differently for unicode");
2397                         return NULL;
2398                 }
2399 #endif
2400                 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2401                         return NULL;
2402         }
2403         else {
2404                 del_table = NULL;
2405                 dellen = 0;
2406         }
2407
2408         table = table1;
2409         inlen = PyString_GET_SIZE(input_obj);
2410         result = PyString_FromStringAndSize((char *)NULL, inlen);
2411         if (result == NULL)
2412                 return NULL;
2413         output_start = output = PyString_AsString(result);
2414         input = PyString_AS_STRING(input_obj);
2415
2416         if (dellen == 0) {
2417                 /* If no deletions are required, use faster code */
2418                 for (i = inlen; --i >= 0; ) {
2419                         c = Py_CHARMASK(*input++);
2420                         if (Py_CHARMASK((*output++ = table[c])) != c)
2421                                 changed = 1;
2422                 }
2423                 if (changed || !PyString_CheckExact(input_obj))
2424                         return result;
2425                 Py_DECREF(result);
2426                 Py_INCREF(input_obj);
2427                 return input_obj;
2428         }
2429
2430         for (i = 0; i < 256; i++)
2431                 trans_table[i] = Py_CHARMASK(table[i]);
2432
2433         for (i = 0; i < dellen; i++)
2434                 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2435
2436         for (i = inlen; --i >= 0; ) {
2437                 c = Py_CHARMASK(*input++);
2438                 if (trans_table[c] != -1)
2439                         if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2440                                 continue;
2441                 changed = 1;
2442         }
2443         if (!changed && PyString_CheckExact(input_obj)) {
2444                 Py_DECREF(result);
2445                 Py_INCREF(input_obj);
2446                 return input_obj;
2447         }
2448         /* Fix the size of the resulting string */
2449         if (inlen > 0)
2450                 _PyString_Resize(&result, output - output_start);
2451         return result;
2452 }
2453
2454
2455 #define FORWARD 1
2456 #define REVERSE -1
2457
2458 /* find and count characters and substrings */
2459
2460 #define findchar(target, target_len, c)                         \
2461   ((char *)memchr((const void *)(target), c, target_len))
2462
2463 /* String ops must return a string.  */
2464 /* If the object is subclass of string, create a copy */
2465 Py_LOCAL(PyStringObject *)
2466 return_self(PyStringObject *self)
2467 {
2468         if (PyString_CheckExact(self)) {
2469                 Py_INCREF(self);
2470                 return self;
2471         }
2472         return (PyStringObject *)PyString_FromStringAndSize(
2473                 PyString_AS_STRING(self),
2474                 PyString_GET_SIZE(self));
2475 }
2476
2477 Py_LOCAL_INLINE(Py_ssize_t)
2478 countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2479 {
2480         Py_ssize_t count=0;
2481         const char *start=target;
2482         const char *end=target+target_len;
2483
2484         while ( (start=findchar(start, end-start, c)) != NULL ) {
2485                 count++;
2486                 if (count >= maxcount)
2487                         break;
2488                 start += 1;
2489         }
2490         return count;
2491 }
2492
2493 Py_LOCAL(Py_ssize_t)
2494 findstring(const char *target, Py_ssize_t target_len,
2495            const char *pattern, Py_ssize_t pattern_len,
2496            Py_ssize_t start,
2497            Py_ssize_t end,
2498            int direction)
2499 {
2500         if (start < 0) {
2501                 start += target_len;
2502                 if (start < 0)
2503                         start = 0;
2504         }
2505         if (end > target_len) {
2506                 end = target_len;
2507         } else if (end < 0) {
2508                 end += target_len;
2509                 if (end < 0)
2510                         end = 0;
2511         }
2512
2513         /* zero-length substrings always match at the first attempt */
2514         if (pattern_len == 0)
2515                 return (direction > 0) ? start : end;
2516
2517         end -= pattern_len;
2518
2519         if (direction < 0) {
2520                 for (; end >= start; end--)
2521                         if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2522                                 return end;
2523         } else {
2524                 for (; start <= end; start++)
2525                         if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2526                                 return start;
2527         }
2528         return -1;
2529 }
2530
2531 Py_LOCAL_INLINE(Py_ssize_t)
2532 countstring(const char *target, Py_ssize_t target_len,
2533             const char *pattern, Py_ssize_t pattern_len,
2534             Py_ssize_t start,
2535             Py_ssize_t end,
2536             int direction, Py_ssize_t maxcount)
2537 {
2538         Py_ssize_t count=0;
2539
2540         if (start < 0) {
2541                 start += target_len;
2542                 if (start < 0)
2543                         start = 0;
2544         }
2545         if (end > target_len) {
2546                 end = target_len;
2547         } else if (end < 0) {
2548                 end += target_len;
2549                 if (end < 0)
2550                         end = 0;
2551         }
2552
2553         /* zero-length substrings match everywhere */
2554         if (pattern_len == 0 || maxcount == 0) {
2555                 if (target_len+1 < maxcount)
2556                         return target_len+1;
2557                 return maxcount;
2558         }
2559
2560         end -= pattern_len;
2561         if (direction < 0) {
2562                 for (; (end >= start); end--)
2563                         if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2564                                 count++;
2565                                 if (--maxcount <= 0) break;
2566                                 end -= pattern_len-1;
2567                         }
2568         } else {
2569                 for (; (start <= end); start++)
2570                         if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2571                                 count++;
2572                                 if (--maxcount <= 0)
2573                                         break;
2574                                 start += pattern_len-1;
2575                         }
2576         }
2577         return count;
2578 }
2579
2580
2581 /* Algorithms for different cases of string replacement */
2582
2583 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2584 Py_LOCAL(PyStringObject *)
2585 replace_interleave(PyStringObject *self,
2586                    const char *to_s, Py_ssize_t to_len,
2587                    Py_ssize_t maxcount)
2588 {
2589         char *self_s, *result_s;
2590         Py_ssize_t self_len, result_len;
2591         Py_ssize_t count, i, product;
2592         PyStringObject *result;
2593
2594         self_len = PyString_GET_SIZE(self);
2595
2596         /* 1 at the end plus 1 after every character */
2597         count = self_len+1;
2598         if (maxcount < count)
2599                 count = maxcount;
2600
2601         /* Check for overflow */
2602         /*   result_len = count * to_len + self_len; */
2603         product = count * to_len;
2604         if (product / to_len != count) {
2605                 PyErr_SetString(PyExc_OverflowError,
2606                                 "replace string is too long");
2607                 return NULL;
2608         }
2609         result_len = product + self_len;
2610         if (result_len < 0) {
2611                 PyErr_SetString(PyExc_OverflowError,
2612                                 "replace string is too long");
2613                 return NULL;
2614         }
2615
2616         if (! (result = (PyStringObject *)
2617                          PyString_FromStringAndSize(NULL, result_len)) )
2618                 return NULL;
2619
2620         self_s = PyString_AS_STRING(self);
2621         result_s = PyString_AS_STRING(result);
2622
2623         /* TODO: special case single character, which doesn't need memcpy */
2624
2625         /* Lay the first one down (guaranteed this will occur) */
2626         Py_MEMCPY(result_s, to_s, to_len);
2627         result_s += to_len;
2628         count -= 1;
2629
2630         for (i=0; i<count; i++) {
2631                 *result_s++ = *self_s++;
2632                 Py_MEMCPY(result_s, to_s, to_len);
2633                 result_s += to_len;
2634         }
2635
2636         /* Copy the rest of the original string */
2637         Py_MEMCPY(result_s, self_s, self_len-i);
2638
2639         return result;
2640 }
2641
2642 /* Special case for deleting a single character */
2643 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2644 Py_LOCAL(PyStringObject *)
2645 replace_delete_single_character(PyStringObject *self,
2646                                 char from_c, Py_ssize_t maxcount)
2647 {
2648         char *self_s, *result_s;
2649         char *start, *next, *end;
2650         Py_ssize_t self_len, result_len;
2651         Py_ssize_t count;
2652         PyStringObject *result;
2653
2654         self_len = PyString_GET_SIZE(self);
2655         self_s = PyString_AS_STRING(self);
2656
2657         count = countchar(self_s, self_len, from_c, maxcount);
2658         if (count == 0) {
2659                 return return_self(self);
2660         }
2661
2662         result_len = self_len - count;  /* from_len == 1 */
2663         assert(result_len>=0);
2664
2665         if ( (result = (PyStringObject *)
2666                         PyString_FromStringAndSize(NULL, result_len)) == NULL)
2667                 return NULL;
2668         result_s = PyString_AS_STRING(result);
2669
2670         start = self_s;
2671         end = self_s + self_len;
2672         while (count-- > 0) {
2673                 next = findchar(start, end-start, from_c);
2674                 if (next == NULL)
2675                         break;
2676                 Py_MEMCPY(result_s, start, next-start);
2677                 result_s += (next-start);
2678                 start = next+1;
2679         }
2680         Py_MEMCPY(result_s, start, end-start);
2681
2682         return result;
2683 }
2684
2685 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2686
2687 Py_LOCAL(PyStringObject *)
2688 replace_delete_substring(PyStringObject *self,
2689                          const char *from_s, Py_ssize_t from_len,
2690                          Py_ssize_t maxcount) {
2691         char *self_s, *result_s;
2692         char *start, *next, *end;
2693         Py_ssize_t self_len, result_len;
2694         Py_ssize_t count, offset;
2695         PyStringObject *result;
2696
2697         self_len = PyString_GET_SIZE(self);
2698         self_s = PyString_AS_STRING(self);
2699
2700         count = countstring(self_s, self_len,
2701                             from_s, from_len,
2702                             0, self_len, 1,
2703                             maxcount);
2704
2705         if (count == 0) {
2706                 /* no matches */
2707                 return return_self(self);
2708         }
2709
2710         result_len = self_len - (count * from_len);
2711         assert (result_len>=0);
2712
2713         if ( (result = (PyStringObject *)
2714               PyString_FromStringAndSize(NULL, result_len)) == NULL )
2715                 return NULL;
2716
2717         result_s = PyString_AS_STRING(result);
2718
2719         start = self_s;
2720         end = self_s + self_len;
2721         while (count-- > 0) {
2722                 offset = findstring(start, end-start,
2723                                     from_s, from_len,
2724                                     0, end-start, FORWARD);
2725                 if (offset == -1)
2726                         break;
2727                 next = start + offset;
2728
2729                 Py_MEMCPY(result_s, start, next-start);
2730
2731                 result_s += (next-start);
2732                 start = next+from_len;
2733         }
2734         Py_MEMCPY(result_s, start, end-start);
2735         return result;
2736 }
2737
2738 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2739 Py_LOCAL(PyStringObject *)
2740 replace_single_character_in_place(PyStringObject *self,
2741                                   char from_c, char to_c,
2742                                   Py_ssize_t maxcount)
2743 {
2744         char *self_s, *result_s, *start, *end, *next;
2745         Py_ssize_t self_len;
2746         PyStringObject *result;
2747
2748         /* The result string will be the same size */
2749         self_s = PyString_AS_STRING(self);
2750         self_len = PyString_GET_SIZE(self);
2751
2752         next = findchar(self_s, self_len, from_c);
2753
2754         if (next == NULL) {
2755                 /* No matches; return the original string */
2756                 return return_self(self);
2757         }
2758
2759         /* Need to make a new string */
2760         result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2761         if (result == NULL)
2762                 return NULL;
2763         result_s = PyString_AS_STRING(result);
2764         Py_MEMCPY(result_s, self_s, self_len);
2765
2766         /* change everything in-place, starting with this one */
2767         start =  result_s + (next-self_s);
2768         *start = to_c;
2769         start++;
2770         end = result_s + self_len;
2771
2772         while (--maxcount > 0) {
2773                 next = findchar(start, end-start, from_c);
2774                 if (next == NULL)
2775                         break;
2776                 *next = to_c;
2777                 start = next+1;
2778         }
2779
2780         return result;
2781 }
2782
2783 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2784 Py_LOCAL(PyStringObject *)
2785 replace_substring_in_place(PyStringObject *self,
2786                            const char *from_s, Py_ssize_t from_len,
2787                            const char *to_s, Py_ssize_t to_len,
2788                            Py_ssize_t maxcount)
2789 {
2790         char *result_s, *start, *end;
2791         char *self_s;
2792         Py_ssize_t self_len, offset;
2793         PyStringObject *result;
2794
2795         /* The result string will be the same size */
2796
2797         self_s = PyString_AS_STRING(self);
2798         self_len = PyString_GET_SIZE(self);
2799
2800         offset = findstring(self_s, self_len,
2801                             from_s, from_len,
2802                             0, self_len, FORWARD);
2803         if (offset == -1) {
2804                 /* No matches; return the original string */
2805                 return return_self(self);
2806         }
2807
2808         /* Need to make a new string */
2809         result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2810         if (result == NULL)
2811                 return NULL;
2812         result_s = PyString_AS_STRING(result);
2813         Py_MEMCPY(result_s, self_s, self_len);
2814
2815         /* change everything in-place, starting with this one */
2816         start =  result_s + offset;
2817         Py_MEMCPY(start, to_s, from_len);
2818         start += from_len;
2819         end = result_s + self_len;
2820
2821         while ( --maxcount > 0) {
2822                 offset = findstring(start, end-start,
2823                                     from_s, from_len,
2824                                     0, end-start, FORWARD);
2825                 if (offset==-1)
2826                         break;
2827                 Py_MEMCPY(start+offset, to_s, from_len);
2828                 start += offset+from_len;
2829         }
2830
2831         return result;
2832 }
2833
2834 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2835 Py_LOCAL(PyStringObject *)
2836 replace_single_character(PyStringObject *self,
2837                          char from_c,
2838                          const char *to_s, Py_ssize_t to_len,
2839                          Py_ssize_t maxcount)
2840 {
2841         char *self_s, *result_s;
2842         char *start, *next, *end;
2843         Py_ssize_t self_len, result_len;
2844         Py_ssize_t count, product;
2845         PyStringObject *result;
2846
2847         self_s = PyString_AS_STRING(self);
2848         self_len = PyString_GET_SIZE(self);
2849
2850         count = countchar(self_s, self_len, from_c, maxcount);
2851         if (count == 0) {
2852                 /* no matches, return unchanged */
2853                 return return_self(self);
2854         }
2855
2856         /* use the difference between current and new, hence the "-1" */
2857         /*   result_len = self_len + count * (to_len-1)  */
2858         product = count * (to_len-1);
2859         if (product / (to_len-1) != count) {
2860                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2861                 return NULL;
2862         }
2863         result_len = self_len + product;
2864         if (result_len < 0) {
2865                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2866                 return NULL;
2867         }
2868
2869         if ( (result = (PyStringObject *)
2870               PyString_FromStringAndSize(NULL, result_len)) == NULL)
2871                 return NULL;
2872         result_s = PyString_AS_STRING(result);
2873
2874         start = self_s;
2875         end = self_s + self_len;
2876         while (count-- > 0) {
2877                 next = findchar(start, end-start, from_c);
2878                 if (next == NULL)
2879                         break;
2880
2881                 if (next == start) {
2882                         /* replace with the 'to' */
2883                         Py_MEMCPY(result_s, to_s, to_len);
2884                         result_s += to_len;
2885                         start += 1;
2886                 } else {
2887                         /* copy the unchanged old then the 'to' */
2888                         Py_MEMCPY(result_s, start, next-start);
2889                         result_s += (next-start);
2890                         Py_MEMCPY(result_s, to_s, to_len);
2891                         result_s += to_len;
2892                         start = next+1;
2893                 }
2894         }
2895         /* Copy the remainder of the remaining string */
2896         Py_MEMCPY(result_s, start, end-start);
2897
2898         return result;
2899 }
2900
2901 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2902 Py_LOCAL(PyStringObject *)
2903 replace_substring(PyStringObject *self,
2904                   const char *from_s, Py_ssize_t from_len,
2905                   const char *to_s, Py_ssize_t to_len,
2906                   Py_ssize_t maxcount) {
2907         char *self_s, *result_s;
2908         char *start, *next, *end;
2909         Py_ssize_t self_len, result_len;
2910         Py_ssize_t count, offset, product;
2911         PyStringObject *result;
2912
2913         self_s = PyString_AS_STRING(self);
2914         self_len = PyString_GET_SIZE(self);
2915
2916         count = countstring(self_s, self_len,
2917                             from_s, from_len,
2918                             0, self_len, FORWARD, maxcount);
2919         if (count == 0) {
2920                 /* no matches, return unchanged */
2921                 return return_self(self);
2922         }
2923
2924         /* Check for overflow */
2925         /*    result_len = self_len + count * (to_len-from_len) */
2926         product = count * (to_len-from_len);
2927         if (product / (to_len-from_len) != count) {
2928                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2929                 return NULL;
2930         }
2931         result_len = self_len + product;
2932         if (result_len < 0) {
2933                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2934                 return NULL;
2935         }
2936
2937         if ( (result = (PyStringObject *)
2938               PyString_FromStringAndSize(NULL, result_len)) == NULL)
2939                 return NULL;
2940         result_s = PyString_AS_STRING(result);
2941
2942         start = self_s;
2943         end = self_s + self_len;
2944         while (count-- > 0) {
2945                 offset = findstring(start, end-start,
2946                                     from_s, from_len,
2947                                     0, end-start, FORWARD);
2948                 if (offset == -1)
2949                         break;
2950                 next = start+offset;
2951                 if (next == start) {
2952                         /* replace with the 'to' */
2953                         Py_MEMCPY(result_s, to_s, to_len);
2954                         result_s += to_len;
2955                         start += from_len;
2956                 } else {
2957                         /* copy the unchanged old then the 'to' */
2958                         Py_MEMCPY(result_s, start, next-start);
2959                         result_s += (next-start);
2960                         Py_MEMCPY(result_s, to_s, to_len);
2961                         result_s += to_len;
2962                         start = next+from_len;
2963                 }
2964         }
2965         /* Copy the remainder of the remaining string */
2966         Py_MEMCPY(result_s, start, end-start);
2967
2968         return result;
2969 }
2970
2971
2972 Py_LOCAL(PyStringObject *)
2973 replace(PyStringObject *self,
2974         const char *from_s, Py_ssize_t from_len,
2975         const char *to_s, Py_ssize_t to_len,
2976         Py_ssize_t maxcount)
2977 {
2978         if (maxcount < 0) {
2979                 maxcount = PY_SSIZE_T_MAX;
2980         } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2981                 /* nothing to do; return the original string */
2982                 return return_self(self);
2983         }
2984
2985         if (maxcount == 0 ||
2986             (from_len == 0 && to_len == 0)) {
2987                 /* nothing to do; return the original string */
2988                 return return_self(self);
2989         }
2990
2991         /* Handle zero-length special cases */
2992
2993         if (from_len == 0) {
2994                 /* insert the 'to' string everywhere.   */
2995                 /*    >>> "Python".replace("", ".")     */
2996                 /*    '.P.y.t.h.o.n.'                   */
2997                 return replace_interleave(self, to_s, to_len, maxcount);
2998         }
2999
3000         /* Except for "".replace("", "A") == "A" there is no way beyond this */
3001         /* point for an empty self string to generate a non-empty string */
3002         /* Special case so the remaining code always gets a non-empty string */
3003         if (PyString_GET_SIZE(self) == 0) {
3004                 return return_self(self);
3005         }
3006
3007         if (to_len == 0) {
3008                 /* delete all occurances of 'from' string */
3009                 if (from_len == 1) {
3010                         return replace_delete_single_character(
3011                                 self, from_s[0], maxcount);
3012                 } else {
3013                         return replace_delete_substring(self, from_s, from_len, maxcount);
3014                 }
3015         }
3016
3017         /* Handle special case where both strings have the same length */
3018
3019         if (from_len == to_len) {
3020                 if (from_len == 1) {
3021                         return replace_single_character_in_place(
3022                                 self,
3023                                 from_s[0],
3024                                 to_s[0],
3025                                 maxcount);
3026                 } else {
3027                         return replace_substring_in_place(
3028                                 self, from_s, from_len, to_s, to_len, maxcount);
3029                 }
3030         }
3031
3032         /* Otherwise use the more generic algorithms */
3033         if (from_len == 1) {
3034                 return replace_single_character(self, from_s[0],
3035                                                 to_s, to_len, maxcount);
3036         } else {
3037                 /* len('from')>=2, len('to')>=1 */
3038                 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3039         }
3040 }
3041
3042 PyDoc_STRVAR(replace__doc__,
3043 "S.replace (old, new[, count]) -> string\n\
3044 \n\
3045 Return a copy of string S with all occurrences of substring\n\
3046 old replaced by new.  If the optional argument count is\n\
3047 given, only the first count occurrences are replaced.");
3048
3049 static PyObject *
3050 string_replace(PyStringObject *self, PyObject *args)
3051 {
3052         Py_ssize_t count = -1;
3053         PyObject *from, *to;
3054         const char *from_s, *to_s;
3055         Py_ssize_t from_len, to_len;
3056
3057         if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3058                 return NULL;
3059
3060         if (PyString_Check(from)) {
3061                 from_s = PyString_AS_STRING(from);
3062                 from_len = PyString_GET_SIZE(from);
3063         }
3064 #ifdef Py_USING_UNICODE
3065         if (PyUnicode_Check(from))
3066                 return PyUnicode_Replace((PyObject *)self,
3067                                          from, to, count);
3068 #endif
3069         else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3070                 return NULL;
3071
3072         if (PyString_Check(to)) {
3073                 to_s = PyString_AS_STRING(to);
3074                 to_len = PyString_GET_SIZE(to);
3075         }
3076 #ifdef Py_USING_UNICODE
3077         else if (PyUnicode_Check(to))
3078                 return PyUnicode_Replace((PyObject *)self,
3079                                          from, to, count);
3080 #endif
3081         else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3082                 return NULL;
3083
3084         return (PyObject *)replace((PyStringObject *) self,
3085                                    from_s, from_len,
3086                                    to_s, to_len, count);
3087 }
3088
3089 /** End DALKE **/
3090
3091 /* Matches the end (direction >= 0) or start (direction < 0) of self
3092  * against substr, using the start and end arguments. Returns
3093  * -1 on error, 0 if not found and 1 if found.
3094  */
3095 Py_LOCAL(int)
3096 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3097                   Py_ssize_t end, int direction)
3098 {
3099         Py_ssize_t len = PyString_GET_SIZE(self);
3100         Py_ssize_t slen;
3101         const char* sub;
3102         const char* str;
3103
3104         if (PyString_Check(substr)) {
3105                 sub = PyString_AS_STRING(substr);
3106                 slen = PyString_GET_SIZE(substr);
3107         }
3108 #ifdef Py_USING_UNICODE
3109         else if (PyUnicode_Check(substr))
3110                 return PyUnicode_Tailmatch((PyObject *)self,
3111                                            substr, start, end, direction);
3112 #endif
3113         else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3114                 return -1;
3115         str = PyString_AS_STRING(self);
3116
3117         string_adjust_indices(&start, &end, len);
3118
3119         if (direction < 0) {
3120                 /* startswith */
3121                 if (start+slen > len)
3122                         return 0;
3123         } else {
3124                 /* endswith */
3125                 if (end-start < slen || start > len)
3126                         return 0;
3127
3128                 if (end-slen > start)
3129                         start = end - slen;
3130         }
3131         if (end-start >= slen)
3132                 return ! memcmp(str+start, sub, slen);
3133         return 0;
3134 }
3135
3136
3137 PyDoc_STRVAR(startswith__doc__,
3138 "S.startswith(prefix[, start[, end]]) -> bool\n\
3139 \n\
3140 Return True if S starts with the specified prefix, False otherwise.\n\
3141 With optional start, test S beginning at that position.\n\
3142 With optional end, stop comparing S at that position.\n\
3143 prefix can also be a tuple of strings to try.");
3144
3145 static PyObject *
3146 string_startswith(PyStringObject *self, PyObject *args)
3147 {
3148         Py_ssize_t start = 0;
3149         Py_ssize_t end = PY_SSIZE_T_MAX;
3150         PyObject *subobj;
3151         int result;
3152
3153         if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3154                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3155                 return NULL;
3156         if (PyTuple_Check(subobj)) {
3157                 Py_ssize_t i;
3158                 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3159                         result = _string_tailmatch(self,
3160                                         PyTuple_GET_ITEM(subobj, i),
3161                                         start, end, -1);
3162                         if (result == -1)
3163                                 return NULL;
3164                         else if (result) {
3165                                 Py_RETURN_TRUE;
3166                         }
3167                 }
3168                 Py_RETURN_FALSE;
3169         }
3170         result = _string_tailmatch(self, subobj, start, end, -1);
3171         if (result == -1)
3172                 return NULL;
3173         else
3174                 return PyBool_FromLong(result);
3175 }
3176
3177
3178 PyDoc_STRVAR(endswith__doc__,
3179 "S.endswith(suffix[, start[, end]]) -> bool\n\
3180 \n\
3181 Return True if S ends with the specified suffix, False otherwise.\n\
3182 With optional start, test S beginning at that position.\n\
3183 With optional end, stop comparing S at that position.\n\
3184 suffix can also be a tuple of strings to try.");
3185
3186 static PyObject *
3187 string_endswith(PyStringObject *self, PyObject *args)
3188 {
3189         Py_ssize_t start = 0;
3190         Py_ssize_t end = PY_SSIZE_T_MAX;
3191         PyObject *subobj;
3192         int result;
3193
3194         if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3195                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3196                 return NULL;
3197         if (PyTuple_Check(subobj)) {
3198                 Py_ssize_t i;
3199                 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3200                         result = _string_tailmatch(self,
3201                                         PyTuple_GET_ITEM(subobj, i),
3202                                         start, end, +1);
3203                         if (result == -1)
3204                                 return NULL;
3205                         else if (result) {
3206                                 Py_RETURN_TRUE;
3207                         }
3208                 }
3209                 Py_RETURN_FALSE;
3210         }
3211         result = _string_tailmatch(self, subobj, start, end, +1);
3212         if (result == -1)
3213                 return NULL;
3214         else
3215                 return PyBool_FromLong(result);
3216 }
3217
3218
3219 PyDoc_STRVAR(encode__doc__,
3220 "S.encode([encoding[,errors]]) -> object\n\
3221 \n\
3222 Encodes S using the codec registered for encoding. encoding defaults\n\
3223 to the default encoding. errors may be given to set a different error\n\
3224 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3225 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3226 'xmlcharrefreplace' as well as any other name registered with\n\
3227 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3228
3229 static PyObject *
3230 string_encode(PyStringObject *self, PyObject *args)
3231 {
3232     char *encoding = NULL;
3233     char *errors = NULL;
3234     PyObject *v;
3235
3236     if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3237         return NULL;
3238     v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3239     if (v == NULL)
3240         goto onError;
3241     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3242         PyErr_Format(PyExc_TypeError,
3243                      "encoder did not return a string/unicode object "
3244                      "(type=%.400s)",
3245                      v->ob_type->tp_name);
3246         Py_DECREF(v);
3247         return NULL;
3248     }
3249     return v;
3250
3251  onError:
3252     return NULL;
3253 }
3254
3255
3256 PyDoc_STRVAR(decode__doc__,
3257 "S.decode([encoding[,errors]]) -> object\n\
3258 \n\
3259 Decodes S using the codec registered for encoding. encoding defaults\n\
3260 to the default encoding. errors may be given to set a different error\n\
3261 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3262 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3263 as well as any other name registerd with codecs.register_error that is\n\
3264 able to handle UnicodeDecodeErrors.");
3265
3266 static PyObject *
3267 string_decode(PyStringObject *self, PyObject *args)
3268 {
3269     char *encoding = NULL;
3270     char *errors = NULL;
3271     PyObject *v;
3272
3273     if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3274         return NULL;
3275     v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3276     if (v == NULL)
3277         goto onError;
3278     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3279         PyErr_Format(PyExc_TypeError,
3280                      "decoder did not return a string/unicode object "
3281                      "(type=%.400s)",
3282                      v->ob_type->tp_name);
3283         Py_DECREF(v);
3284         return NULL;
3285     }
3286     return v;
3287
3288  onError:
3289     return NULL;
3290 }
3291
3292
3293 PyDoc_STRVAR(expandtabs__doc__,
3294 "S.expandtabs([tabsize]) -> string\n\
3295 \n\
3296 Return a copy of S where all tab characters are expanded using spaces.\n\
3297 If tabsize is not given, a tab size of 8 characters is assumed.");
3298
3299 static PyObject*
3300 string_expandtabs(PyStringObject *self, PyObject *args)
3301 {
3302     const char *e, *p;
3303     char *q;
3304     Py_ssize_t i, j, old_j;
3305     PyObject *u;
3306     int tabsize = 8;
3307
3308     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3309         return NULL;
3310
3311     /* First pass: determine size of output string */
3312     i = j = old_j = 0;
3313     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3314     for (p = PyString_AS_STRING(self); p < e; p++)
3315         if (*p == '\t') {
3316             if (tabsize > 0) {
3317                 j += tabsize - (j % tabsize);
3318                 if (old_j > j) {
3319                     PyErr_SetString(PyExc_OverflowError,
3320                                     "new string is too long");
3321                     return NULL;
3322                 }
3323                 old_j = j;
3324             }
3325         }
3326         else {
3327             j++;
3328             if (*p == '\n' || *p == '\r') {
3329                 i += j;
3330                 old_j = j = 0;
3331                 if (i < 0) {
3332                     PyErr_SetString(PyExc_OverflowError,
3333                                     "new string is too long");
3334                     return NULL;
3335                 }
3336             }
3337         }
3338
3339     if ((i + j) < 0) {
3340         PyErr_SetString(PyExc_OverflowError, "new string is too long");
3341         return NULL;
3342     }
3343
3344     /* Second pass: create output string and fill it */
3345     u = PyString_FromStringAndSize(NULL, i + j);
3346     if (!u)
3347         return NULL;
3348
3349     j = 0;
3350     q = PyString_AS_STRING(u);
3351
3352     for (p = PyString_AS_STRING(self); p < e; p++)
3353         if (*p == '\t') {
3354             if (tabsize > 0) {
3355                 i = tabsize - (j % tabsize);
3356                 j += i;
3357                 while (i--)
3358                     *q++ = ' ';
3359             }
3360         }
3361         else {
3362             j++;
3363             *q++ = *p;
3364             if (*p == '\n' || *p == '\r')
3365                 j = 0;
3366         }
3367
3368     return u;
3369 }
3370
3371 Py_LOCAL_INLINE(PyObject *)
3372 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3373 {
3374     PyObject *u;
3375
3376     if (left < 0)
3377         left = 0;
3378     if (right < 0)
3379         right = 0;
3380
3381     if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3382         Py_INCREF(self);
3383         return (PyObject *)self;
3384     }
3385
3386     u = PyString_FromStringAndSize(NULL,
3387                                    left + PyString_GET_SIZE(self) + right);
3388     if (u) {
3389         if (left)
3390             memset(PyString_AS_STRING(u), fill, left);
3391         Py_MEMCPY(PyString_AS_STRING(u) + left,
3392                PyString_AS_STRING(self),
3393                PyString_GET_SIZE(self));
3394         if (right)
3395             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3396                    fill, right);
3397     }
3398
3399     return u;
3400 }
3401
3402 PyDoc_STRVAR(ljust__doc__,
3403 "S.ljust(width[, fillchar]) -> string\n"
3404 "\n"
3405 "Return S left justified in a string of length width. Padding is\n"
3406 "done using the specified fill character (default is a space).");
3407
3408 static PyObject *
3409 string_ljust(PyStringObject *self, PyObject *args)
3410 {
3411     Py_ssize_t width;
3412     char fillchar = ' ';
3413
3414     if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3415         return NULL;
3416
3417     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3418         Py_INCREF(self);
3419         return (PyObject*) self;
3420     }
3421
3422     return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3423 }
3424
3425
3426 PyDoc_STRVAR(rjust__doc__,
3427 "S.rjust(width[, fillchar]) -> string\n"
3428 "\n"
3429 "Return S right justified in a string of length width. Padding is\n"
3430 "done using the specified fill character (default is a space)");
3431
3432 static PyObject *
3433 string_rjust(PyStringObject *self, PyObject *args)
3434 {
3435     Py_ssize_t width;
3436     char fillchar = ' ';
3437
3438     if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3439         return NULL;
3440
3441     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3442         Py_INCREF(self);
3443         return (PyObject*) self;
3444     }
3445
3446     return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3447 }
3448
3449
3450 PyDoc_STRVAR(center__doc__,
3451 "S.center(width[, fillchar]) -> string\n"
3452 "\n"
3453 "Return S centered in a string of length width. Padding is\n"
3454 "done using the specified fill character (default is a space)");
3455
3456 static PyObject *
3457 string_center(PyStringObject *self, PyObject *args)
3458 {
3459     Py_ssize_t marg, left;
3460     Py_ssize_t width;
3461     char fillchar = ' ';
3462
3463     if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3464         return NULL;
3465
3466     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3467         Py_INCREF(self);
3468         return (PyObject*) self;
3469     }
3470
3471     marg = width - PyString_GET_SIZE(self);
3472     left = marg / 2 + (marg & width & 1);
3473
3474     return pad(self, left, marg - left, fillchar);
3475 }
3476
3477 PyDoc_STRVAR(zfill__doc__,
3478 "S.zfill(width) -> string\n"
3479 "\n"
3480 "Pad a numeric string S with zeros on the left, to fill a field\n"
3481 "of the specified width.  The string S is never truncated.");
3482
3483 static PyObject *
3484 string_zfill(PyStringObject *self, PyObject *args)
3485 {
3486     Py_ssize_t fill;
3487     PyObject *s;
3488     char *p;
3489     Py_ssize_t width;
3490
3491     if (!PyArg_ParseTuple(args, "n:zfill", &width))
3492         return NULL;
3493
3494     if (PyString_GET_SIZE(self) >= width) {
3495         if (PyString_CheckExact(self)) {
3496             Py_INCREF(self);
3497             return (PyObject*) self;
3498         }
3499         else
3500             return PyString_FromStringAndSize(
3501                 PyString_AS_STRING(self),
3502                 PyString_GET_SIZE(self)
3503             );
3504     }
3505
3506     fill = width - PyString_GET_SIZE(self);
3507
3508     s = pad(self, fill, 0, '0');
3509
3510     if (s == NULL)
3511         return NULL;
3512
3513     p = PyString_AS_STRING(s);
3514     if (p[fill] == '+' || p[fill] == '-') {
3515         /* move sign to beginning of string */
3516         p[0] = p[fill];
3517         p[fill] = '0';
3518     }
3519
3520     return (PyObject*) s;
3521 }
3522
3523 PyDoc_STRVAR(isspace__doc__,
3524 "S.isspace() -> bool\n\
3525 \n\
3526 Return True if all characters in S are whitespace\n\
3527 and there is at least one character in S, False otherwise.");
3528
3529 static PyObject*
3530 string_isspace(PyStringObject *self)
3531 {
3532     register const unsigned char *p
3533         = (unsigned char *) PyString_AS_STRING(self);
3534     register const unsigned char *e;
3535
3536     /* Shortcut for single character strings */
3537     if (PyString_GET_SIZE(self) == 1 &&
3538         isspace(*p))
3539         return PyBool_FromLong(1);
3540
3541     /* Special case for empty strings */
3542     if (PyString_GET_SIZE(self) == 0)
3543         return PyBool_FromLong(0);
3544
3545     e = p + PyString_GET_SIZE(self);
3546     for (; p < e; p++) {
3547         if (!isspace(*p))
3548             return PyBool_FromLong(0);
3549     }
3550     return PyBool_FromLong(1);
3551 }
3552
3553
3554 PyDoc_STRVAR(isalpha__doc__,
3555 "S.isalpha() -> bool\n\
3556 \n\
3557 Return True if all characters in S are alphabetic\n\
3558 and there is at least one character in S, False otherwise.");
3559
3560 static PyObject*
3561 string_isalpha(PyStringObject *self)
3562 {
3563     register const unsigned char *p
3564         = (unsigned char *) PyString_AS_STRING(self);
3565     register const unsigned char *e;
3566
3567     /* Shortcut for single character strings */
3568     if (PyString_GET_SIZE(self) == 1 &&
3569         isalpha(*p))
3570         return PyBool_FromLong(1);
3571
3572     /* Special case for empty strings */
3573     if (PyString_GET_SIZE(self) == 0)
3574         return PyBool_FromLong(0);
3575
3576     e = p + PyString_GET_SIZE(self);
3577     for (; p < e; p++) {
3578         if (!isalpha(*p))
3579             return PyBool_FromLong(0);
3580     }
3581     return PyBool_FromLong(1);
3582 }
3583
3584
3585 PyDoc_STRVAR(isalnum__doc__,
3586 "S.isalnum() -> bool\n\
3587 \n\
3588 Return True if all characters in S are alphanumeric\n\
3589 and there is at least one character in S, False otherwise.");
3590
3591 static PyObject*
3592 string_isalnum(PyStringObject *self)
3593 {
3594     register const unsigned char *p
3595         = (unsigned char *) PyString_AS_STRING(self);
3596     register const unsigned char *e;
3597
3598     /* Shortcut for single character strings */
3599     if (PyString_GET_SIZE(self) == 1 &&
3600         isalnum(*p))
3601         return PyBool_FromLong(1);
3602
3603     /* Special case for empty strings */
3604     if (PyString_GET_SIZE(self) == 0)
3605         return PyBool_FromLong(0);
3606
3607     e = p + PyString_GET_SIZE(self);
3608     for (; p < e; p++) {
3609         if (!isalnum(*p))
3610             return PyBool_FromLong(0);
3611     }
3612     return PyBool_FromLong(1);
3613 }
3614
3615
3616 PyDoc_STRVAR(isdigit__doc__,
3617 "S.isdigit() -> bool\n\
3618 \n\
3619 Return True if all characters in S are digits\n\
3620 and there is at least one character in S, False otherwise.");
3621
3622 static PyObject*
3623 string_isdigit(PyStringObject *self)
3624 {
3625     register const unsigned char *p
3626         = (unsigned char *) PyString_AS_STRING(self);
3627     register const unsigned char *e;
3628
3629     /* Shortcut for single character strings */
3630     if (PyString_GET_SIZE(self) == 1 &&
3631         isdigit(*p))
3632         return PyBool_FromLong(1);
3633
3634     /* Special case for empty strings */
3635     if (PyString_GET_SIZE(self) == 0)
3636         return PyBool_FromLong(0);
3637
3638     e = p + PyString_GET_SIZE(self);
3639     for (; p < e; p++) {
3640         if (!isdigit(*p))
3641             return PyBool_FromLong(0);
3642     }
3643     return PyBool_FromLong(1);
3644 }
3645
3646
3647 PyDoc_STRVAR(islower__doc__,
3648 "S.islower() -> bool\n\
3649 \n\
3650 Return True if all cased characters in S are lowercase and there is\n\
3651 at least one cased character in S, False otherwise.");
3652
3653 static PyObject*
3654 string_islower(PyStringObject *self)
3655 {
3656     register const unsigned char *p
3657         = (unsigned char *) PyString_AS_STRING(self);
3658     register const unsigned char *e;
3659     int cased;
3660
3661     /* Shortcut for single character strings */
3662     if (PyString_GET_SIZE(self) == 1)
3663         return PyBool_FromLong(islower(*p) != 0);
3664
3665     /* Special case for empty strings */
3666     if (PyString_GET_SIZE(self) == 0)
3667         return PyBool_FromLong(0);
3668
3669     e = p + PyString_GET_SIZE(self);
3670     cased = 0;
3671     for (; p < e; p++) {
3672         if (isupper(*p))
3673             return PyBool_FromLong(0);
3674         else if (!cased && islower(*p))
3675             cased = 1;
3676     }
3677     return PyBool_FromLong(cased);
3678 }
3679
3680
3681 PyDoc_STRVAR(isupper__doc__,
3682 "S.isupper() -> bool\n\
3683 \n\
3684 Return True if all cased characters in S are uppercase and there is\n\
3685 at least one cased character in S, False otherwise.");
3686
3687 static PyObject*
3688 string_isupper(PyStringObject *self)
3689 {
3690     register const unsigned char *p
3691         = (unsigned char *) PyString_AS_STRING(self);
3692     register const unsigned char *e;
3693     int cased;
3694
3695     /* Shortcut for single character strings */
3696     if (PyString_GET_SIZE(self) == 1)
3697         return PyBool_FromLong(isupper(*p) != 0);
3698
3699     /* Special case for empty strings */
3700     if (PyString_GET_SIZE(self) == 0)
3701         return PyBool_FromLong(0);
3702
3703     e = p + PyString_GET_SIZE(self);
3704     cased = 0;
3705     for (; p < e; p++) {
3706         if (islower(*p))
3707             return PyBool_FromLong(0);
3708         else if (!cased && isupper(*p))
3709             cased = 1;
3710     }
3711     return PyBool_FromLong(cased);
3712 }
3713
3714
3715 PyDoc_STRVAR(istitle__doc__,
3716 "S.istitle() -> bool\n\
3717 \n\
3718 Return True if S is a titlecased string and there is at least one\n\
3719 character in S, i.e. uppercase characters may only follow uncased\n\
3720 characters and lowercase characters only cased ones. Return False\n\
3721 otherwise.");
3722
3723 static PyObject*
3724 string_istitle(PyStringObject *self, PyObject *uncased)
3725 {
3726     register const unsigned char *p
3727         = (unsigned char *) PyString_AS_STRING(self);
3728     register const unsigned char *e;
3729     int cased, previous_is_cased;
3730
3731     /* Shortcut for single character strings */
3732     if (PyString_GET_SIZE(self) == 1)
3733         return PyBool_FromLong(isupper(*p) != 0);
3734
3735     /* Special case for empty strings */
3736     if (PyString_GET_SIZE(self) == 0)
3737         return PyBool_FromLong(0);
3738
3739     e = p + PyString_GET_SIZE(self);
3740     cased = 0;
3741     previous_is_cased = 0;
3742     for (; p < e; p++) {
3743         register const unsigned char ch = *p;
3744
3745         if (isupper(ch)) {
3746             if (previous_is_cased)
3747                 return PyBool_FromLong(0);
3748             previous_is_cased = 1;
3749             cased = 1;
3750         }
3751         else if (islower(ch)) {
3752             if (!previous_is_cased)
3753                 return PyBool_FromLong(0);
3754             previous_is_cased = 1;
3755             cased = 1;
3756         }
3757         else
3758             previous_is_cased = 0;
3759     }
3760     return PyBool_FromLong(cased);
3761 }
3762
3763
3764 PyDoc_STRVAR(splitlines__doc__,
3765 "S.splitlines([keepends]) -> list of strings\n\
3766 \n\
3767 Return a list of the lines in S, breaking at line boundaries.\n\
3768 Line breaks are not included in the resulting list unless keepends\n\
3769 is given and true.");
3770
3771 static PyObject*
3772 string_splitlines(PyStringObject *self, PyObject *args)
3773 {
3774     register Py_ssize_t i;
3775     register Py_ssize_t j;
3776     Py_ssize_t len;
3777     int keepends = 0;
3778     PyObject *list;
3779     PyObject *str;
3780     char *data;
3781
3782     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3783         return NULL;
3784
3785     data = PyString_AS_STRING(self);
3786     len = PyString_GET_SIZE(self);
3787
3788     /* This does not use the preallocated list because splitlines is
3789        usually run with hundreds of newlines.  The overhead of
3790        switching between PyList_SET_ITEM and append causes about a
3791        2-3% slowdown for that common case.  A smarter implementation
3792        could move the if check out, so the SET_ITEMs are done first
3793        and the appends only done when the prealloc buffer is full.
3794        That's too much work for little gain.*/
3795
3796     list = PyList_New(0);
3797     if (!list)
3798         goto onError;
3799
3800     for (i = j = 0; i < len; ) {
3801         Py_ssize_t eol;
3802
3803         /* Find a line and append it */
3804         while (i < len && data[i] != '\n' && data[i] != '\r')
3805             i++;
3806
3807         /* Skip the line break reading CRLF as one line break */
3808         eol = i;
3809         if (i < len) {
3810             if (data[i] == '\r' && i + 1 < len &&
3811                 data[i+1] == '\n')
3812                 i += 2;
3813             else
3814                 i++;
3815             if (keepends)
3816                 eol = i;
3817         }
3818         SPLIT_APPEND(data, j, eol);
3819         j = i;
3820     }
3821     if (j < len) {
3822         SPLIT_APPEND(data, j, len);
3823     }
3824
3825     return list;
3826
3827  onError:
3828     Py_XDECREF(list);
3829     return NULL;
3830 }
3831
3832 #undef SPLIT_APPEND
3833 #undef SPLIT_ADD
3834 #undef MAX_PREALLOC
3835 #undef PREALLOC_SIZE
3836
3837 static PyObject *
3838 string_getnewargs(PyStringObject *v)
3839 {
3840         return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3841 }
3842
3843 \f
3844 static PyMethodDef
3845 string_methods[] = {
3846         /* Counterparts of the obsolete stropmodule functions; except
3847            string.maketrans(). */
3848         {"join", (PyCFunction)string_join, METH_O, join__doc__},
3849         {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3850         {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3851         {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3852         {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3853         {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3854         {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3855         {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3856         {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3857         {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3858         {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3859         {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3860         {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3861          capitalize__doc__},
3862         {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3863         {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3864          endswith__doc__},
3865         {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3866         {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3867         {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3868         {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3869         {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3870         {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3871         {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3872         {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3873         {"rpartition", (PyCFunction)string_rpartition, METH_O,
3874          rpartition__doc__},
3875         {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3876          startswith__doc__},
3877         {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3878         {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3879          swapcase__doc__},
3880         {"translate", (PyCFunction)string_translate, METH_VARARGS,
3881          translate__doc__},
3882         {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3883         {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3884         {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3885         {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3886         {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3887         {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3888         {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3889         {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3890          expandtabs__doc__},
3891         {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3892          splitlines__doc__},
3893         {"__getnewargs__",      (PyCFunction)string_getnewargs, METH_NOARGS},
3894         {NULL,     NULL}                     /* sentinel */
3895 };
3896
3897 static PyObject *
3898 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3899
3900 static PyObject *
3901 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3902 {
3903         PyObject *x = NULL;
3904         static char *kwlist[] = {"object", 0};
3905
3906         if (type != &PyString_Type)
3907                 return str_subtype_new(type, args, kwds);
3908         if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3909                 return NULL;
3910         if (x == NULL)
3911                 return PyString_FromString("");
3912         return PyObject_Str(x);
3913 }
3914
3915 static PyObject *
3916 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3917 {
3918         PyObject *tmp, *pnew;
3919         Py_ssize_t n;
3920
3921         assert(PyType_IsSubtype(type, &PyString_Type));
3922         tmp = string_new(&PyString_Type, args, kwds);
3923         if (tmp == NULL)
3924                 return NULL;
3925         assert(PyString_CheckExact(tmp));
3926         n = PyString_GET_SIZE(tmp);
3927         pnew = type->tp_alloc(type, n);
3928         if (pnew != NULL) {
3929                 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3930                 ((PyStringObject *)pnew)->ob_shash =
3931                         ((PyStringObject *)tmp)->ob_shash;
3932                 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3933         }
3934         Py_DECREF(tmp);
3935         return pnew;
3936 }
3937
3938 static PyObject *
3939 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3940 {
3941         PyErr_SetString(PyExc_TypeError,
3942                         "The basestring type cannot be instantiated");
3943         return NULL;
3944 }
3945
3946 static PyObject *
3947 string_mod(PyObject *v, PyObject *w)
3948 {
3949         if (!PyString_Check(v)) {
3950                 Py_INCREF(Py_NotImplemented);
3951                 return Py_NotImplemented;
3952         }
3953         return PyString_Format(v, w);
3954 }
3955
3956 PyDoc_STRVAR(basestring_doc,
3957 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3958
3959 static PyNumberMethods string_as_number = {
3960         0,                      /*nb_add*/
3961         0,                      /*nb_subtract*/
3962         0,                      /*nb_multiply*/
3963         0,                      /*nb_divide*/
3964         string_mod,             /*nb_remainder*/
3965 };
3966
3967
3968 PyTypeObject PyBaseString_Type = {
3969         PyObject_HEAD_INIT(&PyType_Type)
3970         0,
3971         "basestring",
3972         0,
3973         0,
3974         0,                                      /* tp_dealloc */
3975         0,                                      /* tp_print */
3976         0,                                      /* tp_getattr */
3977         0,                                      /* tp_setattr */
3978         0,                                      /* tp_compare */
3979         0,                                      /* tp_repr */
3980         0,                                      /* tp_as_number */
3981         0,                                      /* tp_as_sequence */
3982         0,                                      /* tp_as_mapping */
3983         0,                                      /* tp_hash */
3984         0,                                      /* tp_call */
3985         0,                                      /* tp_str */
3986         0,                                      /* tp_getattro */
3987         0,                                      /* tp_setattro */
3988         0,                                      /* tp_as_buffer */
3989         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3990         basestring_doc,                         /* tp_doc */
3991         0,                                      /* tp_traverse */
3992         0,                                      /* tp_clear */
3993         0,                                      /* tp_richcompare */
3994         0,                                      /* tp_weaklistoffset */
3995         0,                                      /* tp_iter */
3996         0,                                      /* tp_iternext */
3997         0,                                      /* tp_methods */
3998         0,                                      /* tp_members */
3999         0,                                      /* tp_getset */
4000         &PyBaseObject_Type,                     /* tp_base */
4001         0,                                      /* tp_dict */
4002         0,                                      /* tp_descr_get */
4003         0,                                      /* tp_descr_set */
4004         0,                                      /* tp_dictoffset */
4005         0,                                      /* tp_init */
4006         0,                                      /* tp_alloc */
4007         basestring_new,                         /* tp_new */
4008         0,                                      /* tp_free */
4009 };
4010
4011 PyDoc_STRVAR(string_doc,
4012 "str(object) -> string\n\
4013 \n\
4014 Return a nice string representation of the object.\n\
4015 If the argument is a string, the return value is the same object.");
4016
4017 PyTypeObject PyString_Type = {
4018         PyObject_HEAD_INIT(&PyType_Type)
4019         0,
4020         "str",
4021         sizeof(PyStringObject),
4022         sizeof(char),
4023         string_dealloc,                         /* tp_dealloc */
4024         (printfunc)string_print,                /* tp_print */
4025         0,                                      /* tp_getattr */
4026         0,                                      /* tp_setattr */
4027         0,                                      /* tp_compare */
4028         string_repr,                            /* tp_repr */
4029         &string_as_number,                      /* tp_as_number */
4030         &string_as_sequence,                    /* tp_as_sequence */
4031         &string_as_mapping,                     /* tp_as_mapping */
4032         (hashfunc)string_hash,                  /* tp_hash */
4033         0,                                      /* tp_call */
4034         string_str,                             /* tp_str */
4035         PyObject_GenericGetAttr,                /* tp_getattro */
4036         0,                                      /* tp_setattro */
4037         &string_as_buffer,                      /* tp_as_buffer */
4038         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4039                 Py_TPFLAGS_BASETYPE,            /* tp_flags */
4040         string_doc,                             /* tp_doc */
4041         0,                                      /* tp_traverse */
4042         0,                                      /* tp_clear */
4043         (richcmpfunc)string_richcompare,        /* tp_richcompare */
4044         0,                                      /* tp_weaklistoffset */
4045         0,                                      /* tp_iter */
4046         0,                                      /* tp_iternext */
4047         string_methods,                         /* tp_methods */
4048         0,                                      /* tp_members */
4049         0,                                      /* tp_getset */
4050         &PyBaseString_Type,                     /* tp_base */
4051         0,                                      /* tp_dict */
4052         0,                                      /* tp_descr_get */
4053         0,                                      /* tp_descr_set */
4054         0,                                      /* tp_dictoffset */
4055         0,                                      /* tp_init */
4056         0,                                      /* tp_alloc */
4057         string_new,                             /* tp_new */
4058         PyObject_Del,                           /* tp_free */
4059 };
4060
4061 void
4062 PyString_Concat(register PyObject **pv, register PyObject *w)
4063 {
4064         register PyObject *v;
4065         if (*pv == NULL)
4066                 return;
4067         if (w == NULL || !PyString_Check(*pv)) {
4068                 Py_DECREF(*pv);
4069                 *pv = NULL;
4070                 return;
4071         }
4072         v = string_concat((PyStringObject *) *pv, w);
4073         Py_DECREF(*pv);
4074         *pv = v;
4075 }
4076
4077 void
4078 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
4079 {
4080         PyString_Concat(pv, w);
4081         Py_XDECREF(w);
4082 }
4083
4084
4085 /* The following function breaks the notion that strings are immutable:
4086    it changes the size of a string.  We get away with this only if there
4087    is only one module referencing the object.  You can also think of it
4088    as creating a new string object and destroying the old one, only
4089    more efficiently.  In any case, don't use this if the string may
4090    already be known to some other part of the code...
4091    Note that if there's not enough memory to resize the string, the original
4092    string object at *pv is deallocated, *pv is set to NULL, an "out of
4093    memory" exception is set, and -1 is returned.  Else (on success) 0 is
4094    returned, and the value in *pv may or may not be the same as on input.
4095    As always, an extra byte is allocated for a trailing \0 byte (newsize
4096    does *not* include that), and a trailing \0 byte is stored.
4097 */
4098
4099 int
4100 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
4101 {
4102         register PyObject *v;
4103         register PyStringObject *sv;
4104         v = *pv;
4105         if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4106             PyString_CHECK_INTERNED(v)) {
4107                 *pv = 0;
4108                 Py_DECREF(v);
4109                 PyErr_BadInternalCall();
4110                 return -1;
4111         }
4112         /* XXX UNREF/NEWREF interface should be more symmetrical */
4113         _Py_DEC_REFTOTAL;
4114         _Py_ForgetReference(v);
4115         *pv = (PyObject *)
4116                 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
4117         if (*pv == NULL) {
4118                 PyObject_Del(v);
4119                 PyErr_NoMemory();
4120                 return -1;
4121         }
4122         _Py_NewReference(*pv);
4123         sv = (PyStringObject *) *pv;
4124         sv->ob_size = newsize;
4125         sv->ob_sval[newsize] = '\0';
4126         sv->ob_shash = -1;      /* invalidate cached hash value */
4127         return 0;
4128 }
4129
4130 /* Helpers for formatstring */
4131
4132 Py_LOCAL_INLINE(PyObject *)
4133 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4134 {
4135         Py_ssize_t argidx = *p_argidx;
4136         if (argidx < arglen) {
4137                 (*p_argidx)++;
4138                 if (arglen < 0)
4139                         return args;
4140                 else
4141                         return PyTuple_GetItem(args, argidx);
4142         }
4143         PyErr_SetString(PyExc_TypeError,
4144                         "not enough arguments for format string");
4145         return NULL;
4146 }
4147
4148 /* Format codes
4149  * F_LJUST      '-'
4150  * F_SIGN       '+'
4151  * F_BLANK      ' '
4152  * F_ALT        '#'
4153  * F_ZERO       '0'
4154  */
4155 #define F_LJUST (1<<0)
4156 #define F_SIGN  (1<<1)
4157 #define F_BLANK (1<<2)
4158 #define F_ALT   (1<<3)
4159 #define F_ZERO  (1<<4)
4160
4161 Py_LOCAL_INLINE(int)
4162 formatfloat(char *buf, size_t buflen, int flags,
4163             int prec, int type, PyObject *v)
4164 {
4165         /* fmt = '%#.' + `prec` + `type`
4166            worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4167         char fmt[20];
4168         double x;
4169         x = PyFloat_AsDouble(v);
4170         if (x == -1.0 && PyErr_Occurred()) {
4171                 PyErr_SetString(PyExc_TypeError, "float argument required");
4172                 return -1;
4173         }
4174         if (prec < 0)
4175                 prec = 6;
4176         if (type == 'f' && fabs(x)/1e25 >= 1e25)
4177                 type = 'g';
4178         /* Worst case length calc to ensure no buffer overrun:
4179
4180            'g' formats:
4181              fmt = %#.<prec>g
4182              buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4183                 for any double rep.)
4184              len = 1 + prec + 1 + 2 + 5 = 9 + prec
4185
4186            'f' formats:
4187              buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4188              len = 1 + 50 + 1 + prec = 52 + prec
4189
4190            If prec=0 the effective precision is 1 (the leading digit is
4191            always given), therefore increase the length by one.
4192
4193         */
4194         if (((type == 'g' || type == 'G') &&
4195               buflen <= (size_t)10 + (size_t)prec) ||
4196             (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
4197                 PyErr_SetString(PyExc_OverflowError,
4198                         "formatted float is too long (precision too large?)");
4199                 return -1;
4200         }
4201         PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4202                       (flags&F_ALT) ? "#" : "",
4203                       prec, type);
4204         PyOS_ascii_formatd(buf, buflen, fmt, x);
4205         return (int)strlen(buf);
4206 }
4207
4208 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4209  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
4210  * Python's regular ints.
4211  * Return value:  a new PyString*, or NULL if error.
4212  *  .  *pbuf is set to point into it,
4213  *     *plen set to the # of chars following that.
4214  *     Caller must decref it when done using pbuf.
4215  *     The string starting at *pbuf is of the form
4216  *         "-"? ("0x" | "0X")? digit+
4217  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
4218  *         set in flags.  The case of hex digits will be correct,
4219  *     There will be at least prec digits, zero-filled on the left if
4220  *         necessary to get that many.
4221  * val          object to be converted
4222  * flags        bitmask of format flags; only F_ALT is looked at
4223  * prec         minimum number of digits; 0-fill on left if needed
4224  * type         a character in [duoxX]; u acts the same as d
4225  *
4226  * CAUTION:  o, x and X conversions on regular ints can never
4227  * produce a '-' sign, but can for Python's unbounded ints.
4228  */
4229 PyObject*
4230 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4231                      char **pbuf, int *plen)
4232 {
4233         PyObject *result = NULL;
4234         char *buf;
4235         Py_ssize_t i;
4236         int sign;       /* 1 if '-', else 0 */
4237         int len;        /* number of characters */
4238         Py_ssize_t llen;
4239         int numdigits;  /* len == numnondigits + numdigits */
4240         int numnondigits = 0;
4241
4242         switch (type) {
4243         case 'd':
4244         case 'u':
4245                 result = val->ob_type->tp_str(val);
4246                 break;
4247         case 'o':
4248                 result = val->ob_type->tp_as_number->nb_oct(val);
4249                 break;
4250         case 'x':
4251         case 'X':
4252                 numnondigits = 2;
4253                 result = val->ob_type->tp_as_number->nb_hex(val);
4254                 break;
4255         default:
4256                 assert(!"'type' not in [duoxX]");
4257         }
4258         if (!result)
4259                 return NULL;
4260
4261         buf = PyString_AsString(result);
4262         if (!buf) {
4263                 Py_DECREF(result);
4264                 return NULL;
4265         }
4266
4267         /* To modify the string in-place, there can only be one reference. */
4268         if (result->ob_refcnt != 1) {
4269                 PyErr_BadInternalCall();
4270                 return NULL;
4271         }
4272         llen = PyString_Size(result);
4273         if (llen > INT_MAX) {
4274                 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4275                 return NULL;
4276         }
4277         len = (int)llen;
4278         if (buf[len-1] == 'L') {
4279                 --len;
4280                 buf[len] = '\0';
4281         }
4282         sign = buf[0] == '-';
4283         numnondigits += sign;
4284         numdigits = len - numnondigits;
4285         assert(numdigits > 0);
4286
4287         /* Get rid of base marker unless F_ALT */
4288         if ((flags & F_ALT) == 0) {
4289                 /* Need to skip 0x, 0X or 0. */
4290                 int skipped = 0;
4291                 switch (type) {
4292                 case 'o':
4293                         assert(buf[sign] == '0');
4294                         /* If 0 is only digit, leave it alone. */
4295                         if (numdigits > 1) {
4296                                 skipped = 1;
4297                                 --numdigits;
4298                         }
4299                         break;
4300                 case 'x':
4301                 case 'X':
4302                         assert(buf[sign] == '0');
4303                         assert(buf[sign + 1] == 'x');
4304                         skipped = 2;
4305                         numnondigits -= 2;
4306                         break;
4307                 }
4308                 if (skipped) {
4309                         buf += skipped;
4310                         len -= skipped;
4311                         if (sign)
4312                                 buf[0] = '-';
4313                 }
4314                 assert(len == numnondigits + numdigits);
4315                 assert(numdigits > 0);
4316         }
4317
4318         /* Fill with leading zeroes to meet minimum width. */
4319         if (prec > numdigits) {
4320                 PyObject *r1 = PyString_FromStringAndSize(NULL,
4321                                         numnondigits + prec);
4322                 char *b1;
4323                 if (!r1) {
4324                         Py_DECREF(result);
4325                         return NULL;
4326                 }
4327                 b1 = PyString_AS_STRING(r1);
4328                 for (i = 0; i < numnondigits; ++i)
4329                         *b1++ = *buf++;
4330                 for (i = 0; i < prec - numdigits; i++)
4331                         *b1++ = '0';
4332                 for (i = 0; i < numdigits; i++)
4333                         *b1++ = *buf++;
4334                 *b1 = '\0';
4335                 Py_DECREF(result);
4336                 result = r1;
4337                 buf = PyString_AS_STRING(result);
4338                 len = numnondigits + prec;
4339         }
4340
4341         /* Fix up case for hex conversions. */
4342         if (type == 'X') {
4343                 /* Need to convert all lower case letters to upper case.
4344                    and need to convert 0x to 0X (and -0x to -0X). */
4345                 for (i = 0; i < len; i++)
4346                         if (buf[i] >= 'a' && buf[i] <= 'x')
4347                                 buf[i] -= 'a'-'A';
4348         }
4349         *pbuf = buf;
4350         *plen = len;
4351         return result;
4352 }
4353
4354 Py_LOCAL_INLINE(int)
4355 formatint(char *buf, size_t buflen, int flags,
4356           int prec, int type, PyObject *v)
4357 {
4358         /* fmt = '%#.' + `prec` + 'l' + `type`
4359            worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4360            + 1 + 1 = 24 */
4361         char fmt[64];   /* plenty big enough! */
4362         char *sign;
4363         long x;
4364
4365         x = PyInt_AsLong(v);
4366         if (x == -1 && PyErr_Occurred()) {
4367                 PyErr_SetString(PyExc_TypeError, "int argument required");
4368                 return -1;
4369         }
4370         if (x < 0 && type == 'u') {
4371                 type = 'd';
4372         }
4373         if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4374                 sign = "-";
4375         else
4376                 sign = "";
4377         if (prec < 0)
4378                 prec = 1;
4379
4380         if ((flags & F_ALT) &&
4381             (type == 'x' || type == 'X')) {
4382                 /* When converting under %#x or %#X, there are a number
4383                  * of issues that cause pain:
4384                  * - when 0 is being converted, the C standard leaves off
4385                  *   the '0x' or '0X', which is inconsistent with other
4386                  *   %#x/%#X conversions and inconsistent with Python's
4387                  *   hex() function
4388                  * - there are platforms that violate the standard and
4389                  *   convert 0 with the '0x' or '0X'
4390                  *   (Metrowerks, Compaq Tru64)
4391                  * - there are platforms that give '0x' when converting
4392                  *   under %#X, but convert 0 in accordance with the
4393                  *   standard (OS/2 EMX)
4394                  *
4395                  * We can achieve the desired consistency by inserting our
4396                  * own '0x' or '0X' prefix, and substituting %x/%X in place
4397                  * of %#x/%#X.
4398                  *
4399                  * Note that this is the same approach as used in
4400                  * formatint() in unicodeobject.c
4401                  */
4402                 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4403                               sign, type, prec, type);
4404         }
4405         else {
4406                 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4407                               sign, (flags&F_ALT) ? "#" : "",
4408                               prec, type);
4409         }
4410
4411         /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4412          * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4413          */
4414         if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4415                 PyErr_SetString(PyExc_OverflowError,
4416                     "formatted integer is too long (precision too large?)");
4417                 return -1;
4418         }
4419         if (sign[0])
4420                 PyOS_snprintf(buf, buflen, fmt, -x);
4421         else
4422                 PyOS_snprintf(buf, buflen, fmt, x);
4423         return (int)strlen(buf);
4424 }
4425
4426 Py_LOCAL_INLINE(int)
4427 formatchar(char *buf, size_t buflen, PyObject *v)
4428 {
4429         /* presume that the buffer is at least 2 characters long */
4430         if (PyString_Check(v)) {
4431                 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4432                         return -1;
4433         }
4434         else {
4435                 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4436                         return -1;
4437         }
4438         buf[1] = '\0';
4439         return 1;
4440 }
4441
4442 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4443
4444    FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4445    chars are formatted. XXX This is a magic number. Each formatting
4446    routine does bounds checking to ensure no overflow, but a better
4447    solution may be to malloc a buffer of appropriate size for each
4448    format. For now, the current solution is sufficient.
4449 */
4450 #define FORMATBUFLEN (size_t)120
4451
4452 PyObject *
4453 PyString_Format(PyObject *format, PyObject *args)
4454 {
4455         char *fmt, *res;
4456         Py_ssize_t arglen, argidx;
4457         Py_ssize_t reslen, rescnt, fmtcnt;
4458         int args_owned = 0;
4459         PyObject *result, *orig_args;
4460 #ifdef Py_USING_UNICODE
4461         PyObject *v, *w;
4462 #endif
4463         PyObject *dict = NULL;
4464         if (format == NULL || !PyString_Check(format) || args == NULL) {
4465                 PyErr_BadInternalCall();
4466                 return NULL;
4467         }
4468         orig_args = args;
4469         fmt = PyString_AS_STRING(format);
4470         fmtcnt = PyString_GET_SIZE(format);
4471         reslen = rescnt = fmtcnt + 100;
4472         result = PyString_FromStringAndSize((char *)NULL, reslen);
4473         if (result == NULL)
4474                 return NULL;
4475         res = PyString_AsString(result);
4476         if (PyTuple_Check(args)) {
4477                 arglen = PyTuple_GET_SIZE(args);
4478                 argidx = 0;
4479         }
4480         else {
4481                 arglen = -1;
4482                 argidx = -2;
4483         }
4484         if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4485             !PyObject_TypeCheck(args, &PyBaseString_Type))
4486                 dict = args;
4487         while (--fmtcnt >= 0) {
4488                 if (*fmt != '%') {
4489                         if (--rescnt < 0) {
4490                                 rescnt = fmtcnt + 100;
4491                                 reslen += rescnt;
4492                                 if (_PyString_Resize(&result, reslen) < 0)
4493                                         return NULL;
4494                                 res = PyString_AS_STRING(result)
4495                                         + reslen - rescnt;
4496                                 --rescnt;
4497                         }
4498                         *res++ = *fmt++;
4499                 }
4500                 else {
4501                         /* Got a format specifier */
4502                         int flags = 0;
4503                         Py_ssize_t width = -1;
4504                         int prec = -1;
4505                         int c = '\0';
4506                         int fill;
4507                         PyObject *v = NULL;
4508                         PyObject *temp = NULL;
4509                         char *pbuf;
4510                         int sign;
4511                         Py_ssize_t len;
4512                         char formatbuf[FORMATBUFLEN];
4513                              /* For format{float,int,char}() */
4514 #ifdef Py_USING_UNICODE
4515                         char *fmt_start = fmt;
4516                         Py_ssize_t argidx_start = argidx;
4517 #endif
4518
4519                         fmt++;
4520                         if (*fmt == '(') {
4521                                 char *keystart;
4522                                 Py_ssize_t keylen;
4523                                 PyObject *key;
4524                                 int pcount = 1;
4525
4526                                 if (dict == NULL) {
4527                                         PyErr_SetString(PyExc_TypeError,
4528                                                  "format requires a mapping");
4529                                         goto error;
4530                                 }
4531                                 ++fmt;
4532                                 --fmtcnt;
4533                                 keystart = fmt;
4534                                 /* Skip over balanced parentheses */
4535                                 while (pcount > 0 && --fmtcnt >= 0) {
4536                                         if (*fmt == ')')
4537                                                 --pcount;
4538                                         else if (*fmt == '(')
4539                                                 ++pcount;
4540                                         fmt++;
4541                                 }
4542                                 keylen = fmt - keystart - 1;
4543                                 if (fmtcnt < 0 || pcount > 0) {
4544                                         PyErr_SetString(PyExc_ValueError,
4545                                                    "incomplete format key");
4546                                         goto error;
4547                                 }
4548                                 key = PyString_FromStringAndSize(keystart,
4549                                                                  keylen);
4550                                 if (key == NULL)
4551                                         goto error;
4552                                 if (args_owned) {
4553                                         Py_DECREF(args);
4554                                         args_owned = 0;
4555                                 }
4556                                 args = PyObject_GetItem(dict, key);
4557                                 Py_DECREF(key);
4558                                 if (args == NULL) {
4559                                         goto error;
4560                                 }
4561                                 args_owned = 1;
4562                                 arglen = -1;
4563                                 argidx = -2;
4564                         }
4565                         while (--fmtcnt >= 0) {
4566                                 switch (c = *fmt++) {
4567                                 case '-': flags |= F_LJUST; continue;
4568                                 case '+': flags |= F_SIGN; continue;
4569                                 case ' ': flags |= F_BLANK; continue;
4570                                 case '#': flags |= F_ALT; continue;
4571                                 case '0': flags |= F_ZERO; continue;
4572                                 }
4573                                 break;
4574                         }
4575                         if (c == '*') {
4576                                 v = getnextarg(args, arglen, &argidx);
4577                                 if (v == NULL)
4578                                         goto error;
4579                                 if (!PyInt_Check(v)) {
4580                                         PyErr_SetString(PyExc_TypeError,
4581                                                         "* wants int");
4582                                         goto error;
4583                                 }
4584                                 width = PyInt_AsLong(v);
4585                                 if (width < 0) {
4586                                         flags |= F_LJUST;
4587                                         width = -width;
4588                                 }
4589                                 if (--fmtcnt >= 0)
4590                                         c = *fmt++;
4591                         }
4592                         else if (c >= 0 && isdigit(c)) {
4593                                 width = c - '0';
4594                                 while (--fmtcnt >= 0) {
4595                                         c = Py_CHARMASK(*fmt++);
4596                                         if (!isdigit(c))
4597                                                 break;
4598                                         if ((width*10) / 10 != width) {
4599                                                 PyErr_SetString(
4600                                                         PyExc_ValueError,
4601                                                         "width too big");
4602                                                 goto error;
4603                                         }
4604                                         width = width*10 + (c - '0');
4605                                 }
4606                         }
4607                         if (c == '.') {
4608                                 prec = 0;
4609                                 if (--fmtcnt >= 0)
4610                                         c = *fmt++;
4611                                 if (c == '*') {
4612                                         v = getnextarg(args, arglen, &argidx);
4613                                         if (v == NULL)
4614                                                 goto error;
4615                                         if (!PyInt_Check(v)) {
4616                                                 PyErr_SetString(
4617                                                         PyExc_TypeError,
4618                                                         "* wants int");
4619                                                 goto error;
4620                                         }
4621                                         prec = PyInt_AsLong(v);
4622                                         if (prec < 0)
4623                                                 prec = 0;
4624                                         if (--fmtcnt >= 0)
4625                                                 c = *fmt++;
4626                                 }
4627                                 else if (c >= 0 && isdigit(c)) {
4628                                         prec = c - '0';
4629                                         while (--fmtcnt >= 0) {
4630                                                 c = Py_CHARMASK(*fmt++);
4631                                                 if (!isdigit(c))
4632                                                         break;
4633                                                 if ((prec*10) / 10 != prec) {
4634                                                         PyErr_SetString(
4635                                                             PyExc_ValueError,
4636                                                             "prec too big");
4637                                                         goto error;
4638                                                 }
4639                                                 prec = prec*10 + (c - '0');
4640                                         }
4641                                 }
4642                         } /* prec */
4643                         if (fmtcnt >= 0) {
4644                                 if (c == 'h' || c == 'l' || c == 'L') {
4645                                         if (--fmtcnt >= 0)
4646                                                 c = *fmt++;
4647                                 }
4648                         }
4649                         if (fmtcnt < 0) {
4650                                 PyErr_SetString(PyExc_ValueError,
4651                                                 "incomplete format");
4652                                 goto error;
4653                         }
4654                         if (c != '%') {
4655                                 v = getnextarg(args, arglen, &argidx);
4656                                 if (v == NULL)
4657                                         goto error;
4658                         }
4659                         sign = 0;
4660                         fill = ' ';
4661                         switch (c) {
4662                         case '%':
4663                                 pbuf = "%";
4664                                 len = 1;
4665                                 break;
4666                         case 's':
4667 #ifdef Py_USING_UNICODE
4668                                 if (PyUnicode_Check(v)) {
4669                                         fmt = fmt_start;
4670                                         argidx = argidx_start;
4671                                         goto unicode;
4672                                 }
4673 #endif
4674                                 temp = _PyObject_Str(v);
4675 #ifdef Py_USING_UNICODE
4676                                 if (temp != NULL && PyUnicode_Check(temp)) {
4677                                         Py_DECREF(temp);
4678                                         fmt = fmt_start;
4679                                         argidx = argidx_start;
4680                                         goto unicode;
4681                                 }
4682 #endif
4683                                 /* Fall through */
4684                         case 'r':
4685                                 if (c == 'r')
4686                                         temp = PyObject_Repr(v);
4687                                 if (temp == NULL)
4688                                         goto error;
4689                                 if (!PyString_Check(temp)) {
4690                                         PyErr_SetString(PyExc_TypeError,
4691                                           "%s argument has non-string str()");
4692                                         Py_DECREF(temp);
4693                                         goto error;
4694                                 }
4695                                 pbuf = PyString_AS_STRING(temp);
4696                                 len = PyString_GET_SIZE(temp);
4697                                 if (prec >= 0 && len > prec)
4698                                         len = prec;
4699                                 break;
4700                         case 'i':
4701                         case 'd':
4702                         case 'u':
4703                         case 'o':
4704                         case 'x':
4705                         case 'X':
4706                                 if (c == 'i')
4707                                         c = 'd';
4708                                 if (PyLong_Check(v)) {
4709                                         int ilen;
4710                                         temp = _PyString_FormatLong(v, flags,
4711                                                 prec, c, &pbuf, &ilen);
4712                                         len = ilen;
4713                                         if (!temp)
4714                                                 goto error;
4715                                         sign = 1;
4716                                 }
4717                                 else {
4718                                         pbuf = formatbuf;
4719                                         len = formatint(pbuf,
4720                                                         sizeof(formatbuf),
4721                                                         flags, prec, c, v);
4722                                         if (len < 0)
4723                                                 goto error;
4724                                         sign = 1;
4725                                 }
4726                                 if (flags & F_ZERO)
4727                                         fill = '0';
4728                                 break;
4729                         case 'e':
4730                         case 'E':
4731                         case 'f':
4732                         case 'F':
4733                         case 'g':
4734                         case 'G':
4735                                 if (c == 'F')
4736                                         c = 'f';
4737                                 pbuf = formatbuf;
4738                                 len = formatfloat(pbuf, sizeof(formatbuf),
4739                                                   flags, prec, c, v);
4740                                 if (len < 0)
4741                                         goto error;
4742                                 sign = 1;
4743                                 if (flags & F_ZERO)
4744                                         fill = '0';
4745                                 break;
4746                         case 'c':
4747 #ifdef Py_USING_UNICODE
4748                                 if (PyUnicode_Check(v)) {
4749                                         fmt = fmt_start;
4750                                         argidx = argidx_start;
4751                                         goto unicode;
4752                                 }
4753 #endif
4754                                 pbuf = formatbuf;
4755                                 len = formatchar(pbuf, sizeof(formatbuf), v);
4756                                 if (len < 0)
4757                                         goto error;
4758                                 break;
4759                         default:
4760                                 PyErr_Format(PyExc_ValueError,
4761                                   "unsupported format character '%c' (0x%x) "
4762                                   "at index %zd",
4763                                   c, c,
4764                                   (Py_ssize_t)(fmt - 1 -
4765                                                PyString_AsString(format)));
4766                                 goto error;
4767                         }
4768                         if (sign) {
4769                                 if (*pbuf == '-' || *pbuf == '+') {
4770                                         sign = *pbuf++;
4771                                         len--;
4772                                 }
4773                                 else if (flags & F_SIGN)
4774                                         sign = '+';
4775                                 else if (flags & F_BLANK)
4776                                         sign = ' ';
4777                                 else
4778                                         sign = 0;
4779                         }
4780                         if (width < len)
4781                                 width = len;
4782                         if (rescnt - (sign != 0) < width) {
4783                                 reslen -= rescnt;
4784                                 rescnt = width + fmtcnt + 100;
4785                                 reslen += rescnt;
4786                                 if (reslen < 0) {
4787                                         Py_DECREF(result);
4788                                         Py_XDECREF(temp);
4789                                         return PyErr_NoMemory();
4790                                 }
4791                                 if (_PyString_Resize(&result, reslen) < 0) {
4792                                         Py_XDECREF(temp);
4793                                         return NULL;
4794                                 }
4795                                 res = PyString_AS_STRING(result)
4796                                         + reslen - rescnt;
4797                         }
4798                         if (sign) {
4799                                 if (fill != ' ')
4800                                         *res++ = sign;
4801                                 rescnt--;
4802                                 if (width > len)
4803                                         width--;
4804                         }
4805                         if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4806                                 assert(pbuf[0] == '0');
4807                                 assert(pbuf[1] == c);
4808                                 if (fill != ' ') {
4809                                         *res++ = *pbuf++;
4810                                         *res++ = *pbuf++;
4811                                 }
4812                                 rescnt -= 2;
4813                                 width -= 2;
4814                                 if (width < 0)
4815                                         width = 0;
4816                                 len -= 2;
4817                         }
4818                         if (width > len && !(flags & F_LJUST)) {
4819                                 do {
4820                                         --rescnt;
4821                                         *res++ = fill;
4822                                 } while (--width > len);
4823                         }
4824                         if (fill == ' ') {
4825                                 if (sign)
4826                                         *res++ = sign;
4827                                 if ((flags & F_ALT) &&
4828                                     (c == 'x' || c == 'X')) {
4829                                         assert(pbuf[0] == '0');
4830                                         assert(pbuf[1] == c);
4831                                         *res++ = *pbuf++;
4832                                         *res++ = *pbuf++;
4833                                 }
4834                         }
4835                         Py_MEMCPY(res, pbuf, len);
4836                         res += len;
4837                         rescnt -= len;
4838                         while (--width >= len) {
4839                                 --rescnt;
4840                                 *res++ = ' ';
4841                         }
4842                         if (dict && (argidx < arglen) && c != '%') {
4843                                 PyErr_SetString(PyExc_TypeError,
4844                                            "not all arguments converted during string formatting");
4845                                 Py_XDECREF(temp);
4846                                 goto error;
4847                         }
4848                         Py_XDECREF(temp);
4849                 } /* '%' */
4850         } /* until end */
4851         if (argidx < arglen && !dict) {
4852                 PyErr_SetString(PyExc_TypeError,
4853                                 "not all arguments converted during string formatting");
4854                 goto error;
4855         }
4856         if (args_owned) {
4857                 Py_DECREF(args);
4858         }
4859         _PyString_Resize(&result, reslen - rescnt);
4860         return result;
4861
4862 #ifdef Py_USING_UNICODE
4863  unicode:
4864         if (args_owned) {
4865                 Py_DECREF(args);
4866                 args_owned = 0;
4867         }
4868         /* Fiddle args right (remove the first argidx arguments) */
4869         if (PyTuple_Check(orig_args) && argidx > 0) {
4870                 PyObject *v;
4871                 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4872                 v = PyTuple_New(n);
4873                 if (v == NULL)
4874                         goto error;
4875                 while (--n >= 0) {
4876                         PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4877                         Py_INCREF(w);
4878                         PyTuple_SET_ITEM(v, n, w);
4879                 }
4880                 args = v;
4881         } else {
4882                 Py_INCREF(orig_args);
4883                 args = orig_args;
4884         }
4885         args_owned = 1;
4886         /* Take what we have of the result and let the Unicode formatting
4887            function format the rest of the input. */
4888         rescnt = res - PyString_AS_STRING(result);
4889         if (_PyString_Resize(&result, rescnt))
4890                 goto error;
4891         fmtcnt = PyString_GET_SIZE(format) - \
4892                  (fmt - PyString_AS_STRING(format));
4893         format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4894         if (format == NULL)
4895                 goto error;
4896         v = PyUnicode_Format(format, args);
4897         Py_DECREF(format);
4898         if (v == NULL)
4899                 goto error;
4900         /* Paste what we have (result) to what the Unicode formatting
4901            function returned (v) and return the result (or error) */
4902         w = PyUnicode_Concat(result, v);
4903         Py_DECREF(result);
4904         Py_DECREF(v);
4905         Py_DECREF(args);
4906         return w;
4907 #endif /* Py_USING_UNICODE */
4908
4909  error:
4910         Py_DECREF(result);
4911         if (args_owned) {
4912                 Py_DECREF(args);
4913         }
4914         return NULL;
4915 }
4916
4917 void
4918 PyString_InternInPlace(PyObject **p)
4919 {
4920         register PyStringObject *s = (PyStringObject *)(*p);
4921         PyObject *t;
4922         if (s == NULL || !PyString_Check(s))
4923                 Py_FatalError("PyString_InternInPlace: strings only please!");
4924         /* If it's a string subclass, we don't really know what putting
4925            it in the interned dict might do. */
4926         if (!PyString_CheckExact(s))
4927                 return;
4928         if (PyString_CHECK_INTERNED(s))
4929                 return;
4930         if (interned == NULL) {
4931                 interned = PyDict_New();
4932                 if (interned == NULL) {
4933                         PyErr_Clear(); /* Don't leave an exception */
4934                         return;
4935                 }
4936         }
4937         t = PyDict_GetItem(interned, (PyObject *)s);
4938         if (t) {
4939                 Py_INCREF(t);
4940                 Py_DECREF(*p);
4941                 *p = t;
4942                 return;
4943         }
4944
4945         if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4946                 PyErr_Clear();
4947                 return;
4948         }
4949         /* The two references in interned are not counted by refcnt.
4950            The string deallocator will take care of this */
4951         s->ob_refcnt -= 2;
4952         PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4953 }
4954
4955 void
4956 PyString_InternImmortal(PyObject **p)
4957 {
4958         PyString_InternInPlace(p);
4959         if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4960                 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4961                 Py_INCREF(*p);
4962         }
4963 }
4964
4965
4966 PyObject *
4967 PyString_InternFromString(const char *cp)
4968 {
4969         PyObject *s = PyString_FromString(cp);
4970         if (s == NULL)
4971                 return NULL;
4972         PyString_InternInPlace(&s);
4973         return s;
4974 }
4975
4976 void
4977 PyString_Fini(void)
4978 {
4979         int i;
4980         for (i = 0; i < UCHAR_MAX + 1; i++) {
4981                 Py_XDECREF(characters[i]);
4982                 characters[i] = NULL;
4983         }
4984         Py_XDECREF(nullstring);
4985         nullstring = NULL;
4986 }
4987
4988 void _Py_ReleaseInternedStrings(void)
4989 {
4990         PyObject *keys;
4991         PyStringObject *s;
4992         Py_ssize_t i, n;
4993
4994         if (interned == NULL || !PyDict_Check(interned))
4995                 return;
4996         keys = PyDict_Keys(interned);
4997         if (keys == NULL || !PyList_Check(keys)) {
4998                 PyErr_Clear();
4999                 return;
5000         }
5001
5002         /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5003            detector, interned strings are not forcibly deallocated; rather, we
5004            give them their stolen references back, and then clear and DECREF
5005            the interned dict. */
5006
5007         fprintf(stderr, "releasing interned strings\n");
5008         n = PyList_GET_SIZE(keys);
5009         for (i = 0; i < n; i++) {
5010                 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5011                 switch (s->ob_sstate) {
5012                 case SSTATE_NOT_INTERNED:
5013                         /* XXX Shouldn't happen */
5014                         break;
5015                 case SSTATE_INTERNED_IMMORTAL:
5016                         s->ob_refcnt += 1;
5017                         break;
5018                 case SSTATE_INTERNED_MORTAL:
5019                         s->ob_refcnt += 2;
5020                         break;
5021                 default:
5022                         Py_FatalError("Inconsistent interned string state.");
5023                 }
5024                 s->ob_sstate = SSTATE_NOT_INTERNED;
5025         }
5026         Py_DECREF(keys);
5027         PyDict_Clear(interned);
5028         Py_DECREF(interned);
5029         interned = NULL;
5030 }