Objects/stringobject.c

   1 /* String object implementation */
   2
   3 #define PY_SSIZE_T_CLEAN
   4
   5 #include "Python.h"
   6
   7 #include <ctype.h>
   8
   9 #ifdef COUNT_ALLOCS
  10 int null_strings, one_strings;
  11 #endif
  12
  13 static PyStringObject *characters[UCHAR_MAX + 1];
  14 static PyStringObject *nullstring;
  15
  16 /* This dictionary holds all interned strings.  Note that references to
  17    strings in this dictionary are *not* counted in the string's ob_refcnt.
  18    When the interned string reaches a refcnt of 0 the string deallocation
  19    function will delete the reference from this dictionary.
  20
  21    Another way to look at this is that to say that the actual reference
  22    count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
  23 */
  24 static PyObject *interned;
  25
  26 /*
  27    For both PyString_FromString() and PyString_FromStringAndSize(), the
  28    parameter `size' denotes number of characters to allocate, not counting any
  29    null terminating character.
  30
  31    For PyString_FromString(), the parameter `str' points to a null-terminated
  32    string containing exactly `size' bytes.
  33
  34    For PyString_FromStringAndSize(), the parameter the parameter `str' is
  35    either NULL or else points to a string containing at least `size' bytes.
  36    For PyString_FromStringAndSize(), the string in the `str' parameter does
  37    not have to be null-terminated.  (Therefore it is safe to construct a
  38    substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
  39    If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
  40    bytes (setting the last byte to the null terminating character) and you can
  41    fill in the data yourself.  If `str' is non-NULL then the resulting
  42    PyString object must be treated as immutable and you must not fill in nor
  43    alter the data yourself, since the strings may be shared.
  44
  45    The PyObject member `op->ob_size', which denotes the number of "extra
  46    items" in a variable-size object, will contain the number of bytes
  47    allocated for string data, not counting the null terminating character.  It
  48    is therefore equal to the equal to the `size' parameter (for
  49    PyString_FromStringAndSize()) or the length of the string in the `str'
  50    parameter (for PyString_FromString()).
  51 */
  52 PyObject *
  53 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
  54 {
  55         register PyStringObject *op;
  56         assert(size >= 0);
  57         if (size == 0 && (op = nullstring) != NULL) {
  58 #ifdef COUNT_ALLOCS
  59                 null_strings++;
  60 #endif
  61                 Py_INCREF(op);
  62                 return (PyObject *)op;
  63         }
  64         if (size == 1 && str != NULL &&
  65             (op = characters[*str & UCHAR_MAX]) != NULL)
  66         {
  67 #ifdef COUNT_ALLOCS
  68                 one_strings++;
  69 #endif
  70                 Py_INCREF(op);
  71                 return (PyObject *)op;
  72         }
  73
  74         /* Inline PyObject_NewVar */
  75         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
  76         if (op == NULL)
  77                 return PyErr_NoMemory();
  78         PyObject_INIT_VAR(op, &PyString_Type, size);
  79         op->ob_shash = -1;
  80         op->ob_sstate = SSTATE_NOT_INTERNED;
  81         if (str != NULL)
  82                 Py_MEMCPY(op->ob_sval, str, size);
  83         op->ob_sval[size] = '\0';
  84         /* share short strings */
  85         if (size == 0) {
  86                 PyObject *t = (PyObject *)op;
  87                 PyString_InternInPlace(&t);
  88                 op = (PyStringObject *)t;
  89                 nullstring = op;
  90                 Py_INCREF(op);
  91         } else if (size == 1 && str != NULL) {
  92                 PyObject *t = (PyObject *)op;
  93                 PyString_InternInPlace(&t);
  94                 op = (PyStringObject *)t;
  95                 characters[*str & UCHAR_MAX] = op;
  96                 Py_INCREF(op);
  97         }
  98         return (PyObject *) op;
  99 }
 100
 101 PyObject *
 102 PyString_FromString(const char *str)
 103 {
 104         register size_t size;
 105         register PyStringObject *op;
 106
 107         assert(str != NULL);
 108         size = strlen(str);
 109         if (size > PY_SSIZE_T_MAX) {
 110                 PyErr_SetString(PyExc_OverflowError,
 111                         "string is too long for a Python string");
 112                 return NULL;
 113         }
 114         if (size == 0 && (op = nullstring) != NULL) {
 115 #ifdef COUNT_ALLOCS
 116                 null_strings++;
 117 #endif
 118                 Py_INCREF(op);
 119                 return (PyObject *)op;
 120         }
 121         if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
 122 #ifdef COUNT_ALLOCS
 123                 one_strings++;
 124 #endif
 125                 Py_INCREF(op);
 126                 return (PyObject *)op;
 127         }
 128
 129         /* Inline PyObject_NewVar */
 130         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
 131         if (op == NULL)
 132                 return PyErr_NoMemory();
 133         PyObject_INIT_VAR(op, &PyString_Type, size);
 134         op->ob_shash = -1;
 135         op->ob_sstate = SSTATE_NOT_INTERNED;
 136         Py_MEMCPY(op->ob_sval, str, size+1);
 137         /* share short strings */
 138         if (size == 0) {
 139                 PyObject *t = (PyObject *)op;
 140                 PyString_InternInPlace(&t);
 141                 op = (PyStringObject *)t;
 142                 nullstring = op;
 143                 Py_INCREF(op);
 144         } else if (size == 1) {
 145                 PyObject *t = (PyObject *)op;
 146                 PyString_InternInPlace(&t);
 147                 op = (PyStringObject *)t;
 148                 characters[*str & UCHAR_MAX] = op;
 149                 Py_INCREF(op);
 150         }
 151         return (PyObject *) op;
 152 }
 153
 154 PyObject *
 155 PyString_FromFormatV(const char *format, va_list vargs)
 156 {
 157         va_list count;
 158         Py_ssize_t n = 0;
 159         const char* f;
 160         char *s;
 161         PyObject* string;
 162
 163 #ifdef VA_LIST_IS_ARRAY
 164         Py_MEMCPY(count, vargs, sizeof(va_list));
 165 #else
 166 #ifdef  __va_copy
 167         __va_copy(count, vargs);
 168 #else
 169         count = vargs;
 170 #endif
 171 #endif
 172         /* step 1: figure out how large a buffer we need */
 173         for (f = format; *f; f++) {
 174                 if (*f == '%') {
 175                         const char* p = f;
 176                         while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 177                                 ;
 178
 179                         /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
 180                          * they don't affect the amount of space we reserve.
 181                          */
 182                         if ((*f == 'l' || *f == 'z') &&
 183                                         (f[1] == 'd' || f[1] == 'u'))
 184                                 ++f;
 185
 186                         switch (*f) {
 187                         case 'c':
 188                                 (void)va_arg(count, int);
 189                                 /* fall through... */
 190                         case '%':
 191                                 n++;
 192                                 break;
 193                         case 'd': case 'u': case 'i': case 'x':
 194                                 (void) va_arg(count, int);
 195                                 /* 20 bytes is enough to hold a 64-bit
 196                                    integer.  Decimal takes the most space.
 197                                    This isn't enough for octal. */
 198                                 n += 20;
 199                                 break;
 200                         case 's':
 201                                 s = va_arg(count, char*);
 202                                 n += strlen(s);
 203                                 break;
 204                         case 'p':
 205                                 (void) va_arg(count, int);
 206                                 /* maximum 64-bit pointer representation:
 207                                  * 0xffffffffffffffff
 208                                  * so 19 characters is enough.
 209                                  * XXX I count 18 -- what's the extra for?
 210                                  */
 211                                 n += 19;
 212                                 break;
 213                         default:
 214                                 /* if we stumble upon an unknown
 215                                    formatting code, copy the rest of
 216                                    the format string to the output
 217                                    string. (we cannot just skip the
 218                                    code, since there's no way to know
 219                                    what's in the argument list) */
 220                                 n += strlen(p);
 221                                 goto expand;
 222                         }
 223                 } else
 224                         n++;
 225         }
 226  expand:
 227         /* step 2: fill the buffer */
 228         /* Since we've analyzed how much space we need for the worst case,
 229            use sprintf directly instead of the slower PyOS_snprintf. */
 230         string = PyString_FromStringAndSize(NULL, n);
 231         if (!string)
 232                 return NULL;
 233
 234         s = PyString_AsString(string);
 235
 236         for (f = format; *f; f++) {
 237                 if (*f == '%') {
 238                         const char* p = f++;
 239                         Py_ssize_t i;
 240                         int longflag = 0;
 241                         int size_tflag = 0;
 242                         /* parse the width.precision part (we're only
 243                            interested in the precision value, if any) */
 244                         n = 0;
 245                         while (isdigit(Py_CHARMASK(*f)))
 246                                 n = (n*10) + *f++ - '0';
 247                         if (*f == '.') {
 248                                 f++;
 249                                 n = 0;
 250                                 while (isdigit(Py_CHARMASK(*f)))
 251                                         n = (n*10) + *f++ - '0';
 252                         }
 253                         while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 254                                 f++;
 255                         /* handle the long flag, but only for %ld and %lu.
 256                            others can be added when necessary. */
 257                         if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
 258                                 longflag = 1;
 259                                 ++f;
 260                         }
 261                         /* handle the size_t flag. */
 262                         if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
 263                                 size_tflag = 1;
 264                                 ++f;
 265                         }
 266
 267                         switch (*f) {
 268                         case 'c':
 269                                 *s++ = va_arg(vargs, int);
 270                                 break;
 271                         case 'd':
 272                                 if (longflag)
 273                                         sprintf(s, "%ld", va_arg(vargs, long));
 274                                 else if (size_tflag)
 275                                         sprintf(s, "%" PY_FORMAT_SIZE_T "d",
 276                                                 va_arg(vargs, Py_ssize_t));
 277                                 else
 278                                         sprintf(s, "%d", va_arg(vargs, int));
 279                                 s += strlen(s);
 280                                 break;
 281                         case 'u':
 282                                 if (longflag)
 283                                         sprintf(s, "%lu",
 284                                                 va_arg(vargs, unsigned long));
 285                                 else if (size_tflag)
 286                                         sprintf(s, "%" PY_FORMAT_SIZE_T "u",
 287                                                 va_arg(vargs, size_t));
 288                                 else
 289                                         sprintf(s, "%u",
 290                                                 va_arg(vargs, unsigned int));
 291                                 s += strlen(s);
 292                                 break;
 293                         case 'i':
 294                                 sprintf(s, "%i", va_arg(vargs, int));
 295                                 s += strlen(s);
 296                                 break;
 297                         case 'x':
 298                                 sprintf(s, "%x", va_arg(vargs, int));
 299                                 s += strlen(s);
 300                                 break;
 301                         case 's':
 302                                 p = va_arg(vargs, char*);
 303                                 i = strlen(p);
 304                                 if (n > 0 && i > n)
 305                                         i = n;
 306                                 Py_MEMCPY(s, p, i);
 307                                 s += i;
 308                                 break;
 309                         case 'p':
 310                                 sprintf(s, "%p", va_arg(vargs, void*));
 311                                 /* %p is ill-defined:  ensure leading 0x. */
 312                                 if (s[1] == 'X')
 313                                         s[1] = 'x';
 314                                 else if (s[1] != 'x') {
 315                                         memmove(s+2, s, strlen(s)+1);
 316                                         s[0] = '0';
 317                                         s[1] = 'x';
 318                                 }
 319                                 s += strlen(s);
 320                                 break;
 321                         case '%':
 322                                 *s++ = '%';
 323                                 break;
 324                         default:
 325                                 strcpy(s, p);
 326                                 s += strlen(s);
 327                                 goto end;
 328                         }
 329                 } else
 330                         *s++ = *f;
 331         }
 332
 333  end:
 334         _PyString_Resize(&string, s - PyString_AS_STRING(string));
 335         return string;
 336 }
 337
 338 PyObject *
 339 PyString_FromFormat(const char *format, ...)
 340 {
 341         PyObject* ret;
 342         va_list vargs;
 343
 344 #ifdef HAVE_STDARG_PROTOTYPES
 345         va_start(vargs, format);
 346 #else
 347         va_start(vargs);
 348 #endif
 349         ret = PyString_FromFormatV(format, vargs);
 350         va_end(vargs);
 351         return ret;
 352 }
 353
 354
 355 PyObject *PyString_Decode(const char *s,
 356                           Py_ssize_t size,
 357                           const char *encoding,
 358                           const char *errors)
 359 {
 360     PyObject *v, *str;
 361
 362     str = PyString_FromStringAndSize(s, size);
 363     if (str == NULL)
 364         return NULL;
 365     v = PyString_AsDecodedString(str, encoding, errors);
 366     Py_DECREF(str);
 367     return v;
 368 }
 369
 370 PyObject *PyString_AsDecodedObject(PyObject *str,
 371                                    const char *encoding,
 372                                    const char *errors)
 373 {
 374     PyObject *v;
 375
 376     if (!PyString_Check(str)) {
 377         PyErr_BadArgument();
 378         goto onError;
 379     }
 380
 381     if (encoding == NULL) {
 382 #ifdef Py_USING_UNICODE
 383         encoding = PyUnicode_GetDefaultEncoding();
 384 #else
 385         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 386         goto onError;
 387 #endif
 388     }
 389
 390     /* Decode via the codec registry */
 391     v = PyCodec_Decode(str, encoding, errors);
 392     if (v == NULL)
 393         goto onError;
 394
 395     return v;
 396
 397  onError:
 398     return NULL;
 399 }
 400
 401 PyObject *PyString_AsDecodedString(PyObject *str,
 402                                    const char *encoding,
 403                                    const char *errors)
 404 {
 405     PyObject *v;
 406
 407     v = PyString_AsDecodedObject(str, encoding, errors);
 408     if (v == NULL)
 409         goto onError;
 410
 411 #ifdef Py_USING_UNICODE
 412     /* Convert Unicode to a string using the default encoding */
 413     if (PyUnicode_Check(v)) {
 414         PyObject *temp = v;
 415         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 416         Py_DECREF(temp);
 417         if (v == NULL)
 418             goto onError;
 419     }
 420 #endif
 421     if (!PyString_Check(v)) {
 422         PyErr_Format(PyExc_TypeError,
 423                      "decoder did not return a string object (type=%.400s)",
 424                      v->ob_type->tp_name);
 425         Py_DECREF(v);
 426         goto onError;
 427     }
 428
 429     return v;
 430
 431  onError:
 432     return NULL;
 433 }
 434
 435 PyObject *PyString_Encode(const char *s,
 436                           Py_ssize_t size,
 437                           const char *encoding,
 438                           const char *errors)
 439 {
 440     PyObject *v, *str;
 441
 442     str = PyString_FromStringAndSize(s, size);
 443     if (str == NULL)
 444         return NULL;
 445     v = PyString_AsEncodedString(str, encoding, errors);
 446     Py_DECREF(str);
 447     return v;
 448 }
 449
 450 PyObject *PyString_AsEncodedObject(PyObject *str,
 451                                    const char *encoding,
 452                                    const char *errors)
 453 {
 454     PyObject *v;
 455
 456     if (!PyString_Check(str)) {
 457         PyErr_BadArgument();
 458         goto onError;
 459     }
 460
 461     if (encoding == NULL) {
 462 #ifdef Py_USING_UNICODE
 463         encoding = PyUnicode_GetDefaultEncoding();
 464 #else
 465         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 466         goto onError;
 467 #endif
 468     }
 469
 470     /* Encode via the codec registry */
 471     v = PyCodec_Encode(str, encoding, errors);
 472     if (v == NULL)
 473         goto onError;
 474
 475     return v;
 476
 477  onError:
 478     return NULL;
 479 }
 480
 481 PyObject *PyString_AsEncodedString(PyObject *str,
 482                                    const char *encoding,
 483                                    const char *errors)
 484 {
 485     PyObject *v;
 486
 487     v = PyString_AsEncodedObject(str, encoding, errors);
 488     if (v == NULL)
 489         goto onError;
 490
 491 #ifdef Py_USING_UNICODE
 492     /* Convert Unicode to a string using the default encoding */
 493     if (PyUnicode_Check(v)) {
 494         PyObject *temp = v;
 495         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 496         Py_DECREF(temp);
 497         if (v == NULL)
 498             goto onError;
 499     }
 500 #endif
 501     if (!PyString_Check(v)) {
 502         PyErr_Format(PyExc_TypeError,
 503                      "encoder did not return a string object (type=%.400s)",
 504                      v->ob_type->tp_name);
 505         Py_DECREF(v);
 506         goto onError;
 507     }
 508
 509     return v;
 510
 511  onError:
 512     return NULL;
 513 }
 514
 515 static void
 516 string_dealloc(PyObject *op)
 517 {
 518         switch (PyString_CHECK_INTERNED(op)) {
 519                 case SSTATE_NOT_INTERNED:
 520                         break;
 521
 522                 case SSTATE_INTERNED_MORTAL:
 523                         /* revive dead object temporarily for DelItem */
 524                         op->ob_refcnt = 3;
 525                         if (PyDict_DelItem(interned, op) != 0)
 526                                 Py_FatalError(
 527                                         "deletion of interned string failed");
 528                         break;
 529
 530                 case SSTATE_INTERNED_IMMORTAL:
 531                         Py_FatalError("Immortal interned string died.");
 532
 533                 default:
 534                         Py_FatalError("Inconsistent interned string state.");
 535         }
 536         op->ob_type->tp_free(op);
 537 }
 538
 539 /* Unescape a backslash-escaped string. If unicode is non-zero,
 540    the string is a u-literal. If recode_encoding is non-zero,
 541    the string is UTF-8 encoded and should be re-encoded in the
 542    specified encoding.  */
 543
 544 PyObject *PyString_DecodeEscape(const char *s,
 545                                 Py_ssize_t len,
 546                                 const char *errors,
 547                                 Py_ssize_t unicode,
 548                                 const char *recode_encoding)
 549 {
 550         int c;
 551         char *p, *buf;
 552         const char *end;
 553         PyObject *v;
 554         Py_ssize_t newlen = recode_encoding ? 4*len:len;
 555         v = PyString_FromStringAndSize((char *)NULL, newlen);
 556         if (v == NULL)
 557                 return NULL;
 558         p = buf = PyString_AsString(v);
 559         end = s + len;
 560         while (s < end) {
 561                 if (*s != '\\') {
 562                   non_esc:
 563 #ifdef Py_USING_UNICODE
 564                         if (recode_encoding && (*s & 0x80)) {
 565                                 PyObject *u, *w;
 566                                 char *r;
 567                                 const char* t;
 568                                 Py_ssize_t rn;
 569                                 t = s;
 570                                 /* Decode non-ASCII bytes as UTF-8. */
 571                                 while (t < end && (*t & 0x80)) t++;
 572                                 u = PyUnicode_DecodeUTF8(s, t - s, errors);
 573                                 if(!u) goto failed;
 574
 575                                 /* Recode them in target encoding. */
 576                                 w = PyUnicode_AsEncodedString(
 577                                         u, recode_encoding, errors);
 578                                 Py_DECREF(u);
 579                                 if (!w) goto failed;
 580
 581                                 /* Append bytes to output buffer. */
 582                                 assert(PyString_Check(w));
 583                                 r = PyString_AS_STRING(w);
 584                                 rn = PyString_GET_SIZE(w);
 585                                 Py_MEMCPY(p, r, rn);
 586                                 p += rn;
 587                                 Py_DECREF(w);
 588                                 s = t;
 589                         } else {
 590                                 *p++ = *s++;
 591                         }
 592 #else
 593                         *p++ = *s++;
 594 #endif
 595                         continue;
 596                 }
 597                 s++;
 598                 if (s==end) {
 599                         PyErr_SetString(PyExc_ValueError,
 600                                         "Trailing \\ in string");
 601                         goto failed;
 602                 }
 603                 switch (*s++) {
 604                 /* XXX This assumes ASCII! */
 605                 case '\n': break;
 606                 case '\\': *p++ = '\\'; break;
 607                 case '\'': *p++ = '\''; break;
 608                 case '\"': *p++ = '\"'; break;
 609                 case 'b': *p++ = '\b'; break;
 610                 case 'f': *p++ = '\014'; break; /* FF */
 611                 case 't': *p++ = '\t'; break;
 612                 case 'n': *p++ = '\n'; break;
 613                 case 'r': *p++ = '\r'; break;
 614                 case 'v': *p++ = '\013'; break; /* VT */
 615                 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
 616                 case '0': case '1': case '2': case '3':
 617                 case '4': case '5': case '6': case '7':
 618                         c = s[-1] - '0';
 619                         if ('0' <= *s && *s <= '7') {
 620                                 c = (c<<3) + *s++ - '0';
 621                                 if ('0' <= *s && *s <= '7')
 622                                         c = (c<<3) + *s++ - '0';
 623                         }
 624                         *p++ = c;
 625                         break;
 626                 case 'x':
 627                         if (isxdigit(Py_CHARMASK(s[0]))
 628                             && isxdigit(Py_CHARMASK(s[1]))) {
 629                                 unsigned int x = 0;
 630                                 c = Py_CHARMASK(*s);
 631                                 s++;
 632                                 if (isdigit(c))
 633                                         x = c - '0';
 634                                 else if (islower(c))
 635                                         x = 10 + c - 'a';
 636                                 else
 637                                         x = 10 + c - 'A';
 638                                 x = x << 4;
 639                                 c = Py_CHARMASK(*s);
 640                                 s++;
 641                                 if (isdigit(c))
 642                                         x += c - '0';
 643                                 else if (islower(c))
 644                                         x += 10 + c - 'a';
 645                                 else
 646                                         x += 10 + c - 'A';
 647                                 *p++ = x;
 648                                 break;
 649                         }
 650                         if (!errors || strcmp(errors, "strict") == 0) {
 651                                 PyErr_SetString(PyExc_ValueError,
 652                                                 "invalid \\x escape");
 653                                 goto failed;
 654                         }
 655                         if (strcmp(errors, "replace") == 0) {
 656                                 *p++ = '?';
 657                         } else if (strcmp(errors, "ignore") == 0)
 658                                 /* do nothing */;
 659                         else {
 660                                 PyErr_Format(PyExc_ValueError,
 661                                              "decoding error; "
 662                                              "unknown error handling code: %.400s",
 663                                              errors);
 664                                 goto failed;
 665                         }
 666 #ifndef Py_USING_UNICODE
 667                 case 'u':
 668                 case 'U':
 669                 case 'N':
 670                         if (unicode) {
 671                                 PyErr_SetString(PyExc_ValueError,
 672                                           "Unicode escapes not legal "
 673                                           "when Unicode disabled");
 674                                 goto failed;
 675                         }
 676 #endif
 677                 default:
 678                         *p++ = '\\';
 679                         s--;
 680                         goto non_esc; /* an arbitry number of unescaped
 681                                          UTF-8 bytes may follow. */
 682                 }
 683         }
 684         if (p-buf < newlen)
 685                 _PyString_Resize(&v, p - buf);
 686         return v;
 687   failed:
 688         Py_DECREF(v);
 689         return NULL;
 690 }
 691
 692 /* -------------------------------------------------------------------- */
 693 /* object api */
 694
 695 static Py_ssize_t
 696 string_getsize(register PyObject *op)
 697 {
 698         char *s;
 699         Py_ssize_t len;
 700         if (PyString_AsStringAndSize(op, &s, &len))
 701                 return -1;
 702         return len;
 703 }
 704
 705 static /*const*/ char *
 706 string_getbuffer(register PyObject *op)
 707 {
 708         char *s;
 709         Py_ssize_t len;
 710         if (PyString_AsStringAndSize(op, &s, &len))
 711                 return NULL;
 712         return s;
 713 }
 714
 715 Py_ssize_t
 716 PyString_Size(register PyObject *op)
 717 {
 718         if (!PyString_Check(op))
 719                 return string_getsize(op);
 720         return ((PyStringObject *)op) -> ob_size;
 721 }
 722
 723 /*const*/ char *
 724 PyString_AsString(register PyObject *op)
 725 {
 726         if (!PyString_Check(op))
 727                 return string_getbuffer(op);
 728         return ((PyStringObject *)op) -> ob_sval;
 729 }
 730
 731 int
 732 PyString_AsStringAndSize(register PyObject *obj,
 733                          register char **s,
 734                          register Py_ssize_t *len)
 735 {
 736         if (s == NULL) {
 737                 PyErr_BadInternalCall();
 738                 return -1;
 739         }
 740
 741         if (!PyString_Check(obj)) {
 742 #ifdef Py_USING_UNICODE
 743                 if (PyUnicode_Check(obj)) {
 744                         obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
 745                         if (obj == NULL)
 746                                 return -1;
 747                 }
 748                 else
 749 #endif
 750                 {
 751                         PyErr_Format(PyExc_TypeError,
 752                                      "expected string or Unicode object, "
 753                                      "%.200s found", obj->ob_type->tp_name);
 754                         return -1;
 755                 }
 756         }
 757
 758         *s = PyString_AS_STRING(obj);
 759         if (len != NULL)
 760                 *len = PyString_GET_SIZE(obj);
 761         else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
 762                 PyErr_SetString(PyExc_TypeError,
 763                                 "expected string without null bytes");
 764                 return -1;
 765         }
 766         return 0;
 767 }
 768
 769 /* -------------------------------------------------------------------- */
 770 /* Methods */
 771
 772 #define STRINGLIB_CHAR char
 773
 774 #define STRINGLIB_CMP memcmp
 775 #define STRINGLIB_LEN PyString_GET_SIZE
 776 #define STRINGLIB_NEW PyString_FromStringAndSize
 777 #define STRINGLIB_STR PyString_AS_STRING
 778
 779 #define STRINGLIB_EMPTY nullstring
 780
 781 #include "stringlib/fastsearch.h"
 782
 783 #include "stringlib/count.h"
 784 #include "stringlib/find.h"
 785 #include "stringlib/partition.h"
 786
 787
 788 static int
 789 string_print(PyStringObject *op, FILE *fp, int flags)
 790 {
 791         Py_ssize_t i;
 792         char c;
 793         int quote;
 794
 795         /* XXX Ought to check for interrupts when writing long strings */
 796         if (! PyString_CheckExact(op)) {
 797                 int ret;
 798                 /* A str subclass may have its own __str__ method. */
 799                 op = (PyStringObject *) PyObject_Str((PyObject *)op);
 800                 if (op == NULL)
 801                         return -1;
 802                 ret = string_print(op, fp, flags);
 803                 Py_DECREF(op);
 804                 return ret;
 805         }
 806         if (flags & Py_PRINT_RAW) {
 807 #ifdef __VMS
 808                 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
 809 #else
 810                 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
 811 #endif
 812                 return 0;
 813         }
 814
 815         /* figure out which quote to use; single is preferred */
 816         quote = '\'';
 817         if (memchr(op->ob_sval, '\'', op->ob_size) &&
 818             !memchr(op->ob_sval, '"', op->ob_size))
 819                 quote = '"';
 820
 821         fputc(quote, fp);
 822         for (i = 0; i < op->ob_size; i++) {
 823                 c = op->ob_sval[i];
 824                 if (c == quote || c == '\\')
 825                         fprintf(fp, "\\%c", c);
 826                 else if (c == '\t')
 827                         fprintf(fp, "\\t");
 828                 else if (c == '\n')
 829                         fprintf(fp, "\\n");
 830                 else if (c == '\r')
 831                         fprintf(fp, "\\r");
 832                 else if (c < ' ' || c >= 0x7f)
 833                         fprintf(fp, "\\x%02x", c & 0xff);
 834                 else
 835                         fputc(c, fp);
 836         }
 837         fputc(quote, fp);
 838         return 0;
 839 }
 840
 841 PyObject *
 842 PyString_Repr(PyObject *obj, int smartquotes)
 843 {
 844         register PyStringObject* op = (PyStringObject*) obj;
 845         size_t newsize = 2 + 4 * op->ob_size;
 846         PyObject *v;
 847         if (newsize > PY_SSIZE_T_MAX) {
 848                 PyErr_SetString(PyExc_OverflowError,
 849                         "string is too large to make repr");
 850         }
 851         v = PyString_FromStringAndSize((char *)NULL, newsize);
 852         if (v == NULL) {
 853                 return NULL;
 854         }
 855         else {
 856                 register Py_ssize_t i;
 857                 register char c;
 858                 register char *p;
 859                 int quote;
 860
 861                 /* figure out which quote to use; single is preferred */
 862                 quote = '\'';
 863                 if (smartquotes &&
 864                     memchr(op->ob_sval, '\'', op->ob_size) &&
 865                     !memchr(op->ob_sval, '"', op->ob_size))
 866                         quote = '"';
 867
 868                 p = PyString_AS_STRING(v);
 869                 *p++ = quote;
 870                 for (i = 0; i < op->ob_size; i++) {
 871                         /* There's at least enough room for a hex escape
 872                            and a closing quote. */
 873                         assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
 874                         c = op->ob_sval[i];
 875                         if (c == quote || c == '\\')
 876                                 *p++ = '\\', *p++ = c;
 877                         else if (c == '\t')
 878                                 *p++ = '\\', *p++ = 't';
 879                         else if (c == '\n')
 880                                 *p++ = '\\', *p++ = 'n';
 881                         else if (c == '\r')
 882                                 *p++ = '\\', *p++ = 'r';
 883                         else if (c < ' ' || c >= 0x7f) {
 884                                 /* For performance, we don't want to call
 885                                    PyOS_snprintf here (extra layers of
 886                                    function call). */
 887                                 sprintf(p, "\\x%02x", c & 0xff);
 888                                 p += 4;
 889                         }
 890                         else
 891                                 *p++ = c;
 892                 }
 893                 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
 894                 *p++ = quote;
 895                 *p = '\0';
 896                 _PyString_Resize(
 897                         &v, (p - PyString_AS_STRING(v)));
 898                 return v;
 899         }
 900 }
 901
 902 static PyObject *
 903 string_repr(PyObject *op)
 904 {
 905         return PyString_Repr(op, 1);
 906 }
 907
 908 static PyObject *
 909 string_str(PyObject *s)
 910 {
 911         assert(PyString_Check(s));
 912         if (PyString_CheckExact(s)) {
 913                 Py_INCREF(s);
 914                 return s;
 915         }
 916         else {
 917                 /* Subtype -- return genuine string with the same value. */
 918                 PyStringObject *t = (PyStringObject *) s;
 919                 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
 920         }
 921 }
 922
 923 static Py_ssize_t
 924 string_length(PyStringObject *a)
 925 {
 926         return a->ob_size;
 927 }
 928
 929 static PyObject *
 930 string_concat(register PyStringObject *a, register PyObject *bb)
 931 {
 932         register Py_ssize_t size;
 933         register PyStringObject *op;
 934         if (!PyString_Check(bb)) {
 935 #ifdef Py_USING_UNICODE
 936                 if (PyUnicode_Check(bb))
 937                     return PyUnicode_Concat((PyObject *)a, bb);
 938 #endif
 939                 PyErr_Format(PyExc_TypeError,
 940                              "cannot concatenate 'str' and '%.200s' objects",
 941                              bb->ob_type->tp_name);
 942                 return NULL;
 943         }
 944 #define b ((PyStringObject *)bb)
 945         /* Optimize cases with empty left or right operand */
 946         if ((a->ob_size == 0 || b->ob_size == 0) &&
 947             PyString_CheckExact(a) && PyString_CheckExact(b)) {
 948                 if (a->ob_size == 0) {
 949                         Py_INCREF(bb);
 950                         return bb;
 951                 }
 952                 Py_INCREF(a);
 953                 return (PyObject *)a;
 954         }
 955         size = a->ob_size + b->ob_size;
 956         if (size < 0) {
 957                 PyErr_SetString(PyExc_OverflowError,
 958                                 "strings are too large to concat");
 959                 return NULL;
 960         }
 961
 962         /* Inline PyObject_NewVar */
 963         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
 964         if (op == NULL)
 965                 return PyErr_NoMemory();
 966         PyObject_INIT_VAR(op, &PyString_Type, size);
 967         op->ob_shash = -1;
 968         op->ob_sstate = SSTATE_NOT_INTERNED;
 969         Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
 970         Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
 971         op->ob_sval[size] = '\0';
 972         return (PyObject *) op;
 973 #undef b
 974 }
 975
 976 static PyObject *
 977 string_repeat(register PyStringObject *a, register Py_ssize_t n)
 978 {
 979         register Py_ssize_t i;
 980         register Py_ssize_t j;
 981         register Py_ssize_t size;
 982         register PyStringObject *op;
 983         size_t nbytes;
 984         if (n < 0)
 985                 n = 0;
 986         /* watch out for overflows:  the size can overflow int,
 987          * and the # of bytes needed can overflow size_t
 988          */
 989         size = a->ob_size * n;
 990         if (n && size / n != a->ob_size) {
 991                 PyErr_SetString(PyExc_OverflowError,
 992                         "repeated string is too long");
 993                 return NULL;
 994         }
 995         if (size == a->ob_size && PyString_CheckExact(a)) {
 996                 Py_INCREF(a);
 997                 return (PyObject *)a;
 998         }
 999         nbytes = (size_t)size;
1000         if (nbytes + sizeof(PyStringObject) <= nbytes) {
1001                 PyErr_SetString(PyExc_OverflowError,
1002                         "repeated string is too long");
1003                 return NULL;
1004         }
1005         op = (PyStringObject *)
1006                 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
1007         if (op == NULL)
1008                 return PyErr_NoMemory();
1009         PyObject_INIT_VAR(op, &PyString_Type, size);
1010         op->ob_shash = -1;
1011         op->ob_sstate = SSTATE_NOT_INTERNED;
1012         op->ob_sval[size] = '\0';
1013         if (a->ob_size == 1 && n > 0) {
1014                 memset(op->ob_sval, a->ob_sval[0] , n);
1015                 return (PyObject *) op;
1016         }
1017         i = 0;
1018         if (i < size) {
1019                 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
1020                 i = a->ob_size;
1021         }
1022         while (i < size) {
1023                 j = (i <= size-i)  ?  i  :  size-i;
1024                 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1025                 i += j;
1026         }
1027         return (PyObject *) op;
1028 }
1029
1030 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1031
1032 static PyObject *
1033 string_slice(register PyStringObject *a, register Py_ssize_t i,
1034              register Py_ssize_t j)
1035      /* j -- may be negative! */
1036 {
1037         if (i < 0)
1038                 i = 0;
1039         if (j < 0)
1040                 j = 0; /* Avoid signed/unsigned bug in next line */
1041         if (j > a->ob_size)
1042                 j = a->ob_size;
1043         if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1044                 /* It's the same as a */
1045                 Py_INCREF(a);
1046                 return (PyObject *)a;
1047         }
1048         if (j < i)
1049                 j = i;
1050         return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1051 }
1052
1053 static int
1054 string_contains(PyObject *str_obj, PyObject *sub_obj)
1055 {
1056         if (!PyString_CheckExact(sub_obj)) {
1057 #ifdef Py_USING_UNICODE
1058                 if (PyUnicode_Check(sub_obj))
1059                         return PyUnicode_Contains(str_obj, sub_obj);
1060 #endif
1061                 if (!PyString_Check(sub_obj)) {
1062                         PyErr_SetString(PyExc_TypeError,
1063                             "'in <string>' requires string as left operand");
1064                         return -1;
1065                 }
1066         }
1067
1068         return stringlib_contains_obj(str_obj, sub_obj);
1069 }
1070
1071 static PyObject *
1072 string_item(PyStringObject *a, register Py_ssize_t i)
1073 {
1074         char pchar;
1075         PyObject *v;
1076         if (i < 0 || i >= a->ob_size) {
1077                 PyErr_SetString(PyExc_IndexError, "string index out of range");
1078                 return NULL;
1079         }
1080         pchar = a->ob_sval[i];
1081         v = (PyObject *)characters[pchar & UCHAR_MAX];
1082         if (v == NULL)
1083                 v = PyString_FromStringAndSize(&pchar, 1);
1084         else {
1085 #ifdef COUNT_ALLOCS
1086                 one_strings++;
1087 #endif
1088                 Py_INCREF(v);
1089         }
1090         return v;
1091 }
1092
1093 static PyObject*
1094 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1095 {
1096         int c;
1097         Py_ssize_t len_a, len_b;
1098         Py_ssize_t min_len;
1099         PyObject *result;
1100
1101         /* Make sure both arguments are strings. */
1102         if (!(PyString_Check(a) && PyString_Check(b))) {
1103                 result = Py_NotImplemented;
1104                 goto out;
1105         }
1106         if (a == b) {
1107                 switch (op) {
1108                 case Py_EQ:case Py_LE:case Py_GE:
1109                         result = Py_True;
1110                         goto out;
1111                 case Py_NE:case Py_LT:case Py_GT:
1112                         result = Py_False;
1113                         goto out;
1114                 }
1115         }
1116         if (op == Py_EQ) {
1117                 /* Supporting Py_NE here as well does not save
1118                    much time, since Py_NE is rarely used.  */
1119                 if (a->ob_size == b->ob_size
1120                     && (a->ob_sval[0] == b->ob_sval[0]
1121                         && memcmp(a->ob_sval, b->ob_sval,
1122                                   a->ob_size) == 0)) {
1123                         result = Py_True;
1124                 } else {
1125                         result = Py_False;
1126                 }
1127                 goto out;
1128         }
1129         len_a = a->ob_size; len_b = b->ob_size;
1130         min_len = (len_a < len_b) ? len_a : len_b;
1131         if (min_len > 0) {
1132                 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1133                 if (c==0)
1134                         c = memcmp(a->ob_sval, b->ob_sval, min_len);
1135         }else
1136                 c = 0;
1137         if (c == 0)
1138                 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1139         switch (op) {
1140         case Py_LT: c = c <  0; break;
1141         case Py_LE: c = c <= 0; break;
1142         case Py_EQ: assert(0);  break; /* unreachable */
1143         case Py_NE: c = c != 0; break;
1144         case Py_GT: c = c >  0; break;
1145         case Py_GE: c = c >= 0; break;
1146         default:
1147                 result = Py_NotImplemented;
1148                 goto out;
1149         }
1150         result = c ? Py_True : Py_False;
1151   out:
1152         Py_INCREF(result);
1153         return result;
1154 }
1155
1156 int
1157 _PyString_Eq(PyObject *o1, PyObject *o2)
1158 {
1159         PyStringObject *a = (PyStringObject*) o1;
1160         PyStringObject *b = (PyStringObject*) o2;
1161         return a->ob_size == b->ob_size
1162           && *a->ob_sval == *b->ob_sval
1163           && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
1164 }
1165
1166 static long
1167 string_hash(PyStringObject *a)
1168 {
1169         register Py_ssize_t len;
1170         register unsigned char *p;
1171         register long x;
1172
1173         if (a->ob_shash != -1)
1174                 return a->ob_shash;
1175         len = a->ob_size;
1176         p = (unsigned char *) a->ob_sval;
1177         x = *p << 7;
1178         while (--len >= 0)
1179                 x = (1000003*x) ^ *p++;
1180         x ^= a->ob_size;
1181         if (x == -1)
1182                 x = -2;
1183         a->ob_shash = x;
1184         return x;
1185 }
1186
1187 #define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1188
1189 static PyObject*
1190 string_subscript(PyStringObject* self, PyObject* item)
1191 {
1192         PyNumberMethods *nb = item->ob_type->tp_as_number;
1193         if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1194                 Py_ssize_t i = nb->nb_index(item);
1195                 if (i == -1 && PyErr_Occurred())
1196                         return NULL;
1197                 if (i < 0)
1198                         i += PyString_GET_SIZE(self);
1199                 return string_item(self, i);
1200         }
1201         else if (PySlice_Check(item)) {
1202                 Py_ssize_t start, stop, step, slicelength, cur, i;
1203                 char* source_buf;
1204                 char* result_buf;
1205                 PyObject* result;
1206
1207                 if (PySlice_GetIndicesEx((PySliceObject*)item,
1208                                  PyString_GET_SIZE(self),
1209                                  &start, &stop, &step, &slicelength) < 0) {
1210                         return NULL;
1211                 }
1212
1213                 if (slicelength <= 0) {
1214                         return PyString_FromStringAndSize("", 0);
1215                 }
1216                 else {
1217                         source_buf = PyString_AsString((PyObject*)self);
1218                         result_buf = (char *)PyMem_Malloc(slicelength);
1219                         if (result_buf == NULL)
1220                                 return PyErr_NoMemory();
1221
1222                         for (cur = start, i = 0; i < slicelength;
1223                              cur += step, i++) {
1224                                 result_buf[i] = source_buf[cur];
1225                         }
1226
1227                         result = PyString_FromStringAndSize(result_buf,
1228                                                             slicelength);
1229                         PyMem_Free(result_buf);
1230                         return result;
1231                 }
1232         }
1233         else {
1234                 PyErr_SetString(PyExc_TypeError,
1235                                 "string indices must be integers");
1236                 return NULL;
1237         }
1238 }
1239
1240 static Py_ssize_t
1241 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1242 {
1243         if ( index != 0 ) {
1244                 PyErr_SetString(PyExc_SystemError,
1245                                 "accessing non-existent string segment");
1246                 return -1;
1247         }
1248         *ptr = (void *)self->ob_sval;
1249         return self->ob_size;
1250 }
1251
1252 static Py_ssize_t
1253 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1254 {
1255         PyErr_SetString(PyExc_TypeError,
1256                         "Cannot use string as modifiable buffer");
1257         return -1;
1258 }
1259
1260 static Py_ssize_t
1261 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1262 {
1263         if ( lenp )
1264                 *lenp = self->ob_size;
1265         return 1;
1266 }
1267
1268 static Py_ssize_t
1269 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1270 {
1271         if ( index != 0 ) {
1272                 PyErr_SetString(PyExc_SystemError,
1273                                 "accessing non-existent string segment");
1274                 return -1;
1275         }
1276         *ptr = self->ob_sval;
1277         return self->ob_size;
1278 }
1279
1280 static PySequenceMethods string_as_sequence = {
1281         (lenfunc)string_length, /*sq_length*/
1282         (binaryfunc)string_concat, /*sq_concat*/
1283         (ssizeargfunc)string_repeat, /*sq_repeat*/
1284         (ssizeargfunc)string_item, /*sq_item*/
1285         (ssizessizeargfunc)string_slice, /*sq_slice*/
1286         0,              /*sq_ass_item*/
1287         0,              /*sq_ass_slice*/
1288         (objobjproc)string_contains /*sq_contains*/
1289 };
1290
1291 static PyMappingMethods string_as_mapping = {
1292         (lenfunc)string_length,
1293         (binaryfunc)string_subscript,
1294         0,
1295 };
1296
1297 static PyBufferProcs string_as_buffer = {
1298         (readbufferproc)string_buffer_getreadbuf,
1299         (writebufferproc)string_buffer_getwritebuf,
1300         (segcountproc)string_buffer_getsegcount,
1301         (charbufferproc)string_buffer_getcharbuf,
1302 };
1303
1304
1305 \f
1306 #define LEFTSTRIP 0
1307 #define RIGHTSTRIP 1
1308 #define BOTHSTRIP 2
1309
1310 /* Arrays indexed by above */
1311 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1312
1313 #define STRIPNAME(i) (stripformat[i]+3)
1314
1315
1316 /* Don't call if length < 2 */
1317 #define Py_STRING_MATCH(target, offset, pattern, length)        \
1318   (target[offset] == pattern[0] &&                              \
1319    target[offset+length-1] == pattern[length-1] &&              \
1320    !memcmp(target+offset+1, pattern+1, length-2) )
1321
1322
1323 /* Overallocate the initial list to reduce the number of reallocs for small
1324    split sizes.  Eg, "A A A A A A A A A A".split() (10 elements) has three
1325    resizes, to sizes 4, 8, then 16.  Most observed string splits are for human
1326    text (roughly 11 words per line) and field delimited data (usually 1-10
1327    fields).  For large strings the split algorithms are bandwidth limited
1328    so increasing the preallocation likely will not improve things.*/
1329
1330 #define MAX_PREALLOC 12
1331
1332 /* 5 splits gives 6 elements */
1333 #define PREALLOC_SIZE(maxsplit) \
1334         (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1335
1336 #define SPLIT_APPEND(data, left, right)                         \
1337         str = PyString_FromStringAndSize((data) + (left),       \
1338                                          (right) - (left));     \
1339         if (str == NULL)                                        \
1340                 goto onError;                                   \
1341         if (PyList_Append(list, str)) {                         \
1342                 Py_DECREF(str);                                 \
1343                 goto onError;                                   \
1344         }                                                       \
1345         else                                                    \
1346                 Py_DECREF(str);
1347
1348 #define SPLIT_ADD(data, left, right) {                          \
1349         str = PyString_FromStringAndSize((data) + (left),       \
1350                                          (right) - (left));     \
1351         if (str == NULL)                                        \
1352                 goto onError;                                   \
1353         if (count < MAX_PREALLOC) {                             \
1354                 PyList_SET_ITEM(list, count, str);              \
1355         } else {                                                \
1356                 if (PyList_Append(list, str)) {                 \
1357                         Py_DECREF(str);                         \
1358                         goto onError;                           \
1359                 }                                               \
1360                 else                                            \
1361                         Py_DECREF(str);                         \
1362         }                                                       \
1363         count++; }
1364
1365 /* Always force the list to the expected size. */
1366 #define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
1367
1368 #define SKIP_SPACE(s, i, len)    { while (i<len &&  isspace(Py_CHARMASK(s[i]))) i++; }
1369 #define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1370 #define RSKIP_SPACE(s, i)        { while (i>=0  &&  isspace(Py_CHARMASK(s[i]))) i--; }
1371 #define RSKIP_NONSPACE(s, i)     { while (i>=0  && !isspace(Py_CHARMASK(s[i]))) i--; }
1372
1373 Py_LOCAL_INLINE(PyObject *)
1374 split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
1375 {
1376         Py_ssize_t i, j, count=0;
1377         PyObject *str;
1378         PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1379
1380         if (list == NULL)
1381                 return NULL;
1382
1383         i = j = 0;
1384
1385         while (maxsplit-- > 0) {
1386                 SKIP_SPACE(s, i, len);
1387                 if (i==len) break;
1388                 j = i; i++;
1389                 SKIP_NONSPACE(s, i, len);
1390                 SPLIT_ADD(s, j, i);
1391         }
1392
1393         if (i < len) {
1394                 /* Only occurs when maxsplit was reached */
1395                 /* Skip any remaining whitespace and copy to end of string */
1396                 SKIP_SPACE(s, i, len);
1397                 if (i != len)
1398                         SPLIT_ADD(s, i, len);
1399         }
1400         FIX_PREALLOC_SIZE(list);
1401         return list;
1402   onError:
1403         Py_DECREF(list);
1404         return NULL;
1405 }
1406
1407 Py_LOCAL_INLINE(PyObject *)
1408 split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1409 {
1410         register Py_ssize_t i, j, count=0;
1411         PyObject *str;
1412         PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1413
1414         if (list == NULL)
1415                 return NULL;
1416
1417         i = j = 0;
1418         while ((j < len) && (maxcount-- > 0)) {
1419                 for(; j<len; j++) {
1420                         /* I found that using memchr makes no difference */
1421                         if (s[j] == ch) {
1422                                 SPLIT_ADD(s, i, j);
1423                                 i = j = j + 1;
1424                                 break;
1425                         }
1426                 }
1427         }
1428         if (i <= len) {
1429                 SPLIT_ADD(s, i, len);
1430         }
1431         FIX_PREALLOC_SIZE(list);
1432         return list;
1433
1434   onError:
1435         Py_DECREF(list);
1436         return NULL;
1437 }
1438
1439 PyDoc_STRVAR(split__doc__,
1440 "S.split([sep [,maxsplit]]) -> list of strings\n\
1441 \n\
1442 Return a list of the words in the string S, using sep as the\n\
1443 delimiter string.  If maxsplit is given, at most maxsplit\n\
1444 splits are done. If sep is not specified or is None, any\n\
1445 whitespace string is a separator.");
1446
1447 static PyObject *
1448 string_split(PyStringObject *self, PyObject *args)
1449 {
1450         Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1451         Py_ssize_t maxsplit = -1, count=0;
1452         const char *s = PyString_AS_STRING(self), *sub;
1453         PyObject *list, *str, *subobj = Py_None;
1454 #ifdef USE_FAST
1455         Py_ssize_t pos;
1456 #endif
1457
1458         if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1459                 return NULL;
1460         if (maxsplit < 0)
1461                 maxsplit = PY_SSIZE_T_MAX;
1462         if (subobj == Py_None)
1463                 return split_whitespace(s, len, maxsplit);
1464         if (PyString_Check(subobj)) {
1465                 sub = PyString_AS_STRING(subobj);
1466                 n = PyString_GET_SIZE(subobj);
1467         }
1468 #ifdef Py_USING_UNICODE
1469         else if (PyUnicode_Check(subobj))
1470                 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1471 #endif
1472         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1473                 return NULL;
1474
1475         if (n == 0) {
1476                 PyErr_SetString(PyExc_ValueError, "empty separator");
1477                 return NULL;
1478         }
1479         else if (n == 1)
1480                 return split_char(s, len, sub[0], maxsplit);
1481
1482         list = PyList_New(PREALLOC_SIZE(maxsplit));
1483         if (list == NULL)
1484                 return NULL;
1485
1486 #ifdef USE_FAST
1487         i = j = 0;
1488         while (maxsplit-- > 0) {
1489                 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1490                 if (pos < 0)
1491                         break;
1492                 j = i+pos;
1493                 SPLIT_ADD(s, i, j);
1494                 i = j + n;
1495
1496         }
1497 #else
1498         i = j = 0;
1499         while ((j+n <= len) && (maxsplit-- > 0)) {
1500                 for (; j+n <= len; j++) {
1501                         if (Py_STRING_MATCH(s, j, sub, n)) {
1502                                 SPLIT_ADD(s, i, j);
1503                                 i = j = j + n;
1504                                 break;
1505                         }
1506                 }
1507         }
1508 #endif
1509         SPLIT_ADD(s, i, len);
1510         FIX_PREALLOC_SIZE(list);
1511         return list;
1512
1513  onError:
1514         Py_DECREF(list);
1515         return NULL;
1516 }
1517
1518 PyDoc_STRVAR(partition__doc__,
1519 "S.partition(sep) -> (head, sep, tail)\n\
1520 \n\
1521 Searches for the separator sep in S, and returns the part before it,\n\
1522 the separator itself, and the part after it.  If the separator is not\n\
1523 found, returns S and two empty strings.");
1524
1525 static PyObject *
1526 string_partition(PyStringObject *self, PyObject *sep_obj)
1527 {
1528         const char *sep;
1529         Py_ssize_t sep_len;
1530
1531         if (PyString_Check(sep_obj)) {
1532                 sep = PyString_AS_STRING(sep_obj);
1533                 sep_len = PyString_GET_SIZE(sep_obj);
1534         }
1535 #ifdef Py_USING_UNICODE
1536         else if (PyUnicode_Check(sep_obj))
1537                 return PyUnicode_Partition((PyObject *) self, sep_obj);
1538 #endif
1539         else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1540                 return NULL;
1541
1542         return stringlib_partition(
1543                 (PyObject*) self,
1544                 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1545                 sep_obj, sep, sep_len
1546                 );
1547 }
1548
1549 PyDoc_STRVAR(rpartition__doc__,
1550 "S.rpartition(sep) -> (head, sep, tail)\n\
1551 \n\
1552 Searches for the separator sep in S, starting at the end of S, and returns\n\
1553 the part before it, the separator itself, and the part after it.  If the\n\
1554 separator is not found, returns S and two empty strings.");
1555
1556 static PyObject *
1557 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1558 {
1559         const char *sep;
1560         Py_ssize_t sep_len;
1561
1562         if (PyString_Check(sep_obj)) {
1563                 sep = PyString_AS_STRING(sep_obj);
1564                 sep_len = PyString_GET_SIZE(sep_obj);
1565         }
1566 #ifdef Py_USING_UNICODE
1567         else if (PyUnicode_Check(sep_obj))
1568                 return PyUnicode_Partition((PyObject *) self, sep_obj);
1569 #endif
1570         else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1571                 return NULL;
1572
1573         return stringlib_rpartition(
1574                 (PyObject*) self,
1575                 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1576                 sep_obj, sep, sep_len
1577                 );
1578 }
1579
1580 Py_LOCAL_INLINE(PyObject *)
1581 rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
1582 {
1583         Py_ssize_t i, j, count=0;
1584         PyObject *str;
1585         PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1586
1587         if (list == NULL)
1588                 return NULL;
1589
1590         i = j = len-1;
1591
1592         while (maxsplit-- > 0) {
1593                 RSKIP_SPACE(s, i);
1594                 if (i<0) break;
1595                 j = i; i--;
1596                 RSKIP_NONSPACE(s, i);
1597                 SPLIT_ADD(s, i + 1, j + 1);
1598         }
1599         if (i >= 0) {
1600                 /* Only occurs when maxsplit was reached */
1601                 /* Skip any remaining whitespace and copy to beginning of string */
1602                 RSKIP_SPACE(s, i);
1603                 if (i >= 0)
1604                         SPLIT_ADD(s, 0, i + 1);
1605
1606         }
1607         FIX_PREALLOC_SIZE(list);
1608         if (PyList_Reverse(list) < 0)
1609                 goto onError;
1610         return list;
1611   onError:
1612         Py_DECREF(list);
1613         return NULL;
1614 }
1615
1616 Py_LOCAL_INLINE(PyObject *)
1617 rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1618 {
1619         register Py_ssize_t i, j, count=0;
1620         PyObject *str;
1621         PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1622
1623         if (list == NULL)
1624                 return NULL;
1625
1626         i = j = len - 1;
1627         while ((i >= 0) && (maxcount-- > 0)) {
1628                 for (; i >= 0; i--) {
1629                         if (s[i] == ch) {
1630                                 SPLIT_ADD(s, i + 1, j + 1);
1631                                 j = i = i - 1;
1632                                 break;
1633                         }
1634                 }
1635         }
1636         if (j >= -1) {
1637                 SPLIT_ADD(s, 0, j + 1);
1638         }
1639         FIX_PREALLOC_SIZE(list);
1640         if (PyList_Reverse(list) < 0)
1641                 goto onError;
1642         return list;
1643
1644  onError:
1645         Py_DECREF(list);
1646         return NULL;
1647 }
1648
1649 PyDoc_STRVAR(rsplit__doc__,
1650 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1651 \n\
1652 Return a list of the words in the string S, using sep as the\n\
1653 delimiter string, starting at the end of the string and working\n\
1654 to the front.  If maxsplit is given, at most maxsplit splits are\n\
1655 done. If sep is not specified or is None, any whitespace string\n\
1656 is a separator.");
1657
1658 static PyObject *
1659 string_rsplit(PyStringObject *self, PyObject *args)
1660 {
1661         Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1662         Py_ssize_t maxsplit = -1, count=0;
1663         const char *s = PyString_AS_STRING(self), *sub;
1664         PyObject *list, *str, *subobj = Py_None;
1665
1666         if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1667                 return NULL;
1668         if (maxsplit < 0)
1669                 maxsplit = PY_SSIZE_T_MAX;
1670         if (subobj == Py_None)
1671                 return rsplit_whitespace(s, len, maxsplit);
1672         if (PyString_Check(subobj)) {
1673                 sub = PyString_AS_STRING(subobj);
1674                 n = PyString_GET_SIZE(subobj);
1675         }
1676 #ifdef Py_USING_UNICODE
1677         else if (PyUnicode_Check(subobj))
1678                 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1679 #endif
1680         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1681                 return NULL;
1682
1683         if (n == 0) {
1684                 PyErr_SetString(PyExc_ValueError, "empty separator");
1685                 return NULL;
1686         }
1687         else if (n == 1)
1688                 return rsplit_char(s, len, sub[0], maxsplit);
1689
1690         list = PyList_New(PREALLOC_SIZE(maxsplit));
1691         if (list == NULL)
1692                 return NULL;
1693
1694         j = len;
1695         i = j - n;
1696
1697         while ( (i >= 0) && (maxsplit-- > 0) ) {
1698                 for (; i>=0; i--) {
1699                         if (Py_STRING_MATCH(s, i, sub, n)) {
1700                                 SPLIT_ADD(s, i + n, j);
1701                                 j = i;
1702                                 i -= n;
1703                                 break;
1704                         }
1705                 }
1706         }
1707         SPLIT_ADD(s, 0, j);
1708         FIX_PREALLOC_SIZE(list);
1709         if (PyList_Reverse(list) < 0)
1710                 goto onError;
1711         return list;
1712
1713 onError:
1714         Py_DECREF(list);
1715         return NULL;
1716 }
1717
1718
1719 PyDoc_STRVAR(join__doc__,
1720 "S.join(sequence) -> string\n\
1721 \n\
1722 Return a string which is the concatenation of the strings in the\n\
1723 sequence.  The separator between elements is S.");
1724
1725 static PyObject *
1726 string_join(PyStringObject *self, PyObject *orig)
1727 {
1728         char *sep = PyString_AS_STRING(self);
1729         const Py_ssize_t seplen = PyString_GET_SIZE(self);
1730         PyObject *res = NULL;
1731         char *p;
1732         Py_ssize_t seqlen = 0;
1733         size_t sz = 0;
1734         Py_ssize_t i;
1735         PyObject *seq, *item;
1736
1737         seq = PySequence_Fast(orig, "");
1738         if (seq == NULL) {
1739                 return NULL;
1740         }
1741
1742         seqlen = PySequence_Size(seq);
1743         if (seqlen == 0) {
1744                 Py_DECREF(seq);
1745                 return PyString_FromString("");
1746         }
1747         if (seqlen == 1) {
1748                 item = PySequence_Fast_GET_ITEM(seq, 0);
1749                 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1750                         Py_INCREF(item);
1751                         Py_DECREF(seq);
1752                         return item;
1753                 }
1754         }
1755
1756         /* There are at least two things to join, or else we have a subclass
1757          * of the builtin types in the sequence.
1758          * Do a pre-pass to figure out the total amount of space we'll
1759          * need (sz), see whether any argument is absurd, and defer to
1760          * the Unicode join if appropriate.
1761          */
1762         for (i = 0; i < seqlen; i++) {
1763                 const size_t old_sz = sz;
1764                 item = PySequence_Fast_GET_ITEM(seq, i);
1765                 if (!PyString_Check(item)){
1766 #ifdef Py_USING_UNICODE
1767                         if (PyUnicode_Check(item)) {
1768                                 /* Defer to Unicode join.
1769                                  * CAUTION:  There's no gurantee that the
1770                                  * original sequence can be iterated over
1771                                  * again, so we must pass seq here.
1772                                  */
1773                                 PyObject *result;
1774                                 result = PyUnicode_Join((PyObject *)self, seq);
1775                                 Py_DECREF(seq);
1776                                 return result;
1777                         }
1778 #endif
1779                         PyErr_Format(PyExc_TypeError,
1780                                      "sequence item %zd: expected string,"
1781                                      " %.80s found",
1782                                      i, item->ob_type->tp_name);
1783                         Py_DECREF(seq);
1784                         return NULL;
1785                 }
1786                 sz += PyString_GET_SIZE(item);
1787                 if (i != 0)
1788                         sz += seplen;
1789                 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1790                         PyErr_SetString(PyExc_OverflowError,
1791                                 "join() result is too long for a Python string");
1792                         Py_DECREF(seq);
1793                         return NULL;
1794                 }
1795         }
1796
1797         /* Allocate result space. */
1798         res = PyString_FromStringAndSize((char*)NULL, sz);
1799         if (res == NULL) {
1800                 Py_DECREF(seq);
1801                 return NULL;
1802         }
1803
1804         /* Catenate everything. */
1805         p = PyString_AS_STRING(res);
1806         for (i = 0; i < seqlen; ++i) {
1807                 size_t n;
1808                 item = PySequence_Fast_GET_ITEM(seq, i);
1809                 n = PyString_GET_SIZE(item);
1810                 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1811                 p += n;
1812                 if (i < seqlen - 1) {
1813                         Py_MEMCPY(p, sep, seplen);
1814                         p += seplen;
1815                 }
1816         }
1817
1818         Py_DECREF(seq);
1819         return res;
1820 }
1821
1822 PyObject *
1823 _PyString_Join(PyObject *sep, PyObject *x)
1824 {
1825         assert(sep != NULL && PyString_Check(sep));
1826         assert(x != NULL);
1827         return string_join((PyStringObject *)sep, x);
1828 }
1829
1830 Py_LOCAL_INLINE(void)
1831 string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1832 {
1833         if (*end > len)
1834                 *end = len;
1835         else if (*end < 0)
1836                 *end += len;
1837         if (*end < 0)
1838                 *end = 0;
1839         if (*start < 0)
1840                 *start += len;
1841         if (*start < 0)
1842                 *start = 0;
1843 }
1844
1845 Py_LOCAL_INLINE(Py_ssize_t)
1846 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1847 {
1848         PyObject *subobj;
1849         const char *sub;
1850         Py_ssize_t sub_len;
1851         Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1852
1853         if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1854                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1855                 return -2;
1856         if (PyString_Check(subobj)) {
1857                 sub = PyString_AS_STRING(subobj);
1858                 sub_len = PyString_GET_SIZE(subobj);
1859         }
1860 #ifdef Py_USING_UNICODE
1861         else if (PyUnicode_Check(subobj))
1862                 return PyUnicode_Find(
1863                         (PyObject *)self, subobj, start, end, dir);
1864 #endif
1865         else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1866                 /* XXX - the "expected a character buffer object" is pretty
1867                    confusing for a non-expert.  remap to something else ? */
1868                 return -2;
1869
1870         if (dir > 0)
1871                 return stringlib_find_slice(
1872                         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1873                         sub, sub_len, start, end);
1874         else
1875                 return stringlib_rfind_slice(
1876                         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1877                         sub, sub_len, start, end);
1878 }
1879
1880
1881 PyDoc_STRVAR(find__doc__,
1882 "S.find(sub [,start [,end]]) -> int\n\
1883 \n\
1884 Return the lowest index in S where substring sub is found,\n\
1885 such that sub is contained within s[start,end].  Optional\n\
1886 arguments start and end are interpreted as in slice notation.\n\
1887 \n\
1888 Return -1 on failure.");
1889
1890 static PyObject *
1891 string_find(PyStringObject *self, PyObject *args)
1892 {
1893         Py_ssize_t result = string_find_internal(self, args, +1);
1894         if (result == -2)
1895                 return NULL;
1896         return PyInt_FromSsize_t(result);
1897 }
1898
1899
1900 PyDoc_STRVAR(index__doc__,
1901 "S.index(sub [,start [,end]]) -> int\n\
1902 \n\
1903 Like S.find() but raise ValueError when the substring is not found.");
1904
1905 static PyObject *
1906 string_index(PyStringObject *self, PyObject *args)
1907 {
1908         Py_ssize_t result = string_find_internal(self, args, +1);
1909         if (result == -2)
1910                 return NULL;
1911         if (result == -1) {
1912                 PyErr_SetString(PyExc_ValueError,
1913                                 "substring not found");
1914                 return NULL;
1915         }
1916         return PyInt_FromSsize_t(result);
1917 }
1918
1919
1920 PyDoc_STRVAR(rfind__doc__,
1921 "S.rfind(sub [,start [,end]]) -> int\n\
1922 \n\
1923 Return the highest index in S where substring sub is found,\n\
1924 such that sub is contained within s[start,end].  Optional\n\
1925 arguments start and end are interpreted as in slice notation.\n\
1926 \n\
1927 Return -1 on failure.");
1928
1929 static PyObject *
1930 string_rfind(PyStringObject *self, PyObject *args)
1931 {
1932         Py_ssize_t result = string_find_internal(self, args, -1);
1933         if (result == -2)
1934                 return NULL;
1935         return PyInt_FromSsize_t(result);
1936 }
1937
1938
1939 PyDoc_STRVAR(rindex__doc__,
1940 "S.rindex(sub [,start [,end]]) -> int\n\
1941 \n\
1942 Like S.rfind() but raise ValueError when the substring is not found.");
1943
1944 static PyObject *
1945 string_rindex(PyStringObject *self, PyObject *args)
1946 {
1947         Py_ssize_t result = string_find_internal(self, args, -1);
1948         if (result == -2)
1949                 return NULL;
1950         if (result == -1) {
1951                 PyErr_SetString(PyExc_ValueError,
1952                                 "substring not found");
1953                 return NULL;
1954         }
1955         return PyInt_FromSsize_t(result);
1956 }
1957
1958
1959 Py_LOCAL_INLINE(PyObject *)
1960 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1961 {
1962         char *s = PyString_AS_STRING(self);
1963         Py_ssize_t len = PyString_GET_SIZE(self);
1964         char *sep = PyString_AS_STRING(sepobj);
1965         Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1966         Py_ssize_t i, j;
1967
1968         i = 0;
1969         if (striptype != RIGHTSTRIP) {
1970                 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1971                         i++;
1972                 }
1973         }
1974
1975         j = len;
1976         if (striptype != LEFTSTRIP) {
1977                 do {
1978                         j--;
1979                 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1980                 j++;
1981         }
1982
1983         if (i == 0 && j == len && PyString_CheckExact(self)) {
1984                 Py_INCREF(self);
1985                 return (PyObject*)self;
1986         }
1987         else
1988                 return PyString_FromStringAndSize(s+i, j-i);
1989 }
1990
1991
1992 Py_LOCAL_INLINE(PyObject *)
1993 do_strip(PyStringObject *self, int striptype)
1994 {
1995         char *s = PyString_AS_STRING(self);
1996         Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1997
1998         i = 0;
1999         if (striptype != RIGHTSTRIP) {
2000                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2001                         i++;
2002                 }
2003         }
2004
2005         j = len;
2006         if (striptype != LEFTSTRIP) {
2007                 do {
2008                         j--;
2009                 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2010                 j++;
2011         }
2012
2013         if (i == 0 && j == len && PyString_CheckExact(self)) {
2014                 Py_INCREF(self);
2015                 return (PyObject*)self;
2016         }
2017         else
2018                 return PyString_FromStringAndSize(s+i, j-i);
2019 }
2020
2021
2022 Py_LOCAL_INLINE(PyObject *)
2023 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2024 {
2025         PyObject *sep = NULL;
2026
2027         if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2028                 return NULL;
2029
2030         if (sep != NULL && sep != Py_None) {
2031                 if (PyString_Check(sep))
2032                         return do_xstrip(self, striptype, sep);
2033 #ifdef Py_USING_UNICODE
2034                 else if (PyUnicode_Check(sep)) {
2035                         PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2036                         PyObject *res;
2037                         if (uniself==NULL)
2038                                 return NULL;
2039                         res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2040                                 striptype, sep);
2041                         Py_DECREF(uniself);
2042                         return res;
2043                 }
2044 #endif
2045                 PyErr_Format(PyExc_TypeError,
2046 #ifdef Py_USING_UNICODE
2047                              "%s arg must be None, str or unicode",
2048 #else
2049                              "%s arg must be None or str",
2050 #endif
2051                              STRIPNAME(striptype));
2052                 return NULL;
2053         }
2054
2055         return do_strip(self, striptype);
2056 }
2057
2058
2059 PyDoc_STRVAR(strip__doc__,
2060 "S.strip([chars]) -> string or unicode\n\
2061 \n\
2062 Return a copy of the string S with leading and trailing\n\
2063 whitespace removed.\n\
2064 If chars is given and not None, remove characters in chars instead.\n\
2065 If chars is unicode, S will be converted to unicode before stripping");
2066
2067 static PyObject *
2068 string_strip(PyStringObject *self, PyObject *args)
2069 {
2070         if (PyTuple_GET_SIZE(args) == 0)
2071                 return do_strip(self, BOTHSTRIP); /* Common case */
2072         else
2073                 return do_argstrip(self, BOTHSTRIP, args);
2074 }
2075
2076
2077 PyDoc_STRVAR(lstrip__doc__,
2078 "S.lstrip([chars]) -> string or unicode\n\
2079 \n\
2080 Return a copy of the string S with leading whitespace removed.\n\
2081 If chars is given and not None, remove characters in chars instead.\n\
2082 If chars is unicode, S will be converted to unicode before stripping");
2083
2084 static PyObject *
2085 string_lstrip(PyStringObject *self, PyObject *args)
2086 {
2087         if (PyTuple_GET_SIZE(args) == 0)
2088                 return do_strip(self, LEFTSTRIP); /* Common case */
2089         else
2090                 return do_argstrip(self, LEFTSTRIP, args);
2091 }
2092
2093
2094 PyDoc_STRVAR(rstrip__doc__,
2095 "S.rstrip([chars]) -> string or unicode\n\
2096 \n\
2097 Return a copy of the string S with trailing whitespace removed.\n\
2098 If chars is given and not None, remove characters in chars instead.\n\
2099 If chars is unicode, S will be converted to unicode before stripping");
2100
2101 static PyObject *
2102 string_rstrip(PyStringObject *self, PyObject *args)
2103 {
2104         if (PyTuple_GET_SIZE(args) == 0)
2105                 return do_strip(self, RIGHTSTRIP); /* Common case */
2106         else
2107                 return do_argstrip(self, RIGHTSTRIP, args);
2108 }
2109
2110
2111 PyDoc_STRVAR(lower__doc__,
2112 "S.lower() -> string\n\
2113 \n\
2114 Return a copy of the string S converted to lowercase.");
2115
2116 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2117 #ifndef _tolower
2118 #define _tolower tolower
2119 #endif
2120
2121 static PyObject *
2122 string_lower(PyStringObject *self)
2123 {
2124         char *s;
2125         Py_ssize_t i, n = PyString_GET_SIZE(self);
2126         PyObject *newobj;
2127
2128         newobj = PyString_FromStringAndSize(NULL, n);
2129         if (!newobj)
2130                 return NULL;
2131
2132         s = PyString_AS_STRING(newobj);
2133
2134         Py_MEMCPY(s, PyString_AS_STRING(self), n);
2135
2136         for (i = 0; i < n; i++) {
2137                 int c = Py_CHARMASK(s[i]);
2138                 if (isupper(c))
2139                         s[i] = _tolower(c);
2140         }
2141
2142         return newobj;
2143 }
2144
2145 PyDoc_STRVAR(upper__doc__,
2146 "S.upper() -> string\n\
2147 \n\
2148 Return a copy of the string S converted to uppercase.");
2149
2150 #ifndef _toupper
2151 #define _toupper toupper
2152 #endif
2153
2154 static PyObject *
2155 string_upper(PyStringObject *self)
2156 {
2157         char *s;
2158         Py_ssize_t i, n = PyString_GET_SIZE(self);
2159         PyObject *newobj;
2160
2161         newobj = PyString_FromStringAndSize(NULL, n);
2162         if (!newobj)
2163                 return NULL;
2164
2165         s = PyString_AS_STRING(newobj);
2166
2167         Py_MEMCPY(s, PyString_AS_STRING(self), n);
2168
2169         for (i = 0; i < n; i++) {
2170                 int c = Py_CHARMASK(s[i]);
2171                 if (islower(c))
2172                         s[i] = _toupper(c);
2173         }
2174
2175         return newobj;
2176 }
2177
2178 PyDoc_STRVAR(title__doc__,
2179 "S.title() -> string\n\
2180 \n\
2181 Return a titlecased version of S, i.e. words start with uppercase\n\
2182 characters, all remaining cased characters have lowercase.");
2183
2184 static PyObject*
2185 string_title(PyStringObject *self)
2186 {
2187         char *s = PyString_AS_STRING(self), *s_new;
2188         Py_ssize_t i, n = PyString_GET_SIZE(self);
2189         int previous_is_cased = 0;
2190         PyObject *newobj;
2191
2192         newobj = PyString_FromStringAndSize(NULL, n);
2193         if (newobj == NULL)
2194                 return NULL;
2195         s_new = PyString_AsString(newobj);
2196         for (i = 0; i < n; i++) {
2197                 int c = Py_CHARMASK(*s++);
2198                 if (islower(c)) {
2199                         if (!previous_is_cased)
2200                             c = toupper(c);
2201                         previous_is_cased = 1;
2202                 } else if (isupper(c)) {
2203                         if (previous_is_cased)
2204                             c = tolower(c);
2205                         previous_is_cased = 1;
2206                 } else
2207                         previous_is_cased = 0;
2208                 *s_new++ = c;
2209         }
2210         return newobj;
2211 }
2212
2213 PyDoc_STRVAR(capitalize__doc__,
2214 "S.capitalize() -> string\n\
2215 \n\
2216 Return a copy of the string S with only its first character\n\
2217 capitalized.");
2218
2219 static PyObject *
2220 string_capitalize(PyStringObject *self)
2221 {
2222         char *s = PyString_AS_STRING(self), *s_new;
2223         Py_ssize_t i, n = PyString_GET_SIZE(self);
2224         PyObject *newobj;
2225
2226         newobj = PyString_FromStringAndSize(NULL, n);
2227         if (newobj == NULL)
2228                 return NULL;
2229         s_new = PyString_AsString(newobj);
2230         if (0 < n) {
2231                 int c = Py_CHARMASK(*s++);
2232                 if (islower(c))
2233                         *s_new = toupper(c);
2234                 else
2235                         *s_new = c;
2236                 s_new++;
2237         }
2238         for (i = 1; i < n; i++) {
2239                 int c = Py_CHARMASK(*s++);
2240                 if (isupper(c))
2241                         *s_new = tolower(c);
2242                 else
2243                         *s_new = c;
2244                 s_new++;
2245         }
2246         return newobj;
2247 }
2248
2249
2250 PyDoc_STRVAR(count__doc__,
2251 "S.count(sub[, start[, end]]) -> int\n\
2252 \n\
2253 Return the number of non-overlapping occurrences of substring sub in\n\
2254 string S[start:end].  Optional arguments start and end are interpreted\n\
2255 as in slice notation.");
2256
2257 static PyObject *
2258 string_count(PyStringObject *self, PyObject *args)
2259 {
2260         PyObject *sub_obj;
2261         const char *str = PyString_AS_STRING(self), *sub;
2262         Py_ssize_t sub_len;
2263         Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2264
2265         if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2266                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2267                 return NULL;
2268
2269         if (PyString_Check(sub_obj)) {
2270                 sub = PyString_AS_STRING(sub_obj);
2271                 sub_len = PyString_GET_SIZE(sub_obj);
2272         }
2273 #ifdef Py_USING_UNICODE
2274         else if (PyUnicode_Check(sub_obj)) {
2275                 Py_ssize_t count;
2276                 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2277                 if (count == -1)
2278                         return NULL;
2279                 else
2280                         return PyInt_FromSsize_t(count);
2281         }
2282 #endif
2283         else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2284                 return NULL;
2285
2286         string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
2287
2288         return PyInt_FromSsize_t(
2289                 stringlib_count(str + start, end - start, sub, sub_len)
2290                 );
2291 }
2292
2293 PyDoc_STRVAR(swapcase__doc__,
2294 "S.swapcase() -> string\n\
2295 \n\
2296 Return a copy of the string S with uppercase characters\n\
2297 converted to lowercase and vice versa.");
2298
2299 static PyObject *
2300 string_swapcase(PyStringObject *self)
2301 {
2302         char *s = PyString_AS_STRING(self), *s_new;
2303         Py_ssize_t i, n = PyString_GET_SIZE(self);
2304         PyObject *newobj;
2305
2306         newobj = PyString_FromStringAndSize(NULL, n);
2307         if (newobj == NULL)
2308                 return NULL;
2309         s_new = PyString_AsString(newobj);
2310         for (i = 0; i < n; i++) {
2311                 int c = Py_CHARMASK(*s++);
2312                 if (islower(c)) {
2313                         *s_new = toupper(c);
2314                 }
2315                 else if (isupper(c)) {
2316                         *s_new = tolower(c);
2317                 }
2318                 else
2319                         *s_new = c;
2320                 s_new++;
2321         }
2322         return newobj;
2323 }
2324
2325
2326 PyDoc_STRVAR(translate__doc__,
2327 "S.translate(table [,deletechars]) -> string\n\
2328 \n\
2329 Return a copy of the string S, where all characters occurring\n\
2330 in the optional argument deletechars are removed, and the\n\
2331 remaining characters have been mapped through the given\n\
2332 translation table, which must be a string of length 256.");
2333
2334 static PyObject *
2335 string_translate(PyStringObject *self, PyObject *args)
2336 {
2337         register char *input, *output;
2338         register const char *table;
2339         register Py_ssize_t i, c, changed = 0;
2340         PyObject *input_obj = (PyObject*)self;
2341         const char *table1, *output_start, *del_table=NULL;
2342         Py_ssize_t inlen, tablen, dellen = 0;
2343         PyObject *result;
2344         int trans_table[256];
2345         PyObject *tableobj, *delobj = NULL;
2346
2347         if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2348                               &tableobj, &delobj))
2349                 return NULL;
2350
2351         if (PyString_Check(tableobj)) {
2352                 table1 = PyString_AS_STRING(tableobj);
2353                 tablen = PyString_GET_SIZE(tableobj);
2354         }
2355 #ifdef Py_USING_UNICODE
2356         else if (PyUnicode_Check(tableobj)) {
2357                 /* Unicode .translate() does not support the deletechars
2358                    parameter; instead a mapping to None will cause characters
2359                    to be deleted. */
2360                 if (delobj != NULL) {
2361                         PyErr_SetString(PyExc_TypeError,
2362                         "deletions are implemented differently for unicode");
2363                         return NULL;
2364                 }
2365                 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2366         }
2367 #endif
2368         else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
2369                 return NULL;
2370
2371         if (tablen != 256) {
2372                 PyErr_SetString(PyExc_ValueError,
2373                   "translation table must be 256 characters long");
2374                 return NULL;
2375         }
2376
2377         if (delobj != NULL) {
2378                 if (PyString_Check(delobj)) {
2379                         del_table = PyString_AS_STRING(delobj);
2380                         dellen = PyString_GET_SIZE(delobj);
2381                 }
2382 #ifdef Py_USING_UNICODE
2383                 else if (PyUnicode_Check(delobj)) {
2384                         PyErr_SetString(PyExc_TypeError,
2385                         "deletions are implemented differently for unicode");
2386                         return NULL;
2387                 }
2388 #endif
2389                 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2390                         return NULL;
2391         }
2392         else {
2393                 del_table = NULL;
2394                 dellen = 0;
2395         }
2396
2397         table = table1;
2398         inlen = PyString_GET_SIZE(input_obj);
2399         result = PyString_FromStringAndSize((char *)NULL, inlen);
2400         if (result == NULL)
2401                 return NULL;
2402         output_start = output = PyString_AsString(result);
2403         input = PyString_AS_STRING(input_obj);
2404
2405         if (dellen == 0) {
2406                 /* If no deletions are required, use faster code */
2407                 for (i = inlen; --i >= 0; ) {
2408                         c = Py_CHARMASK(*input++);
2409                         if (Py_CHARMASK((*output++ = table[c])) != c)
2410                                 changed = 1;
2411                 }
2412                 if (changed || !PyString_CheckExact(input_obj))
2413                         return result;
2414                 Py_DECREF(result);
2415                 Py_INCREF(input_obj);
2416                 return input_obj;
2417         }
2418
2419         for (i = 0; i < 256; i++)
2420                 trans_table[i] = Py_CHARMASK(table[i]);
2421
2422         for (i = 0; i < dellen; i++)
2423                 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2424
2425         for (i = inlen; --i >= 0; ) {
2426                 c = Py_CHARMASK(*input++);
2427                 if (trans_table[c] != -1)
2428                         if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2429                                 continue;
2430                 changed = 1;
2431         }
2432         if (!changed && PyString_CheckExact(input_obj)) {
2433                 Py_DECREF(result);
2434                 Py_INCREF(input_obj);
2435                 return input_obj;
2436         }
2437         /* Fix the size of the resulting string */
2438         if (inlen > 0)
2439                 _PyString_Resize(&result, output - output_start);
2440         return result;
2441 }
2442
2443
2444 #define FORWARD 1
2445 #define REVERSE -1
2446
2447 /* find and count characters and substrings */
2448
2449 #define findchar(target, target_len, c)                         \
2450   ((char *)memchr((const void *)(target), c, target_len))
2451
2452 /* String ops must return a string.  */
2453 /* If the object is subclass of string, create a copy */
2454 Py_LOCAL(PyStringObject *)
2455 return_self(PyStringObject *self)
2456 {
2457         if (PyString_CheckExact(self)) {
2458                 Py_INCREF(self);
2459                 return self;
2460         }
2461         return (PyStringObject *)PyString_FromStringAndSize(
2462                 PyString_AS_STRING(self),
2463                 PyString_GET_SIZE(self));
2464 }
2465
2466 Py_LOCAL_INLINE(Py_ssize_t)
2467 countchar(char *target, int target_len, char c, Py_ssize_t maxcount)
2468 {
2469         Py_ssize_t count=0;
2470         char *start=target;
2471         char *end=target+target_len;
2472
2473         while ( (start=findchar(start, end-start, c)) != NULL ) {
2474                 count++;
2475                 if (count >= maxcount)
2476                         break;
2477                 start += 1;
2478         }
2479         return count;
2480 }
2481
2482 Py_LOCAL(Py_ssize_t)
2483 findstring(char *target, Py_ssize_t target_len,
2484            char *pattern, Py_ssize_t pattern_len,
2485            Py_ssize_t start,
2486            Py_ssize_t end,
2487            int direction)
2488 {
2489         if (start < 0) {
2490                 start += target_len;
2491                 if (start < 0)
2492                         start = 0;
2493         }
2494         if (end > target_len) {
2495                 end = target_len;
2496         } else if (end < 0) {
2497                 end += target_len;
2498                 if (end < 0)
2499                         end = 0;
2500         }
2501
2502         /* zero-length substrings always match at the first attempt */
2503         if (pattern_len == 0)
2504                 return (direction > 0) ? start : end;
2505
2506         end -= pattern_len;
2507
2508         if (direction < 0) {
2509                 for (; end >= start; end--)
2510                         if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2511                                 return end;
2512         } else {
2513                 for (; start <= end; start++)
2514                         if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2515                                 return start;
2516         }
2517         return -1;
2518 }
2519
2520 Py_LOCAL_INLINE(Py_ssize_t)
2521 countstring(char *target, Py_ssize_t target_len,
2522             char *pattern, Py_ssize_t pattern_len,
2523             Py_ssize_t start,
2524             Py_ssize_t end,
2525             int direction, Py_ssize_t maxcount)
2526 {
2527         Py_ssize_t count=0;
2528
2529         if (start < 0) {
2530                 start += target_len;
2531                 if (start < 0)
2532                         start = 0;
2533         }
2534         if (end > target_len) {
2535                 end = target_len;
2536         } else if (end < 0) {
2537                 end += target_len;
2538                 if (end < 0)
2539                         end = 0;
2540         }
2541
2542         /* zero-length substrings match everywhere */
2543         if (pattern_len == 0 || maxcount == 0) {
2544                 if (target_len+1 < maxcount)
2545                         return target_len+1;
2546                 return maxcount;
2547         }
2548
2549         end -= pattern_len;
2550         if (direction < 0) {
2551                 for (; (end >= start); end--)
2552                         if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2553                                 count++;
2554                                 if (--maxcount <= 0) break;
2555                                 end -= pattern_len-1;
2556                         }
2557         } else {
2558                 for (; (start <= end); start++)
2559                         if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2560                                 count++;
2561                                 if (--maxcount <= 0)
2562                                         break;
2563                                 start += pattern_len-1;
2564                         }
2565         }
2566         return count;
2567 }
2568
2569
2570 /* Algorithms for different cases of string replacement */
2571
2572 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2573 Py_LOCAL(PyStringObject *)
2574 replace_interleave(PyStringObject *self,
2575                    PyStringObject *to,
2576                    Py_ssize_t maxcount)
2577 {
2578         char *self_s, *to_s, *result_s;
2579         Py_ssize_t self_len, to_len, result_len;
2580         Py_ssize_t count, i, product;
2581         PyStringObject *result;
2582
2583         self_len = PyString_GET_SIZE(self);
2584         to_len = PyString_GET_SIZE(to);
2585
2586         /* 1 at the end plus 1 after every character */
2587         count = self_len+1;
2588         if (maxcount < count)
2589                 count = maxcount;
2590
2591         /* Check for overflow */
2592         /*   result_len = count * to_len + self_len; */
2593         product = count * to_len;
2594         if (product / to_len != count) {
2595                 PyErr_SetString(PyExc_OverflowError,
2596                                 "replace string is too long");
2597                 return NULL;
2598         }
2599         result_len = product + self_len;
2600         if (result_len < 0) {
2601                 PyErr_SetString(PyExc_OverflowError,
2602                                 "replace string is too long");
2603                 return NULL;
2604         }
2605
2606         if (! (result = (PyStringObject *)
2607                          PyString_FromStringAndSize(NULL, result_len)) )
2608                 return NULL;
2609
2610         self_s = PyString_AS_STRING(self);
2611         to_s = PyString_AS_STRING(to);
2612         to_len = PyString_GET_SIZE(to);
2613         result_s = PyString_AS_STRING(result);
2614
2615         /* TODO: special case single character, which doesn't need memcpy */
2616
2617         /* Lay the first one down (guaranteed this will occur) */
2618         Py_MEMCPY(result_s, to_s, to_len);
2619         result_s += to_len;
2620         count -= 1;
2621
2622         for (i=0; i<count; i++) {
2623                 *result_s++ = *self_s++;
2624                 Py_MEMCPY(result_s, to_s, to_len);
2625                 result_s += to_len;
2626         }
2627
2628         /* Copy the rest of the original string */
2629         Py_MEMCPY(result_s, self_s, self_len-i);
2630
2631         return result;
2632 }
2633
2634 /* Special case for deleting a single character */
2635 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2636 Py_LOCAL(PyStringObject *)
2637 replace_delete_single_character(PyStringObject *self,
2638                                 char from_c, Py_ssize_t maxcount)
2639 {
2640         char *self_s, *result_s;
2641         char *start, *next, *end;
2642         Py_ssize_t self_len, result_len;
2643         Py_ssize_t count;
2644         PyStringObject *result;
2645
2646         self_len = PyString_GET_SIZE(self);
2647         self_s = PyString_AS_STRING(self);
2648
2649         count = countchar(self_s, self_len, from_c, maxcount);
2650         if (count == 0) {
2651                 return return_self(self);
2652         }
2653
2654         result_len = self_len - count;  /* from_len == 1 */
2655         assert(result_len>=0);
2656
2657         if ( (result = (PyStringObject *)
2658                         PyString_FromStringAndSize(NULL, result_len)) == NULL)
2659                 return NULL;
2660         result_s = PyString_AS_STRING(result);
2661
2662         start = self_s;
2663         end = self_s + self_len;
2664         while (count-- > 0) {
2665                 next = findchar(start, end-start, from_c);
2666                 if (next == NULL)
2667                         break;
2668                 Py_MEMCPY(result_s, start, next-start);
2669                 result_s += (next-start);
2670                 start = next+1;
2671         }
2672         Py_MEMCPY(result_s, start, end-start);
2673
2674         return result;
2675 }
2676
2677 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2678
2679 Py_LOCAL(PyStringObject *)
2680 replace_delete_substring(PyStringObject *self, PyStringObject *from,
2681                          Py_ssize_t maxcount) {
2682         char *self_s, *from_s, *result_s;
2683         char *start, *next, *end;
2684         Py_ssize_t self_len, from_len, result_len;
2685         Py_ssize_t count, offset;
2686         PyStringObject *result;
2687
2688         self_len = PyString_GET_SIZE(self);
2689         self_s = PyString_AS_STRING(self);
2690         from_len = PyString_GET_SIZE(from);
2691         from_s = PyString_AS_STRING(from);
2692
2693         count = countstring(self_s, self_len,
2694                             from_s, from_len,
2695                             0, self_len, 1,
2696                             maxcount);
2697
2698         if (count == 0) {
2699                 /* no matches */
2700                 return return_self(self);
2701         }
2702
2703         result_len = self_len - (count * from_len);
2704         assert (result_len>=0);
2705
2706         if ( (result = (PyStringObject *)
2707               PyString_FromStringAndSize(NULL, result_len)) == NULL )
2708                 return NULL;
2709
2710         result_s = PyString_AS_STRING(result);
2711
2712         start = self_s;
2713         end = self_s + self_len;
2714         while (count-- > 0) {
2715                 offset = findstring(start, end-start,
2716                                     from_s, from_len,
2717                                     0, end-start, FORWARD);
2718                 if (offset == -1)
2719                         break;
2720                 next = start + offset;
2721
2722                 Py_MEMCPY(result_s, start, next-start);
2723
2724                 result_s += (next-start);
2725                 start = next+from_len;
2726         }
2727         Py_MEMCPY(result_s, start, end-start);
2728         return result;
2729 }
2730
2731 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2732 Py_LOCAL(PyStringObject *)
2733 replace_single_character_in_place(PyStringObject *self,
2734                                   char from_c, char to_c,
2735                                   Py_ssize_t maxcount)
2736 {
2737         char *self_s, *result_s, *start, *end, *next;
2738         Py_ssize_t self_len;
2739         PyStringObject *result;
2740
2741         /* The result string will be the same size */
2742         self_s = PyString_AS_STRING(self);
2743         self_len = PyString_GET_SIZE(self);
2744
2745         next = findchar(self_s, self_len, from_c);
2746
2747         if (next == NULL) {
2748                 /* No matches; return the original string */
2749                 return return_self(self);
2750         }
2751
2752         /* Need to make a new string */
2753         result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2754         if (result == NULL)
2755                 return NULL;
2756         result_s = PyString_AS_STRING(result);
2757         Py_MEMCPY(result_s, self_s, self_len);
2758
2759         /* change everything in-place, starting with this one */
2760         start =  result_s + (next-self_s);
2761         *start = to_c;
2762         start++;
2763         end = result_s + self_len;
2764
2765         while (--maxcount > 0) {
2766                 next = findchar(start, end-start, from_c);
2767                 if (next == NULL)
2768                         break;
2769                 *next = to_c;
2770                 start = next+1;
2771         }
2772
2773         return result;
2774 }
2775
2776 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2777 Py_LOCAL(PyStringObject *)
2778 replace_substring_in_place(PyStringObject *self,
2779                            PyStringObject *from,
2780                            PyStringObject *to,
2781                            Py_ssize_t maxcount)
2782 {
2783         char *result_s, *start, *end;
2784         char *self_s, *from_s, *to_s;
2785         Py_ssize_t self_len, from_len, offset;
2786         PyStringObject *result;
2787
2788         /* The result string will be the same size */
2789
2790         self_s = PyString_AS_STRING(self);
2791         self_len = PyString_GET_SIZE(self);
2792
2793         from_s = PyString_AS_STRING(from);
2794         from_len = PyString_GET_SIZE(from);
2795         to_s = PyString_AS_STRING(to);
2796
2797         offset = findstring(self_s, self_len,
2798                             from_s, from_len,
2799                             0, self_len, FORWARD);
2800
2801         if (offset == -1) {
2802                 /* No matches; return the original string */
2803                 return return_self(self);
2804         }
2805
2806         /* Need to make a new string */
2807         result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2808         if (result == NULL)
2809                 return NULL;
2810         result_s = PyString_AS_STRING(result);
2811         Py_MEMCPY(result_s, self_s, self_len);
2812
2813
2814         /* change everything in-place, starting with this one */
2815         start =  result_s + offset;
2816         Py_MEMCPY(start, to_s, from_len);
2817         start += from_len;
2818         end = result_s + self_len;
2819
2820         while ( --maxcount > 0) {
2821                 offset = findstring(start, end-start,
2822                                     from_s, from_len,
2823                                     0, end-start, FORWARD);
2824                 if (offset==-1)
2825                         break;
2826                 Py_MEMCPY(start+offset, to_s, from_len);
2827                 start += offset+from_len;
2828         }
2829
2830         return result;
2831 }
2832
2833 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2834 Py_LOCAL(PyStringObject *)
2835 replace_single_character(PyStringObject *self,
2836                          char from_c,
2837                          PyStringObject *to,
2838                          Py_ssize_t maxcount)
2839 {
2840         char *self_s, *to_s, *result_s;
2841         char *start, *next, *end;
2842         Py_ssize_t self_len, to_len, result_len;
2843         Py_ssize_t count, product;
2844         PyStringObject *result;
2845
2846         self_s = PyString_AS_STRING(self);
2847         self_len = PyString_GET_SIZE(self);
2848
2849         count = countchar(self_s, self_len, from_c, maxcount);
2850
2851         if (count == 0) {
2852                 /* no matches, return unchanged */
2853                 return return_self(self);
2854         }
2855
2856         to_s = PyString_AS_STRING(to);
2857         to_len = PyString_GET_SIZE(to);
2858
2859         /* use the difference between current and new, hence the "-1" */
2860         /*   result_len = self_len + count * (to_len-1)  */
2861         product = count * (to_len-1);
2862         if (product / (to_len-1) != count) {
2863                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2864                 return NULL;
2865         }
2866         result_len = self_len + product;
2867         if (result_len < 0) {
2868                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2869                 return NULL;
2870         }
2871
2872         if ( (result = (PyStringObject *)
2873               PyString_FromStringAndSize(NULL, result_len)) == NULL)
2874                 return NULL;
2875         result_s = PyString_AS_STRING(result);
2876
2877         start = self_s;
2878         end = self_s + self_len;
2879         while (count-- > 0) {
2880                 next = findchar(start, end-start, from_c);
2881                 if (next == NULL)
2882                         break;
2883
2884                 if (next == start) {
2885                         /* replace with the 'to' */
2886                         Py_MEMCPY(result_s, to_s, to_len);
2887                         result_s += to_len;
2888                         start += 1;
2889                 } else {
2890                         /* copy the unchanged old then the 'to' */
2891                         Py_MEMCPY(result_s, start, next-start);
2892                         result_s += (next-start);
2893                         Py_MEMCPY(result_s, to_s, to_len);
2894                         result_s += to_len;
2895                         start = next+1;
2896                 }
2897         }
2898         /* Copy the remainder of the remaining string */
2899         Py_MEMCPY(result_s, start, end-start);
2900
2901         return result;
2902 }
2903
2904 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2905 Py_LOCAL(PyStringObject *)
2906 replace_substring(PyStringObject *self,
2907                   PyStringObject *from,
2908                   PyStringObject *to,
2909                   Py_ssize_t maxcount) {
2910         char *self_s, *from_s, *to_s, *result_s;
2911         char *start, *next, *end;
2912         Py_ssize_t self_len, from_len, to_len, result_len;
2913         Py_ssize_t count, offset, product;
2914         PyStringObject *result;
2915
2916         self_s = PyString_AS_STRING(self);
2917         self_len = PyString_GET_SIZE(self);
2918         from_s = PyString_AS_STRING(from);
2919         from_len = PyString_GET_SIZE(from);
2920
2921         count = countstring(self_s, self_len,
2922                             from_s, from_len,
2923                             0, self_len, FORWARD, maxcount);
2924         if (count == 0) {
2925                 /* no matches, return unchanged */
2926                 return return_self(self);
2927         }
2928
2929         to_s = PyString_AS_STRING(to);
2930         to_len = PyString_GET_SIZE(to);
2931
2932         /* Check for overflow */
2933         /*    result_len = self_len + count * (to_len-from_len) */
2934         product = count * (to_len-from_len);
2935         if (product / (to_len-from_len) != count) {
2936                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2937                 return NULL;
2938         }
2939         result_len = self_len + product;
2940         if (result_len < 0) {
2941                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2942                 return NULL;
2943         }
2944
2945         if ( (result = (PyStringObject *)
2946               PyString_FromStringAndSize(NULL, result_len)) == NULL)
2947                 return NULL;
2948         result_s = PyString_AS_STRING(result);
2949
2950         start = self_s;
2951         end = self_s + self_len;
2952         while (count-- > 0) {
2953                 offset = findstring(start, end-start,
2954                                     from_s, from_len,
2955                                     0, end-start, FORWARD);
2956                 if (offset == -1)
2957                         break;
2958                 next = start+offset;
2959                 if (next == start) {
2960                         /* replace with the 'to' */
2961                         Py_MEMCPY(result_s, to_s, to_len);
2962                         result_s += to_len;
2963                         start += from_len;
2964                 } else {
2965                         /* copy the unchanged old then the 'to' */
2966                         Py_MEMCPY(result_s, start, next-start);
2967                         result_s += (next-start);
2968                         Py_MEMCPY(result_s, to_s, to_len);
2969                         result_s += to_len;
2970                         start = next+from_len;
2971                 }
2972         }
2973         /* Copy the remainder of the remaining string */
2974         Py_MEMCPY(result_s, start, end-start);
2975
2976         return result;
2977 }
2978
2979
2980 Py_LOCAL(PyStringObject *)
2981 replace(PyStringObject *self,
2982         PyStringObject *from,
2983         PyStringObject *to,
2984         Py_ssize_t maxcount)
2985 {
2986         Py_ssize_t from_len, to_len;
2987
2988         if (maxcount < 0) {
2989                 maxcount = PY_SSIZE_T_MAX;
2990         } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2991                 /* nothing to do; return the original string */
2992                 return return_self(self);
2993         }
2994
2995         from_len = PyString_GET_SIZE(from);
2996         to_len = PyString_GET_SIZE(to);
2997
2998         if (maxcount == 0 ||
2999             (from_len == 0 && to_len == 0)) {
3000                 /* nothing to do; return the original string */
3001                 return return_self(self);
3002         }
3003
3004         /* Handle zero-length special cases */
3005
3006         if (from_len == 0) {
3007                 /* insert the 'to' string everywhere.   */
3008                 /*    >>> "Python".replace("", ".")     */
3009                 /*    '.P.y.t.h.o.n.'                   */
3010                 return replace_interleave(self, to, maxcount);
3011         }
3012
3013         /* Except for "".replace("", "A") == "A" there is no way beyond this */
3014         /* point for an empty self string to generate a non-empty string */
3015         /* Special case so the remaining code always gets a non-empty string */
3016         if (PyString_GET_SIZE(self) == 0) {
3017                 return return_self(self);
3018         }
3019
3020         if (to_len == 0) {
3021                 /* delete all occurances of 'from' string */
3022                 if (from_len == 1) {
3023                         return replace_delete_single_character(
3024                                 self, PyString_AS_STRING(from)[0], maxcount);
3025                 } else {
3026                         return replace_delete_substring(self, from, maxcount);
3027                 }
3028         }
3029
3030         /* Handle special case where both strings have the same length */
3031
3032         if (from_len == to_len) {
3033                 if (from_len == 1) {
3034                         return replace_single_character_in_place(
3035                                 self,
3036                                 PyString_AS_STRING(from)[0],
3037                                 PyString_AS_STRING(to)[0],
3038                                 maxcount);
3039                 } else {
3040                         return replace_substring_in_place(
3041                                 self, from, to, maxcount);
3042                 }
3043         }
3044
3045         /* Otherwise use the more generic algorithms */
3046         if (from_len == 1) {
3047                 return replace_single_character(self, PyString_AS_STRING(from)[0],
3048                                                 to, maxcount);
3049         } else {
3050                 /* len('from')>=2, len('to')>=1 */
3051                 return replace_substring(self, from, to, maxcount);
3052         }
3053 }
3054
3055 PyDoc_STRVAR(replace__doc__,
3056 "S.replace (old, new[, count]) -> string\n\
3057 \n\
3058 Return a copy of string S with all occurrences of substring\n\
3059 old replaced by new.  If the optional argument count is\n\
3060 given, only the first count occurrences are replaced.");
3061
3062 static PyObject *
3063 string_replace(PyStringObject *self, PyObject *args)
3064 {
3065         Py_ssize_t count = -1;
3066         PyObject *from, *to;
3067         const char *tmp_s;
3068         Py_ssize_t tmp_len;
3069
3070         if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3071                 return NULL;
3072
3073         if (PyString_Check(from)) {
3074           /* Can this be made a '!check' after the Unicode check? */
3075         }
3076 #ifdef Py_USING_UNICODE
3077         if (PyUnicode_Check(from))
3078                 return PyUnicode_Replace((PyObject *)self,
3079                                          from, to, count);
3080 #endif
3081         else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
3082                 return NULL;
3083
3084         if (PyString_Check(to)) {
3085           /* Can this be made a '!check' after the Unicode check? */
3086         }
3087 #ifdef Py_USING_UNICODE
3088         else if (PyUnicode_Check(to))
3089                 return PyUnicode_Replace((PyObject *)self,
3090                                          from, to, count);
3091 #endif
3092         else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
3093                 return NULL;
3094
3095         return (PyObject *)replace((PyStringObject *) self,
3096                                    (PyStringObject *) from,
3097                                    (PyStringObject *) to, count);
3098 }
3099
3100 /** End DALKE **/
3101
3102 /* Matches the end (direction >= 0) or start (direction < 0) of self
3103  * against substr, using the start and end arguments. Returns
3104  * -1 on error, 0 if not found and 1 if found.
3105  */
3106 Py_LOCAL(int)
3107 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3108                   Py_ssize_t end, int direction)
3109 {
3110         Py_ssize_t len = PyString_GET_SIZE(self);
3111         Py_ssize_t slen;
3112         const char* sub;
3113         const char* str;
3114
3115         if (PyString_Check(substr)) {
3116                 sub = PyString_AS_STRING(substr);
3117                 slen = PyString_GET_SIZE(substr);
3118         }
3119 #ifdef Py_USING_UNICODE
3120         else if (PyUnicode_Check(substr))
3121                 return PyUnicode_Tailmatch((PyObject *)self,
3122                                            substr, start, end, direction);
3123 #endif
3124         else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3125                 return -1;
3126         str = PyString_AS_STRING(self);
3127
3128         string_adjust_indices(&start, &end, len);
3129
3130         if (direction < 0) {
3131                 /* startswith */
3132                 if (start+slen > len)
3133                         return 0;
3134         } else {
3135                 /* endswith */
3136                 if (end-start < slen || start > len)
3137                         return 0;
3138
3139                 if (end-slen > start)
3140                         start = end - slen;
3141         }
3142         if (end-start >= slen)
3143                 return ! memcmp(str+start, sub, slen);
3144         return 0;
3145 }
3146
3147
3148 PyDoc_STRVAR(startswith__doc__,
3149 "S.startswith(prefix[, start[, end]]) -> bool\n\
3150 \n\
3151 Return True if S starts with the specified prefix, False otherwise.\n\
3152 With optional start, test S beginning at that position.\n\
3153 With optional end, stop comparing S at that position.\n\
3154 prefix can also be a tuple of strings to try.");
3155
3156 static PyObject *
3157 string_startswith(PyStringObject *self, PyObject *args)
3158 {
3159         Py_ssize_t start = 0;
3160         Py_ssize_t end = PY_SSIZE_T_MAX;
3161         PyObject *subobj;
3162         int result;
3163
3164         if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3165                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3166                 return NULL;
3167         if (PyTuple_Check(subobj)) {
3168                 Py_ssize_t i;
3169                 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3170                         result = _string_tailmatch(self,
3171                                         PyTuple_GET_ITEM(subobj, i),
3172                                         start, end, -1);
3173                         if (result == -1)
3174                                 return NULL;
3175                         else if (result) {
3176                                 Py_RETURN_TRUE;
3177                         }
3178                 }
3179                 Py_RETURN_FALSE;
3180         }
3181         result = _string_tailmatch(self, subobj, start, end, -1);
3182         if (result == -1)
3183                 return NULL;
3184         else
3185                 return PyBool_FromLong(result);
3186 }
3187
3188
3189 PyDoc_STRVAR(endswith__doc__,
3190 "S.endswith(suffix[, start[, end]]) -> bool\n\
3191 \n\
3192 Return True if S ends with the specified suffix, False otherwise.\n\
3193 With optional start, test S beginning at that position.\n\
3194 With optional end, stop comparing S at that position.\n\
3195 suffix can also be a tuple of strings to try.");
3196
3197 static PyObject *
3198 string_endswith(PyStringObject *self, PyObject *args)
3199 {
3200         Py_ssize_t start = 0;
3201         Py_ssize_t end = PY_SSIZE_T_MAX;
3202         PyObject *subobj;
3203         int result;
3204
3205         if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3206                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3207                 return NULL;
3208         if (PyTuple_Check(subobj)) {
3209                 Py_ssize_t i;
3210                 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3211                         result = _string_tailmatch(self,
3212                                         PyTuple_GET_ITEM(subobj, i),
3213                                         start, end, +1);
3214                         if (result == -1)
3215                                 return NULL;
3216                         else if (result) {
3217                                 Py_RETURN_TRUE;
3218                         }
3219                 }
3220                 Py_RETURN_FALSE;
3221         }
3222         result = _string_tailmatch(self, subobj, start, end, +1);
3223         if (result == -1)
3224                 return NULL;
3225         else
3226                 return PyBool_FromLong(result);
3227 }
3228
3229
3230 PyDoc_STRVAR(encode__doc__,
3231 "S.encode([encoding[,errors]]) -> object\n\
3232 \n\
3233 Encodes S using the codec registered for encoding. encoding defaults\n\
3234 to the default encoding. errors may be given to set a different error\n\
3235 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3236 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3237 'xmlcharrefreplace' as well as any other name registered with\n\
3238 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3239
3240 static PyObject *
3241 string_encode(PyStringObject *self, PyObject *args)
3242 {
3243     char *encoding = NULL;
3244     char *errors = NULL;
3245     PyObject *v;
3246
3247     if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3248         return NULL;
3249     v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3250     if (v == NULL)
3251         goto onError;
3252     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3253         PyErr_Format(PyExc_TypeError,
3254                      "encoder did not return a string/unicode object "
3255                      "(type=%.400s)",
3256                      v->ob_type->tp_name);
3257         Py_DECREF(v);
3258         return NULL;
3259     }
3260     return v;
3261
3262  onError:
3263     return NULL;
3264 }
3265
3266
3267 PyDoc_STRVAR(decode__doc__,
3268 "S.decode([encoding[,errors]]) -> object\n\
3269 \n\
3270 Decodes S using the codec registered for encoding. encoding defaults\n\
3271 to the default encoding. errors may be given to set a different error\n\
3272 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3273 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3274 as well as any other name registerd with codecs.register_error that is\n\
3275 able to handle UnicodeDecodeErrors.");
3276
3277 static PyObject *
3278 string_decode(PyStringObject *self, PyObject *args)
3279 {
3280     char *encoding = NULL;
3281     char *errors = NULL;
3282     PyObject *v;
3283
3284     if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3285         return NULL;
3286     v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3287     if (v == NULL)
3288         goto onError;
3289     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3290         PyErr_Format(PyExc_TypeError,
3291                      "decoder did not return a string/unicode object "
3292                      "(type=%.400s)",
3293                      v->ob_type->tp_name);
3294         Py_DECREF(v);
3295         return NULL;
3296     }
3297     return v;
3298
3299  onError:
3300     return NULL;
3301 }
3302
3303
3304 PyDoc_STRVAR(expandtabs__doc__,
3305 "S.expandtabs([tabsize]) -> string\n\
3306 \n\
3307 Return a copy of S where all tab characters are expanded using spaces.\n\
3308 If tabsize is not given, a tab size of 8 characters is assumed.");
3309
3310 static PyObject*
3311 string_expandtabs(PyStringObject *self, PyObject *args)
3312 {
3313     const char *e, *p;
3314     char *q;
3315     Py_ssize_t i, j;
3316     PyObject *u;
3317     int tabsize = 8;
3318
3319     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3320         return NULL;
3321
3322     /* First pass: determine size of output string */
3323     i = j = 0;
3324     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3325     for (p = PyString_AS_STRING(self); p < e; p++)
3326         if (*p == '\t') {
3327             if (tabsize > 0)
3328                 j += tabsize - (j % tabsize);
3329         }
3330         else {
3331             j++;
3332             if (*p == '\n' || *p == '\r') {
3333                 i += j;
3334                 j = 0;
3335             }
3336         }
3337
3338     /* Second pass: create output string and fill it */
3339     u = PyString_FromStringAndSize(NULL, i + j);
3340     if (!u)
3341         return NULL;
3342
3343     j = 0;
3344     q = PyString_AS_STRING(u);
3345
3346     for (p = PyString_AS_STRING(self); p < e; p++)
3347         if (*p == '\t') {
3348             if (tabsize > 0) {
3349                 i = tabsize - (j % tabsize);
3350                 j += i;
3351                 while (i--)
3352                     *q++ = ' ';
3353             }
3354         }
3355         else {
3356             j++;
3357             *q++ = *p;
3358             if (*p == '\n' || *p == '\r')
3359                 j = 0;
3360         }
3361
3362     return u;
3363 }
3364
3365 Py_LOCAL_INLINE(PyObject *)
3366 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3367 {
3368     PyObject *u;
3369
3370     if (left < 0)
3371         left = 0;
3372     if (right < 0)
3373         right = 0;
3374
3375     if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3376         Py_INCREF(self);
3377         return (PyObject *)self;
3378     }
3379
3380     u = PyString_FromStringAndSize(NULL,
3381                                    left + PyString_GET_SIZE(self) + right);
3382     if (u) {
3383         if (left)
3384             memset(PyString_AS_STRING(u), fill, left);
3385         Py_MEMCPY(PyString_AS_STRING(u) + left,
3386                PyString_AS_STRING(self),
3387                PyString_GET_SIZE(self));
3388         if (right)
3389             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3390                    fill, right);
3391     }
3392
3393     return u;
3394 }
3395
3396 PyDoc_STRVAR(ljust__doc__,
3397 "S.ljust(width[, fillchar]) -> string\n"
3398 "\n"
3399 "Return S left justified in a string of length width. Padding is\n"
3400 "done using the specified fill character (default is a space).");
3401
3402 static PyObject *
3403 string_ljust(PyStringObject *self, PyObject *args)
3404 {
3405     Py_ssize_t width;
3406     char fillchar = ' ';
3407
3408     if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3409         return NULL;
3410
3411     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3412         Py_INCREF(self);
3413         return (PyObject*) self;
3414     }
3415
3416     return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3417 }
3418
3419
3420 PyDoc_STRVAR(rjust__doc__,
3421 "S.rjust(width[, fillchar]) -> string\n"
3422 "\n"
3423 "Return S right justified in a string of length width. Padding is\n"
3424 "done using the specified fill character (default is a space)");
3425
3426 static PyObject *
3427 string_rjust(PyStringObject *self, PyObject *args)
3428 {
3429     Py_ssize_t width;
3430     char fillchar = ' ';
3431
3432     if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3433         return NULL;
3434
3435     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3436         Py_INCREF(self);
3437         return (PyObject*) self;
3438     }
3439
3440     return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3441 }
3442
3443
3444 PyDoc_STRVAR(center__doc__,
3445 "S.center(width[, fillchar]) -> string\n"
3446 "\n"
3447 "Return S centered in a string of length width. Padding is\n"
3448 "done using the specified fill character (default is a space)");
3449
3450 static PyObject *
3451 string_center(PyStringObject *self, PyObject *args)
3452 {
3453     Py_ssize_t marg, left;
3454     Py_ssize_t width;
3455     char fillchar = ' ';
3456
3457     if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3458         return NULL;
3459
3460     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3461         Py_INCREF(self);
3462         return (PyObject*) self;
3463     }
3464
3465     marg = width - PyString_GET_SIZE(self);
3466     left = marg / 2 + (marg & width & 1);
3467
3468     return pad(self, left, marg - left, fillchar);
3469 }
3470
3471 PyDoc_STRVAR(zfill__doc__,
3472 "S.zfill(width) -> string\n"
3473 "\n"
3474 "Pad a numeric string S with zeros on the left, to fill a field\n"
3475 "of the specified width.  The string S is never truncated.");
3476
3477 static PyObject *
3478 string_zfill(PyStringObject *self, PyObject *args)
3479 {
3480     Py_ssize_t fill;
3481     PyObject *s;
3482     char *p;
3483     Py_ssize_t width;
3484
3485     if (!PyArg_ParseTuple(args, "n:zfill", &width))
3486         return NULL;
3487
3488     if (PyString_GET_SIZE(self) >= width) {
3489         if (PyString_CheckExact(self)) {
3490             Py_INCREF(self);
3491             return (PyObject*) self;
3492         }
3493         else
3494             return PyString_FromStringAndSize(
3495                 PyString_AS_STRING(self),
3496                 PyString_GET_SIZE(self)
3497             );
3498     }
3499
3500     fill = width - PyString_GET_SIZE(self);
3501
3502     s = pad(self, fill, 0, '0');
3503
3504     if (s == NULL)
3505         return NULL;
3506
3507     p = PyString_AS_STRING(s);
3508     if (p[fill] == '+' || p[fill] == '-') {
3509         /* move sign to beginning of string */
3510         p[0] = p[fill];
3511         p[fill] = '0';
3512     }
3513
3514     return (PyObject*) s;
3515 }
3516
3517 PyDoc_STRVAR(isspace__doc__,
3518 "S.isspace() -> bool\n\
3519 \n\
3520 Return True if all characters in S are whitespace\n\
3521 and there is at least one character in S, False otherwise.");
3522
3523 static PyObject*
3524 string_isspace(PyStringObject *self)
3525 {
3526     register const unsigned char *p
3527         = (unsigned char *) PyString_AS_STRING(self);
3528     register const unsigned char *e;
3529
3530     /* Shortcut for single character strings */
3531     if (PyString_GET_SIZE(self) == 1 &&
3532         isspace(*p))
3533         return PyBool_FromLong(1);
3534
3535     /* Special case for empty strings */
3536     if (PyString_GET_SIZE(self) == 0)
3537         return PyBool_FromLong(0);
3538
3539     e = p + PyString_GET_SIZE(self);
3540     for (; p < e; p++) {
3541         if (!isspace(*p))
3542             return PyBool_FromLong(0);
3543     }
3544     return PyBool_FromLong(1);
3545 }
3546
3547
3548 PyDoc_STRVAR(isalpha__doc__,
3549 "S.isalpha() -> bool\n\
3550 \n\
3551 Return True if all characters in S are alphabetic\n\
3552 and there is at least one character in S, False otherwise.");
3553
3554 static PyObject*
3555 string_isalpha(PyStringObject *self)
3556 {
3557     register const unsigned char *p
3558         = (unsigned char *) PyString_AS_STRING(self);
3559     register const unsigned char *e;
3560
3561     /* Shortcut for single character strings */
3562     if (PyString_GET_SIZE(self) == 1 &&
3563         isalpha(*p))
3564         return PyBool_FromLong(1);
3565
3566     /* Special case for empty strings */
3567     if (PyString_GET_SIZE(self) == 0)
3568         return PyBool_FromLong(0);
3569
3570     e = p + PyString_GET_SIZE(self);
3571     for (; p < e; p++) {
3572         if (!isalpha(*p))
3573             return PyBool_FromLong(0);
3574     }
3575     return PyBool_FromLong(1);
3576 }
3577
3578
3579 PyDoc_STRVAR(isalnum__doc__,
3580 "S.isalnum() -> bool\n\
3581 \n\
3582 Return True if all characters in S are alphanumeric\n\
3583 and there is at least one character in S, False otherwise.");
3584
3585 static PyObject*
3586 string_isalnum(PyStringObject *self)
3587 {
3588     register const unsigned char *p
3589         = (unsigned char *) PyString_AS_STRING(self);
3590     register const unsigned char *e;
3591
3592     /* Shortcut for single character strings */
3593     if (PyString_GET_SIZE(self) == 1 &&
3594         isalnum(*p))
3595         return PyBool_FromLong(1);
3596
3597     /* Special case for empty strings */
3598     if (PyString_GET_SIZE(self) == 0)
3599         return PyBool_FromLong(0);
3600
3601     e = p + PyString_GET_SIZE(self);
3602     for (; p < e; p++) {
3603         if (!isalnum(*p))
3604             return PyBool_FromLong(0);
3605     }
3606     return PyBool_FromLong(1);
3607 }
3608
3609
3610 PyDoc_STRVAR(isdigit__doc__,
3611 "S.isdigit() -> bool\n\
3612 \n\
3613 Return True if all characters in S are digits\n\
3614 and there is at least one character in S, False otherwise.");
3615
3616 static PyObject*
3617 string_isdigit(PyStringObject *self)
3618 {
3619     register const unsigned char *p
3620         = (unsigned char *) PyString_AS_STRING(self);
3621     register const unsigned char *e;
3622
3623     /* Shortcut for single character strings */
3624     if (PyString_GET_SIZE(self) == 1 &&
3625         isdigit(*p))
3626         return PyBool_FromLong(1);
3627
3628     /* Special case for empty strings */
3629     if (PyString_GET_SIZE(self) == 0)
3630         return PyBool_FromLong(0);
3631
3632     e = p + PyString_GET_SIZE(self);
3633     for (; p < e; p++) {
3634         if (!isdigit(*p))
3635             return PyBool_FromLong(0);
3636     }
3637     return PyBool_FromLong(1);
3638 }
3639
3640
3641 PyDoc_STRVAR(islower__doc__,
3642 "S.islower() -> bool\n\
3643 \n\
3644 Return True if all cased characters in S are lowercase and there is\n\
3645 at least one cased character in S, False otherwise.");
3646
3647 static PyObject*
3648 string_islower(PyStringObject *self)
3649 {
3650     register const unsigned char *p
3651         = (unsigned char *) PyString_AS_STRING(self);
3652     register const unsigned char *e;
3653     int cased;
3654
3655     /* Shortcut for single character strings */
3656     if (PyString_GET_SIZE(self) == 1)
3657         return PyBool_FromLong(islower(*p) != 0);
3658
3659     /* Special case for empty strings */
3660     if (PyString_GET_SIZE(self) == 0)
3661         return PyBool_FromLong(0);
3662
3663     e = p + PyString_GET_SIZE(self);
3664     cased = 0;
3665     for (; p < e; p++) {
3666         if (isupper(*p))
3667             return PyBool_FromLong(0);
3668         else if (!cased && islower(*p))
3669             cased = 1;
3670     }
3671     return PyBool_FromLong(cased);
3672 }
3673
3674
3675 PyDoc_STRVAR(isupper__doc__,
3676 "S.isupper() -> bool\n\
3677 \n\
3678 Return True if all cased characters in S are uppercase and there is\n\
3679 at least one cased character in S, False otherwise.");
3680
3681 static PyObject*
3682 string_isupper(PyStringObject *self)
3683 {
3684     register const unsigned char *p
3685         = (unsigned char *) PyString_AS_STRING(self);
3686     register const unsigned char *e;
3687     int cased;
3688
3689     /* Shortcut for single character strings */
3690     if (PyString_GET_SIZE(self) == 1)
3691         return PyBool_FromLong(isupper(*p) != 0);
3692
3693     /* Special case for empty strings */
3694     if (PyString_GET_SIZE(self) == 0)
3695         return PyBool_FromLong(0);
3696
3697     e = p + PyString_GET_SIZE(self);
3698     cased = 0;
3699     for (; p < e; p++) {
3700         if (islower(*p))
3701             return PyBool_FromLong(0);
3702         else if (!cased && isupper(*p))
3703             cased = 1;
3704     }
3705     return PyBool_FromLong(cased);
3706 }
3707
3708
3709 PyDoc_STRVAR(istitle__doc__,
3710 "S.istitle() -> bool\n\
3711 \n\
3712 Return True if S is a titlecased string and there is at least one\n\
3713 character in S, i.e. uppercase characters may only follow uncased\n\
3714 characters and lowercase characters only cased ones. Return False\n\
3715 otherwise.");
3716
3717 static PyObject*
3718 string_istitle(PyStringObject *self, PyObject *uncased)
3719 {
3720     register const unsigned char *p
3721         = (unsigned char *) PyString_AS_STRING(self);
3722     register const unsigned char *e;
3723     int cased, previous_is_cased;
3724
3725     /* Shortcut for single character strings */
3726     if (PyString_GET_SIZE(self) == 1)
3727         return PyBool_FromLong(isupper(*p) != 0);
3728
3729     /* Special case for empty strings */
3730     if (PyString_GET_SIZE(self) == 0)
3731         return PyBool_FromLong(0);
3732
3733     e = p + PyString_GET_SIZE(self);
3734     cased = 0;
3735     previous_is_cased = 0;
3736     for (; p < e; p++) {
3737         register const unsigned char ch = *p;
3738
3739         if (isupper(ch)) {
3740             if (previous_is_cased)
3741                 return PyBool_FromLong(0);
3742             previous_is_cased = 1;
3743             cased = 1;
3744         }
3745         else if (islower(ch)) {
3746             if (!previous_is_cased)
3747                 return PyBool_FromLong(0);
3748             previous_is_cased = 1;
3749             cased = 1;
3750         }
3751         else
3752             previous_is_cased = 0;
3753     }
3754     return PyBool_FromLong(cased);
3755 }
3756
3757
3758 PyDoc_STRVAR(splitlines__doc__,
3759 "S.splitlines([keepends]) -> list of strings\n\
3760 \n\
3761 Return a list of the lines in S, breaking at line boundaries.\n\
3762 Line breaks are not included in the resulting list unless keepends\n\
3763 is given and true.");
3764
3765 static PyObject*
3766 string_splitlines(PyStringObject *self, PyObject *args)
3767 {
3768     register Py_ssize_t i;
3769     register Py_ssize_t j;
3770     Py_ssize_t len;
3771     int keepends = 0;
3772     PyObject *list;
3773     PyObject *str;
3774     char *data;
3775
3776     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3777         return NULL;
3778
3779     data = PyString_AS_STRING(self);
3780     len = PyString_GET_SIZE(self);
3781
3782     /* This does not use the preallocated list because splitlines is
3783        usually run with hundreds of newlines.  The overhead of
3784        switching between PyList_SET_ITEM and append causes about a
3785        2-3% slowdown for that common case.  A smarter implementation
3786        could move the if check out, so the SET_ITEMs are done first
3787        and the appends only done when the prealloc buffer is full.
3788        That's too much work for little gain.*/
3789
3790     list = PyList_New(0);
3791     if (!list)
3792         goto onError;
3793
3794     for (i = j = 0; i < len; ) {
3795         Py_ssize_t eol;
3796
3797         /* Find a line and append it */
3798         while (i < len && data[i] != '\n' && data[i] != '\r')
3799             i++;
3800
3801         /* Skip the line break reading CRLF as one line break */
3802         eol = i;
3803         if (i < len) {
3804             if (data[i] == '\r' && i + 1 < len &&
3805                 data[i+1] == '\n')
3806                 i += 2;
3807             else
3808                 i++;
3809             if (keepends)
3810                 eol = i;
3811         }
3812         SPLIT_APPEND(data, j, eol);
3813         j = i;
3814     }
3815     if (j < len) {
3816         SPLIT_APPEND(data, j, len);
3817     }
3818
3819     return list;
3820
3821  onError:
3822     Py_XDECREF(list);
3823     return NULL;
3824 }
3825
3826 #undef SPLIT_APPEND
3827 #undef SPLIT_ADD
3828 #undef MAX_PREALLOC
3829 #undef PREALLOC_SIZE
3830
3831 static PyObject *
3832 string_getnewargs(PyStringObject *v)
3833 {
3834         return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3835 }
3836
3837 \f
3838 static PyMethodDef
3839 string_methods[] = {
3840         /* Counterparts of the obsolete stropmodule functions; except
3841            string.maketrans(). */
3842         {"join", (PyCFunction)string_join, METH_O, join__doc__},
3843         {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3844         {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3845         {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3846         {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3847         {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3848         {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3849         {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3850         {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3851         {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3852         {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3853         {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3854         {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3855          capitalize__doc__},
3856         {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3857         {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3858          endswith__doc__},
3859         {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3860         {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3861         {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3862         {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3863         {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3864         {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3865         {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3866         {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3867         {"rpartition", (PyCFunction)string_rpartition, METH_O,
3868          rpartition__doc__},
3869         {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3870          startswith__doc__},
3871         {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3872         {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3873          swapcase__doc__},
3874         {"translate", (PyCFunction)string_translate, METH_VARARGS,
3875          translate__doc__},
3876         {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3877         {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3878         {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3879         {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3880         {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3881         {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3882         {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3883         {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3884          expandtabs__doc__},
3885         {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3886          splitlines__doc__},
3887         {"__getnewargs__",      (PyCFunction)string_getnewargs, METH_NOARGS},
3888         {NULL,     NULL}                     /* sentinel */
3889 };
3890
3891 static PyObject *
3892 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3893
3894 static PyObject *
3895 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3896 {
3897         PyObject *x = NULL;
3898         static char *kwlist[] = {"object", 0};
3899
3900         if (type != &PyString_Type)
3901                 return str_subtype_new(type, args, kwds);
3902         if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3903                 return NULL;
3904         if (x == NULL)
3905                 return PyString_FromString("");
3906         return PyObject_Str(x);
3907 }
3908
3909 static PyObject *
3910 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3911 {
3912         PyObject *tmp, *pnew;
3913         Py_ssize_t n;
3914
3915         assert(PyType_IsSubtype(type, &PyString_Type));
3916         tmp = string_new(&PyString_Type, args, kwds);
3917         if (tmp == NULL)
3918                 return NULL;
3919         assert(PyString_CheckExact(tmp));
3920         n = PyString_GET_SIZE(tmp);
3921         pnew = type->tp_alloc(type, n);
3922         if (pnew != NULL) {
3923                 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3924                 ((PyStringObject *)pnew)->ob_shash =
3925                         ((PyStringObject *)tmp)->ob_shash;
3926                 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3927         }
3928         Py_DECREF(tmp);
3929         return pnew;
3930 }
3931
3932 static PyObject *
3933 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3934 {
3935         PyErr_SetString(PyExc_TypeError,
3936                         "The basestring type cannot be instantiated");
3937         return NULL;
3938 }
3939
3940 static PyObject *
3941 string_mod(PyObject *v, PyObject *w)
3942 {
3943         if (!PyString_Check(v)) {
3944                 Py_INCREF(Py_NotImplemented);
3945                 return Py_NotImplemented;
3946         }
3947         return PyString_Format(v, w);
3948 }
3949
3950 PyDoc_STRVAR(basestring_doc,
3951 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3952
3953 static PyNumberMethods string_as_number = {
3954         0,                      /*nb_add*/
3955         0,                      /*nb_subtract*/
3956         0,                      /*nb_multiply*/
3957         0,                      /*nb_divide*/
3958         string_mod,             /*nb_remainder*/
3959 };
3960
3961
3962 PyTypeObject PyBaseString_Type = {
3963         PyObject_HEAD_INIT(&PyType_Type)
3964         0,
3965         "basestring",
3966         0,
3967         0,
3968         0,                                      /* tp_dealloc */
3969         0,                                      /* tp_print */
3970         0,                                      /* tp_getattr */
3971         0,                                      /* tp_setattr */
3972         0,                                      /* tp_compare */
3973         0,                                      /* tp_repr */
3974         0,                                      /* tp_as_number */
3975         0,                                      /* tp_as_sequence */
3976         0,                                      /* tp_as_mapping */
3977         0,                                      /* tp_hash */
3978         0,                                      /* tp_call */
3979         0,                                      /* tp_str */
3980         0,                                      /* tp_getattro */
3981         0,                                      /* tp_setattro */
3982         0,                                      /* tp_as_buffer */
3983         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3984         basestring_doc,                         /* tp_doc */
3985         0,                                      /* tp_traverse */
3986         0,                                      /* tp_clear */
3987         0,                                      /* tp_richcompare */
3988         0,                                      /* tp_weaklistoffset */
3989         0,                                      /* tp_iter */
3990         0,                                      /* tp_iternext */
3991         0,                                      /* tp_methods */
3992         0,                                      /* tp_members */
3993         0,                                      /* tp_getset */
3994         &PyBaseObject_Type,                     /* tp_base */
3995         0,                                      /* tp_dict */
3996         0,                                      /* tp_descr_get */
3997         0,                                      /* tp_descr_set */
3998         0,                                      /* tp_dictoffset */
3999         0,                                      /* tp_init */
4000         0,                                      /* tp_alloc */
4001         basestring_new,                         /* tp_new */
4002         0,                                      /* tp_free */
4003 };
4004
4005 PyDoc_STRVAR(string_doc,
4006 "str(object) -> string\n\
4007 \n\
4008 Return a nice string representation of the object.\n\
4009 If the argument is a string, the return value is the same object.");
4010
4011 PyTypeObject PyString_Type = {
4012         PyObject_HEAD_INIT(&PyType_Type)
4013         0,
4014         "str",
4015         sizeof(PyStringObject),
4016         sizeof(char),
4017         string_dealloc,                         /* tp_dealloc */
4018         (printfunc)string_print,                /* tp_print */
4019         0,                                      /* tp_getattr */
4020         0,                                      /* tp_setattr */
4021         0,                                      /* tp_compare */
4022         string_repr,                            /* tp_repr */
4023         &string_as_number,                      /* tp_as_number */
4024         &string_as_sequence,                    /* tp_as_sequence */
4025         &string_as_mapping,                     /* tp_as_mapping */
4026         (hashfunc)string_hash,                  /* tp_hash */
4027         0,                                      /* tp_call */
4028         string_str,                             /* tp_str */
4029         PyObject_GenericGetAttr,                /* tp_getattro */
4030         0,                                      /* tp_setattro */
4031         &string_as_buffer,                      /* tp_as_buffer */
4032         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4033                 Py_TPFLAGS_BASETYPE,            /* tp_flags */
4034         string_doc,                             /* tp_doc */
4035         0,                                      /* tp_traverse */
4036         0,                                      /* tp_clear */
4037         (richcmpfunc)string_richcompare,        /* tp_richcompare */
4038         0,                                      /* tp_weaklistoffset */
4039         0,                                      /* tp_iter */
4040         0,                                      /* tp_iternext */
4041         string_methods,                         /* tp_methods */
4042         0,                                      /* tp_members */
4043         0,                                      /* tp_getset */
4044         &PyBaseString_Type,                     /* tp_base */
4045         0,                                      /* tp_dict */
4046         0,                                      /* tp_descr_get */
4047         0,                                      /* tp_descr_set */
4048         0,                                      /* tp_dictoffset */
4049         0,                                      /* tp_init */
4050         0,                                      /* tp_alloc */
4051         string_new,                             /* tp_new */
4052         PyObject_Del,                           /* tp_free */
4053 };
4054
4055 void
4056 PyString_Concat(register PyObject **pv, register PyObject *w)
4057 {
4058         register PyObject *v;
4059         if (*pv == NULL)
4060                 return;
4061         if (w == NULL || !PyString_Check(*pv)) {
4062                 Py_DECREF(*pv);
4063                 *pv = NULL;
4064                 return;
4065         }
4066         v = string_concat((PyStringObject *) *pv, w);
4067         Py_DECREF(*pv);
4068         *pv = v;
4069 }
4070
4071 void
4072 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
4073 {
4074         PyString_Concat(pv, w);
4075         Py_XDECREF(w);
4076 }
4077
4078
4079 /* The following function breaks the notion that strings are immutable:
4080    it changes the size of a string.  We get away with this only if there
4081    is only one module referencing the object.  You can also think of it
4082    as creating a new string object and destroying the old one, only
4083    more efficiently.  In any case, don't use this if the string may
4084    already be known to some other part of the code...
4085    Note that if there's not enough memory to resize the string, the original
4086    string object at *pv is deallocated, *pv is set to NULL, an "out of
4087    memory" exception is set, and -1 is returned.  Else (on success) 0 is
4088    returned, and the value in *pv may or may not be the same as on input.
4089    As always, an extra byte is allocated for a trailing \0 byte (newsize
4090    does *not* include that), and a trailing \0 byte is stored.
4091 */
4092
4093 int
4094 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
4095 {
4096         register PyObject *v;
4097         register PyStringObject *sv;
4098         v = *pv;
4099         if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4100             PyString_CHECK_INTERNED(v)) {
4101                 *pv = 0;
4102                 Py_DECREF(v);
4103                 PyErr_BadInternalCall();
4104                 return -1;
4105         }
4106         /* XXX UNREF/NEWREF interface should be more symmetrical */
4107         _Py_DEC_REFTOTAL;
4108         _Py_ForgetReference(v);
4109         *pv = (PyObject *)
4110                 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
4111         if (*pv == NULL) {
4112                 PyObject_Del(v);
4113                 PyErr_NoMemory();
4114                 return -1;
4115         }
4116         _Py_NewReference(*pv);
4117         sv = (PyStringObject *) *pv;
4118         sv->ob_size = newsize;
4119         sv->ob_sval[newsize] = '\0';
4120         sv->ob_shash = -1;      /* invalidate cached hash value */
4121         return 0;
4122 }
4123
4124 /* Helpers for formatstring */
4125
4126 Py_LOCAL_INLINE(PyObject *)
4127 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4128 {
4129         Py_ssize_t argidx = *p_argidx;
4130         if (argidx < arglen) {
4131                 (*p_argidx)++;
4132                 if (arglen < 0)
4133                         return args;
4134                 else
4135                         return PyTuple_GetItem(args, argidx);
4136         }
4137         PyErr_SetString(PyExc_TypeError,
4138                         "not enough arguments for format string");
4139         return NULL;
4140 }
4141
4142 /* Format codes
4143  * F_LJUST      '-'
4144  * F_SIGN       '+'
4145  * F_BLANK      ' '
4146  * F_ALT        '#'
4147  * F_ZERO       '0'
4148  */
4149 #define F_LJUST (1<<0)
4150 #define F_SIGN  (1<<1)
4151 #define F_BLANK (1<<2)
4152 #define F_ALT   (1<<3)
4153 #define F_ZERO  (1<<4)
4154
4155 Py_LOCAL_INLINE(int)
4156 formatfloat(char *buf, size_t buflen, int flags,
4157             int prec, int type, PyObject *v)
4158 {
4159         /* fmt = '%#.' + `prec` + `type`
4160            worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4161         char fmt[20];
4162         double x;
4163         x = PyFloat_AsDouble(v);
4164         if (x == -1.0 && PyErr_Occurred()) {
4165                 PyErr_SetString(PyExc_TypeError, "float argument required");
4166                 return -1;
4167         }
4168         if (prec < 0)
4169                 prec = 6;
4170         if (type == 'f' && fabs(x)/1e25 >= 1e25)
4171                 type = 'g';
4172         /* Worst case length calc to ensure no buffer overrun:
4173
4174            'g' formats:
4175              fmt = %#.<prec>g
4176              buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4177                 for any double rep.)
4178              len = 1 + prec + 1 + 2 + 5 = 9 + prec
4179
4180            'f' formats:
4181              buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4182              len = 1 + 50 + 1 + prec = 52 + prec
4183
4184            If prec=0 the effective precision is 1 (the leading digit is
4185            always given), therefore increase the length by one.
4186
4187         */
4188         if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4189             (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
4190                 PyErr_SetString(PyExc_OverflowError,
4191                         "formatted float is too long (precision too large?)");
4192                 return -1;
4193         }
4194         PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4195                       (flags&F_ALT) ? "#" : "",
4196                       prec, type);
4197         PyOS_ascii_formatd(buf, buflen, fmt, x);
4198         return (int)strlen(buf);
4199 }
4200
4201 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4202  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
4203  * Python's regular ints.
4204  * Return value:  a new PyString*, or NULL if error.
4205  *  .  *pbuf is set to point into it,
4206  *     *plen set to the # of chars following that.
4207  *     Caller must decref it when done using pbuf.
4208  *     The string starting at *pbuf is of the form
4209  *         "-"? ("0x" | "0X")? digit+
4210  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
4211  *         set in flags.  The case of hex digits will be correct,
4212  *     There will be at least prec digits, zero-filled on the left if
4213  *         necessary to get that many.
4214  * val          object to be converted
4215  * flags        bitmask of format flags; only F_ALT is looked at
4216  * prec         minimum number of digits; 0-fill on left if needed
4217  * type         a character in [duoxX]; u acts the same as d
4218  *
4219  * CAUTION:  o, x and X conversions on regular ints can never
4220  * produce a '-' sign, but can for Python's unbounded ints.
4221  */
4222 PyObject*
4223 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4224                      char **pbuf, int *plen)
4225 {
4226         PyObject *result = NULL;
4227         char *buf;
4228         Py_ssize_t i;
4229         int sign;       /* 1 if '-', else 0 */
4230         int len;        /* number of characters */
4231         Py_ssize_t llen;
4232         int numdigits;  /* len == numnondigits + numdigits */
4233         int numnondigits = 0;
4234
4235         switch (type) {
4236         case 'd':
4237         case 'u':
4238                 result = val->ob_type->tp_str(val);
4239                 break;
4240         case 'o':
4241                 result = val->ob_type->tp_as_number->nb_oct(val);
4242                 break;
4243         case 'x':
4244         case 'X':
4245                 numnondigits = 2;
4246                 result = val->ob_type->tp_as_number->nb_hex(val);
4247                 break;
4248         default:
4249                 assert(!"'type' not in [duoxX]");
4250         }
4251         if (!result)
4252                 return NULL;
4253
4254         /* To modify the string in-place, there can only be one reference. */
4255         if (result->ob_refcnt != 1) {
4256                 PyErr_BadInternalCall();
4257                 return NULL;
4258         }
4259         buf = PyString_AsString(result);
4260         llen = PyString_Size(result);
4261         if (llen > PY_SSIZE_T_MAX) {
4262                 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4263                 return NULL;
4264         }
4265         len = (int)llen;
4266         if (buf[len-1] == 'L') {
4267                 --len;
4268                 buf[len] = '\0';
4269         }
4270         sign = buf[0] == '-';
4271         numnondigits += sign;
4272         numdigits = len - numnondigits;
4273         assert(numdigits > 0);
4274
4275         /* Get rid of base marker unless F_ALT */
4276         if ((flags & F_ALT) == 0) {
4277                 /* Need to skip 0x, 0X or 0. */
4278                 int skipped = 0;
4279                 switch (type) {
4280                 case 'o':
4281                         assert(buf[sign] == '0');
4282                         /* If 0 is only digit, leave it alone. */
4283                         if (numdigits > 1) {
4284                                 skipped = 1;
4285                                 --numdigits;
4286                         }
4287                         break;
4288                 case 'x':
4289                 case 'X':
4290                         assert(buf[sign] == '0');
4291                         assert(buf[sign + 1] == 'x');
4292                         skipped = 2;
4293                         numnondigits -= 2;
4294                         break;
4295                 }
4296                 if (skipped) {
4297                         buf += skipped;
4298                         len -= skipped;
4299                         if (sign)
4300                                 buf[0] = '-';
4301                 }
4302                 assert(len == numnondigits + numdigits);
4303                 assert(numdigits > 0);
4304         }
4305
4306         /* Fill with leading zeroes to meet minimum width. */
4307         if (prec > numdigits) {
4308                 PyObject *r1 = PyString_FromStringAndSize(NULL,
4309                                         numnondigits + prec);
4310                 char *b1;
4311                 if (!r1) {
4312                         Py_DECREF(result);
4313                         return NULL;
4314                 }
4315                 b1 = PyString_AS_STRING(r1);
4316                 for (i = 0; i < numnondigits; ++i)
4317                         *b1++ = *buf++;
4318                 for (i = 0; i < prec - numdigits; i++)
4319                         *b1++ = '0';
4320                 for (i = 0; i < numdigits; i++)
4321                         *b1++ = *buf++;
4322                 *b1 = '\0';
4323                 Py_DECREF(result);
4324                 result = r1;
4325                 buf = PyString_AS_STRING(result);
4326                 len = numnondigits + prec;
4327         }
4328
4329         /* Fix up case for hex conversions. */
4330         if (type == 'X') {
4331                 /* Need to convert all lower case letters to upper case.
4332                    and need to convert 0x to 0X (and -0x to -0X). */
4333                 for (i = 0; i < len; i++)
4334                         if (buf[i] >= 'a' && buf[i] <= 'x')
4335                                 buf[i] -= 'a'-'A';
4336         }
4337         *pbuf = buf;
4338         *plen = len;
4339         return result;
4340 }
4341
4342 Py_LOCAL_INLINE(int)
4343 formatint(char *buf, size_t buflen, int flags,
4344           int prec, int type, PyObject *v)
4345 {
4346         /* fmt = '%#.' + `prec` + 'l' + `type`
4347            worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4348            + 1 + 1 = 24 */
4349         char fmt[64];   /* plenty big enough! */
4350         char *sign;
4351         long x;
4352
4353         x = PyInt_AsLong(v);
4354         if (x == -1 && PyErr_Occurred()) {
4355                 PyErr_SetString(PyExc_TypeError, "int argument required");
4356                 return -1;
4357         }
4358         if (x < 0 && type == 'u') {
4359                 type = 'd';
4360         }
4361         if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4362                 sign = "-";
4363         else
4364                 sign = "";
4365         if (prec < 0)
4366                 prec = 1;
4367
4368         if ((flags & F_ALT) &&
4369             (type == 'x' || type == 'X')) {
4370                 /* When converting under %#x or %#X, there are a number
4371                  * of issues that cause pain:
4372                  * - when 0 is being converted, the C standard leaves off
4373                  *   the '0x' or '0X', which is inconsistent with other
4374                  *   %#x/%#X conversions and inconsistent with Python's
4375                  *   hex() function
4376                  * - there are platforms that violate the standard and
4377                  *   convert 0 with the '0x' or '0X'
4378                  *   (Metrowerks, Compaq Tru64)
4379                  * - there are platforms that give '0x' when converting
4380                  *   under %#X, but convert 0 in accordance with the
4381                  *   standard (OS/2 EMX)
4382                  *
4383                  * We can achieve the desired consistency by inserting our
4384                  * own '0x' or '0X' prefix, and substituting %x/%X in place
4385                  * of %#x/%#X.
4386                  *
4387                  * Note that this is the same approach as used in
4388                  * formatint() in unicodeobject.c
4389                  */
4390                 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4391                               sign, type, prec, type);
4392         }
4393         else {
4394                 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4395                               sign, (flags&F_ALT) ? "#" : "",
4396                               prec, type);
4397         }
4398
4399         /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4400          * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4401          */
4402         if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4403                 PyErr_SetString(PyExc_OverflowError,
4404                     "formatted integer is too long (precision too large?)");
4405                 return -1;
4406         }
4407         if (sign[0])
4408                 PyOS_snprintf(buf, buflen, fmt, -x);
4409         else
4410                 PyOS_snprintf(buf, buflen, fmt, x);
4411         return (int)strlen(buf);
4412 }
4413
4414 Py_LOCAL_INLINE(int)
4415 formatchar(char *buf, size_t buflen, PyObject *v)
4416 {
4417         /* presume that the buffer is at least 2 characters long */
4418         if (PyString_Check(v)) {
4419                 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4420                         return -1;
4421         }
4422         else {
4423                 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4424                         return -1;
4425         }
4426         buf[1] = '\0';
4427         return 1;
4428 }
4429
4430 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4431
4432    FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4433    chars are formatted. XXX This is a magic number. Each formatting
4434    routine does bounds checking to ensure no overflow, but a better
4435    solution may be to malloc a buffer of appropriate size for each
4436    format. For now, the current solution is sufficient.
4437 */
4438 #define FORMATBUFLEN (size_t)120
4439
4440 PyObject *
4441 PyString_Format(PyObject *format, PyObject *args)
4442 {
4443         char *fmt, *res;
4444         Py_ssize_t arglen, argidx;
4445         Py_ssize_t reslen, rescnt, fmtcnt;
4446         int args_owned = 0;
4447         PyObject *result, *orig_args;
4448 #ifdef Py_USING_UNICODE
4449         PyObject *v, *w;
4450 #endif
4451         PyObject *dict = NULL;
4452         if (format == NULL || !PyString_Check(format) || args == NULL) {
4453                 PyErr_BadInternalCall();
4454                 return NULL;
4455         }
4456         orig_args = args;
4457         fmt = PyString_AS_STRING(format);
4458         fmtcnt = PyString_GET_SIZE(format);
4459         reslen = rescnt = fmtcnt + 100;
4460         result = PyString_FromStringAndSize((char *)NULL, reslen);
4461         if (result == NULL)
4462                 return NULL;
4463         res = PyString_AsString(result);
4464         if (PyTuple_Check(args)) {
4465                 arglen = PyTuple_GET_SIZE(args);
4466                 argidx = 0;
4467         }
4468         else {
4469                 arglen = -1;
4470                 argidx = -2;
4471         }
4472         if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4473             !PyObject_TypeCheck(args, &PyBaseString_Type))
4474                 dict = args;
4475         while (--fmtcnt >= 0) {
4476                 if (*fmt != '%') {
4477                         if (--rescnt < 0) {
4478                                 rescnt = fmtcnt + 100;
4479                                 reslen += rescnt;
4480                                 if (_PyString_Resize(&result, reslen) < 0)
4481                                         return NULL;
4482                                 res = PyString_AS_STRING(result)
4483                                         + reslen - rescnt;
4484                                 --rescnt;
4485                         }
4486                         *res++ = *fmt++;
4487                 }
4488                 else {
4489                         /* Got a format specifier */
4490                         int flags = 0;
4491                         Py_ssize_t width = -1;
4492                         int prec = -1;
4493                         int c = '\0';
4494                         int fill;
4495                         PyObject *v = NULL;
4496                         PyObject *temp = NULL;
4497                         char *pbuf;
4498                         int sign;
4499                         Py_ssize_t len;
4500                         char formatbuf[FORMATBUFLEN];
4501                              /* For format{float,int,char}() */
4502 #ifdef Py_USING_UNICODE
4503                         char *fmt_start = fmt;
4504                         Py_ssize_t argidx_start = argidx;
4505 #endif
4506
4507                         fmt++;
4508                         if (*fmt == '(') {
4509                                 char *keystart;
4510                                 Py_ssize_t keylen;
4511                                 PyObject *key;
4512                                 int pcount = 1;
4513
4514                                 if (dict == NULL) {
4515                                         PyErr_SetString(PyExc_TypeError,
4516                                                  "format requires a mapping");
4517                                         goto error;
4518                                 }
4519                                 ++fmt;
4520                                 --fmtcnt;
4521                                 keystart = fmt;
4522                                 /* Skip over balanced parentheses */
4523                                 while (pcount > 0 && --fmtcnt >= 0) {
4524                                         if (*fmt == ')')
4525                                                 --pcount;
4526                                         else if (*fmt == '(')
4527                                                 ++pcount;
4528                                         fmt++;
4529                                 }
4530                                 keylen = fmt - keystart - 1;
4531                                 if (fmtcnt < 0 || pcount > 0) {
4532                                         PyErr_SetString(PyExc_ValueError,
4533                                                    "incomplete format key");
4534                                         goto error;
4535                                 }
4536                                 key = PyString_FromStringAndSize(keystart,
4537                                                                  keylen);
4538                                 if (key == NULL)
4539                                         goto error;
4540                                 if (args_owned) {
4541                                         Py_DECREF(args);
4542                                         args_owned = 0;
4543                                 }
4544                                 args = PyObject_GetItem(dict, key);
4545                                 Py_DECREF(key);
4546                                 if (args == NULL) {
4547                                         goto error;
4548                                 }
4549                                 args_owned = 1;
4550                                 arglen = -1;
4551                                 argidx = -2;
4552                         }
4553                         while (--fmtcnt >= 0) {
4554                                 switch (c = *fmt++) {
4555                                 case '-': flags |= F_LJUST; continue;
4556                                 case '+': flags |= F_SIGN; continue;
4557                                 case ' ': flags |= F_BLANK; continue;
4558                                 case '#': flags |= F_ALT; continue;
4559                                 case '0': flags |= F_ZERO; continue;
4560                                 }
4561                                 break;
4562                         }
4563                         if (c == '*') {
4564                                 v = getnextarg(args, arglen, &argidx);
4565                                 if (v == NULL)
4566                                         goto error;
4567                                 if (!PyInt_Check(v)) {
4568                                         PyErr_SetString(PyExc_TypeError,
4569                                                         "* wants int");
4570                                         goto error;
4571                                 }
4572                                 width = PyInt_AsLong(v);
4573                                 if (width < 0) {
4574                                         flags |= F_LJUST;
4575                                         width = -width;
4576                                 }
4577                                 if (--fmtcnt >= 0)
4578                                         c = *fmt++;
4579                         }
4580                         else if (c >= 0 && isdigit(c)) {
4581                                 width = c - '0';
4582                                 while (--fmtcnt >= 0) {
4583                                         c = Py_CHARMASK(*fmt++);
4584                                         if (!isdigit(c))
4585                                                 break;
4586                                         if ((width*10) / 10 != width) {
4587                                                 PyErr_SetString(
4588                                                         PyExc_ValueError,
4589                                                         "width too big");
4590                                                 goto error;
4591                                         }
4592                                         width = width*10 + (c - '0');
4593                                 }
4594                         }
4595                         if (c == '.') {
4596                                 prec = 0;
4597                                 if (--fmtcnt >= 0)
4598                                         c = *fmt++;
4599                                 if (c == '*') {
4600                                         v = getnextarg(args, arglen, &argidx);
4601                                         if (v == NULL)
4602                                                 goto error;
4603                                         if (!PyInt_Check(v)) {
4604                                                 PyErr_SetString(
4605                                                         PyExc_TypeError,
4606                                                         "* wants int");
4607                                                 goto error;
4608                                         }
4609                                         prec = PyInt_AsLong(v);
4610                                         if (prec < 0)
4611                                                 prec = 0;
4612                                         if (--fmtcnt >= 0)
4613                                                 c = *fmt++;
4614                                 }
4615                                 else if (c >= 0 && isdigit(c)) {
4616                                         prec = c - '0';
4617                                         while (--fmtcnt >= 0) {
4618                                                 c = Py_CHARMASK(*fmt++);
4619                                                 if (!isdigit(c))
4620                                                         break;
4621                                                 if ((prec*10) / 10 != prec) {
4622                                                         PyErr_SetString(
4623                                                             PyExc_ValueError,
4624                                                             "prec too big");
4625                                                         goto error;
4626                                                 }
4627                                                 prec = prec*10 + (c - '0');
4628                                         }
4629                                 }
4630                         } /* prec */
4631                         if (fmtcnt >= 0) {
4632                                 if (c == 'h' || c == 'l' || c == 'L') {
4633                                         if (--fmtcnt >= 0)
4634                                                 c = *fmt++;
4635                                 }
4636                         }
4637                         if (fmtcnt < 0) {
4638                                 PyErr_SetString(PyExc_ValueError,
4639                                                 "incomplete format");
4640                                 goto error;
4641                         }
4642                         if (c != '%') {
4643                                 v = getnextarg(args, arglen, &argidx);
4644                                 if (v == NULL)
4645                                         goto error;
4646                         }
4647                         sign = 0;
4648                         fill = ' ';
4649                         switch (c) {
4650                         case '%':
4651                                 pbuf = "%";
4652                                 len = 1;
4653                                 break;
4654                         case 's':
4655 #ifdef Py_USING_UNICODE
4656                                 if (PyUnicode_Check(v)) {
4657                                         fmt = fmt_start;
4658                                         argidx = argidx_start;
4659                                         goto unicode;
4660                                 }
4661 #endif
4662                                 temp = _PyObject_Str(v);
4663 #ifdef Py_USING_UNICODE
4664                                 if (temp != NULL && PyUnicode_Check(temp)) {
4665                                         Py_DECREF(temp);
4666                                         fmt = fmt_start;
4667                                         argidx = argidx_start;
4668                                         goto unicode;
4669                                 }
4670 #endif
4671                                 /* Fall through */
4672                         case 'r':
4673                                 if (c == 'r')
4674                                         temp = PyObject_Repr(v);
4675                                 if (temp == NULL)
4676                                         goto error;
4677                                 if (!PyString_Check(temp)) {
4678                                         PyErr_SetString(PyExc_TypeError,
4679                                           "%s argument has non-string str()");
4680                                         Py_DECREF(temp);
4681                                         goto error;
4682                                 }
4683                                 pbuf = PyString_AS_STRING(temp);
4684                                 len = PyString_GET_SIZE(temp);
4685                                 if (prec >= 0 && len > prec)
4686                                         len = prec;
4687                                 break;
4688                         case 'i':
4689                         case 'd':
4690                         case 'u':
4691                         case 'o':
4692                         case 'x':
4693                         case 'X':
4694                                 if (c == 'i')
4695                                         c = 'd';
4696                                 if (PyLong_Check(v)) {
4697                                         int ilen;
4698                                         temp = _PyString_FormatLong(v, flags,
4699                                                 prec, c, &pbuf, &ilen);
4700                                         len = ilen;
4701                                         if (!temp)
4702                                                 goto error;
4703                                         sign = 1;
4704                                 }
4705                                 else {
4706                                         pbuf = formatbuf;
4707                                         len = formatint(pbuf,
4708                                                         sizeof(formatbuf),
4709                                                         flags, prec, c, v);
4710                                         if (len < 0)
4711                                                 goto error;
4712                                         sign = 1;
4713                                 }
4714                                 if (flags & F_ZERO)
4715                                         fill = '0';
4716                                 break;
4717                         case 'e':
4718                         case 'E':
4719                         case 'f':
4720                         case 'F':
4721                         case 'g':
4722                         case 'G':
4723                                 if (c == 'F')
4724                                         c = 'f';
4725                                 pbuf = formatbuf;
4726                                 len = formatfloat(pbuf, sizeof(formatbuf),
4727                                                   flags, prec, c, v);
4728                                 if (len < 0)
4729                                         goto error;
4730                                 sign = 1;
4731                                 if (flags & F_ZERO)
4732                                         fill = '0';
4733                                 break;
4734                         case 'c':
4735 #ifdef Py_USING_UNICODE
4736                                 if (PyUnicode_Check(v)) {
4737                                         fmt = fmt_start;
4738                                         argidx = argidx_start;
4739                                         goto unicode;
4740                                 }
4741 #endif
4742                                 pbuf = formatbuf;
4743                                 len = formatchar(pbuf, sizeof(formatbuf), v);
4744                                 if (len < 0)
4745                                         goto error;
4746                                 break;
4747                         default:
4748                                 PyErr_Format(PyExc_ValueError,
4749                                   "unsupported format character '%c' (0x%x) "
4750                                   "at index %i",
4751                                   c, c,
4752                                   (int)(fmt - 1 - PyString_AsString(format)));
4753                                 goto error;
4754                         }
4755                         if (sign) {
4756                                 if (*pbuf == '-' || *pbuf == '+') {
4757                                         sign = *pbuf++;
4758                                         len--;
4759                                 }
4760                                 else if (flags & F_SIGN)
4761                                         sign = '+';
4762                                 else if (flags & F_BLANK)
4763                                         sign = ' ';
4764                                 else
4765                                         sign = 0;
4766                         }
4767                         if (width < len)
4768                                 width = len;
4769                         if (rescnt - (sign != 0) < width) {
4770                                 reslen -= rescnt;
4771                                 rescnt = width + fmtcnt + 100;
4772                                 reslen += rescnt;
4773                                 if (reslen < 0) {
4774                                         Py_DECREF(result);
4775                                         return PyErr_NoMemory();
4776                                 }
4777                                 if (_PyString_Resize(&result, reslen) < 0)
4778                                         return NULL;
4779                                 res = PyString_AS_STRING(result)
4780                                         + reslen - rescnt;
4781                         }
4782                         if (sign) {
4783                                 if (fill != ' ')
4784                                         *res++ = sign;
4785                                 rescnt--;
4786                                 if (width > len)
4787                                         width--;
4788                         }
4789                         if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4790                                 assert(pbuf[0] == '0');
4791                                 assert(pbuf[1] == c);
4792                                 if (fill != ' ') {
4793                                         *res++ = *pbuf++;
4794                                         *res++ = *pbuf++;
4795                                 }
4796                                 rescnt -= 2;
4797                                 width -= 2;
4798                                 if (width < 0)
4799                                         width = 0;
4800                                 len -= 2;
4801                         }
4802                         if (width > len && !(flags & F_LJUST)) {
4803                                 do {
4804                                         --rescnt;
4805                                         *res++ = fill;
4806                                 } while (--width > len);
4807                         }
4808                         if (fill == ' ') {
4809                                 if (sign)
4810                                         *res++ = sign;
4811                                 if ((flags & F_ALT) &&
4812                                     (c == 'x' || c == 'X')) {
4813                                         assert(pbuf[0] == '0');
4814                                         assert(pbuf[1] == c);
4815                                         *res++ = *pbuf++;
4816                                         *res++ = *pbuf++;
4817                                 }
4818                         }
4819                         Py_MEMCPY(res, pbuf, len);
4820                         res += len;
4821                         rescnt -= len;
4822                         while (--width >= len) {
4823                                 --rescnt;
4824                                 *res++ = ' ';
4825                         }
4826                         if (dict && (argidx < arglen) && c != '%') {
4827                                 PyErr_SetString(PyExc_TypeError,
4828                                            "not all arguments converted during string formatting");
4829                                 goto error;
4830                         }
4831                         Py_XDECREF(temp);
4832                 } /* '%' */
4833         } /* until end */
4834         if (argidx < arglen && !dict) {
4835                 PyErr_SetString(PyExc_TypeError,
4836                                 "not all arguments converted during string formatting");
4837                 goto error;
4838         }
4839         if (args_owned) {
4840                 Py_DECREF(args);
4841         }
4842         _PyString_Resize(&result, reslen - rescnt);
4843         return result;
4844
4845 #ifdef Py_USING_UNICODE
4846  unicode:
4847         if (args_owned) {
4848                 Py_DECREF(args);
4849                 args_owned = 0;
4850         }
4851         /* Fiddle args right (remove the first argidx arguments) */
4852         if (PyTuple_Check(orig_args) && argidx > 0) {
4853                 PyObject *v;
4854                 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4855                 v = PyTuple_New(n);
4856                 if (v == NULL)
4857                         goto error;
4858                 while (--n >= 0) {
4859                         PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4860                         Py_INCREF(w);
4861                         PyTuple_SET_ITEM(v, n, w);
4862                 }
4863                 args = v;
4864         } else {
4865                 Py_INCREF(orig_args);
4866                 args = orig_args;
4867         }
4868         args_owned = 1;
4869         /* Take what we have of the result and let the Unicode formatting
4870            function format the rest of the input. */
4871         rescnt = res - PyString_AS_STRING(result);
4872         if (_PyString_Resize(&result, rescnt))
4873                 goto error;
4874         fmtcnt = PyString_GET_SIZE(format) - \
4875                  (fmt - PyString_AS_STRING(format));
4876         format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4877         if (format == NULL)
4878                 goto error;
4879         v = PyUnicode_Format(format, args);
4880         Py_DECREF(format);
4881         if (v == NULL)
4882                 goto error;
4883         /* Paste what we have (result) to what the Unicode formatting
4884            function returned (v) and return the result (or error) */
4885         w = PyUnicode_Concat(result, v);
4886         Py_DECREF(result);
4887         Py_DECREF(v);
4888         Py_DECREF(args);
4889         return w;
4890 #endif /* Py_USING_UNICODE */
4891
4892  error:
4893         Py_DECREF(result);
4894         if (args_owned) {
4895                 Py_DECREF(args);
4896         }
4897         return NULL;
4898 }
4899
4900 void
4901 PyString_InternInPlace(PyObject **p)
4902 {
4903         register PyStringObject *s = (PyStringObject *)(*p);
4904         PyObject *t;
4905         if (s == NULL || !PyString_Check(s))
4906                 Py_FatalError("PyString_InternInPlace: strings only please!");
4907         /* If it's a string subclass, we don't really know what putting
4908            it in the interned dict might do. */
4909         if (!PyString_CheckExact(s))
4910                 return;
4911         if (PyString_CHECK_INTERNED(s))
4912                 return;
4913         if (interned == NULL) {
4914                 interned = PyDict_New();
4915                 if (interned == NULL) {
4916                         PyErr_Clear(); /* Don't leave an exception */
4917                         return;
4918                 }
4919         }
4920         t = PyDict_GetItem(interned, (PyObject *)s);
4921         if (t) {
4922                 Py_INCREF(t);
4923                 Py_DECREF(*p);
4924                 *p = t;
4925                 return;
4926         }
4927
4928         if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4929                 PyErr_Clear();
4930                 return;
4931         }
4932         /* The two references in interned are not counted by refcnt.
4933            The string deallocator will take care of this */
4934         s->ob_refcnt -= 2;
4935         PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4936 }
4937
4938 void
4939 PyString_InternImmortal(PyObject **p)
4940 {
4941         PyString_InternInPlace(p);
4942         if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4943                 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4944                 Py_INCREF(*p);
4945         }
4946 }
4947
4948
4949 PyObject *
4950 PyString_InternFromString(const char *cp)
4951 {
4952         PyObject *s = PyString_FromString(cp);
4953         if (s == NULL)
4954                 return NULL;
4955         PyString_InternInPlace(&s);
4956         return s;
4957 }
4958
4959 void
4960 PyString_Fini(void)
4961 {
4962         int i;
4963         for (i = 0; i < UCHAR_MAX + 1; i++) {
4964                 Py_XDECREF(characters[i]);
4965                 characters[i] = NULL;
4966         }
4967         Py_XDECREF(nullstring);
4968         nullstring = NULL;
4969 }
4970
4971 void _Py_ReleaseInternedStrings(void)
4972 {
4973         PyObject *keys;
4974         PyStringObject *s;
4975         Py_ssize_t i, n;
4976
4977         if (interned == NULL || !PyDict_Check(interned))
4978                 return;
4979         keys = PyDict_Keys(interned);
4980         if (keys == NULL || !PyList_Check(keys)) {
4981                 PyErr_Clear();
4982                 return;
4983         }
4984
4985         /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4986            detector, interned strings are not forcibly deallocated; rather, we
4987            give them their stolen references back, and then clear and DECREF
4988            the interned dict. */
4989
4990         fprintf(stderr, "releasing interned strings\n");
4991         n = PyList_GET_SIZE(keys);
4992         for (i = 0; i < n; i++) {
4993                 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4994                 switch (s->ob_sstate) {
4995                 case SSTATE_NOT_INTERNED:
4996                         /* XXX Shouldn't happen */
4997                         break;
4998                 case SSTATE_INTERNED_IMMORTAL:
4999                         s->ob_refcnt += 1;
5000                         break;
5001                 case SSTATE_INTERNED_MORTAL:
5002                         s->ob_refcnt += 2;
5003                         break;
5004                 default:
5005                         Py_FatalError("Inconsistent interned string state.");
5006                 }
5007                 s->ob_sstate = SSTATE_NOT_INTERNED;
5008         }
5009         Py_DECREF(keys);
5010         PyDict_Clear(interned);
5011         Py_DECREF(interned);
5012         interned = NULL;
5013 }