Objects/stringobject.c

   1 /* String (str/bytes) object implementation */
   2
   3 #define PY_SSIZE_T_CLEAN
   4
   5 #include "Python.h"
   6 #include <ctype.h>
   7
   8 #ifdef COUNT_ALLOCS
   9 int null_strings, one_strings;
  10 #endif
  11
  12 static PyStringObject *characters[UCHAR_MAX + 1];
  13 static PyStringObject *nullstring;
  14
  15 /* This dictionary holds all interned strings.  Note that references to
  16    strings in this dictionary are *not* counted in the string's ob_refcnt.
  17    When the interned string reaches a refcnt of 0 the string deallocation
  18    function will delete the reference from this dictionary.
  19
  20    Another way to look at this is that to say that the actual reference
  21    count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
  22 */
  23 static PyObject *interned;
  24
  25 /*
  26    For both PyString_FromString() and PyString_FromStringAndSize(), the
  27    parameter `size' denotes number of characters to allocate, not counting any
  28    null terminating character.
  29
  30    For PyString_FromString(), the parameter `str' points to a null-terminated
  31    string containing exactly `size' bytes.
  32
  33    For PyString_FromStringAndSize(), the parameter the parameter `str' is
  34    either NULL or else points to a string containing at least `size' bytes.
  35    For PyString_FromStringAndSize(), the string in the `str' parameter does
  36    not have to be null-terminated.  (Therefore it is safe to construct a
  37    substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
  38    If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
  39    bytes (setting the last byte to the null terminating character) and you can
  40    fill in the data yourself.  If `str' is non-NULL then the resulting
  41    PyString object must be treated as immutable and you must not fill in nor
  42    alter the data yourself, since the strings may be shared.
  43
  44    The PyObject member `op->ob_size', which denotes the number of "extra
  45    items" in a variable-size object, will contain the number of bytes
  46    allocated for string data, not counting the null terminating character.  It
  47    is therefore equal to the equal to the `size' parameter (for
  48    PyString_FromStringAndSize()) or the length of the string in the `str'
  49    parameter (for PyString_FromString()).
  50 */
  51 PyObject *
  52 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
  53 {
  54         register PyStringObject *op;
  55         if (size < 0) {
  56                 PyErr_SetString(PyExc_SystemError,
  57                     "Negative size passed to PyString_FromStringAndSize");
  58                 return NULL;
  59         }
  60         if (size == 0 && (op = nullstring) != NULL) {
  61 #ifdef COUNT_ALLOCS
  62                 null_strings++;
  63 #endif
  64                 Py_INCREF(op);
  65                 return (PyObject *)op;
  66         }
  67         if (size == 1 && str != NULL &&
  68             (op = characters[*str & UCHAR_MAX]) != NULL)
  69         {
  70 #ifdef COUNT_ALLOCS
  71                 one_strings++;
  72 #endif
  73                 Py_INCREF(op);
  74                 return (PyObject *)op;
  75         }
  76
  77         if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
  78                 PyErr_SetString(PyExc_OverflowError, "string is too large");
  79                 return NULL;
  80         }
  81
  82         /* Inline PyObject_NewVar */
  83         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
  84         if (op == NULL)
  85                 return PyErr_NoMemory();
  86         PyObject_INIT_VAR(op, &PyString_Type, size);
  87         op->ob_shash = -1;
  88         op->ob_sstate = SSTATE_NOT_INTERNED;
  89         if (str != NULL)
  90                 Py_MEMCPY(op->ob_sval, str, size);
  91         op->ob_sval[size] = '\0';
  92         /* share short strings */
  93         if (size == 0) {
  94                 PyObject *t = (PyObject *)op;
  95                 PyString_InternInPlace(&t);
  96                 op = (PyStringObject *)t;
  97                 nullstring = op;
  98                 Py_INCREF(op);
  99         } else if (size == 1 && str != NULL) {
 100                 PyObject *t = (PyObject *)op;
 101                 PyString_InternInPlace(&t);
 102                 op = (PyStringObject *)t;
 103                 characters[*str & UCHAR_MAX] = op;
 104                 Py_INCREF(op);
 105         }
 106         return (PyObject *) op;
 107 }
 108
 109 PyObject *
 110 PyString_FromString(const char *str)
 111 {
 112         register size_t size;
 113         register PyStringObject *op;
 114
 115         assert(str != NULL);
 116         size = strlen(str);
 117         if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
 118                 PyErr_SetString(PyExc_OverflowError,
 119                         "string is too long for a Python string");
 120                 return NULL;
 121         }
 122         if (size == 0 && (op = nullstring) != NULL) {
 123 #ifdef COUNT_ALLOCS
 124                 null_strings++;
 125 #endif
 126                 Py_INCREF(op);
 127                 return (PyObject *)op;
 128         }
 129         if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
 130 #ifdef COUNT_ALLOCS
 131                 one_strings++;
 132 #endif
 133                 Py_INCREF(op);
 134                 return (PyObject *)op;
 135         }
 136
 137         /* Inline PyObject_NewVar */
 138         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
 139         if (op == NULL)
 140                 return PyErr_NoMemory();
 141         PyObject_INIT_VAR(op, &PyString_Type, size);
 142         op->ob_shash = -1;
 143         op->ob_sstate = SSTATE_NOT_INTERNED;
 144         Py_MEMCPY(op->ob_sval, str, size+1);
 145         /* share short strings */
 146         if (size == 0) {
 147                 PyObject *t = (PyObject *)op;
 148                 PyString_InternInPlace(&t);
 149                 op = (PyStringObject *)t;
 150                 nullstring = op;
 151                 Py_INCREF(op);
 152         } else if (size == 1) {
 153                 PyObject *t = (PyObject *)op;
 154                 PyString_InternInPlace(&t);
 155                 op = (PyStringObject *)t;
 156                 characters[*str & UCHAR_MAX] = op;
 157                 Py_INCREF(op);
 158         }
 159         return (PyObject *) op;
 160 }
 161
 162 PyObject *
 163 PyString_FromFormatV(const char *format, va_list vargs)
 164 {
 165         va_list count;
 166         Py_ssize_t n = 0;
 167         const char* f;
 168         char *s;
 169         PyObject* string;
 170
 171 #ifdef VA_LIST_IS_ARRAY
 172         Py_MEMCPY(count, vargs, sizeof(va_list));
 173 #else
 174 #ifdef  __va_copy
 175         __va_copy(count, vargs);
 176 #else
 177         count = vargs;
 178 #endif
 179 #endif
 180         /* step 1: figure out how large a buffer we need */
 181         for (f = format; *f; f++) {
 182                 if (*f == '%') {
 183                         const char* p = f;
 184                         while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 185                                 ;
 186
 187                         /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
 188                          * they don't affect the amount of space we reserve.
 189                          */
 190                         if ((*f == 'l' || *f == 'z') &&
 191                                         (f[1] == 'd' || f[1] == 'u'))
 192                                 ++f;
 193
 194                         switch (*f) {
 195                         case 'c':
 196                                 (void)va_arg(count, int);
 197                                 /* fall through... */
 198                         case '%':
 199                                 n++;
 200                                 break;
 201                         case 'd': case 'u': case 'i': case 'x':
 202                                 (void) va_arg(count, int);
 203                                 /* 20 bytes is enough to hold a 64-bit
 204                                    integer.  Decimal takes the most space.
 205                                    This isn't enough for octal. */
 206                                 n += 20;
 207                                 break;
 208                         case 's':
 209                                 s = va_arg(count, char*);
 210                                 n += strlen(s);
 211                                 break;
 212                         case 'p':
 213                                 (void) va_arg(count, int);
 214                                 /* maximum 64-bit pointer representation:
 215                                  * 0xffffffffffffffff
 216                                  * so 19 characters is enough.
 217                                  * XXX I count 18 -- what's the extra for?
 218                                  */
 219                                 n += 19;
 220                                 break;
 221                         default:
 222                                 /* if we stumble upon an unknown
 223                                    formatting code, copy the rest of
 224                                    the format string to the output
 225                                    string. (we cannot just skip the
 226                                    code, since there's no way to know
 227                                    what's in the argument list) */
 228                                 n += strlen(p);
 229                                 goto expand;
 230                         }
 231                 } else
 232                         n++;
 233         }
 234  expand:
 235         /* step 2: fill the buffer */
 236         /* Since we've analyzed how much space we need for the worst case,
 237            use sprintf directly instead of the slower PyOS_snprintf. */
 238         string = PyString_FromStringAndSize(NULL, n);
 239         if (!string)
 240                 return NULL;
 241
 242         s = PyString_AsString(string);
 243
 244         for (f = format; *f; f++) {
 245                 if (*f == '%') {
 246                         const char* p = f++;
 247                         Py_ssize_t i;
 248                         int longflag = 0;
 249                         int size_tflag = 0;
 250                         /* parse the width.precision part (we're only
 251                            interested in the precision value, if any) */
 252                         n = 0;
 253                         while (isdigit(Py_CHARMASK(*f)))
 254                                 n = (n*10) + *f++ - '0';
 255                         if (*f == '.') {
 256                                 f++;
 257                                 n = 0;
 258                                 while (isdigit(Py_CHARMASK(*f)))
 259                                         n = (n*10) + *f++ - '0';
 260                         }
 261                         while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 262                                 f++;
 263                         /* handle the long flag, but only for %ld and %lu.
 264                            others can be added when necessary. */
 265                         if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
 266                                 longflag = 1;
 267                                 ++f;
 268                         }
 269                         /* handle the size_t flag. */
 270                         if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
 271                                 size_tflag = 1;
 272                                 ++f;
 273                         }
 274
 275                         switch (*f) {
 276                         case 'c':
 277                                 *s++ = va_arg(vargs, int);
 278                                 break;
 279                         case 'd':
 280                                 if (longflag)
 281                                         sprintf(s, "%ld", va_arg(vargs, long));
 282                                 else if (size_tflag)
 283                                         sprintf(s, "%" PY_FORMAT_SIZE_T "d",
 284                                                 va_arg(vargs, Py_ssize_t));
 285                                 else
 286                                         sprintf(s, "%d", va_arg(vargs, int));
 287                                 s += strlen(s);
 288                                 break;
 289                         case 'u':
 290                                 if (longflag)
 291                                         sprintf(s, "%lu",
 292                                                 va_arg(vargs, unsigned long));
 293                                 else if (size_tflag)
 294                                         sprintf(s, "%" PY_FORMAT_SIZE_T "u",
 295                                                 va_arg(vargs, size_t));
 296                                 else
 297                                         sprintf(s, "%u",
 298                                                 va_arg(vargs, unsigned int));
 299                                 s += strlen(s);
 300                                 break;
 301                         case 'i':
 302                                 sprintf(s, "%i", va_arg(vargs, int));
 303                                 s += strlen(s);
 304                                 break;
 305                         case 'x':
 306                                 sprintf(s, "%x", va_arg(vargs, int));
 307                                 s += strlen(s);
 308                                 break;
 309                         case 's':
 310                                 p = va_arg(vargs, char*);
 311                                 i = strlen(p);
 312                                 if (n > 0 && i > n)
 313                                         i = n;
 314                                 Py_MEMCPY(s, p, i);
 315                                 s += i;
 316                                 break;
 317                         case 'p':
 318                                 sprintf(s, "%p", va_arg(vargs, void*));
 319                                 /* %p is ill-defined:  ensure leading 0x. */
 320                                 if (s[1] == 'X')
 321                                         s[1] = 'x';
 322                                 else if (s[1] != 'x') {
 323                                         memmove(s+2, s, strlen(s)+1);
 324                                         s[0] = '0';
 325                                         s[1] = 'x';
 326                                 }
 327                                 s += strlen(s);
 328                                 break;
 329                         case '%':
 330                                 *s++ = '%';
 331                                 break;
 332                         default:
 333                                 strcpy(s, p);
 334                                 s += strlen(s);
 335                                 goto end;
 336                         }
 337                 } else
 338                         *s++ = *f;
 339         }
 340
 341  end:
 342         _PyString_Resize(&string, s - PyString_AS_STRING(string));
 343         return string;
 344 }
 345
 346 PyObject *
 347 PyString_FromFormat(const char *format, ...)
 348 {
 349         PyObject* ret;
 350         va_list vargs;
 351
 352 #ifdef HAVE_STDARG_PROTOTYPES
 353         va_start(vargs, format);
 354 #else
 355         va_start(vargs);
 356 #endif
 357         ret = PyString_FromFormatV(format, vargs);
 358         va_end(vargs);
 359         return ret;
 360 }
 361
 362
 363 PyObject *PyString_Decode(const char *s,
 364                           Py_ssize_t size,
 365                           const char *encoding,
 366                           const char *errors)
 367 {
 368     PyObject *v, *str;
 369
 370     str = PyString_FromStringAndSize(s, size);
 371     if (str == NULL)
 372         return NULL;
 373     v = PyString_AsDecodedString(str, encoding, errors);
 374     Py_DECREF(str);
 375     return v;
 376 }
 377
 378 PyObject *PyString_AsDecodedObject(PyObject *str,
 379                                    const char *encoding,
 380                                    const char *errors)
 381 {
 382     PyObject *v;
 383
 384     if (!PyString_Check(str)) {
 385         PyErr_BadArgument();
 386         goto onError;
 387     }
 388
 389     if (encoding == NULL) {
 390 #ifdef Py_USING_UNICODE
 391         encoding = PyUnicode_GetDefaultEncoding();
 392 #else
 393         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 394         goto onError;
 395 #endif
 396     }
 397
 398     /* Decode via the codec registry */
 399     v = PyCodec_Decode(str, encoding, errors);
 400     if (v == NULL)
 401         goto onError;
 402
 403     return v;
 404
 405  onError:
 406     return NULL;
 407 }
 408
 409 PyObject *PyString_AsDecodedString(PyObject *str,
 410                                    const char *encoding,
 411                                    const char *errors)
 412 {
 413     PyObject *v;
 414
 415     v = PyString_AsDecodedObject(str, encoding, errors);
 416     if (v == NULL)
 417         goto onError;
 418
 419 #ifdef Py_USING_UNICODE
 420     /* Convert Unicode to a string using the default encoding */
 421     if (PyUnicode_Check(v)) {
 422         PyObject *temp = v;
 423         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 424         Py_DECREF(temp);
 425         if (v == NULL)
 426             goto onError;
 427     }
 428 #endif
 429     if (!PyString_Check(v)) {
 430         PyErr_Format(PyExc_TypeError,
 431                      "decoder did not return a string object (type=%.400s)",
 432                      Py_TYPE(v)->tp_name);
 433         Py_DECREF(v);
 434         goto onError;
 435     }
 436
 437     return v;
 438
 439  onError:
 440     return NULL;
 441 }
 442
 443 PyObject *PyString_Encode(const char *s,
 444                           Py_ssize_t size,
 445                           const char *encoding,
 446                           const char *errors)
 447 {
 448     PyObject *v, *str;
 449
 450     str = PyString_FromStringAndSize(s, size);
 451     if (str == NULL)
 452         return NULL;
 453     v = PyString_AsEncodedString(str, encoding, errors);
 454     Py_DECREF(str);
 455     return v;
 456 }
 457
 458 PyObject *PyString_AsEncodedObject(PyObject *str,
 459                                    const char *encoding,
 460                                    const char *errors)
 461 {
 462     PyObject *v;
 463
 464     if (!PyString_Check(str)) {
 465         PyErr_BadArgument();
 466         goto onError;
 467     }
 468
 469     if (encoding == NULL) {
 470 #ifdef Py_USING_UNICODE
 471         encoding = PyUnicode_GetDefaultEncoding();
 472 #else
 473         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 474         goto onError;
 475 #endif
 476     }
 477
 478     /* Encode via the codec registry */
 479     v = PyCodec_Encode(str, encoding, errors);
 480     if (v == NULL)
 481         goto onError;
 482
 483     return v;
 484
 485  onError:
 486     return NULL;
 487 }
 488
 489 PyObject *PyString_AsEncodedString(PyObject *str,
 490                                    const char *encoding,
 491                                    const char *errors)
 492 {
 493     PyObject *v;
 494
 495     v = PyString_AsEncodedObject(str, encoding, errors);
 496     if (v == NULL)
 497         goto onError;
 498
 499 #ifdef Py_USING_UNICODE
 500     /* Convert Unicode to a string using the default encoding */
 501     if (PyUnicode_Check(v)) {
 502         PyObject *temp = v;
 503         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 504         Py_DECREF(temp);
 505         if (v == NULL)
 506             goto onError;
 507     }
 508 #endif
 509     if (!PyString_Check(v)) {
 510         PyErr_Format(PyExc_TypeError,
 511                      "encoder did not return a string object (type=%.400s)",
 512                      Py_TYPE(v)->tp_name);
 513         Py_DECREF(v);
 514         goto onError;
 515     }
 516
 517     return v;
 518
 519  onError:
 520     return NULL;
 521 }
 522
 523 static void
 524 string_dealloc(PyObject *op)
 525 {
 526         switch (PyString_CHECK_INTERNED(op)) {
 527                 case SSTATE_NOT_INTERNED:
 528                         break;
 529
 530                 case SSTATE_INTERNED_MORTAL:
 531                         /* revive dead object temporarily for DelItem */
 532                         Py_REFCNT(op) = 3;
 533                         if (PyDict_DelItem(interned, op) != 0)
 534                                 Py_FatalError(
 535                                         "deletion of interned string failed");
 536                         break;
 537
 538                 case SSTATE_INTERNED_IMMORTAL:
 539                         Py_FatalError("Immortal interned string died.");
 540
 541                 default:
 542                         Py_FatalError("Inconsistent interned string state.");
 543         }
 544         Py_TYPE(op)->tp_free(op);
 545 }
 546
 547 /* Unescape a backslash-escaped string. If unicode is non-zero,
 548    the string is a u-literal. If recode_encoding is non-zero,
 549    the string is UTF-8 encoded and should be re-encoded in the
 550    specified encoding.  */
 551
 552 PyObject *PyString_DecodeEscape(const char *s,
 553                                 Py_ssize_t len,
 554                                 const char *errors,
 555                                 Py_ssize_t unicode,
 556                                 const char *recode_encoding)
 557 {
 558         int c;
 559         char *p, *buf;
 560         const char *end;
 561         PyObject *v;
 562         Py_ssize_t newlen = recode_encoding ? 4*len:len;
 563         v = PyString_FromStringAndSize((char *)NULL, newlen);
 564         if (v == NULL)
 565                 return NULL;
 566         p = buf = PyString_AsString(v);
 567         end = s + len;
 568         while (s < end) {
 569                 if (*s != '\\') {
 570                   non_esc:
 571 #ifdef Py_USING_UNICODE
 572                         if (recode_encoding && (*s & 0x80)) {
 573                                 PyObject *u, *w;
 574                                 char *r;
 575                                 const char* t;
 576                                 Py_ssize_t rn;
 577                                 t = s;
 578                                 /* Decode non-ASCII bytes as UTF-8. */
 579                                 while (t < end && (*t & 0x80)) t++;
 580                                 u = PyUnicode_DecodeUTF8(s, t - s, errors);
 581                                 if(!u) goto failed;
 582
 583                                 /* Recode them in target encoding. */
 584                                 w = PyUnicode_AsEncodedString(
 585                                         u, recode_encoding, errors);
 586                                 Py_DECREF(u);
 587                                 if (!w) goto failed;
 588
 589                                 /* Append bytes to output buffer. */
 590                                 assert(PyString_Check(w));
 591                                 r = PyString_AS_STRING(w);
 592                                 rn = PyString_GET_SIZE(w);
 593                                 Py_MEMCPY(p, r, rn);
 594                                 p += rn;
 595                                 Py_DECREF(w);
 596                                 s = t;
 597                         } else {
 598                                 *p++ = *s++;
 599                         }
 600 #else
 601                         *p++ = *s++;
 602 #endif
 603                         continue;
 604                 }
 605                 s++;
 606                 if (s==end) {
 607                         PyErr_SetString(PyExc_ValueError,
 608                                         "Trailing \\ in string");
 609                         goto failed;
 610                 }
 611                 switch (*s++) {
 612                 /* XXX This assumes ASCII! */
 613                 case '\n': break;
 614                 case '\\': *p++ = '\\'; break;
 615                 case '\'': *p++ = '\''; break;
 616                 case '\"': *p++ = '\"'; break;
 617                 case 'b': *p++ = '\b'; break;
 618                 case 'f': *p++ = '\014'; break; /* FF */
 619                 case 't': *p++ = '\t'; break;
 620                 case 'n': *p++ = '\n'; break;
 621                 case 'r': *p++ = '\r'; break;
 622                 case 'v': *p++ = '\013'; break; /* VT */
 623                 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
 624                 case '0': case '1': case '2': case '3':
 625                 case '4': case '5': case '6': case '7':
 626                         c = s[-1] - '0';
 627                         if (s < end && '0' <= *s && *s <= '7') {
 628                                 c = (c<<3) + *s++ - '0';
 629                                 if (s < end && '0' <= *s && *s <= '7')
 630                                         c = (c<<3) + *s++ - '0';
 631                         }
 632                         *p++ = c;
 633                         break;
 634                 case 'x':
 635                         if (s+1 < end &&
 636                             isxdigit(Py_CHARMASK(s[0])) &&
 637                             isxdigit(Py_CHARMASK(s[1])))
 638                         {
 639                                 unsigned int x = 0;
 640                                 c = Py_CHARMASK(*s);
 641                                 s++;
 642                                 if (isdigit(c))
 643                                         x = c - '0';
 644                                 else if (islower(c))
 645                                         x = 10 + c - 'a';
 646                                 else
 647                                         x = 10 + c - 'A';
 648                                 x = x << 4;
 649                                 c = Py_CHARMASK(*s);
 650                                 s++;
 651                                 if (isdigit(c))
 652                                         x += c - '0';
 653                                 else if (islower(c))
 654                                         x += 10 + c - 'a';
 655                                 else
 656                                         x += 10 + c - 'A';
 657                                 *p++ = x;
 658                                 break;
 659                         }
 660                         if (!errors || strcmp(errors, "strict") == 0) {
 661                                 PyErr_SetString(PyExc_ValueError,
 662                                                 "invalid \\x escape");
 663                                 goto failed;
 664                         }
 665                         if (strcmp(errors, "replace") == 0) {
 666                                 *p++ = '?';
 667                         } else if (strcmp(errors, "ignore") == 0)
 668                                 /* do nothing */;
 669                         else {
 670                                 PyErr_Format(PyExc_ValueError,
 671                                              "decoding error; "
 672                                              "unknown error handling code: %.400s",
 673                                              errors);
 674                                 goto failed;
 675                         }
 676 #ifndef Py_USING_UNICODE
 677                 case 'u':
 678                 case 'U':
 679                 case 'N':
 680                         if (unicode) {
 681                                 PyErr_SetString(PyExc_ValueError,
 682                                           "Unicode escapes not legal "
 683                                           "when Unicode disabled");
 684                                 goto failed;
 685                         }
 686 #endif
 687                 default:
 688                         *p++ = '\\';
 689                         s--;
 690                         goto non_esc; /* an arbitry number of unescaped
 691                                          UTF-8 bytes may follow. */
 692                 }
 693         }
 694         if (p-buf < newlen)
 695                 _PyString_Resize(&v, p - buf);
 696         return v;
 697   failed:
 698         Py_DECREF(v);
 699         return NULL;
 700 }
 701
 702 /* -------------------------------------------------------------------- */
 703 /* object api */
 704
 705 static Py_ssize_t
 706 string_getsize(register PyObject *op)
 707 {
 708         char *s;
 709         Py_ssize_t len;
 710         if (PyString_AsStringAndSize(op, &s, &len))
 711                 return -1;
 712         return len;
 713 }
 714
 715 static /*const*/ char *
 716 string_getbuffer(register PyObject *op)
 717 {
 718         char *s;
 719         Py_ssize_t len;
 720         if (PyString_AsStringAndSize(op, &s, &len))
 721                 return NULL;
 722         return s;
 723 }
 724
 725 Py_ssize_t
 726 PyString_Size(register PyObject *op)
 727 {
 728         if (!PyString_Check(op))
 729                 return string_getsize(op);
 730         return Py_SIZE(op);
 731 }
 732
 733 /*const*/ char *
 734 PyString_AsString(register PyObject *op)
 735 {
 736         if (!PyString_Check(op))
 737                 return string_getbuffer(op);
 738         return ((PyStringObject *)op) -> ob_sval;
 739 }
 740
 741 int
 742 PyString_AsStringAndSize(register PyObject *obj,
 743                          register char **s,
 744                          register Py_ssize_t *len)
 745 {
 746         if (s == NULL) {
 747                 PyErr_BadInternalCall();
 748                 return -1;
 749         }
 750
 751         if (!PyString_Check(obj)) {
 752 #ifdef Py_USING_UNICODE
 753                 if (PyUnicode_Check(obj)) {
 754                         obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
 755                         if (obj == NULL)
 756                                 return -1;
 757                 }
 758                 else
 759 #endif
 760                 {
 761                         PyErr_Format(PyExc_TypeError,
 762                                      "expected string or Unicode object, "
 763                                      "%.200s found", Py_TYPE(obj)->tp_name);
 764                         return -1;
 765                 }
 766         }
 767
 768         *s = PyString_AS_STRING(obj);
 769         if (len != NULL)
 770                 *len = PyString_GET_SIZE(obj);
 771         else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
 772                 PyErr_SetString(PyExc_TypeError,
 773                                 "expected string without null bytes");
 774                 return -1;
 775         }
 776         return 0;
 777 }
 778
 779 /* -------------------------------------------------------------------- */
 780 /* Methods */
 781
 782 #include "stringlib/stringdefs.h"
 783 #include "stringlib/fastsearch.h"
 784
 785 #include "stringlib/count.h"
 786 #include "stringlib/find.h"
 787 #include "stringlib/partition.h"
 788
 789 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
 790 #include "stringlib/localeutil.h"
 791
 792
 793
 794 static int
 795 string_print(PyStringObject *op, FILE *fp, int flags)
 796 {
 797         Py_ssize_t i, str_len;
 798         char c;
 799         int quote;
 800
 801         /* XXX Ought to check for interrupts when writing long strings */
 802         if (! PyString_CheckExact(op)) {
 803                 int ret;
 804                 /* A str subclass may have its own __str__ method. */
 805                 op = (PyStringObject *) PyObject_Str((PyObject *)op);
 806                 if (op == NULL)
 807                         return -1;
 808                 ret = string_print(op, fp, flags);
 809                 Py_DECREF(op);
 810                 return ret;
 811         }
 812         if (flags & Py_PRINT_RAW) {
 813                 char *data = op->ob_sval;
 814                 Py_ssize_t size = Py_SIZE(op);
 815                 Py_BEGIN_ALLOW_THREADS
 816                 while (size > INT_MAX) {
 817                         /* Very long strings cannot be written atomically.
 818                          * But don't write exactly INT_MAX bytes at a time
 819                          * to avoid memory aligment issues.
 820                          */
 821                         const int chunk_size = INT_MAX & ~0x3FFF;
 822                         fwrite(data, 1, chunk_size, fp);
 823                         data += chunk_size;
 824                         size -= chunk_size;
 825                 }
 826 #ifdef __VMS
 827                 if (size) fwrite(data, (int)size, 1, fp);
 828 #else
 829                 fwrite(data, 1, (int)size, fp);
 830 #endif
 831                 Py_END_ALLOW_THREADS
 832                 return 0;
 833         }
 834
 835         /* figure out which quote to use; single is preferred */
 836         quote = '\'';
 837         if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
 838             !memchr(op->ob_sval, '"', Py_SIZE(op)))
 839                 quote = '"';
 840
 841         str_len = Py_SIZE(op);
 842         Py_BEGIN_ALLOW_THREADS
 843         fputc(quote, fp);
 844         for (i = 0; i < str_len; i++) {
 845                 /* Since strings are immutable and the caller should have a
 846                 reference, accessing the interal buffer should not be an issue
 847                 with the GIL released. */
 848                 c = op->ob_sval[i];
 849                 if (c == quote || c == '\\')
 850                         fprintf(fp, "\\%c", c);
 851                 else if (c == '\t')
 852                         fprintf(fp, "\\t");
 853                 else if (c == '\n')
 854                         fprintf(fp, "\\n");
 855                 else if (c == '\r')
 856                         fprintf(fp, "\\r");
 857                 else if (c < ' ' || c >= 0x7f)
 858                         fprintf(fp, "\\x%02x", c & 0xff);
 859                 else
 860                         fputc(c, fp);
 861         }
 862         fputc(quote, fp);
 863         Py_END_ALLOW_THREADS
 864         return 0;
 865 }
 866
 867 PyObject *
 868 PyString_Repr(PyObject *obj, int smartquotes)
 869 {
 870         register PyStringObject* op = (PyStringObject*) obj;
 871         size_t newsize = 2 + 4 * Py_SIZE(op);
 872         PyObject *v;
 873         if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
 874                 PyErr_SetString(PyExc_OverflowError,
 875                         "string is too large to make repr");
 876                 return NULL;
 877         }
 878         v = PyString_FromStringAndSize((char *)NULL, newsize);
 879         if (v == NULL) {
 880                 return NULL;
 881         }
 882         else {
 883                 register Py_ssize_t i;
 884                 register char c;
 885                 register char *p;
 886                 int quote;
 887
 888                 /* figure out which quote to use; single is preferred */
 889                 quote = '\'';
 890                 if (smartquotes &&
 891                     memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
 892                     !memchr(op->ob_sval, '"', Py_SIZE(op)))
 893                         quote = '"';
 894
 895                 p = PyString_AS_STRING(v);
 896                 *p++ = quote;
 897                 for (i = 0; i < Py_SIZE(op); i++) {
 898                         /* There's at least enough room for a hex escape
 899                            and a closing quote. */
 900                         assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
 901                         c = op->ob_sval[i];
 902                         if (c == quote || c == '\\')
 903                                 *p++ = '\\', *p++ = c;
 904                         else if (c == '\t')
 905                                 *p++ = '\\', *p++ = 't';
 906                         else if (c == '\n')
 907                                 *p++ = '\\', *p++ = 'n';
 908                         else if (c == '\r')
 909                                 *p++ = '\\', *p++ = 'r';
 910                         else if (c < ' ' || c >= 0x7f) {
 911                                 /* For performance, we don't want to call
 912                                    PyOS_snprintf here (extra layers of
 913                                    function call). */
 914                                 sprintf(p, "\\x%02x", c & 0xff);
 915                                 p += 4;
 916                         }
 917                         else
 918                                 *p++ = c;
 919                 }
 920                 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
 921                 *p++ = quote;
 922                 *p = '\0';
 923                 _PyString_Resize(
 924                         &v, (p - PyString_AS_STRING(v)));
 925                 return v;
 926         }
 927 }
 928
 929 static PyObject *
 930 string_repr(PyObject *op)
 931 {
 932         return PyString_Repr(op, 1);
 933 }
 934
 935 static PyObject *
 936 string_str(PyObject *s)
 937 {
 938         assert(PyString_Check(s));
 939         if (PyString_CheckExact(s)) {
 940                 Py_INCREF(s);
 941                 return s;
 942         }
 943         else {
 944                 /* Subtype -- return genuine string with the same value. */
 945                 PyStringObject *t = (PyStringObject *) s;
 946                 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
 947         }
 948 }
 949
 950 static Py_ssize_t
 951 string_length(PyStringObject *a)
 952 {
 953         return Py_SIZE(a);
 954 }
 955
 956 static PyObject *
 957 string_concat(register PyStringObject *a, register PyObject *bb)
 958 {
 959         register Py_ssize_t size;
 960         register PyStringObject *op;
 961         if (!PyString_Check(bb)) {
 962 #ifdef Py_USING_UNICODE
 963                 if (PyUnicode_Check(bb))
 964                     return PyUnicode_Concat((PyObject *)a, bb);
 965 #endif
 966                 if (PyByteArray_Check(bb))
 967                     return PyByteArray_Concat((PyObject *)a, bb);
 968                 PyErr_Format(PyExc_TypeError,
 969                              "cannot concatenate 'str' and '%.200s' objects",
 970                              Py_TYPE(bb)->tp_name);
 971                 return NULL;
 972         }
 973 #define b ((PyStringObject *)bb)
 974         /* Optimize cases with empty left or right operand */
 975         if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
 976             PyString_CheckExact(a) && PyString_CheckExact(b)) {
 977                 if (Py_SIZE(a) == 0) {
 978                         Py_INCREF(bb);
 979                         return bb;
 980                 }
 981                 Py_INCREF(a);
 982                 return (PyObject *)a;
 983         }
 984         size = Py_SIZE(a) + Py_SIZE(b);
 985         /* Check that string sizes are not negative, to prevent an
 986            overflow in cases where we are passed incorrectly-created
 987            strings with negative lengths (due to a bug in other code).
 988         */
 989         if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
 990             Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
 991                 PyErr_SetString(PyExc_OverflowError,
 992                                 "strings are too large to concat");
 993                 return NULL;
 994         }
 995
 996         /* Inline PyObject_NewVar */
 997         if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
 998                 PyErr_SetString(PyExc_OverflowError,
 999                                 "strings are too large to concat");
1000                 return NULL;
1001         }
1002         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
1003         if (op == NULL)
1004                 return PyErr_NoMemory();
1005         PyObject_INIT_VAR(op, &PyString_Type, size);
1006         op->ob_shash = -1;
1007         op->ob_sstate = SSTATE_NOT_INTERNED;
1008         Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1009         Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1010         op->ob_sval[size] = '\0';
1011         return (PyObject *) op;
1012 #undef b
1013 }
1014
1015 static PyObject *
1016 string_repeat(register PyStringObject *a, register Py_ssize_t n)
1017 {
1018         register Py_ssize_t i;
1019         register Py_ssize_t j;
1020         register Py_ssize_t size;
1021         register PyStringObject *op;
1022         size_t nbytes;
1023         if (n < 0)
1024                 n = 0;
1025         /* watch out for overflows:  the size can overflow int,
1026          * and the # of bytes needed can overflow size_t
1027          */
1028         size = Py_SIZE(a) * n;
1029         if (n && size / n != Py_SIZE(a)) {
1030                 PyErr_SetString(PyExc_OverflowError,
1031                         "repeated string is too long");
1032                 return NULL;
1033         }
1034         if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1035                 Py_INCREF(a);
1036                 return (PyObject *)a;
1037         }
1038         nbytes = (size_t)size;
1039         if (nbytes + sizeof(PyStringObject) <= nbytes) {
1040                 PyErr_SetString(PyExc_OverflowError,
1041                         "repeated string is too long");
1042                 return NULL;
1043         }
1044         op = (PyStringObject *)
1045                 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
1046         if (op == NULL)
1047                 return PyErr_NoMemory();
1048         PyObject_INIT_VAR(op, &PyString_Type, size);
1049         op->ob_shash = -1;
1050         op->ob_sstate = SSTATE_NOT_INTERNED;
1051         op->ob_sval[size] = '\0';
1052         if (Py_SIZE(a) == 1 && n > 0) {
1053                 memset(op->ob_sval, a->ob_sval[0] , n);
1054                 return (PyObject *) op;
1055         }
1056         i = 0;
1057         if (i < size) {
1058                 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1059                 i = Py_SIZE(a);
1060         }
1061         while (i < size) {
1062                 j = (i <= size-i)  ?  i  :  size-i;
1063                 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1064                 i += j;
1065         }
1066         return (PyObject *) op;
1067 }
1068
1069 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1070
1071 static PyObject *
1072 string_slice(register PyStringObject *a, register Py_ssize_t i,
1073              register Py_ssize_t j)
1074      /* j -- may be negative! */
1075 {
1076         if (i < 0)
1077                 i = 0;
1078         if (j < 0)
1079                 j = 0; /* Avoid signed/unsigned bug in next line */
1080         if (j > Py_SIZE(a))
1081                 j = Py_SIZE(a);
1082         if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1083                 /* It's the same as a */
1084                 Py_INCREF(a);
1085                 return (PyObject *)a;
1086         }
1087         if (j < i)
1088                 j = i;
1089         return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1090 }
1091
1092 static int
1093 string_contains(PyObject *str_obj, PyObject *sub_obj)
1094 {
1095         if (!PyString_CheckExact(sub_obj)) {
1096 #ifdef Py_USING_UNICODE
1097                 if (PyUnicode_Check(sub_obj))
1098                         return PyUnicode_Contains(str_obj, sub_obj);
1099 #endif
1100                 if (!PyString_Check(sub_obj)) {
1101                         PyErr_Format(PyExc_TypeError,
1102                             "'in <string>' requires string as left operand, "
1103                             "not %.200s", Py_TYPE(sub_obj)->tp_name);
1104                         return -1;
1105                 }
1106         }
1107
1108         return stringlib_contains_obj(str_obj, sub_obj);
1109 }
1110
1111 static PyObject *
1112 string_item(PyStringObject *a, register Py_ssize_t i)
1113 {
1114         char pchar;
1115         PyObject *v;
1116         if (i < 0 || i >= Py_SIZE(a)) {
1117                 PyErr_SetString(PyExc_IndexError, "string index out of range");
1118                 return NULL;
1119         }
1120         pchar = a->ob_sval[i];
1121         v = (PyObject *)characters[pchar & UCHAR_MAX];
1122         if (v == NULL)
1123                 v = PyString_FromStringAndSize(&pchar, 1);
1124         else {
1125 #ifdef COUNT_ALLOCS
1126                 one_strings++;
1127 #endif
1128                 Py_INCREF(v);
1129         }
1130         return v;
1131 }
1132
1133 static PyObject*
1134 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1135 {
1136         int c;
1137         Py_ssize_t len_a, len_b;
1138         Py_ssize_t min_len;
1139         PyObject *result;
1140
1141         /* Make sure both arguments are strings. */
1142         if (!(PyString_Check(a) && PyString_Check(b))) {
1143                 result = Py_NotImplemented;
1144                 goto out;
1145         }
1146         if (a == b) {
1147                 switch (op) {
1148                 case Py_EQ:case Py_LE:case Py_GE:
1149                         result = Py_True;
1150                         goto out;
1151                 case Py_NE:case Py_LT:case Py_GT:
1152                         result = Py_False;
1153                         goto out;
1154                 }
1155         }
1156         if (op == Py_EQ) {
1157                 /* Supporting Py_NE here as well does not save
1158                    much time, since Py_NE is rarely used.  */
1159                 if (Py_SIZE(a) == Py_SIZE(b)
1160                     && (a->ob_sval[0] == b->ob_sval[0]
1161                         && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1162                         result = Py_True;
1163                 } else {
1164                         result = Py_False;
1165                 }
1166                 goto out;
1167         }
1168         len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1169         min_len = (len_a < len_b) ? len_a : len_b;
1170         if (min_len > 0) {
1171                 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1172                 if (c==0)
1173                         c = memcmp(a->ob_sval, b->ob_sval, min_len);
1174         } else
1175                 c = 0;
1176         if (c == 0)
1177                 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1178         switch (op) {
1179         case Py_LT: c = c <  0; break;
1180         case Py_LE: c = c <= 0; break;
1181         case Py_EQ: assert(0);  break; /* unreachable */
1182         case Py_NE: c = c != 0; break;
1183         case Py_GT: c = c >  0; break;
1184         case Py_GE: c = c >= 0; break;
1185         default:
1186                 result = Py_NotImplemented;
1187                 goto out;
1188         }
1189         result = c ? Py_True : Py_False;
1190   out:
1191         Py_INCREF(result);
1192         return result;
1193 }
1194
1195 int
1196 _PyString_Eq(PyObject *o1, PyObject *o2)
1197 {
1198         PyStringObject *a = (PyStringObject*) o1;
1199         PyStringObject *b = (PyStringObject*) o2;
1200         return Py_SIZE(a) == Py_SIZE(b)
1201           && *a->ob_sval == *b->ob_sval
1202           && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1203 }
1204
1205 static long
1206 string_hash(PyStringObject *a)
1207 {
1208         register Py_ssize_t len;
1209         register unsigned char *p;
1210         register long x;
1211
1212         if (a->ob_shash != -1)
1213                 return a->ob_shash;
1214         len = Py_SIZE(a);
1215         p = (unsigned char *) a->ob_sval;
1216         x = *p << 7;
1217         while (--len >= 0)
1218                 x = (1000003*x) ^ *p++;
1219         x ^= Py_SIZE(a);
1220         if (x == -1)
1221                 x = -2;
1222         a->ob_shash = x;
1223         return x;
1224 }
1225
1226 static PyObject*
1227 string_subscript(PyStringObject* self, PyObject* item)
1228 {
1229         if (PyIndex_Check(item)) {
1230                 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1231                 if (i == -1 && PyErr_Occurred())
1232                         return NULL;
1233                 if (i < 0)
1234                         i += PyString_GET_SIZE(self);
1235                 return string_item(self, i);
1236         }
1237         else if (PySlice_Check(item)) {
1238                 Py_ssize_t start, stop, step, slicelength, cur, i;
1239                 char* source_buf;
1240                 char* result_buf;
1241                 PyObject* result;
1242
1243                 if (PySlice_GetIndicesEx((PySliceObject*)item,
1244                                  PyString_GET_SIZE(self),
1245                                  &start, &stop, &step, &slicelength) < 0) {
1246                         return NULL;
1247                 }
1248
1249                 if (slicelength <= 0) {
1250                         return PyString_FromStringAndSize("", 0);
1251                 }
1252                 else if (start == 0 && step == 1 &&
1253                          slicelength == PyString_GET_SIZE(self) &&
1254                          PyString_CheckExact(self)) {
1255                         Py_INCREF(self);
1256                         return (PyObject *)self;
1257                 }
1258                 else if (step == 1) {
1259                         return PyString_FromStringAndSize(
1260                                 PyString_AS_STRING(self) + start,
1261                                 slicelength);
1262                 }
1263                 else {
1264                         source_buf = PyString_AsString((PyObject*)self);
1265                         result_buf = (char *)PyMem_Malloc(slicelength);
1266                         if (result_buf == NULL)
1267                                 return PyErr_NoMemory();
1268
1269                         for (cur = start, i = 0; i < slicelength;
1270                              cur += step, i++) {
1271                                 result_buf[i] = source_buf[cur];
1272                         }
1273
1274                         result = PyString_FromStringAndSize(result_buf,
1275                                                             slicelength);
1276                         PyMem_Free(result_buf);
1277                         return result;
1278                 }
1279         }
1280         else {
1281                 PyErr_Format(PyExc_TypeError,
1282                              "string indices must be integers, not %.200s",
1283                              Py_TYPE(item)->tp_name);
1284                 return NULL;
1285         }
1286 }
1287
1288 static Py_ssize_t
1289 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1290 {
1291         if ( index != 0 ) {
1292                 PyErr_SetString(PyExc_SystemError,
1293                                 "accessing non-existent string segment");
1294                 return -1;
1295         }
1296         *ptr = (void *)self->ob_sval;
1297         return Py_SIZE(self);
1298 }
1299
1300 static Py_ssize_t
1301 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1302 {
1303         PyErr_SetString(PyExc_TypeError,
1304                         "Cannot use string as modifiable buffer");
1305         return -1;
1306 }
1307
1308 static Py_ssize_t
1309 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1310 {
1311         if ( lenp )
1312                 *lenp = Py_SIZE(self);
1313         return 1;
1314 }
1315
1316 static Py_ssize_t
1317 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1318 {
1319         if ( index != 0 ) {
1320                 PyErr_SetString(PyExc_SystemError,
1321                                 "accessing non-existent string segment");
1322                 return -1;
1323         }
1324         *ptr = self->ob_sval;
1325         return Py_SIZE(self);
1326 }
1327
1328 static int
1329 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1330 {
1331         return PyBuffer_FillInfo(view, (PyObject*)self,
1332                                  (void *)self->ob_sval, Py_SIZE(self),
1333                                  1, flags);
1334 }
1335
1336 static PySequenceMethods string_as_sequence = {
1337         (lenfunc)string_length, /*sq_length*/
1338         (binaryfunc)string_concat, /*sq_concat*/
1339         (ssizeargfunc)string_repeat, /*sq_repeat*/
1340         (ssizeargfunc)string_item, /*sq_item*/
1341         (ssizessizeargfunc)string_slice, /*sq_slice*/
1342         0,              /*sq_ass_item*/
1343         0,              /*sq_ass_slice*/
1344         (objobjproc)string_contains /*sq_contains*/
1345 };
1346
1347 static PyMappingMethods string_as_mapping = {
1348         (lenfunc)string_length,
1349         (binaryfunc)string_subscript,
1350         0,
1351 };
1352
1353 static PyBufferProcs string_as_buffer = {
1354         (readbufferproc)string_buffer_getreadbuf,
1355         (writebufferproc)string_buffer_getwritebuf,
1356         (segcountproc)string_buffer_getsegcount,
1357         (charbufferproc)string_buffer_getcharbuf,
1358         (getbufferproc)string_buffer_getbuffer,
1359         0, /* XXX */
1360 };
1361
1362
1363
1364 #define LEFTSTRIP 0
1365 #define RIGHTSTRIP 1
1366 #define BOTHSTRIP 2
1367
1368 /* Arrays indexed by above */
1369 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1370
1371 #define STRIPNAME(i) (stripformat[i]+3)
1372
1373
1374 /* Don't call if length < 2 */
1375 #define Py_STRING_MATCH(target, offset, pattern, length)        \
1376   (target[offset] == pattern[0] &&                              \
1377    target[offset+length-1] == pattern[length-1] &&              \
1378    !memcmp(target+offset+1, pattern+1, length-2) )
1379
1380
1381 /* Overallocate the initial list to reduce the number of reallocs for small
1382    split sizes.  Eg, "A A A A A A A A A A".split() (10 elements) has three
1383    resizes, to sizes 4, 8, then 16.  Most observed string splits are for human
1384    text (roughly 11 words per line) and field delimited data (usually 1-10
1385    fields).  For large strings the split algorithms are bandwidth limited
1386    so increasing the preallocation likely will not improve things.*/
1387
1388 #define MAX_PREALLOC 12
1389
1390 /* 5 splits gives 6 elements */
1391 #define PREALLOC_SIZE(maxsplit) \
1392         (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1393
1394 #define SPLIT_APPEND(data, left, right)                         \
1395         str = PyString_FromStringAndSize((data) + (left),       \
1396                                          (right) - (left));     \
1397         if (str == NULL)                                        \
1398                 goto onError;                                   \
1399         if (PyList_Append(list, str)) {                         \
1400                 Py_DECREF(str);                                 \
1401                 goto onError;                                   \
1402         }                                                       \
1403         else                                                    \
1404                 Py_DECREF(str);
1405
1406 #define SPLIT_ADD(data, left, right) {                          \
1407         str = PyString_FromStringAndSize((data) + (left),       \
1408                                          (right) - (left));     \
1409         if (str == NULL)                                        \
1410                 goto onError;                                   \
1411         if (count < MAX_PREALLOC) {                             \
1412                 PyList_SET_ITEM(list, count, str);              \
1413         } else {                                                \
1414                 if (PyList_Append(list, str)) {                 \
1415                         Py_DECREF(str);                         \
1416                         goto onError;                           \
1417                 }                                               \
1418                 else                                            \
1419                         Py_DECREF(str);                         \
1420         }                                                       \
1421         count++; }
1422
1423 /* Always force the list to the expected size. */
1424 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1425
1426 #define SKIP_SPACE(s, i, len)    { while (i<len &&  isspace(Py_CHARMASK(s[i]))) i++; }
1427 #define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1428 #define RSKIP_SPACE(s, i)        { while (i>=0  &&  isspace(Py_CHARMASK(s[i]))) i--; }
1429 #define RSKIP_NONSPACE(s, i)     { while (i>=0  && !isspace(Py_CHARMASK(s[i]))) i--; }
1430
1431 Py_LOCAL_INLINE(PyObject *)
1432 split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
1433 {
1434         const char *s = PyString_AS_STRING(self);
1435         Py_ssize_t i, j, count=0;
1436         PyObject *str;
1437         PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1438
1439         if (list == NULL)
1440                 return NULL;
1441
1442         i = j = 0;
1443
1444         while (maxsplit-- > 0) {
1445                 SKIP_SPACE(s, i, len);
1446                 if (i==len) break;
1447                 j = i; i++;
1448                 SKIP_NONSPACE(s, i, len);
1449                 if (j == 0 && i == len && PyString_CheckExact(self)) {
1450                         /* No whitespace in self, so just use it as list[0] */
1451                         Py_INCREF(self);
1452                         PyList_SET_ITEM(list, 0, (PyObject *)self);
1453                         count++;
1454                         break;
1455                 }
1456                 SPLIT_ADD(s, j, i);
1457         }
1458
1459         if (i < len) {
1460                 /* Only occurs when maxsplit was reached */
1461                 /* Skip any remaining whitespace and copy to end of string */
1462                 SKIP_SPACE(s, i, len);
1463                 if (i != len)
1464                         SPLIT_ADD(s, i, len);
1465         }
1466         FIX_PREALLOC_SIZE(list);
1467         return list;
1468   onError:
1469         Py_DECREF(list);
1470         return NULL;
1471 }
1472
1473 Py_LOCAL_INLINE(PyObject *)
1474 split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1475 {
1476         const char *s = PyString_AS_STRING(self);
1477         register Py_ssize_t i, j, count=0;
1478         PyObject *str;
1479         PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1480
1481         if (list == NULL)
1482                 return NULL;
1483
1484         i = j = 0;
1485         while ((j < len) && (maxcount-- > 0)) {
1486                 for(; j<len; j++) {
1487                         /* I found that using memchr makes no difference */
1488                         if (s[j] == ch) {
1489                                 SPLIT_ADD(s, i, j);
1490                                 i = j = j + 1;
1491                                 break;
1492                         }
1493                 }
1494         }
1495         if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1496                 /* ch not in self, so just use self as list[0] */
1497                 Py_INCREF(self);
1498                 PyList_SET_ITEM(list, 0, (PyObject *)self);
1499                 count++;
1500         }
1501         else if (i <= len) {
1502                 SPLIT_ADD(s, i, len);
1503         }
1504         FIX_PREALLOC_SIZE(list);
1505         return list;
1506
1507   onError:
1508         Py_DECREF(list);
1509         return NULL;
1510 }
1511
1512 PyDoc_STRVAR(split__doc__,
1513 "S.split([sep [,maxsplit]]) -> list of strings\n\
1514 \n\
1515 Return a list of the words in the string S, using sep as the\n\
1516 delimiter string.  If maxsplit is given, at most maxsplit\n\
1517 splits are done. If sep is not specified or is None, any\n\
1518 whitespace string is a separator and empty strings are removed\n\
1519 from the result.");
1520
1521 static PyObject *
1522 string_split(PyStringObject *self, PyObject *args)
1523 {
1524         Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1525         Py_ssize_t maxsplit = -1, count=0;
1526         const char *s = PyString_AS_STRING(self), *sub;
1527         PyObject *list, *str, *subobj = Py_None;
1528 #ifdef USE_FAST
1529         Py_ssize_t pos;
1530 #endif
1531
1532         if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1533                 return NULL;
1534         if (maxsplit < 0)
1535                 maxsplit = PY_SSIZE_T_MAX;
1536         if (subobj == Py_None)
1537                 return split_whitespace(self, len, maxsplit);
1538         if (PyString_Check(subobj)) {
1539                 sub = PyString_AS_STRING(subobj);
1540                 n = PyString_GET_SIZE(subobj);
1541         }
1542 #ifdef Py_USING_UNICODE
1543         else if (PyUnicode_Check(subobj))
1544                 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1545 #endif
1546         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1547                 return NULL;
1548
1549         if (n == 0) {
1550                 PyErr_SetString(PyExc_ValueError, "empty separator");
1551                 return NULL;
1552         }
1553         else if (n == 1)
1554                 return split_char(self, len, sub[0], maxsplit);
1555
1556         list = PyList_New(PREALLOC_SIZE(maxsplit));
1557         if (list == NULL)
1558                 return NULL;
1559
1560 #ifdef USE_FAST
1561         i = j = 0;
1562         while (maxsplit-- > 0) {
1563                 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1564                 if (pos < 0)
1565                         break;
1566                 j = i+pos;
1567                 SPLIT_ADD(s, i, j);
1568                 i = j + n;
1569         }
1570 #else
1571         i = j = 0;
1572         while ((j+n <= len) && (maxsplit-- > 0)) {
1573                 for (; j+n <= len; j++) {
1574                         if (Py_STRING_MATCH(s, j, sub, n)) {
1575                                 SPLIT_ADD(s, i, j);
1576                                 i = j = j + n;
1577                                 break;
1578                         }
1579                 }
1580         }
1581 #endif
1582         SPLIT_ADD(s, i, len);
1583         FIX_PREALLOC_SIZE(list);
1584         return list;
1585
1586  onError:
1587         Py_DECREF(list);
1588         return NULL;
1589 }
1590
1591 PyDoc_STRVAR(partition__doc__,
1592 "S.partition(sep) -> (head, sep, tail)\n\
1593 \n\
1594 Searches for the separator sep in S, and returns the part before it,\n\
1595 the separator itself, and the part after it.  If the separator is not\n\
1596 found, returns S and two empty strings.");
1597
1598 static PyObject *
1599 string_partition(PyStringObject *self, PyObject *sep_obj)
1600 {
1601         const char *sep;
1602         Py_ssize_t sep_len;
1603
1604         if (PyString_Check(sep_obj)) {
1605                 sep = PyString_AS_STRING(sep_obj);
1606                 sep_len = PyString_GET_SIZE(sep_obj);
1607         }
1608 #ifdef Py_USING_UNICODE
1609         else if (PyUnicode_Check(sep_obj))
1610                 return PyUnicode_Partition((PyObject *) self, sep_obj);
1611 #endif
1612         else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1613                 return NULL;
1614
1615         return stringlib_partition(
1616                 (PyObject*) self,
1617                 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1618                 sep_obj, sep, sep_len
1619                 );
1620 }
1621
1622 PyDoc_STRVAR(rpartition__doc__,
1623 "S.rpartition(sep) -> (tail, sep, head)\n\
1624 \n\
1625 Searches for the separator sep in S, starting at the end of S, and returns\n\
1626 the part before it, the separator itself, and the part after it.  If the\n\
1627 separator is not found, returns two empty strings and S.");
1628
1629 static PyObject *
1630 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1631 {
1632         const char *sep;
1633         Py_ssize_t sep_len;
1634
1635         if (PyString_Check(sep_obj)) {
1636                 sep = PyString_AS_STRING(sep_obj);
1637                 sep_len = PyString_GET_SIZE(sep_obj);
1638         }
1639 #ifdef Py_USING_UNICODE
1640         else if (PyUnicode_Check(sep_obj))
1641                 return PyUnicode_Partition((PyObject *) self, sep_obj);
1642 #endif
1643         else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1644                 return NULL;
1645
1646         return stringlib_rpartition(
1647                 (PyObject*) self,
1648                 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1649                 sep_obj, sep, sep_len
1650                 );
1651 }
1652
1653 Py_LOCAL_INLINE(PyObject *)
1654 rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
1655 {
1656         const char *s = PyString_AS_STRING(self);
1657         Py_ssize_t i, j, count=0;
1658         PyObject *str;
1659         PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1660
1661         if (list == NULL)
1662                 return NULL;
1663
1664         i = j = len-1;
1665
1666         while (maxsplit-- > 0) {
1667                 RSKIP_SPACE(s, i);
1668                 if (i<0) break;
1669                 j = i; i--;
1670                 RSKIP_NONSPACE(s, i);
1671                 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1672                         /* No whitespace in self, so just use it as list[0] */
1673                         Py_INCREF(self);
1674                         PyList_SET_ITEM(list, 0, (PyObject *)self);
1675                         count++;
1676                         break;
1677                 }
1678                 SPLIT_ADD(s, i + 1, j + 1);
1679         }
1680         if (i >= 0) {
1681                 /* Only occurs when maxsplit was reached */
1682                 /* Skip any remaining whitespace and copy to beginning of string */
1683                 RSKIP_SPACE(s, i);
1684                 if (i >= 0)
1685                         SPLIT_ADD(s, 0, i + 1);
1686
1687         }
1688         FIX_PREALLOC_SIZE(list);
1689         if (PyList_Reverse(list) < 0)
1690                 goto onError;
1691         return list;
1692   onError:
1693         Py_DECREF(list);
1694         return NULL;
1695 }
1696
1697 Py_LOCAL_INLINE(PyObject *)
1698 rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1699 {
1700         const char *s = PyString_AS_STRING(self);
1701         register Py_ssize_t i, j, count=0;
1702         PyObject *str;
1703         PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1704
1705         if (list == NULL)
1706                 return NULL;
1707
1708         i = j = len - 1;
1709         while ((i >= 0) && (maxcount-- > 0)) {
1710                 for (; i >= 0; i--) {
1711                         if (s[i] == ch) {
1712                                 SPLIT_ADD(s, i + 1, j + 1);
1713                                 j = i = i - 1;
1714                                 break;
1715                         }
1716                 }
1717         }
1718         if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1719                 /* ch not in self, so just use self as list[0] */
1720                 Py_INCREF(self);
1721                 PyList_SET_ITEM(list, 0, (PyObject *)self);
1722                 count++;
1723         }
1724         else if (j >= -1) {
1725                 SPLIT_ADD(s, 0, j + 1);
1726         }
1727         FIX_PREALLOC_SIZE(list);
1728         if (PyList_Reverse(list) < 0)
1729                 goto onError;
1730         return list;
1731
1732  onError:
1733         Py_DECREF(list);
1734         return NULL;
1735 }
1736
1737 PyDoc_STRVAR(rsplit__doc__,
1738 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1739 \n\
1740 Return a list of the words in the string S, using sep as the\n\
1741 delimiter string, starting at the end of the string and working\n\
1742 to the front.  If maxsplit is given, at most maxsplit splits are\n\
1743 done. If sep is not specified or is None, any whitespace string\n\
1744 is a separator.");
1745
1746 static PyObject *
1747 string_rsplit(PyStringObject *self, PyObject *args)
1748 {
1749         Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1750         Py_ssize_t maxsplit = -1, count=0;
1751         const char *s, *sub;
1752         PyObject *list, *str, *subobj = Py_None;
1753
1754         if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1755                 return NULL;
1756         if (maxsplit < 0)
1757                 maxsplit = PY_SSIZE_T_MAX;
1758         if (subobj == Py_None)
1759                 return rsplit_whitespace(self, len, maxsplit);
1760         if (PyString_Check(subobj)) {
1761                 sub = PyString_AS_STRING(subobj);
1762                 n = PyString_GET_SIZE(subobj);
1763         }
1764 #ifdef Py_USING_UNICODE
1765         else if (PyUnicode_Check(subobj))
1766                 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1767 #endif
1768         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1769                 return NULL;
1770
1771         if (n == 0) {
1772                 PyErr_SetString(PyExc_ValueError, "empty separator");
1773                 return NULL;
1774         }
1775         else if (n == 1)
1776                 return rsplit_char(self, len, sub[0], maxsplit);
1777
1778         list = PyList_New(PREALLOC_SIZE(maxsplit));
1779         if (list == NULL)
1780                 return NULL;
1781
1782         j = len;
1783         i = j - n;
1784
1785         s = PyString_AS_STRING(self);
1786         while ( (i >= 0) && (maxsplit-- > 0) ) {
1787                 for (; i>=0; i--) {
1788                         if (Py_STRING_MATCH(s, i, sub, n)) {
1789                                 SPLIT_ADD(s, i + n, j);
1790                                 j = i;
1791                                 i -= n;
1792                                 break;
1793                         }
1794                 }
1795         }
1796         SPLIT_ADD(s, 0, j);
1797         FIX_PREALLOC_SIZE(list);
1798         if (PyList_Reverse(list) < 0)
1799                 goto onError;
1800         return list;
1801
1802 onError:
1803         Py_DECREF(list);
1804         return NULL;
1805 }
1806
1807
1808 PyDoc_STRVAR(join__doc__,
1809 "S.join(sequence) -> string\n\
1810 \n\
1811 Return a string which is the concatenation of the strings in the\n\
1812 sequence.  The separator between elements is S.");
1813
1814 static PyObject *
1815 string_join(PyStringObject *self, PyObject *orig)
1816 {
1817         char *sep = PyString_AS_STRING(self);
1818         const Py_ssize_t seplen = PyString_GET_SIZE(self);
1819         PyObject *res = NULL;
1820         char *p;
1821         Py_ssize_t seqlen = 0;
1822         size_t sz = 0;
1823         Py_ssize_t i;
1824         PyObject *seq, *item;
1825
1826         seq = PySequence_Fast(orig, "");
1827         if (seq == NULL) {
1828                 return NULL;
1829         }
1830
1831         seqlen = PySequence_Size(seq);
1832         if (seqlen == 0) {
1833                 Py_DECREF(seq);
1834                 return PyString_FromString("");
1835         }
1836         if (seqlen == 1) {
1837                 item = PySequence_Fast_GET_ITEM(seq, 0);
1838                 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1839                         Py_INCREF(item);
1840                         Py_DECREF(seq);
1841                         return item;
1842                 }
1843         }
1844
1845         /* There are at least two things to join, or else we have a subclass
1846          * of the builtin types in the sequence.
1847          * Do a pre-pass to figure out the total amount of space we'll
1848          * need (sz), see whether any argument is absurd, and defer to
1849          * the Unicode join if appropriate.
1850          */
1851         for (i = 0; i < seqlen; i++) {
1852                 const size_t old_sz = sz;
1853                 item = PySequence_Fast_GET_ITEM(seq, i);
1854                 if (!PyString_Check(item)){
1855 #ifdef Py_USING_UNICODE
1856                         if (PyUnicode_Check(item)) {
1857                                 /* Defer to Unicode join.
1858                                  * CAUTION:  There's no gurantee that the
1859                                  * original sequence can be iterated over
1860                                  * again, so we must pass seq here.
1861                                  */
1862                                 PyObject *result;
1863                                 result = PyUnicode_Join((PyObject *)self, seq);
1864                                 Py_DECREF(seq);
1865                                 return result;
1866                         }
1867 #endif
1868                         PyErr_Format(PyExc_TypeError,
1869                                      "sequence item %zd: expected string,"
1870                                      " %.80s found",
1871                                      i, Py_TYPE(item)->tp_name);
1872                         Py_DECREF(seq);
1873                         return NULL;
1874                 }
1875                 sz += PyString_GET_SIZE(item);
1876                 if (i != 0)
1877                         sz += seplen;
1878                 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1879                         PyErr_SetString(PyExc_OverflowError,
1880                                 "join() result is too long for a Python string");
1881                         Py_DECREF(seq);
1882                         return NULL;
1883                 }
1884         }
1885
1886         /* Allocate result space. */
1887         res = PyString_FromStringAndSize((char*)NULL, sz);
1888         if (res == NULL) {
1889                 Py_DECREF(seq);
1890                 return NULL;
1891         }
1892
1893         /* Catenate everything. */
1894         p = PyString_AS_STRING(res);
1895         for (i = 0; i < seqlen; ++i) {
1896                 size_t n;
1897                 item = PySequence_Fast_GET_ITEM(seq, i);
1898                 n = PyString_GET_SIZE(item);
1899                 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1900                 p += n;
1901                 if (i < seqlen - 1) {
1902                         Py_MEMCPY(p, sep, seplen);
1903                         p += seplen;
1904                 }
1905         }
1906
1907         Py_DECREF(seq);
1908         return res;
1909 }
1910
1911 PyObject *
1912 _PyString_Join(PyObject *sep, PyObject *x)
1913 {
1914         assert(sep != NULL && PyString_Check(sep));
1915         assert(x != NULL);
1916         return string_join((PyStringObject *)sep, x);
1917 }
1918
1919 Py_LOCAL_INLINE(void)
1920 string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1921 {
1922         if (*end > len)
1923                 *end = len;
1924         else if (*end < 0)
1925                 *end += len;
1926         if (*end < 0)
1927                 *end = 0;
1928         if (*start < 0)
1929                 *start += len;
1930         if (*start < 0)
1931                 *start = 0;
1932 }
1933
1934 Py_LOCAL_INLINE(Py_ssize_t)
1935 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1936 {
1937         PyObject *subobj;
1938         const char *sub;
1939         Py_ssize_t sub_len;
1940         Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1941         PyObject *obj_start=Py_None, *obj_end=Py_None;
1942
1943         if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1944                 &obj_start, &obj_end))
1945                 return -2;
1946         /* To support None in "start" and "end" arguments, meaning
1947            the same as if they were not passed.
1948         */
1949         if (obj_start != Py_None)
1950                 if (!_PyEval_SliceIndex(obj_start, &start))
1951                 return -2;
1952         if (obj_end != Py_None)
1953                 if (!_PyEval_SliceIndex(obj_end, &end))
1954                 return -2;
1955
1956         if (PyString_Check(subobj)) {
1957                 sub = PyString_AS_STRING(subobj);
1958                 sub_len = PyString_GET_SIZE(subobj);
1959         }
1960 #ifdef Py_USING_UNICODE
1961         else if (PyUnicode_Check(subobj))
1962                 return PyUnicode_Find(
1963                         (PyObject *)self, subobj, start, end, dir);
1964 #endif
1965         else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1966                 /* XXX - the "expected a character buffer object" is pretty
1967                    confusing for a non-expert.  remap to something else ? */
1968                 return -2;
1969
1970         if (dir > 0)
1971                 return stringlib_find_slice(
1972                         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1973                         sub, sub_len, start, end);
1974         else
1975                 return stringlib_rfind_slice(
1976                         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1977                         sub, sub_len, start, end);
1978 }
1979
1980
1981 PyDoc_STRVAR(find__doc__,
1982 "S.find(sub [,start [,end]]) -> int\n\
1983 \n\
1984 Return the lowest index in S where substring sub is found,\n\
1985 such that sub is contained within s[start:end].  Optional\n\
1986 arguments start and end are interpreted as in slice notation.\n\
1987 \n\
1988 Return -1 on failure.");
1989
1990 static PyObject *
1991 string_find(PyStringObject *self, PyObject *args)
1992 {
1993         Py_ssize_t result = string_find_internal(self, args, +1);
1994         if (result == -2)
1995                 return NULL;
1996         return PyInt_FromSsize_t(result);
1997 }
1998
1999
2000 PyDoc_STRVAR(index__doc__,
2001 "S.index(sub [,start [,end]]) -> int\n\
2002 \n\
2003 Like S.find() but raise ValueError when the substring is not found.");
2004
2005 static PyObject *
2006 string_index(PyStringObject *self, PyObject *args)
2007 {
2008         Py_ssize_t result = string_find_internal(self, args, +1);
2009         if (result == -2)
2010                 return NULL;
2011         if (result == -1) {
2012                 PyErr_SetString(PyExc_ValueError,
2013                                 "substring not found");
2014                 return NULL;
2015         }
2016         return PyInt_FromSsize_t(result);
2017 }
2018
2019
2020 PyDoc_STRVAR(rfind__doc__,
2021 "S.rfind(sub [,start [,end]]) -> int\n\
2022 \n\
2023 Return the highest index in S where substring sub is found,\n\
2024 such that sub is contained within s[start:end].  Optional\n\
2025 arguments start and end are interpreted as in slice notation.\n\
2026 \n\
2027 Return -1 on failure.");
2028
2029 static PyObject *
2030 string_rfind(PyStringObject *self, PyObject *args)
2031 {
2032         Py_ssize_t result = string_find_internal(self, args, -1);
2033         if (result == -2)
2034                 return NULL;
2035         return PyInt_FromSsize_t(result);
2036 }
2037
2038
2039 PyDoc_STRVAR(rindex__doc__,
2040 "S.rindex(sub [,start [,end]]) -> int\n\
2041 \n\
2042 Like S.rfind() but raise ValueError when the substring is not found.");
2043
2044 static PyObject *
2045 string_rindex(PyStringObject *self, PyObject *args)
2046 {
2047         Py_ssize_t result = string_find_internal(self, args, -1);
2048         if (result == -2)
2049                 return NULL;
2050         if (result == -1) {
2051                 PyErr_SetString(PyExc_ValueError,
2052                                 "substring not found");
2053                 return NULL;
2054         }
2055         return PyInt_FromSsize_t(result);
2056 }
2057
2058
2059 Py_LOCAL_INLINE(PyObject *)
2060 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
2061 {
2062         char *s = PyString_AS_STRING(self);
2063         Py_ssize_t len = PyString_GET_SIZE(self);
2064         char *sep = PyString_AS_STRING(sepobj);
2065         Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2066         Py_ssize_t i, j;
2067
2068         i = 0;
2069         if (striptype != RIGHTSTRIP) {
2070                 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2071                         i++;
2072                 }
2073         }
2074
2075         j = len;
2076         if (striptype != LEFTSTRIP) {
2077                 do {
2078                         j--;
2079                 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2080                 j++;
2081         }
2082
2083         if (i == 0 && j == len && PyString_CheckExact(self)) {
2084                 Py_INCREF(self);
2085                 return (PyObject*)self;
2086         }
2087         else
2088                 return PyString_FromStringAndSize(s+i, j-i);
2089 }
2090
2091
2092 Py_LOCAL_INLINE(PyObject *)
2093 do_strip(PyStringObject *self, int striptype)
2094 {
2095         char *s = PyString_AS_STRING(self);
2096         Py_ssize_t len = PyString_GET_SIZE(self), i, j;
2097
2098         i = 0;
2099         if (striptype != RIGHTSTRIP) {
2100                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2101                         i++;
2102                 }
2103         }
2104
2105         j = len;
2106         if (striptype != LEFTSTRIP) {
2107                 do {
2108                         j--;
2109                 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2110                 j++;
2111         }
2112
2113         if (i == 0 && j == len && PyString_CheckExact(self)) {
2114                 Py_INCREF(self);
2115                 return (PyObject*)self;
2116         }
2117         else
2118                 return PyString_FromStringAndSize(s+i, j-i);
2119 }
2120
2121
2122 Py_LOCAL_INLINE(PyObject *)
2123 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2124 {
2125         PyObject *sep = NULL;
2126
2127         if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2128                 return NULL;
2129
2130         if (sep != NULL && sep != Py_None) {
2131                 if (PyString_Check(sep))
2132                         return do_xstrip(self, striptype, sep);
2133 #ifdef Py_USING_UNICODE
2134                 else if (PyUnicode_Check(sep)) {
2135                         PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2136                         PyObject *res;
2137                         if (uniself==NULL)
2138                                 return NULL;
2139                         res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2140                                 striptype, sep);
2141                         Py_DECREF(uniself);
2142                         return res;
2143                 }
2144 #endif
2145                 PyErr_Format(PyExc_TypeError,
2146 #ifdef Py_USING_UNICODE
2147                              "%s arg must be None, str or unicode",
2148 #else
2149                              "%s arg must be None or str",
2150 #endif
2151                              STRIPNAME(striptype));
2152                 return NULL;
2153         }
2154
2155         return do_strip(self, striptype);
2156 }
2157
2158
2159 PyDoc_STRVAR(strip__doc__,
2160 "S.strip([chars]) -> string or unicode\n\
2161 \n\
2162 Return a copy of the string S with leading and trailing\n\
2163 whitespace removed.\n\
2164 If chars is given and not None, remove characters in chars instead.\n\
2165 If chars is unicode, S will be converted to unicode before stripping");
2166
2167 static PyObject *
2168 string_strip(PyStringObject *self, PyObject *args)
2169 {
2170         if (PyTuple_GET_SIZE(args) == 0)
2171                 return do_strip(self, BOTHSTRIP); /* Common case */
2172         else
2173                 return do_argstrip(self, BOTHSTRIP, args);
2174 }
2175
2176
2177 PyDoc_STRVAR(lstrip__doc__,
2178 "S.lstrip([chars]) -> string or unicode\n\
2179 \n\
2180 Return a copy of the string S with leading whitespace removed.\n\
2181 If chars is given and not None, remove characters in chars instead.\n\
2182 If chars is unicode, S will be converted to unicode before stripping");
2183
2184 static PyObject *
2185 string_lstrip(PyStringObject *self, PyObject *args)
2186 {
2187         if (PyTuple_GET_SIZE(args) == 0)
2188                 return do_strip(self, LEFTSTRIP); /* Common case */
2189         else
2190                 return do_argstrip(self, LEFTSTRIP, args);
2191 }
2192
2193
2194 PyDoc_STRVAR(rstrip__doc__,
2195 "S.rstrip([chars]) -> string or unicode\n\
2196 \n\
2197 Return a copy of the string S with trailing whitespace removed.\n\
2198 If chars is given and not None, remove characters in chars instead.\n\
2199 If chars is unicode, S will be converted to unicode before stripping");
2200
2201 static PyObject *
2202 string_rstrip(PyStringObject *self, PyObject *args)
2203 {
2204         if (PyTuple_GET_SIZE(args) == 0)
2205                 return do_strip(self, RIGHTSTRIP); /* Common case */
2206         else
2207                 return do_argstrip(self, RIGHTSTRIP, args);
2208 }
2209
2210
2211 PyDoc_STRVAR(lower__doc__,
2212 "S.lower() -> string\n\
2213 \n\
2214 Return a copy of the string S converted to lowercase.");
2215
2216 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2217 #ifndef _tolower
2218 #define _tolower tolower
2219 #endif
2220
2221 static PyObject *
2222 string_lower(PyStringObject *self)
2223 {
2224         char *s;
2225         Py_ssize_t i, n = PyString_GET_SIZE(self);
2226         PyObject *newobj;
2227
2228         newobj = PyString_FromStringAndSize(NULL, n);
2229         if (!newobj)
2230                 return NULL;
2231
2232         s = PyString_AS_STRING(newobj);
2233
2234         Py_MEMCPY(s, PyString_AS_STRING(self), n);
2235
2236         for (i = 0; i < n; i++) {
2237                 int c = Py_CHARMASK(s[i]);
2238                 if (isupper(c))
2239                         s[i] = _tolower(c);
2240         }
2241
2242         return newobj;
2243 }
2244
2245 PyDoc_STRVAR(upper__doc__,
2246 "S.upper() -> string\n\
2247 \n\
2248 Return a copy of the string S converted to uppercase.");
2249
2250 #ifndef _toupper
2251 #define _toupper toupper
2252 #endif
2253
2254 static PyObject *
2255 string_upper(PyStringObject *self)
2256 {
2257         char *s;
2258         Py_ssize_t i, n = PyString_GET_SIZE(self);
2259         PyObject *newobj;
2260
2261         newobj = PyString_FromStringAndSize(NULL, n);
2262         if (!newobj)
2263                 return NULL;
2264
2265         s = PyString_AS_STRING(newobj);
2266
2267         Py_MEMCPY(s, PyString_AS_STRING(self), n);
2268
2269         for (i = 0; i < n; i++) {
2270                 int c = Py_CHARMASK(s[i]);
2271                 if (islower(c))
2272                         s[i] = _toupper(c);
2273         }
2274
2275         return newobj;
2276 }
2277
2278 PyDoc_STRVAR(title__doc__,
2279 "S.title() -> string\n\
2280 \n\
2281 Return a titlecased version of S, i.e. words start with uppercase\n\
2282 characters, all remaining cased characters have lowercase.");
2283
2284 static PyObject*
2285 string_title(PyStringObject *self)
2286 {
2287         char *s = PyString_AS_STRING(self), *s_new;
2288         Py_ssize_t i, n = PyString_GET_SIZE(self);
2289         int previous_is_cased = 0;
2290         PyObject *newobj;
2291
2292         newobj = PyString_FromStringAndSize(NULL, n);
2293         if (newobj == NULL)
2294                 return NULL;
2295         s_new = PyString_AsString(newobj);
2296         for (i = 0; i < n; i++) {
2297                 int c = Py_CHARMASK(*s++);
2298                 if (islower(c)) {
2299                         if (!previous_is_cased)
2300                             c = toupper(c);
2301                         previous_is_cased = 1;
2302                 } else if (isupper(c)) {
2303                         if (previous_is_cased)
2304                             c = tolower(c);
2305                         previous_is_cased = 1;
2306                 } else
2307                         previous_is_cased = 0;
2308                 *s_new++ = c;
2309         }
2310         return newobj;
2311 }
2312
2313 PyDoc_STRVAR(capitalize__doc__,
2314 "S.capitalize() -> string\n\
2315 \n\
2316 Return a copy of the string S with only its first character\n\
2317 capitalized.");
2318
2319 static PyObject *
2320 string_capitalize(PyStringObject *self)
2321 {
2322         char *s = PyString_AS_STRING(self), *s_new;
2323         Py_ssize_t i, n = PyString_GET_SIZE(self);
2324         PyObject *newobj;
2325
2326         newobj = PyString_FromStringAndSize(NULL, n);
2327         if (newobj == NULL)
2328                 return NULL;
2329         s_new = PyString_AsString(newobj);
2330         if (0 < n) {
2331                 int c = Py_CHARMASK(*s++);
2332                 if (islower(c))
2333                         *s_new = toupper(c);
2334                 else
2335                         *s_new = c;
2336                 s_new++;
2337         }
2338         for (i = 1; i < n; i++) {
2339                 int c = Py_CHARMASK(*s++);
2340                 if (isupper(c))
2341                         *s_new = tolower(c);
2342                 else
2343                         *s_new = c;
2344                 s_new++;
2345         }
2346         return newobj;
2347 }
2348
2349
2350 PyDoc_STRVAR(count__doc__,
2351 "S.count(sub[, start[, end]]) -> int\n\
2352 \n\
2353 Return the number of non-overlapping occurrences of substring sub in\n\
2354 string S[start:end].  Optional arguments start and end are interpreted\n\
2355 as in slice notation.");
2356
2357 static PyObject *
2358 string_count(PyStringObject *self, PyObject *args)
2359 {
2360         PyObject *sub_obj;
2361         const char *str = PyString_AS_STRING(self), *sub;
2362         Py_ssize_t sub_len;
2363         Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2364
2365         if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2366                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2367                 return NULL;
2368
2369         if (PyString_Check(sub_obj)) {
2370                 sub = PyString_AS_STRING(sub_obj);
2371                 sub_len = PyString_GET_SIZE(sub_obj);
2372         }
2373 #ifdef Py_USING_UNICODE
2374         else if (PyUnicode_Check(sub_obj)) {
2375                 Py_ssize_t count;
2376                 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2377                 if (count == -1)
2378                         return NULL;
2379                 else
2380                         return PyInt_FromSsize_t(count);
2381         }
2382 #endif
2383         else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2384                 return NULL;
2385
2386         string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
2387
2388         return PyInt_FromSsize_t(
2389                 stringlib_count(str + start, end - start, sub, sub_len)
2390                 );
2391 }
2392
2393 PyDoc_STRVAR(swapcase__doc__,
2394 "S.swapcase() -> string\n\
2395 \n\
2396 Return a copy of the string S with uppercase characters\n\
2397 converted to lowercase and vice versa.");
2398
2399 static PyObject *
2400 string_swapcase(PyStringObject *self)
2401 {
2402         char *s = PyString_AS_STRING(self), *s_new;
2403         Py_ssize_t i, n = PyString_GET_SIZE(self);
2404         PyObject *newobj;
2405
2406         newobj = PyString_FromStringAndSize(NULL, n);
2407         if (newobj == NULL)
2408                 return NULL;
2409         s_new = PyString_AsString(newobj);
2410         for (i = 0; i < n; i++) {
2411                 int c = Py_CHARMASK(*s++);
2412                 if (islower(c)) {
2413                         *s_new = toupper(c);
2414                 }
2415                 else if (isupper(c)) {
2416                         *s_new = tolower(c);
2417                 }
2418                 else
2419                         *s_new = c;
2420                 s_new++;
2421         }
2422         return newobj;
2423 }
2424
2425
2426 PyDoc_STRVAR(translate__doc__,
2427 "S.translate(table [,deletechars]) -> string\n\
2428 \n\
2429 Return a copy of the string S, where all characters occurring\n\
2430 in the optional argument deletechars are removed, and the\n\
2431 remaining characters have been mapped through the given\n\
2432 translation table, which must be a string of length 256.");
2433
2434 static PyObject *
2435 string_translate(PyStringObject *self, PyObject *args)
2436 {
2437         register char *input, *output;
2438         const char *table;
2439         register Py_ssize_t i, c, changed = 0;
2440         PyObject *input_obj = (PyObject*)self;
2441         const char *output_start, *del_table=NULL;
2442         Py_ssize_t inlen, tablen, dellen = 0;
2443         PyObject *result;
2444         int trans_table[256];
2445         PyObject *tableobj, *delobj = NULL;
2446
2447         if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2448                               &tableobj, &delobj))
2449                 return NULL;
2450
2451         if (PyString_Check(tableobj)) {
2452                 table = PyString_AS_STRING(tableobj);
2453                 tablen = PyString_GET_SIZE(tableobj);
2454         }
2455         else if (tableobj == Py_None) {
2456                 table = NULL;
2457                 tablen = 256;
2458         }
2459 #ifdef Py_USING_UNICODE
2460         else if (PyUnicode_Check(tableobj)) {
2461                 /* Unicode .translate() does not support the deletechars
2462                    parameter; instead a mapping to None will cause characters
2463                    to be deleted. */
2464                 if (delobj != NULL) {
2465                         PyErr_SetString(PyExc_TypeError,
2466                         "deletions are implemented differently for unicode");
2467                         return NULL;
2468                 }
2469                 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2470         }
2471 #endif
2472         else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2473                 return NULL;
2474
2475         if (tablen != 256) {
2476                 PyErr_SetString(PyExc_ValueError,
2477                   "translation table must be 256 characters long");
2478                 return NULL;
2479         }
2480
2481         if (delobj != NULL) {
2482                 if (PyString_Check(delobj)) {
2483                         del_table = PyString_AS_STRING(delobj);
2484                         dellen = PyString_GET_SIZE(delobj);
2485                 }
2486 #ifdef Py_USING_UNICODE
2487                 else if (PyUnicode_Check(delobj)) {
2488                         PyErr_SetString(PyExc_TypeError,
2489                         "deletions are implemented differently for unicode");
2490                         return NULL;
2491                 }
2492 #endif
2493                 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2494                         return NULL;
2495         }
2496         else {
2497                 del_table = NULL;
2498                 dellen = 0;
2499         }
2500
2501         inlen = PyString_GET_SIZE(input_obj);
2502         result = PyString_FromStringAndSize((char *)NULL, inlen);
2503         if (result == NULL)
2504                 return NULL;
2505         output_start = output = PyString_AsString(result);
2506         input = PyString_AS_STRING(input_obj);
2507
2508         if (dellen == 0 && table != NULL) {
2509                 /* If no deletions are required, use faster code */
2510                 for (i = inlen; --i >= 0; ) {
2511                         c = Py_CHARMASK(*input++);
2512                         if (Py_CHARMASK((*output++ = table[c])) != c)
2513                                 changed = 1;
2514                 }
2515                 if (changed || !PyString_CheckExact(input_obj))
2516                         return result;
2517                 Py_DECREF(result);
2518                 Py_INCREF(input_obj);
2519                 return input_obj;
2520         }
2521
2522         if (table == NULL) {
2523                 for (i = 0; i < 256; i++)
2524                         trans_table[i] = Py_CHARMASK(i);
2525         } else {
2526                 for (i = 0; i < 256; i++)
2527                         trans_table[i] = Py_CHARMASK(table[i]);
2528         }
2529
2530         for (i = 0; i < dellen; i++)
2531                 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2532
2533         for (i = inlen; --i >= 0; ) {
2534                 c = Py_CHARMASK(*input++);
2535                 if (trans_table[c] != -1)
2536                         if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2537                                 continue;
2538                 changed = 1;
2539         }
2540         if (!changed && PyString_CheckExact(input_obj)) {
2541                 Py_DECREF(result);
2542                 Py_INCREF(input_obj);
2543                 return input_obj;
2544         }
2545         /* Fix the size of the resulting string */
2546         if (inlen > 0)
2547                 _PyString_Resize(&result, output - output_start);
2548         return result;
2549 }
2550
2551
2552 #define FORWARD 1
2553 #define REVERSE -1
2554
2555 /* find and count characters and substrings */
2556
2557 #define findchar(target, target_len, c)                         \
2558   ((char *)memchr((const void *)(target), c, target_len))
2559
2560 /* String ops must return a string.  */
2561 /* If the object is subclass of string, create a copy */
2562 Py_LOCAL(PyStringObject *)
2563 return_self(PyStringObject *self)
2564 {
2565         if (PyString_CheckExact(self)) {
2566                 Py_INCREF(self);
2567                 return self;
2568         }
2569         return (PyStringObject *)PyString_FromStringAndSize(
2570                 PyString_AS_STRING(self),
2571                 PyString_GET_SIZE(self));
2572 }
2573
2574 Py_LOCAL_INLINE(Py_ssize_t)
2575 countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2576 {
2577         Py_ssize_t count=0;
2578         const char *start=target;
2579         const char *end=target+target_len;
2580
2581         while ( (start=findchar(start, end-start, c)) != NULL ) {
2582                 count++;
2583                 if (count >= maxcount)
2584                         break;
2585                 start += 1;
2586         }
2587         return count;
2588 }
2589
2590 Py_LOCAL(Py_ssize_t)
2591 findstring(const char *target, Py_ssize_t target_len,
2592            const char *pattern, Py_ssize_t pattern_len,
2593            Py_ssize_t start,
2594            Py_ssize_t end,
2595            int direction)
2596 {
2597         if (start < 0) {
2598                 start += target_len;
2599                 if (start < 0)
2600                         start = 0;
2601         }
2602         if (end > target_len) {
2603                 end = target_len;
2604         } else if (end < 0) {
2605                 end += target_len;
2606                 if (end < 0)
2607                         end = 0;
2608         }
2609
2610         /* zero-length substrings always match at the first attempt */
2611         if (pattern_len == 0)
2612                 return (direction > 0) ? start : end;
2613
2614         end -= pattern_len;
2615
2616         if (direction < 0) {
2617                 for (; end >= start; end--)
2618                         if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2619                                 return end;
2620         } else {
2621                 for (; start <= end; start++)
2622                         if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2623                                 return start;
2624         }
2625         return -1;
2626 }
2627
2628 Py_LOCAL_INLINE(Py_ssize_t)
2629 countstring(const char *target, Py_ssize_t target_len,
2630             const char *pattern, Py_ssize_t pattern_len,
2631             Py_ssize_t start,
2632             Py_ssize_t end,
2633             int direction, Py_ssize_t maxcount)
2634 {
2635         Py_ssize_t count=0;
2636
2637         if (start < 0) {
2638                 start += target_len;
2639                 if (start < 0)
2640                         start = 0;
2641         }
2642         if (end > target_len) {
2643                 end = target_len;
2644         } else if (end < 0) {
2645                 end += target_len;
2646                 if (end < 0)
2647                         end = 0;
2648         }
2649
2650         /* zero-length substrings match everywhere */
2651         if (pattern_len == 0 || maxcount == 0) {
2652                 if (target_len+1 < maxcount)
2653                         return target_len+1;
2654                 return maxcount;
2655         }
2656
2657         end -= pattern_len;
2658         if (direction < 0) {
2659                 for (; (end >= start); end--)
2660                         if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2661                                 count++;
2662                                 if (--maxcount <= 0) break;
2663                                 end -= pattern_len-1;
2664                         }
2665         } else {
2666                 for (; (start <= end); start++)
2667                         if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2668                                 count++;
2669                                 if (--maxcount <= 0)
2670                                         break;
2671                                 start += pattern_len-1;
2672                         }
2673         }
2674         return count;
2675 }
2676
2677
2678 /* Algorithms for different cases of string replacement */
2679
2680 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2681 Py_LOCAL(PyStringObject *)
2682 replace_interleave(PyStringObject *self,
2683                    const char *to_s, Py_ssize_t to_len,
2684                    Py_ssize_t maxcount)
2685 {
2686         char *self_s, *result_s;
2687         Py_ssize_t self_len, result_len;
2688         Py_ssize_t count, i, product;
2689         PyStringObject *result;
2690
2691         self_len = PyString_GET_SIZE(self);
2692
2693         /* 1 at the end plus 1 after every character */
2694         count = self_len+1;
2695         if (maxcount < count)
2696                 count = maxcount;
2697
2698         /* Check for overflow */
2699         /*   result_len = count * to_len + self_len; */
2700         product = count * to_len;
2701         if (product / to_len != count) {
2702                 PyErr_SetString(PyExc_OverflowError,
2703                                 "replace string is too long");
2704                 return NULL;
2705         }
2706         result_len = product + self_len;
2707         if (result_len < 0) {
2708                 PyErr_SetString(PyExc_OverflowError,
2709                                 "replace string is too long");
2710                 return NULL;
2711         }
2712
2713         if (! (result = (PyStringObject *)
2714                          PyString_FromStringAndSize(NULL, result_len)) )
2715                 return NULL;
2716
2717         self_s = PyString_AS_STRING(self);
2718         result_s = PyString_AS_STRING(result);
2719
2720         /* TODO: special case single character, which doesn't need memcpy */
2721
2722         /* Lay the first one down (guaranteed this will occur) */
2723         Py_MEMCPY(result_s, to_s, to_len);
2724         result_s += to_len;
2725         count -= 1;
2726
2727         for (i=0; i<count; i++) {
2728                 *result_s++ = *self_s++;
2729                 Py_MEMCPY(result_s, to_s, to_len);
2730                 result_s += to_len;
2731         }
2732
2733         /* Copy the rest of the original string */
2734         Py_MEMCPY(result_s, self_s, self_len-i);
2735
2736         return result;
2737 }
2738
2739 /* Special case for deleting a single character */
2740 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2741 Py_LOCAL(PyStringObject *)
2742 replace_delete_single_character(PyStringObject *self,
2743                                 char from_c, Py_ssize_t maxcount)
2744 {
2745         char *self_s, *result_s;
2746         char *start, *next, *end;
2747         Py_ssize_t self_len, result_len;
2748         Py_ssize_t count;
2749         PyStringObject *result;
2750
2751         self_len = PyString_GET_SIZE(self);
2752         self_s = PyString_AS_STRING(self);
2753
2754         count = countchar(self_s, self_len, from_c, maxcount);
2755         if (count == 0) {
2756                 return return_self(self);
2757         }
2758
2759         result_len = self_len - count;  /* from_len == 1 */
2760         assert(result_len>=0);
2761
2762         if ( (result = (PyStringObject *)
2763                         PyString_FromStringAndSize(NULL, result_len)) == NULL)
2764                 return NULL;
2765         result_s = PyString_AS_STRING(result);
2766
2767         start = self_s;
2768         end = self_s + self_len;
2769         while (count-- > 0) {
2770                 next = findchar(start, end-start, from_c);
2771                 if (next == NULL)
2772                         break;
2773                 Py_MEMCPY(result_s, start, next-start);
2774                 result_s += (next-start);
2775                 start = next+1;
2776         }
2777         Py_MEMCPY(result_s, start, end-start);
2778
2779         return result;
2780 }
2781
2782 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2783
2784 Py_LOCAL(PyStringObject *)
2785 replace_delete_substring(PyStringObject *self,
2786                          const char *from_s, Py_ssize_t from_len,
2787                          Py_ssize_t maxcount) {
2788         char *self_s, *result_s;
2789         char *start, *next, *end;
2790         Py_ssize_t self_len, result_len;
2791         Py_ssize_t count, offset;
2792         PyStringObject *result;
2793
2794         self_len = PyString_GET_SIZE(self);
2795         self_s = PyString_AS_STRING(self);
2796
2797         count = countstring(self_s, self_len,
2798                             from_s, from_len,
2799                             0, self_len, 1,
2800                             maxcount);
2801
2802         if (count == 0) {
2803                 /* no matches */
2804                 return return_self(self);
2805         }
2806
2807         result_len = self_len - (count * from_len);
2808         assert (result_len>=0);
2809
2810         if ( (result = (PyStringObject *)
2811               PyString_FromStringAndSize(NULL, result_len)) == NULL )
2812                 return NULL;
2813
2814         result_s = PyString_AS_STRING(result);
2815
2816         start = self_s;
2817         end = self_s + self_len;
2818         while (count-- > 0) {
2819                 offset = findstring(start, end-start,
2820                                     from_s, from_len,
2821                                     0, end-start, FORWARD);
2822                 if (offset == -1)
2823                         break;
2824                 next = start + offset;
2825
2826                 Py_MEMCPY(result_s, start, next-start);
2827
2828                 result_s += (next-start);
2829                 start = next+from_len;
2830         }
2831         Py_MEMCPY(result_s, start, end-start);
2832         return result;
2833 }
2834
2835 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2836 Py_LOCAL(PyStringObject *)
2837 replace_single_character_in_place(PyStringObject *self,
2838                                   char from_c, char to_c,
2839                                   Py_ssize_t maxcount)
2840 {
2841         char *self_s, *result_s, *start, *end, *next;
2842         Py_ssize_t self_len;
2843         PyStringObject *result;
2844
2845         /* The result string will be the same size */
2846         self_s = PyString_AS_STRING(self);
2847         self_len = PyString_GET_SIZE(self);
2848
2849         next = findchar(self_s, self_len, from_c);
2850
2851         if (next == NULL) {
2852                 /* No matches; return the original string */
2853                 return return_self(self);
2854         }
2855
2856         /* Need to make a new string */
2857         result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2858         if (result == NULL)
2859                 return NULL;
2860         result_s = PyString_AS_STRING(result);
2861         Py_MEMCPY(result_s, self_s, self_len);
2862
2863         /* change everything in-place, starting with this one */
2864         start =  result_s + (next-self_s);
2865         *start = to_c;
2866         start++;
2867         end = result_s + self_len;
2868
2869         while (--maxcount > 0) {
2870                 next = findchar(start, end-start, from_c);
2871                 if (next == NULL)
2872                         break;
2873                 *next = to_c;
2874                 start = next+1;
2875         }
2876
2877         return result;
2878 }
2879
2880 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2881 Py_LOCAL(PyStringObject *)
2882 replace_substring_in_place(PyStringObject *self,
2883                            const char *from_s, Py_ssize_t from_len,
2884                            const char *to_s, Py_ssize_t to_len,
2885                            Py_ssize_t maxcount)
2886 {
2887         char *result_s, *start, *end;
2888         char *self_s;
2889         Py_ssize_t self_len, offset;
2890         PyStringObject *result;
2891
2892         /* The result string will be the same size */
2893
2894         self_s = PyString_AS_STRING(self);
2895         self_len = PyString_GET_SIZE(self);
2896
2897         offset = findstring(self_s, self_len,
2898                             from_s, from_len,
2899                             0, self_len, FORWARD);
2900         if (offset == -1) {
2901                 /* No matches; return the original string */
2902                 return return_self(self);
2903         }
2904
2905         /* Need to make a new string */
2906         result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2907         if (result == NULL)
2908                 return NULL;
2909         result_s = PyString_AS_STRING(result);
2910         Py_MEMCPY(result_s, self_s, self_len);
2911
2912         /* change everything in-place, starting with this one */
2913         start =  result_s + offset;
2914         Py_MEMCPY(start, to_s, from_len);
2915         start += from_len;
2916         end = result_s + self_len;
2917
2918         while ( --maxcount > 0) {
2919                 offset = findstring(start, end-start,
2920                                     from_s, from_len,
2921                                     0, end-start, FORWARD);
2922                 if (offset==-1)
2923                         break;
2924                 Py_MEMCPY(start+offset, to_s, from_len);
2925                 start += offset+from_len;
2926         }
2927
2928         return result;
2929 }
2930
2931 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2932 Py_LOCAL(PyStringObject *)
2933 replace_single_character(PyStringObject *self,
2934                          char from_c,
2935                          const char *to_s, Py_ssize_t to_len,
2936                          Py_ssize_t maxcount)
2937 {
2938         char *self_s, *result_s;
2939         char *start, *next, *end;
2940         Py_ssize_t self_len, result_len;
2941         Py_ssize_t count, product;
2942         PyStringObject *result;
2943
2944         self_s = PyString_AS_STRING(self);
2945         self_len = PyString_GET_SIZE(self);
2946
2947         count = countchar(self_s, self_len, from_c, maxcount);
2948         if (count == 0) {
2949                 /* no matches, return unchanged */
2950                 return return_self(self);
2951         }
2952
2953         /* use the difference between current and new, hence the "-1" */
2954         /*   result_len = self_len + count * (to_len-1)  */
2955         product = count * (to_len-1);
2956         if (product / (to_len-1) != count) {
2957                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2958                 return NULL;
2959         }
2960         result_len = self_len + product;
2961         if (result_len < 0) {
2962                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2963                 return NULL;
2964         }
2965
2966         if ( (result = (PyStringObject *)
2967               PyString_FromStringAndSize(NULL, result_len)) == NULL)
2968                 return NULL;
2969         result_s = PyString_AS_STRING(result);
2970
2971         start = self_s;
2972         end = self_s + self_len;
2973         while (count-- > 0) {
2974                 next = findchar(start, end-start, from_c);
2975                 if (next == NULL)
2976                         break;
2977
2978                 if (next == start) {
2979                         /* replace with the 'to' */
2980                         Py_MEMCPY(result_s, to_s, to_len);
2981                         result_s += to_len;
2982                         start += 1;
2983                 } else {
2984                         /* copy the unchanged old then the 'to' */
2985                         Py_MEMCPY(result_s, start, next-start);
2986                         result_s += (next-start);
2987                         Py_MEMCPY(result_s, to_s, to_len);
2988                         result_s += to_len;
2989                         start = next+1;
2990                 }
2991         }
2992         /* Copy the remainder of the remaining string */
2993         Py_MEMCPY(result_s, start, end-start);
2994
2995         return result;
2996 }
2997
2998 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2999 Py_LOCAL(PyStringObject *)
3000 replace_substring(PyStringObject *self,
3001                   const char *from_s, Py_ssize_t from_len,
3002                   const char *to_s, Py_ssize_t to_len,
3003                   Py_ssize_t maxcount) {
3004         char *self_s, *result_s;
3005         char *start, *next, *end;
3006         Py_ssize_t self_len, result_len;
3007         Py_ssize_t count, offset, product;
3008         PyStringObject *result;
3009
3010         self_s = PyString_AS_STRING(self);
3011         self_len = PyString_GET_SIZE(self);
3012
3013         count = countstring(self_s, self_len,
3014                             from_s, from_len,
3015                             0, self_len, FORWARD, maxcount);
3016         if (count == 0) {
3017                 /* no matches, return unchanged */
3018                 return return_self(self);
3019         }
3020
3021         /* Check for overflow */
3022         /*    result_len = self_len + count * (to_len-from_len) */
3023         product = count * (to_len-from_len);
3024         if (product / (to_len-from_len) != count) {
3025                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3026                 return NULL;
3027         }
3028         result_len = self_len + product;
3029         if (result_len < 0) {
3030                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3031                 return NULL;
3032         }
3033
3034         if ( (result = (PyStringObject *)
3035               PyString_FromStringAndSize(NULL, result_len)) == NULL)
3036                 return NULL;
3037         result_s = PyString_AS_STRING(result);
3038
3039         start = self_s;
3040         end = self_s + self_len;
3041         while (count-- > 0) {
3042                 offset = findstring(start, end-start,
3043                                     from_s, from_len,
3044                                     0, end-start, FORWARD);
3045                 if (offset == -1)
3046                         break;
3047                 next = start+offset;
3048                 if (next == start) {
3049                         /* replace with the 'to' */
3050                         Py_MEMCPY(result_s, to_s, to_len);
3051                         result_s += to_len;
3052                         start += from_len;
3053                 } else {
3054                         /* copy the unchanged old then the 'to' */
3055                         Py_MEMCPY(result_s, start, next-start);
3056                         result_s += (next-start);
3057                         Py_MEMCPY(result_s, to_s, to_len);
3058                         result_s += to_len;
3059                         start = next+from_len;
3060                 }
3061         }
3062         /* Copy the remainder of the remaining string */
3063         Py_MEMCPY(result_s, start, end-start);
3064
3065         return result;
3066 }
3067
3068
3069 Py_LOCAL(PyStringObject *)
3070 replace(PyStringObject *self,
3071         const char *from_s, Py_ssize_t from_len,
3072         const char *to_s, Py_ssize_t to_len,
3073         Py_ssize_t maxcount)
3074 {
3075         if (maxcount < 0) {
3076                 maxcount = PY_SSIZE_T_MAX;
3077         } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3078                 /* nothing to do; return the original string */
3079                 return return_self(self);
3080         }
3081
3082         if (maxcount == 0 ||
3083             (from_len == 0 && to_len == 0)) {
3084                 /* nothing to do; return the original string */
3085                 return return_self(self);
3086         }
3087
3088         /* Handle zero-length special cases */
3089
3090         if (from_len == 0) {
3091                 /* insert the 'to' string everywhere.   */
3092                 /*    >>> "Python".replace("", ".")     */
3093                 /*    '.P.y.t.h.o.n.'                   */
3094                 return replace_interleave(self, to_s, to_len, maxcount);
3095         }
3096
3097         /* Except for "".replace("", "A") == "A" there is no way beyond this */
3098         /* point for an empty self string to generate a non-empty string */
3099         /* Special case so the remaining code always gets a non-empty string */
3100         if (PyString_GET_SIZE(self) == 0) {
3101                 return return_self(self);
3102         }
3103
3104         if (to_len == 0) {
3105                 /* delete all occurances of 'from' string */
3106                 if (from_len == 1) {
3107                         return replace_delete_single_character(
3108                                 self, from_s[0], maxcount);
3109                 } else {
3110                         return replace_delete_substring(self, from_s, from_len, maxcount);
3111                 }
3112         }
3113
3114         /* Handle special case where both strings have the same length */
3115
3116         if (from_len == to_len) {
3117                 if (from_len == 1) {
3118                         return replace_single_character_in_place(
3119                                 self,
3120                                 from_s[0],
3121                                 to_s[0],
3122                                 maxcount);
3123                 } else {
3124                         return replace_substring_in_place(
3125                                 self, from_s, from_len, to_s, to_len, maxcount);
3126                 }
3127         }
3128
3129         /* Otherwise use the more generic algorithms */
3130         if (from_len == 1) {
3131                 return replace_single_character(self, from_s[0],
3132                                                 to_s, to_len, maxcount);
3133         } else {
3134                 /* len('from')>=2, len('to')>=1 */
3135                 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3136         }
3137 }
3138
3139 PyDoc_STRVAR(replace__doc__,
3140 "S.replace (old, new[, count]) -> string\n\
3141 \n\
3142 Return a copy of string S with all occurrences of substring\n\
3143 old replaced by new.  If the optional argument count is\n\
3144 given, only the first count occurrences are replaced.");
3145
3146 static PyObject *
3147 string_replace(PyStringObject *self, PyObject *args)
3148 {
3149         Py_ssize_t count = -1;
3150         PyObject *from, *to;
3151         const char *from_s, *to_s;
3152         Py_ssize_t from_len, to_len;
3153
3154         if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3155                 return NULL;
3156
3157         if (PyString_Check(from)) {
3158                 from_s = PyString_AS_STRING(from);
3159                 from_len = PyString_GET_SIZE(from);
3160         }
3161 #ifdef Py_USING_UNICODE
3162         if (PyUnicode_Check(from))
3163                 return PyUnicode_Replace((PyObject *)self,
3164                                          from, to, count);
3165 #endif
3166         else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3167                 return NULL;
3168
3169         if (PyString_Check(to)) {
3170                 to_s = PyString_AS_STRING(to);
3171                 to_len = PyString_GET_SIZE(to);
3172         }
3173 #ifdef Py_USING_UNICODE
3174         else if (PyUnicode_Check(to))
3175                 return PyUnicode_Replace((PyObject *)self,
3176                                          from, to, count);
3177 #endif
3178         else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3179                 return NULL;
3180
3181         return (PyObject *)replace((PyStringObject *) self,
3182                                    from_s, from_len,
3183                                    to_s, to_len, count);
3184 }
3185
3186 /** End DALKE **/
3187
3188 /* Matches the end (direction >= 0) or start (direction < 0) of self
3189  * against substr, using the start and end arguments. Returns
3190  * -1 on error, 0 if not found and 1 if found.
3191  */
3192 Py_LOCAL(int)
3193 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3194                   Py_ssize_t end, int direction)
3195 {
3196         Py_ssize_t len = PyString_GET_SIZE(self);
3197         Py_ssize_t slen;
3198         const char* sub;
3199         const char* str;
3200
3201         if (PyString_Check(substr)) {
3202                 sub = PyString_AS_STRING(substr);
3203                 slen = PyString_GET_SIZE(substr);
3204         }
3205 #ifdef Py_USING_UNICODE
3206         else if (PyUnicode_Check(substr))
3207                 return PyUnicode_Tailmatch((PyObject *)self,
3208                                            substr, start, end, direction);
3209 #endif
3210         else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3211                 return -1;
3212         str = PyString_AS_STRING(self);
3213
3214         string_adjust_indices(&start, &end, len);
3215
3216         if (direction < 0) {
3217                 /* startswith */
3218                 if (start+slen > len)
3219                         return 0;
3220         } else {
3221                 /* endswith */
3222                 if (end-start < slen || start > len)
3223                         return 0;
3224
3225                 if (end-slen > start)
3226                         start = end - slen;
3227         }
3228         if (end-start >= slen)
3229                 return ! memcmp(str+start, sub, slen);
3230         return 0;
3231 }
3232
3233
3234 PyDoc_STRVAR(startswith__doc__,
3235 "S.startswith(prefix[, start[, end]]) -> bool\n\
3236 \n\
3237 Return True if S starts with the specified prefix, False otherwise.\n\
3238 With optional start, test S beginning at that position.\n\
3239 With optional end, stop comparing S at that position.\n\
3240 prefix can also be a tuple of strings to try.");
3241
3242 static PyObject *
3243 string_startswith(PyStringObject *self, PyObject *args)
3244 {
3245         Py_ssize_t start = 0;
3246         Py_ssize_t end = PY_SSIZE_T_MAX;
3247         PyObject *subobj;
3248         int result;
3249
3250         if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3251                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3252                 return NULL;
3253         if (PyTuple_Check(subobj)) {
3254                 Py_ssize_t i;
3255                 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3256                         result = _string_tailmatch(self,
3257                                         PyTuple_GET_ITEM(subobj, i),
3258                                         start, end, -1);
3259                         if (result == -1)
3260                                 return NULL;
3261                         else if (result) {
3262                                 Py_RETURN_TRUE;
3263                         }
3264                 }
3265                 Py_RETURN_FALSE;
3266         }
3267         result = _string_tailmatch(self, subobj, start, end, -1);
3268         if (result == -1)
3269                 return NULL;
3270         else
3271                 return PyBool_FromLong(result);
3272 }
3273
3274
3275 PyDoc_STRVAR(endswith__doc__,
3276 "S.endswith(suffix[, start[, end]]) -> bool\n\
3277 \n\
3278 Return True if S ends with the specified suffix, False otherwise.\n\
3279 With optional start, test S beginning at that position.\n\
3280 With optional end, stop comparing S at that position.\n\
3281 suffix can also be a tuple of strings to try.");
3282
3283 static PyObject *
3284 string_endswith(PyStringObject *self, PyObject *args)
3285 {
3286         Py_ssize_t start = 0;
3287         Py_ssize_t end = PY_SSIZE_T_MAX;
3288         PyObject *subobj;
3289         int result;
3290
3291         if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3292                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3293                 return NULL;
3294         if (PyTuple_Check(subobj)) {
3295                 Py_ssize_t i;
3296                 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3297                         result = _string_tailmatch(self,
3298                                         PyTuple_GET_ITEM(subobj, i),
3299                                         start, end, +1);
3300                         if (result == -1)
3301                                 return NULL;
3302                         else if (result) {
3303                                 Py_RETURN_TRUE;
3304                         }
3305                 }
3306                 Py_RETURN_FALSE;
3307         }
3308         result = _string_tailmatch(self, subobj, start, end, +1);
3309         if (result == -1)
3310                 return NULL;
3311         else
3312                 return PyBool_FromLong(result);
3313 }
3314
3315
3316 PyDoc_STRVAR(encode__doc__,
3317 "S.encode([encoding[,errors]]) -> object\n\
3318 \n\
3319 Encodes S using the codec registered for encoding. encoding defaults\n\
3320 to the default encoding. errors may be given to set a different error\n\
3321 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3322 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3323 'xmlcharrefreplace' as well as any other name registered with\n\
3324 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3325
3326 static PyObject *
3327 string_encode(PyStringObject *self, PyObject *args)
3328 {
3329     char *encoding = NULL;
3330     char *errors = NULL;
3331     PyObject *v;
3332
3333     if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3334         return NULL;
3335     v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3336     if (v == NULL)
3337         goto onError;
3338     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3339         PyErr_Format(PyExc_TypeError,
3340                      "encoder did not return a string/unicode object "
3341                      "(type=%.400s)",
3342                      Py_TYPE(v)->tp_name);
3343         Py_DECREF(v);
3344         return NULL;
3345     }
3346     return v;
3347
3348  onError:
3349     return NULL;
3350 }
3351
3352
3353 PyDoc_STRVAR(decode__doc__,
3354 "S.decode([encoding[,errors]]) -> object\n\
3355 \n\
3356 Decodes S using the codec registered for encoding. encoding defaults\n\
3357 to the default encoding. errors may be given to set a different error\n\
3358 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3359 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3360 as well as any other name registerd with codecs.register_error that is\n\
3361 able to handle UnicodeDecodeErrors.");
3362
3363 static PyObject *
3364 string_decode(PyStringObject *self, PyObject *args)
3365 {
3366     char *encoding = NULL;
3367     char *errors = NULL;
3368     PyObject *v;
3369
3370     if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3371         return NULL;
3372     v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3373     if (v == NULL)
3374         goto onError;
3375     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3376         PyErr_Format(PyExc_TypeError,
3377                      "decoder did not return a string/unicode object "
3378                      "(type=%.400s)",
3379                      Py_TYPE(v)->tp_name);
3380         Py_DECREF(v);
3381         return NULL;
3382     }
3383     return v;
3384
3385  onError:
3386     return NULL;
3387 }
3388
3389
3390 PyDoc_STRVAR(expandtabs__doc__,
3391 "S.expandtabs([tabsize]) -> string\n\
3392 \n\
3393 Return a copy of S where all tab characters are expanded using spaces.\n\
3394 If tabsize is not given, a tab size of 8 characters is assumed.");
3395
3396 static PyObject*
3397 string_expandtabs(PyStringObject *self, PyObject *args)
3398 {
3399     const char *e, *p, *qe;
3400     char *q;
3401     Py_ssize_t i, j, incr;
3402     PyObject *u;
3403     int tabsize = 8;
3404
3405     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3406         return NULL;
3407
3408     /* First pass: determine size of output string */
3409     i = 0; /* chars up to and including most recent \n or \r */
3410     j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3411     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3412     for (p = PyString_AS_STRING(self); p < e; p++)
3413         if (*p == '\t') {
3414             if (tabsize > 0) {
3415                 incr = tabsize - (j % tabsize);
3416                 if (j > PY_SSIZE_T_MAX - incr)
3417                     goto overflow1;
3418                 j += incr;
3419             }
3420         }
3421         else {
3422             if (j > PY_SSIZE_T_MAX - 1)
3423                 goto overflow1;
3424             j++;
3425             if (*p == '\n' || *p == '\r') {
3426                 if (i > PY_SSIZE_T_MAX - j)
3427                     goto overflow1;
3428                 i += j;
3429                 j = 0;
3430             }
3431         }
3432
3433     if (i > PY_SSIZE_T_MAX - j)
3434         goto overflow1;
3435
3436     /* Second pass: create output string and fill it */
3437     u = PyString_FromStringAndSize(NULL, i + j);
3438     if (!u)
3439         return NULL;
3440
3441     j = 0; /* same as in first pass */
3442     q = PyString_AS_STRING(u); /* next output char */
3443     qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3444
3445     for (p = PyString_AS_STRING(self); p < e; p++)
3446         if (*p == '\t') {
3447             if (tabsize > 0) {
3448                 i = tabsize - (j % tabsize);
3449                 j += i;
3450                 while (i--) {
3451                     if (q >= qe)
3452                         goto overflow2;
3453                     *q++ = ' ';
3454                 }
3455             }
3456         }
3457         else {
3458             if (q >= qe)
3459                 goto overflow2;
3460             *q++ = *p;
3461             j++;
3462             if (*p == '\n' || *p == '\r')
3463                 j = 0;
3464         }
3465
3466     return u;
3467
3468   overflow2:
3469     Py_DECREF(u);
3470   overflow1:
3471     PyErr_SetString(PyExc_OverflowError, "new string is too long");
3472     return NULL;
3473 }
3474
3475 Py_LOCAL_INLINE(PyObject *)
3476 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3477 {
3478     PyObject *u;
3479
3480     if (left < 0)
3481         left = 0;
3482     if (right < 0)
3483         right = 0;
3484
3485     if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3486         Py_INCREF(self);
3487         return (PyObject *)self;
3488     }
3489
3490     u = PyString_FromStringAndSize(NULL,
3491                                    left + PyString_GET_SIZE(self) + right);
3492     if (u) {
3493         if (left)
3494             memset(PyString_AS_STRING(u), fill, left);
3495         Py_MEMCPY(PyString_AS_STRING(u) + left,
3496                PyString_AS_STRING(self),
3497                PyString_GET_SIZE(self));
3498         if (right)
3499             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3500                    fill, right);
3501     }
3502
3503     return u;
3504 }
3505
3506 PyDoc_STRVAR(ljust__doc__,
3507 "S.ljust(width[, fillchar]) -> string\n"
3508 "\n"
3509 "Return S left justified in a string of length width. Padding is\n"
3510 "done using the specified fill character (default is a space).");
3511
3512 static PyObject *
3513 string_ljust(PyStringObject *self, PyObject *args)
3514 {
3515     Py_ssize_t width;
3516     char fillchar = ' ';
3517
3518     if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3519         return NULL;
3520
3521     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3522         Py_INCREF(self);
3523         return (PyObject*) self;
3524     }
3525
3526     return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3527 }
3528
3529
3530 PyDoc_STRVAR(rjust__doc__,
3531 "S.rjust(width[, fillchar]) -> string\n"
3532 "\n"
3533 "Return S right justified in a string of length width. Padding is\n"
3534 "done using the specified fill character (default is a space)");
3535
3536 static PyObject *
3537 string_rjust(PyStringObject *self, PyObject *args)
3538 {
3539     Py_ssize_t width;
3540     char fillchar = ' ';
3541
3542     if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3543         return NULL;
3544
3545     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3546         Py_INCREF(self);
3547         return (PyObject*) self;
3548     }
3549
3550     return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3551 }
3552
3553
3554 PyDoc_STRVAR(center__doc__,
3555 "S.center(width[, fillchar]) -> string\n"
3556 "\n"
3557 "Return S centered in a string of length width. Padding is\n"
3558 "done using the specified fill character (default is a space)");
3559
3560 static PyObject *
3561 string_center(PyStringObject *self, PyObject *args)
3562 {
3563     Py_ssize_t marg, left;
3564     Py_ssize_t width;
3565     char fillchar = ' ';
3566
3567     if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3568         return NULL;
3569
3570     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3571         Py_INCREF(self);
3572         return (PyObject*) self;
3573     }
3574
3575     marg = width - PyString_GET_SIZE(self);
3576     left = marg / 2 + (marg & width & 1);
3577
3578     return pad(self, left, marg - left, fillchar);
3579 }
3580
3581 PyDoc_STRVAR(zfill__doc__,
3582 "S.zfill(width) -> string\n"
3583 "\n"
3584 "Pad a numeric string S with zeros on the left, to fill a field\n"
3585 "of the specified width.  The string S is never truncated.");
3586
3587 static PyObject *
3588 string_zfill(PyStringObject *self, PyObject *args)
3589 {
3590     Py_ssize_t fill;
3591     PyObject *s;
3592     char *p;
3593     Py_ssize_t width;
3594
3595     if (!PyArg_ParseTuple(args, "n:zfill", &width))
3596         return NULL;
3597
3598     if (PyString_GET_SIZE(self) >= width) {
3599         if (PyString_CheckExact(self)) {
3600             Py_INCREF(self);
3601             return (PyObject*) self;
3602         }
3603         else
3604             return PyString_FromStringAndSize(
3605                 PyString_AS_STRING(self),
3606                 PyString_GET_SIZE(self)
3607             );
3608     }
3609
3610     fill = width - PyString_GET_SIZE(self);
3611
3612     s = pad(self, fill, 0, '0');
3613
3614     if (s == NULL)
3615         return NULL;
3616
3617     p = PyString_AS_STRING(s);
3618     if (p[fill] == '+' || p[fill] == '-') {
3619         /* move sign to beginning of string */
3620         p[0] = p[fill];
3621         p[fill] = '0';
3622     }
3623
3624     return (PyObject*) s;
3625 }
3626
3627 PyDoc_STRVAR(isspace__doc__,
3628 "S.isspace() -> bool\n\
3629 \n\
3630 Return True if all characters in S are whitespace\n\
3631 and there is at least one character in S, False otherwise.");
3632
3633 static PyObject*
3634 string_isspace(PyStringObject *self)
3635 {
3636     register const unsigned char *p
3637         = (unsigned char *) PyString_AS_STRING(self);
3638     register const unsigned char *e;
3639
3640     /* Shortcut for single character strings */
3641     if (PyString_GET_SIZE(self) == 1 &&
3642         isspace(*p))
3643         return PyBool_FromLong(1);
3644
3645     /* Special case for empty strings */
3646     if (PyString_GET_SIZE(self) == 0)
3647         return PyBool_FromLong(0);
3648
3649     e = p + PyString_GET_SIZE(self);
3650     for (; p < e; p++) {
3651         if (!isspace(*p))
3652             return PyBool_FromLong(0);
3653     }
3654     return PyBool_FromLong(1);
3655 }
3656
3657
3658 PyDoc_STRVAR(isalpha__doc__,
3659 "S.isalpha() -> bool\n\
3660 \n\
3661 Return True if all characters in S are alphabetic\n\
3662 and there is at least one character in S, False otherwise.");
3663
3664 static PyObject*
3665 string_isalpha(PyStringObject *self)
3666 {
3667     register const unsigned char *p
3668         = (unsigned char *) PyString_AS_STRING(self);
3669     register const unsigned char *e;
3670
3671     /* Shortcut for single character strings */
3672     if (PyString_GET_SIZE(self) == 1 &&
3673         isalpha(*p))
3674         return PyBool_FromLong(1);
3675
3676     /* Special case for empty strings */
3677     if (PyString_GET_SIZE(self) == 0)
3678         return PyBool_FromLong(0);
3679
3680     e = p + PyString_GET_SIZE(self);
3681     for (; p < e; p++) {
3682         if (!isalpha(*p))
3683             return PyBool_FromLong(0);
3684     }
3685     return PyBool_FromLong(1);
3686 }
3687
3688
3689 PyDoc_STRVAR(isalnum__doc__,
3690 "S.isalnum() -> bool\n\
3691 \n\
3692 Return True if all characters in S are alphanumeric\n\
3693 and there is at least one character in S, False otherwise.");
3694
3695 static PyObject*
3696 string_isalnum(PyStringObject *self)
3697 {
3698     register const unsigned char *p
3699         = (unsigned char *) PyString_AS_STRING(self);
3700     register const unsigned char *e;
3701
3702     /* Shortcut for single character strings */
3703     if (PyString_GET_SIZE(self) == 1 &&
3704         isalnum(*p))
3705         return PyBool_FromLong(1);
3706
3707     /* Special case for empty strings */
3708     if (PyString_GET_SIZE(self) == 0)
3709         return PyBool_FromLong(0);
3710
3711     e = p + PyString_GET_SIZE(self);
3712     for (; p < e; p++) {
3713         if (!isalnum(*p))
3714             return PyBool_FromLong(0);
3715     }
3716     return PyBool_FromLong(1);
3717 }
3718
3719
3720 PyDoc_STRVAR(isdigit__doc__,
3721 "S.isdigit() -> bool\n\
3722 \n\
3723 Return True if all characters in S are digits\n\
3724 and there is at least one character in S, False otherwise.");
3725
3726 static PyObject*
3727 string_isdigit(PyStringObject *self)
3728 {
3729     register const unsigned char *p
3730         = (unsigned char *) PyString_AS_STRING(self);
3731     register const unsigned char *e;
3732
3733     /* Shortcut for single character strings */
3734     if (PyString_GET_SIZE(self) == 1 &&
3735         isdigit(*p))
3736         return PyBool_FromLong(1);
3737
3738     /* Special case for empty strings */
3739     if (PyString_GET_SIZE(self) == 0)
3740         return PyBool_FromLong(0);
3741
3742     e = p + PyString_GET_SIZE(self);
3743     for (; p < e; p++) {
3744         if (!isdigit(*p))
3745             return PyBool_FromLong(0);
3746     }
3747     return PyBool_FromLong(1);
3748 }
3749
3750
3751 PyDoc_STRVAR(islower__doc__,
3752 "S.islower() -> bool\n\
3753 \n\
3754 Return True if all cased characters in S are lowercase and there is\n\
3755 at least one cased character in S, False otherwise.");
3756
3757 static PyObject*
3758 string_islower(PyStringObject *self)
3759 {
3760     register const unsigned char *p
3761         = (unsigned char *) PyString_AS_STRING(self);
3762     register const unsigned char *e;
3763     int cased;
3764
3765     /* Shortcut for single character strings */
3766     if (PyString_GET_SIZE(self) == 1)
3767         return PyBool_FromLong(islower(*p) != 0);
3768
3769     /* Special case for empty strings */
3770     if (PyString_GET_SIZE(self) == 0)
3771         return PyBool_FromLong(0);
3772
3773     e = p + PyString_GET_SIZE(self);
3774     cased = 0;
3775     for (; p < e; p++) {
3776         if (isupper(*p))
3777             return PyBool_FromLong(0);
3778         else if (!cased && islower(*p))
3779             cased = 1;
3780     }
3781     return PyBool_FromLong(cased);
3782 }
3783
3784
3785 PyDoc_STRVAR(isupper__doc__,
3786 "S.isupper() -> bool\n\
3787 \n\
3788 Return True if all cased characters in S are uppercase and there is\n\
3789 at least one cased character in S, False otherwise.");
3790
3791 static PyObject*
3792 string_isupper(PyStringObject *self)
3793 {
3794     register const unsigned char *p
3795         = (unsigned char *) PyString_AS_STRING(self);
3796     register const unsigned char *e;
3797     int cased;
3798
3799     /* Shortcut for single character strings */
3800     if (PyString_GET_SIZE(self) == 1)
3801         return PyBool_FromLong(isupper(*p) != 0);
3802
3803     /* Special case for empty strings */
3804     if (PyString_GET_SIZE(self) == 0)
3805         return PyBool_FromLong(0);
3806
3807     e = p + PyString_GET_SIZE(self);
3808     cased = 0;
3809     for (; p < e; p++) {
3810         if (islower(*p))
3811             return PyBool_FromLong(0);
3812         else if (!cased && isupper(*p))
3813             cased = 1;
3814     }
3815     return PyBool_FromLong(cased);
3816 }
3817
3818
3819 PyDoc_STRVAR(istitle__doc__,
3820 "S.istitle() -> bool\n\
3821 \n\
3822 Return True if S is a titlecased string and there is at least one\n\
3823 character in S, i.e. uppercase characters may only follow uncased\n\
3824 characters and lowercase characters only cased ones. Return False\n\
3825 otherwise.");
3826
3827 static PyObject*
3828 string_istitle(PyStringObject *self, PyObject *uncased)
3829 {
3830     register const unsigned char *p
3831         = (unsigned char *) PyString_AS_STRING(self);
3832     register const unsigned char *e;
3833     int cased, previous_is_cased;
3834
3835     /* Shortcut for single character strings */
3836     if (PyString_GET_SIZE(self) == 1)
3837         return PyBool_FromLong(isupper(*p) != 0);
3838
3839     /* Special case for empty strings */
3840     if (PyString_GET_SIZE(self) == 0)
3841         return PyBool_FromLong(0);
3842
3843     e = p + PyString_GET_SIZE(self);
3844     cased = 0;
3845     previous_is_cased = 0;
3846     for (; p < e; p++) {
3847         register const unsigned char ch = *p;
3848
3849         if (isupper(ch)) {
3850             if (previous_is_cased)
3851                 return PyBool_FromLong(0);
3852             previous_is_cased = 1;
3853             cased = 1;
3854         }
3855         else if (islower(ch)) {
3856             if (!previous_is_cased)
3857                 return PyBool_FromLong(0);
3858             previous_is_cased = 1;
3859             cased = 1;
3860         }
3861         else
3862             previous_is_cased = 0;
3863     }
3864     return PyBool_FromLong(cased);
3865 }
3866
3867
3868 PyDoc_STRVAR(splitlines__doc__,
3869 "S.splitlines([keepends]) -> list of strings\n\
3870 \n\
3871 Return a list of the lines in S, breaking at line boundaries.\n\
3872 Line breaks are not included in the resulting list unless keepends\n\
3873 is given and true.");
3874
3875 static PyObject*
3876 string_splitlines(PyStringObject *self, PyObject *args)
3877 {
3878     register Py_ssize_t i;
3879     register Py_ssize_t j;
3880     Py_ssize_t len;
3881     int keepends = 0;
3882     PyObject *list;
3883     PyObject *str;
3884     char *data;
3885
3886     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3887         return NULL;
3888
3889     data = PyString_AS_STRING(self);
3890     len = PyString_GET_SIZE(self);
3891
3892     /* This does not use the preallocated list because splitlines is
3893        usually run with hundreds of newlines.  The overhead of
3894        switching between PyList_SET_ITEM and append causes about a
3895        2-3% slowdown for that common case.  A smarter implementation
3896        could move the if check out, so the SET_ITEMs are done first
3897        and the appends only done when the prealloc buffer is full.
3898        That's too much work for little gain.*/
3899
3900     list = PyList_New(0);
3901     if (!list)
3902         goto onError;
3903
3904     for (i = j = 0; i < len; ) {
3905         Py_ssize_t eol;
3906
3907         /* Find a line and append it */
3908         while (i < len && data[i] != '\n' && data[i] != '\r')
3909             i++;
3910
3911         /* Skip the line break reading CRLF as one line break */
3912         eol = i;
3913         if (i < len) {
3914             if (data[i] == '\r' && i + 1 < len &&
3915                 data[i+1] == '\n')
3916                 i += 2;
3917             else
3918                 i++;
3919             if (keepends)
3920                 eol = i;
3921         }
3922         SPLIT_APPEND(data, j, eol);
3923         j = i;
3924     }
3925     if (j < len) {
3926         SPLIT_APPEND(data, j, len);
3927     }
3928
3929     return list;
3930
3931  onError:
3932     Py_XDECREF(list);
3933     return NULL;
3934 }
3935
3936 PyDoc_STRVAR(sizeof__doc__,
3937 "S.__sizeof__() -> size of S in memory, in bytes");
3938
3939 static PyObject *
3940 string_sizeof(PyStringObject *v)
3941 {
3942         Py_ssize_t res;
3943         res = sizeof(PyStringObject) + v->ob_size * v->ob_type->tp_itemsize;
3944         return PyInt_FromSsize_t(res);
3945 }
3946
3947 #undef SPLIT_APPEND
3948 #undef SPLIT_ADD
3949 #undef MAX_PREALLOC
3950 #undef PREALLOC_SIZE
3951
3952 static PyObject *
3953 string_getnewargs(PyStringObject *v)
3954 {
3955         return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3956 }
3957
3958
3959 #include "stringlib/string_format.h"
3960
3961 PyDoc_STRVAR(format__doc__,
3962 "S.format(*args, **kwargs) -> unicode\n\
3963 \n\
3964 ");
3965
3966 static PyObject *
3967 string__format__(PyObject* self, PyObject* args)
3968 {
3969     PyObject *format_spec;
3970     PyObject *result = NULL;
3971     PyObject *tmp = NULL;
3972
3973     /* If 2.x, convert format_spec to the same type as value */
3974     /* This is to allow things like u''.format('') */
3975     if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3976         goto done;
3977     if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
3978         PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3979                      "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3980         goto done;
3981     }
3982     tmp = PyObject_Str(format_spec);
3983     if (tmp == NULL)
3984         goto done;
3985     format_spec = tmp;
3986
3987     result = _PyBytes_FormatAdvanced(self,
3988                                      PyString_AS_STRING(format_spec),
3989                                      PyString_GET_SIZE(format_spec));
3990 done:
3991     Py_XDECREF(tmp);
3992     return result;
3993 }
3994
3995 PyDoc_STRVAR(p_format__doc__,
3996 "S.__format__(format_spec) -> unicode\n\
3997 \n\
3998 ");
3999
4000
4001 static PyMethodDef
4002 string_methods[] = {
4003         /* Counterparts of the obsolete stropmodule functions; except
4004            string.maketrans(). */
4005         {"join", (PyCFunction)string_join, METH_O, join__doc__},
4006         {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
4007         {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
4008         {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
4009         {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
4010         {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
4011         {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
4012         {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
4013         {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
4014         {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
4015         {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
4016         {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
4017         {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
4018          capitalize__doc__},
4019         {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
4020         {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
4021          endswith__doc__},
4022         {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
4023         {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
4024         {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
4025         {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
4026         {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
4027         {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
4028         {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
4029         {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
4030         {"rpartition", (PyCFunction)string_rpartition, METH_O,
4031          rpartition__doc__},
4032         {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
4033          startswith__doc__},
4034         {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
4035         {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
4036          swapcase__doc__},
4037         {"translate", (PyCFunction)string_translate, METH_VARARGS,
4038          translate__doc__},
4039         {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
4040         {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
4041         {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
4042         {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
4043         {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
4044         {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
4045         {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
4046         {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
4047         {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
4048         {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
4049         {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
4050         {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
4051          expandtabs__doc__},
4052         {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4053          splitlines__doc__},
4054         {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
4055          sizeof__doc__},
4056         {"__getnewargs__",      (PyCFunction)string_getnewargs, METH_NOARGS},
4057         {NULL,     NULL}                     /* sentinel */
4058 };
4059
4060 static PyObject *
4061 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
4062
4063 static PyObject *
4064 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4065 {
4066         PyObject *x = NULL;
4067         static char *kwlist[] = {"object", 0};
4068
4069         if (type != &PyString_Type)
4070                 return str_subtype_new(type, args, kwds);
4071         if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4072                 return NULL;
4073         if (x == NULL)
4074                 return PyString_FromString("");
4075         return PyObject_Str(x);
4076 }
4077
4078 static PyObject *
4079 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4080 {
4081         PyObject *tmp, *pnew;
4082         Py_ssize_t n;
4083
4084         assert(PyType_IsSubtype(type, &PyString_Type));
4085         tmp = string_new(&PyString_Type, args, kwds);
4086         if (tmp == NULL)
4087                 return NULL;
4088         assert(PyString_CheckExact(tmp));
4089         n = PyString_GET_SIZE(tmp);
4090         pnew = type->tp_alloc(type, n);
4091         if (pnew != NULL) {
4092                 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
4093                 ((PyStringObject *)pnew)->ob_shash =
4094                         ((PyStringObject *)tmp)->ob_shash;
4095                 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
4096         }
4097         Py_DECREF(tmp);
4098         return pnew;
4099 }
4100
4101 static PyObject *
4102 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4103 {
4104         PyErr_SetString(PyExc_TypeError,
4105                         "The basestring type cannot be instantiated");
4106         return NULL;
4107 }
4108
4109 static PyObject *
4110 string_mod(PyObject *v, PyObject *w)
4111 {
4112         if (!PyString_Check(v)) {
4113                 Py_INCREF(Py_NotImplemented);
4114                 return Py_NotImplemented;
4115         }
4116         return PyString_Format(v, w);
4117 }
4118
4119 PyDoc_STRVAR(basestring_doc,
4120 "Type basestring cannot be instantiated; it is the base for str and unicode.");
4121
4122 static PyNumberMethods string_as_number = {
4123         0,                      /*nb_add*/
4124         0,                      /*nb_subtract*/
4125         0,                      /*nb_multiply*/
4126         0,                      /*nb_divide*/
4127         string_mod,             /*nb_remainder*/
4128 };
4129
4130
4131 PyTypeObject PyBaseString_Type = {
4132         PyVarObject_HEAD_INIT(&PyType_Type, 0)
4133         "basestring",
4134         0,
4135         0,
4136         0,                                      /* tp_dealloc */
4137         0,                                      /* tp_print */
4138         0,                                      /* tp_getattr */
4139         0,                                      /* tp_setattr */
4140         0,                                      /* tp_compare */
4141         0,                                      /* tp_repr */
4142         0,                                      /* tp_as_number */
4143         0,                                      /* tp_as_sequence */
4144         0,                                      /* tp_as_mapping */
4145         0,                                      /* tp_hash */
4146         0,                                      /* tp_call */
4147         0,                                      /* tp_str */
4148         0,                                      /* tp_getattro */
4149         0,                                      /* tp_setattro */
4150         0,                                      /* tp_as_buffer */
4151         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4152         basestring_doc,                         /* tp_doc */
4153         0,                                      /* tp_traverse */
4154         0,                                      /* tp_clear */
4155         0,                                      /* tp_richcompare */
4156         0,                                      /* tp_weaklistoffset */
4157         0,                                      /* tp_iter */
4158         0,                                      /* tp_iternext */
4159         0,                                      /* tp_methods */
4160         0,                                      /* tp_members */
4161         0,                                      /* tp_getset */
4162         &PyBaseObject_Type,                     /* tp_base */
4163         0,                                      /* tp_dict */
4164         0,                                      /* tp_descr_get */
4165         0,                                      /* tp_descr_set */
4166         0,                                      /* tp_dictoffset */
4167         0,                                      /* tp_init */
4168         0,                                      /* tp_alloc */
4169         basestring_new,                         /* tp_new */
4170         0,                                      /* tp_free */
4171 };
4172
4173 PyDoc_STRVAR(string_doc,
4174 "str(object) -> string\n\
4175 \n\
4176 Return a nice string representation of the object.\n\
4177 If the argument is a string, the return value is the same object.");
4178
4179 PyTypeObject PyString_Type = {
4180         PyVarObject_HEAD_INIT(&PyType_Type, 0)
4181         "str",
4182         sizeof(PyStringObject),
4183         sizeof(char),
4184         string_dealloc,                         /* tp_dealloc */
4185         (printfunc)string_print,                /* tp_print */
4186         0,                                      /* tp_getattr */
4187         0,                                      /* tp_setattr */
4188         0,                                      /* tp_compare */
4189         string_repr,                            /* tp_repr */
4190         &string_as_number,                      /* tp_as_number */
4191         &string_as_sequence,                    /* tp_as_sequence */
4192         &string_as_mapping,                     /* tp_as_mapping */
4193         (hashfunc)string_hash,                  /* tp_hash */
4194         0,                                      /* tp_call */
4195         string_str,                             /* tp_str */
4196         PyObject_GenericGetAttr,                /* tp_getattro */
4197         0,                                      /* tp_setattro */
4198         &string_as_buffer,                      /* tp_as_buffer */
4199         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4200                 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4201                 Py_TPFLAGS_HAVE_NEWBUFFER,      /* tp_flags */
4202         string_doc,                             /* tp_doc */
4203         0,                                      /* tp_traverse */
4204         0,                                      /* tp_clear */
4205         (richcmpfunc)string_richcompare,        /* tp_richcompare */
4206         0,                                      /* tp_weaklistoffset */
4207         0,                                      /* tp_iter */
4208         0,                                      /* tp_iternext */
4209         string_methods,                         /* tp_methods */
4210         0,                                      /* tp_members */
4211         0,                                      /* tp_getset */
4212         &PyBaseString_Type,                     /* tp_base */
4213         0,                                      /* tp_dict */
4214         0,                                      /* tp_descr_get */
4215         0,                                      /* tp_descr_set */
4216         0,                                      /* tp_dictoffset */
4217         0,                                      /* tp_init */
4218         0,                                      /* tp_alloc */
4219         string_new,                             /* tp_new */
4220         PyObject_Del,                           /* tp_free */
4221 };
4222
4223 void
4224 PyString_Concat(register PyObject **pv, register PyObject *w)
4225 {
4226         register PyObject *v;
4227         if (*pv == NULL)
4228                 return;
4229         if (w == NULL || !PyString_Check(*pv)) {
4230                 Py_DECREF(*pv);
4231                 *pv = NULL;
4232                 return;
4233         }
4234         v = string_concat((PyStringObject *) *pv, w);
4235         Py_DECREF(*pv);
4236         *pv = v;
4237 }
4238
4239 void
4240 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
4241 {
4242         PyString_Concat(pv, w);
4243         Py_XDECREF(w);
4244 }
4245
4246
4247 /* The following function breaks the notion that strings are immutable:
4248    it changes the size of a string.  We get away with this only if there
4249    is only one module referencing the object.  You can also think of it
4250    as creating a new string object and destroying the old one, only
4251    more efficiently.  In any case, don't use this if the string may
4252    already be known to some other part of the code...
4253    Note that if there's not enough memory to resize the string, the original
4254    string object at *pv is deallocated, *pv is set to NULL, an "out of
4255    memory" exception is set, and -1 is returned.  Else (on success) 0 is
4256    returned, and the value in *pv may or may not be the same as on input.
4257    As always, an extra byte is allocated for a trailing \0 byte (newsize
4258    does *not* include that), and a trailing \0 byte is stored.
4259 */
4260
4261 int
4262 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
4263 {
4264         register PyObject *v;
4265         register PyStringObject *sv;
4266         v = *pv;
4267         if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
4268             PyString_CHECK_INTERNED(v)) {
4269                 *pv = 0;
4270                 Py_DECREF(v);
4271                 PyErr_BadInternalCall();
4272                 return -1;
4273         }
4274         /* XXX UNREF/NEWREF interface should be more symmetrical */
4275         _Py_DEC_REFTOTAL;
4276         _Py_ForgetReference(v);
4277         *pv = (PyObject *)
4278                 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
4279         if (*pv == NULL) {
4280                 PyObject_Del(v);
4281                 PyErr_NoMemory();
4282                 return -1;
4283         }
4284         _Py_NewReference(*pv);
4285         sv = (PyStringObject *) *pv;
4286         Py_SIZE(sv) = newsize;
4287         sv->ob_sval[newsize] = '\0';
4288         sv->ob_shash = -1;      /* invalidate cached hash value */
4289         return 0;
4290 }
4291
4292 /* Helpers for formatstring */
4293
4294 Py_LOCAL_INLINE(PyObject *)
4295 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4296 {
4297         Py_ssize_t argidx = *p_argidx;
4298         if (argidx < arglen) {
4299                 (*p_argidx)++;
4300                 if (arglen < 0)
4301                         return args;
4302                 else
4303                         return PyTuple_GetItem(args, argidx);
4304         }
4305         PyErr_SetString(PyExc_TypeError,
4306                         "not enough arguments for format string");
4307         return NULL;
4308 }
4309
4310 /* Format codes
4311  * F_LJUST      '-'
4312  * F_SIGN       '+'
4313  * F_BLANK      ' '
4314  * F_ALT        '#'
4315  * F_ZERO       '0'
4316  */
4317 #define F_LJUST (1<<0)
4318 #define F_SIGN  (1<<1)
4319 #define F_BLANK (1<<2)
4320 #define F_ALT   (1<<3)
4321 #define F_ZERO  (1<<4)
4322
4323 Py_LOCAL_INLINE(int)
4324 formatfloat(char *buf, size_t buflen, int flags,
4325             int prec, int type, PyObject *v)
4326 {
4327         /* fmt = '%#.' + `prec` + `type`
4328            worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4329         char fmt[20];
4330         double x;
4331         x = PyFloat_AsDouble(v);
4332         if (x == -1.0 && PyErr_Occurred()) {
4333                 PyErr_Format(PyExc_TypeError, "float argument required, "
4334                              "not %.200s", Py_TYPE(v)->tp_name);
4335                 return -1;
4336         }
4337         if (prec < 0)
4338                 prec = 6;
4339         if (type == 'f' && fabs(x)/1e25 >= 1e25)
4340                 type = 'g';
4341         /* Worst case length calc to ensure no buffer overrun:
4342
4343            'g' formats:
4344              fmt = %#.<prec>g
4345              buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4346                 for any double rep.)
4347              len = 1 + prec + 1 + 2 + 5 = 9 + prec
4348
4349            'f' formats:
4350              buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4351              len = 1 + 50 + 1 + prec = 52 + prec
4352
4353            If prec=0 the effective precision is 1 (the leading digit is
4354            always given), therefore increase the length by one.
4355
4356         */
4357         if (((type == 'g' || type == 'G') &&
4358               buflen <= (size_t)10 + (size_t)prec) ||
4359             (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
4360                 PyErr_SetString(PyExc_OverflowError,
4361                         "formatted float is too long (precision too large?)");
4362                 return -1;
4363         }
4364         PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4365                       (flags&F_ALT) ? "#" : "",
4366                       prec, type);
4367         PyOS_ascii_formatd(buf, buflen, fmt, x);
4368         return (int)strlen(buf);
4369 }
4370
4371 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4372  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
4373  * Python's regular ints.
4374  * Return value:  a new PyString*, or NULL if error.
4375  *  .  *pbuf is set to point into it,
4376  *     *plen set to the # of chars following that.
4377  *     Caller must decref it when done using pbuf.
4378  *     The string starting at *pbuf is of the form
4379  *         "-"? ("0x" | "0X")? digit+
4380  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
4381  *         set in flags.  The case of hex digits will be correct,
4382  *     There will be at least prec digits, zero-filled on the left if
4383  *         necessary to get that many.
4384  * val          object to be converted
4385  * flags        bitmask of format flags; only F_ALT is looked at
4386  * prec         minimum number of digits; 0-fill on left if needed
4387  * type         a character in [duoxX]; u acts the same as d
4388  *
4389  * CAUTION:  o, x and X conversions on regular ints can never
4390  * produce a '-' sign, but can for Python's unbounded ints.
4391  */
4392 PyObject*
4393 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4394                      char **pbuf, int *plen)
4395 {
4396         PyObject *result = NULL;
4397         char *buf;
4398         Py_ssize_t i;
4399         int sign;       /* 1 if '-', else 0 */
4400         int len;        /* number of characters */
4401         Py_ssize_t llen;
4402         int numdigits;  /* len == numnondigits + numdigits */
4403         int numnondigits = 0;
4404
4405         switch (type) {
4406         case 'd':
4407         case 'u':
4408                 result = Py_TYPE(val)->tp_str(val);
4409                 break;
4410         case 'o':
4411                 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4412                 break;
4413         case 'x':
4414         case 'X':
4415                 numnondigits = 2;
4416                 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4417                 break;
4418         default:
4419                 assert(!"'type' not in [duoxX]");
4420         }
4421         if (!result)
4422                 return NULL;
4423
4424         buf = PyString_AsString(result);
4425         if (!buf) {
4426                 Py_DECREF(result);
4427                 return NULL;
4428         }
4429
4430         /* To modify the string in-place, there can only be one reference. */
4431         if (Py_REFCNT(result) != 1) {
4432                 PyErr_BadInternalCall();
4433                 return NULL;
4434         }
4435         llen = PyString_Size(result);
4436         if (llen > INT_MAX) {
4437                 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4438                 return NULL;
4439         }
4440         len = (int)llen;
4441         if (buf[len-1] == 'L') {
4442                 --len;
4443                 buf[len] = '\0';
4444         }
4445         sign = buf[0] == '-';
4446         numnondigits += sign;
4447         numdigits = len - numnondigits;
4448         assert(numdigits > 0);
4449
4450         /* Get rid of base marker unless F_ALT */
4451         if ((flags & F_ALT) == 0) {
4452                 /* Need to skip 0x, 0X or 0. */
4453                 int skipped = 0;
4454                 switch (type) {
4455                 case 'o':
4456                         assert(buf[sign] == '0');
4457                         /* If 0 is only digit, leave it alone. */
4458                         if (numdigits > 1) {
4459                                 skipped = 1;
4460                                 --numdigits;
4461                         }
4462                         break;
4463                 case 'x':
4464                 case 'X':
4465                         assert(buf[sign] == '0');
4466                         assert(buf[sign + 1] == 'x');
4467                         skipped = 2;
4468                         numnondigits -= 2;
4469                         break;
4470                 }
4471                 if (skipped) {
4472                         buf += skipped;
4473                         len -= skipped;
4474                         if (sign)
4475                                 buf[0] = '-';
4476                 }
4477                 assert(len == numnondigits + numdigits);
4478                 assert(numdigits > 0);
4479         }
4480
4481         /* Fill with leading zeroes to meet minimum width. */
4482         if (prec > numdigits) {
4483                 PyObject *r1 = PyString_FromStringAndSize(NULL,
4484                                         numnondigits + prec);
4485                 char *b1;
4486                 if (!r1) {
4487                         Py_DECREF(result);
4488                         return NULL;
4489                 }
4490                 b1 = PyString_AS_STRING(r1);
4491                 for (i = 0; i < numnondigits; ++i)
4492                         *b1++ = *buf++;
4493                 for (i = 0; i < prec - numdigits; i++)
4494                         *b1++ = '0';
4495                 for (i = 0; i < numdigits; i++)
4496                         *b1++ = *buf++;
4497                 *b1 = '\0';
4498                 Py_DECREF(result);
4499                 result = r1;
4500                 buf = PyString_AS_STRING(result);
4501                 len = numnondigits + prec;
4502         }
4503
4504         /* Fix up case for hex conversions. */
4505         if (type == 'X') {
4506                 /* Need to convert all lower case letters to upper case.
4507                    and need to convert 0x to 0X (and -0x to -0X). */
4508                 for (i = 0; i < len; i++)
4509                         if (buf[i] >= 'a' && buf[i] <= 'x')
4510                                 buf[i] -= 'a'-'A';
4511         }
4512         *pbuf = buf;
4513         *plen = len;
4514         return result;
4515 }
4516
4517 Py_LOCAL_INLINE(int)
4518 formatint(char *buf, size_t buflen, int flags,
4519           int prec, int type, PyObject *v)
4520 {
4521         /* fmt = '%#.' + `prec` + 'l' + `type`
4522            worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4523            + 1 + 1 = 24 */
4524         char fmt[64];   /* plenty big enough! */
4525         char *sign;
4526         long x;
4527
4528         x = PyInt_AsLong(v);
4529         if (x == -1 && PyErr_Occurred()) {
4530                 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4531                              Py_TYPE(v)->tp_name);
4532                 return -1;
4533         }
4534         if (x < 0 && type == 'u') {
4535                 type = 'd';
4536         }
4537         if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4538                 sign = "-";
4539         else
4540                 sign = "";
4541         if (prec < 0)
4542                 prec = 1;
4543
4544         if ((flags & F_ALT) &&
4545             (type == 'x' || type == 'X')) {
4546                 /* When converting under %#x or %#X, there are a number
4547                  * of issues that cause pain:
4548                  * - when 0 is being converted, the C standard leaves off
4549                  *   the '0x' or '0X', which is inconsistent with other
4550                  *   %#x/%#X conversions and inconsistent with Python's
4551                  *   hex() function
4552                  * - there are platforms that violate the standard and
4553                  *   convert 0 with the '0x' or '0X'
4554                  *   (Metrowerks, Compaq Tru64)
4555                  * - there are platforms that give '0x' when converting
4556                  *   under %#X, but convert 0 in accordance with the
4557                  *   standard (OS/2 EMX)
4558                  *
4559                  * We can achieve the desired consistency by inserting our
4560                  * own '0x' or '0X' prefix, and substituting %x/%X in place
4561                  * of %#x/%#X.
4562                  *
4563                  * Note that this is the same approach as used in
4564                  * formatint() in unicodeobject.c
4565                  */
4566                 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4567                               sign, type, prec, type);
4568         }
4569         else {
4570                 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4571                               sign, (flags&F_ALT) ? "#" : "",
4572                               prec, type);
4573         }
4574
4575         /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4576          * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4577          */
4578         if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4579                 PyErr_SetString(PyExc_OverflowError,
4580                     "formatted integer is too long (precision too large?)");
4581                 return -1;
4582         }
4583         if (sign[0])
4584                 PyOS_snprintf(buf, buflen, fmt, -x);
4585         else
4586                 PyOS_snprintf(buf, buflen, fmt, x);
4587         return (int)strlen(buf);
4588 }
4589
4590 Py_LOCAL_INLINE(int)
4591 formatchar(char *buf, size_t buflen, PyObject *v)
4592 {
4593         /* presume that the buffer is at least 2 characters long */
4594         if (PyString_Check(v)) {
4595                 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4596                         return -1;
4597         }
4598         else {
4599                 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4600                         return -1;
4601         }
4602         buf[1] = '\0';
4603         return 1;
4604 }
4605
4606 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4607
4608    FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4609    chars are formatted. XXX This is a magic number. Each formatting
4610    routine does bounds checking to ensure no overflow, but a better
4611    solution may be to malloc a buffer of appropriate size for each
4612    format. For now, the current solution is sufficient.
4613 */
4614 #define FORMATBUFLEN (size_t)120
4615
4616 PyObject *
4617 PyString_Format(PyObject *format, PyObject *args)
4618 {
4619         char *fmt, *res;
4620         Py_ssize_t arglen, argidx;
4621         Py_ssize_t reslen, rescnt, fmtcnt;
4622         int args_owned = 0;
4623         PyObject *result, *orig_args;
4624 #ifdef Py_USING_UNICODE
4625         PyObject *v, *w;
4626 #endif
4627         PyObject *dict = NULL;
4628         if (format == NULL || !PyString_Check(format) || args == NULL) {
4629                 PyErr_BadInternalCall();
4630                 return NULL;
4631         }
4632         orig_args = args;
4633         fmt = PyString_AS_STRING(format);
4634         fmtcnt = PyString_GET_SIZE(format);
4635         reslen = rescnt = fmtcnt + 100;
4636         result = PyString_FromStringAndSize((char *)NULL, reslen);
4637         if (result == NULL)
4638                 return NULL;
4639         res = PyString_AsString(result);
4640         if (PyTuple_Check(args)) {
4641                 arglen = PyTuple_GET_SIZE(args);
4642                 argidx = 0;
4643         }
4644         else {
4645                 arglen = -1;
4646                 argidx = -2;
4647         }
4648         if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4649             !PyObject_TypeCheck(args, &PyBaseString_Type))
4650                 dict = args;
4651         while (--fmtcnt >= 0) {
4652                 if (*fmt != '%') {
4653                         if (--rescnt < 0) {
4654                                 rescnt = fmtcnt + 100;
4655                                 reslen += rescnt;
4656                                 if (_PyString_Resize(&result, reslen) < 0)
4657                                         return NULL;
4658                                 res = PyString_AS_STRING(result)
4659                                         + reslen - rescnt;
4660                                 --rescnt;
4661                         }
4662                         *res++ = *fmt++;
4663                 }
4664                 else {
4665                         /* Got a format specifier */
4666                         int flags = 0;
4667                         Py_ssize_t width = -1;
4668                         int prec = -1;
4669                         int c = '\0';
4670                         int fill;
4671                         int isnumok;
4672                         PyObject *v = NULL;
4673                         PyObject *temp = NULL;
4674                         char *pbuf;
4675                         int sign;
4676                         Py_ssize_t len;
4677                         char formatbuf[FORMATBUFLEN];
4678                              /* For format{float,int,char}() */
4679 #ifdef Py_USING_UNICODE
4680                         char *fmt_start = fmt;
4681                         Py_ssize_t argidx_start = argidx;
4682 #endif
4683
4684                         fmt++;
4685                         if (*fmt == '(') {
4686                                 char *keystart;
4687                                 Py_ssize_t keylen;
4688                                 PyObject *key;
4689                                 int pcount = 1;
4690
4691                                 if (dict == NULL) {
4692                                         PyErr_SetString(PyExc_TypeError,
4693                                                  "format requires a mapping");
4694                                         goto error;
4695                                 }
4696                                 ++fmt;
4697                                 --fmtcnt;
4698                                 keystart = fmt;
4699                                 /* Skip over balanced parentheses */
4700                                 while (pcount > 0 && --fmtcnt >= 0) {
4701                                         if (*fmt == ')')
4702                                                 --pcount;
4703                                         else if (*fmt == '(')
4704                                                 ++pcount;
4705                                         fmt++;
4706                                 }
4707                                 keylen = fmt - keystart - 1;
4708                                 if (fmtcnt < 0 || pcount > 0) {
4709                                         PyErr_SetString(PyExc_ValueError,
4710                                                    "incomplete format key");
4711                                         goto error;
4712                                 }
4713                                 key = PyString_FromStringAndSize(keystart,
4714                                                                  keylen);
4715                                 if (key == NULL)
4716                                         goto error;
4717                                 if (args_owned) {
4718                                         Py_DECREF(args);
4719                                         args_owned = 0;
4720                                 }
4721                                 args = PyObject_GetItem(dict, key);
4722                                 Py_DECREF(key);
4723                                 if (args == NULL) {
4724                                         goto error;
4725                                 }
4726                                 args_owned = 1;
4727                                 arglen = -1;
4728                                 argidx = -2;
4729                         }
4730                         while (--fmtcnt >= 0) {
4731                                 switch (c = *fmt++) {
4732                                 case '-': flags |= F_LJUST; continue;
4733                                 case '+': flags |= F_SIGN; continue;
4734                                 case ' ': flags |= F_BLANK; continue;
4735                                 case '#': flags |= F_ALT; continue;
4736                                 case '0': flags |= F_ZERO; continue;
4737                                 }
4738                                 break;
4739                         }
4740                         if (c == '*') {
4741                                 v = getnextarg(args, arglen, &argidx);
4742                                 if (v == NULL)
4743                                         goto error;
4744                                 if (!PyInt_Check(v)) {
4745                                         PyErr_SetString(PyExc_TypeError,
4746                                                         "* wants int");
4747                                         goto error;
4748                                 }
4749                                 width = PyInt_AsLong(v);
4750                                 if (width < 0) {
4751                                         flags |= F_LJUST;
4752                                         width = -width;
4753                                 }
4754                                 if (--fmtcnt >= 0)
4755                                         c = *fmt++;
4756                         }
4757                         else if (c >= 0 && isdigit(c)) {
4758                                 width = c - '0';
4759                                 while (--fmtcnt >= 0) {
4760                                         c = Py_CHARMASK(*fmt++);
4761                                         if (!isdigit(c))
4762                                                 break;
4763                                         if ((width*10) / 10 != width) {
4764                                                 PyErr_SetString(
4765                                                         PyExc_ValueError,
4766                                                         "width too big");
4767                                                 goto error;
4768                                         }
4769                                         width = width*10 + (c - '0');
4770                                 }
4771                         }
4772                         if (c == '.') {
4773                                 prec = 0;
4774                                 if (--fmtcnt >= 0)
4775                                         c = *fmt++;
4776                                 if (c == '*') {
4777                                         v = getnextarg(args, arglen, &argidx);
4778                                         if (v == NULL)
4779                                                 goto error;
4780                                         if (!PyInt_Check(v)) {
4781                                                 PyErr_SetString(
4782                                                         PyExc_TypeError,
4783                                                         "* wants int");
4784                                                 goto error;
4785                                         }
4786                                         prec = PyInt_AsLong(v);
4787                                         if (prec < 0)
4788                                                 prec = 0;
4789                                         if (--fmtcnt >= 0)
4790                                                 c = *fmt++;
4791                                 }
4792                                 else if (c >= 0 && isdigit(c)) {
4793                                         prec = c - '0';
4794                                         while (--fmtcnt >= 0) {
4795                                                 c = Py_CHARMASK(*fmt++);
4796                                                 if (!isdigit(c))
4797                                                         break;
4798                                                 if ((prec*10) / 10 != prec) {
4799                                                         PyErr_SetString(
4800                                                             PyExc_ValueError,
4801                                                             "prec too big");
4802                                                         goto error;
4803                                                 }
4804                                                 prec = prec*10 + (c - '0');
4805                                         }
4806                                 }
4807                         } /* prec */
4808                         if (fmtcnt >= 0) {
4809                                 if (c == 'h' || c == 'l' || c == 'L') {
4810                                         if (--fmtcnt >= 0)
4811                                                 c = *fmt++;
4812                                 }
4813                         }
4814                         if (fmtcnt < 0) {
4815                                 PyErr_SetString(PyExc_ValueError,
4816                                                 "incomplete format");
4817                                 goto error;
4818                         }
4819                         if (c != '%') {
4820                                 v = getnextarg(args, arglen, &argidx);
4821                                 if (v == NULL)
4822                                         goto error;
4823                         }
4824                         sign = 0;
4825                         fill = ' ';
4826                         switch (c) {
4827                         case '%':
4828                                 pbuf = "%";
4829                                 len = 1;
4830                                 break;
4831                         case 's':
4832 #ifdef Py_USING_UNICODE
4833                                 if (PyUnicode_Check(v)) {
4834                                         fmt = fmt_start;
4835                                         argidx = argidx_start;
4836                                         goto unicode;
4837                                 }
4838 #endif
4839                                 temp = _PyObject_Str(v);
4840 #ifdef Py_USING_UNICODE
4841                                 if (temp != NULL && PyUnicode_Check(temp)) {
4842                                         Py_DECREF(temp);
4843                                         fmt = fmt_start;
4844                                         argidx = argidx_start;
4845                                         goto unicode;
4846                                 }
4847 #endif
4848                                 /* Fall through */
4849                         case 'r':
4850                                 if (c == 'r')
4851                                         temp = PyObject_Repr(v);
4852                                 if (temp == NULL)
4853                                         goto error;
4854                                 if (!PyString_Check(temp)) {
4855                                         PyErr_SetString(PyExc_TypeError,
4856                                           "%s argument has non-string str()");
4857                                         Py_DECREF(temp);
4858                                         goto error;
4859                                 }
4860                                 pbuf = PyString_AS_STRING(temp);
4861                                 len = PyString_GET_SIZE(temp);
4862                                 if (prec >= 0 && len > prec)
4863                                         len = prec;
4864                                 break;
4865                         case 'i':
4866                         case 'd':
4867                         case 'u':
4868                         case 'o':
4869                         case 'x':
4870                         case 'X':
4871                                 if (c == 'i')
4872                                         c = 'd';
4873                                 isnumok = 0;
4874                                 if (PyNumber_Check(v)) {
4875                                         PyObject *iobj=NULL;
4876
4877                                         if (PyInt_Check(v) || (PyLong_Check(v))) {
4878                                                 iobj = v;
4879                                                 Py_INCREF(iobj);
4880                                         }
4881                                         else {
4882                                                 iobj = PyNumber_Int(v);
4883                                                 if (iobj==NULL) iobj = PyNumber_Long(v);
4884                                         }
4885                                         if (iobj!=NULL) {
4886                                                 if (PyInt_Check(iobj)) {
4887                                                         isnumok = 1;
4888                                                         pbuf = formatbuf;
4889                                                         len = formatint(pbuf,
4890                                                                         sizeof(formatbuf),
4891                                                                         flags, prec, c, iobj);
4892                                                         Py_DECREF(iobj);
4893                                                         if (len < 0)
4894                                                                 goto error;
4895                                                         sign = 1;
4896                                                 }
4897                                                 else if (PyLong_Check(iobj)) {
4898                                                         int ilen;
4899
4900                                                         isnumok = 1;
4901                                                         temp = _PyString_FormatLong(iobj, flags,
4902                                                                 prec, c, &pbuf, &ilen);
4903                                                         Py_DECREF(iobj);
4904                                                         len = ilen;
4905                                                         if (!temp)
4906                                                                 goto error;
4907                                                         sign = 1;
4908                                                 }
4909                                                 else {
4910                                                         Py_DECREF(iobj);
4911                                                 }
4912                                         }
4913                                 }
4914                                 if (!isnumok) {
4915                                         PyErr_Format(PyExc_TypeError,
4916                                             "%%%c format: a number is required, "
4917                                             "not %.200s", c, Py_TYPE(v)->tp_name);
4918                                         goto error;
4919                                 }
4920                                 if (flags & F_ZERO)
4921                                         fill = '0';
4922                                 break;
4923                         case 'e':
4924                         case 'E':
4925                         case 'f':
4926                         case 'F':
4927                         case 'g':
4928                         case 'G':
4929                                 if (c == 'F')
4930                                         c = 'f';
4931                                 pbuf = formatbuf;
4932                                 len = formatfloat(pbuf, sizeof(formatbuf),
4933                                                   flags, prec, c, v);
4934                                 if (len < 0)
4935                                         goto error;
4936                                 sign = 1;
4937                                 if (flags & F_ZERO)
4938                                         fill = '0';
4939                                 break;
4940                         case 'c':
4941 #ifdef Py_USING_UNICODE
4942                                 if (PyUnicode_Check(v)) {
4943                                         fmt = fmt_start;
4944                                         argidx = argidx_start;
4945                                         goto unicode;
4946                                 }
4947 #endif
4948                                 pbuf = formatbuf;
4949                                 len = formatchar(pbuf, sizeof(formatbuf), v);
4950                                 if (len < 0)
4951                                         goto error;
4952                                 break;
4953                         default:
4954                                 PyErr_Format(PyExc_ValueError,
4955                                   "unsupported format character '%c' (0x%x) "
4956                                   "at index %zd",
4957                                   c, c,
4958                                   (Py_ssize_t)(fmt - 1 -
4959                                                PyString_AsString(format)));
4960                                 goto error;
4961                         }
4962                         if (sign) {
4963                                 if (*pbuf == '-' || *pbuf == '+') {
4964                                         sign = *pbuf++;
4965                                         len--;
4966                                 }
4967                                 else if (flags & F_SIGN)
4968                                         sign = '+';
4969                                 else if (flags & F_BLANK)
4970                                         sign = ' ';
4971                                 else
4972                                         sign = 0;
4973                         }
4974                         if (width < len)
4975                                 width = len;
4976                         if (rescnt - (sign != 0) < width) {
4977                                 reslen -= rescnt;
4978                                 rescnt = width + fmtcnt + 100;
4979                                 reslen += rescnt;
4980                                 if (reslen < 0) {
4981                                         Py_DECREF(result);
4982                                         Py_XDECREF(temp);
4983                                         return PyErr_NoMemory();
4984                                 }
4985                                 if (_PyString_Resize(&result, reslen) < 0) {
4986                                         Py_XDECREF(temp);
4987                                         return NULL;
4988                                 }
4989                                 res = PyString_AS_STRING(result)
4990                                         + reslen - rescnt;
4991                         }
4992                         if (sign) {
4993                                 if (fill != ' ')
4994                                         *res++ = sign;
4995                                 rescnt--;
4996                                 if (width > len)
4997                                         width--;
4998                         }
4999                         if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
5000                                 assert(pbuf[0] == '0');
5001                                 assert(pbuf[1] == c);
5002                                 if (fill != ' ') {
5003                                         *res++ = *pbuf++;
5004                                         *res++ = *pbuf++;
5005                                 }
5006                                 rescnt -= 2;
5007                                 width -= 2;
5008                                 if (width < 0)
5009                                         width = 0;
5010                                 len -= 2;
5011                         }
5012                         if (width > len && !(flags & F_LJUST)) {
5013                                 do {
5014                                         --rescnt;
5015                                         *res++ = fill;
5016                                 } while (--width > len);
5017                         }
5018                         if (fill == ' ') {
5019                                 if (sign)
5020                                         *res++ = sign;
5021                                 if ((flags & F_ALT) &&
5022                                     (c == 'x' || c == 'X')) {
5023                                         assert(pbuf[0] == '0');
5024                                         assert(pbuf[1] == c);
5025                                         *res++ = *pbuf++;
5026                                         *res++ = *pbuf++;
5027                                 }
5028                         }
5029                         Py_MEMCPY(res, pbuf, len);
5030                         res += len;
5031                         rescnt -= len;
5032                         while (--width >= len) {
5033                                 --rescnt;
5034                                 *res++ = ' ';
5035                         }
5036                         if (dict && (argidx < arglen) && c != '%') {
5037                                 PyErr_SetString(PyExc_TypeError,
5038                                            "not all arguments converted during string formatting");
5039                                 Py_XDECREF(temp);
5040                                 goto error;
5041                         }
5042                         Py_XDECREF(temp);
5043                 } /* '%' */
5044         } /* until end */
5045         if (argidx < arglen && !dict) {
5046                 PyErr_SetString(PyExc_TypeError,
5047                                 "not all arguments converted during string formatting");
5048                 goto error;
5049         }
5050         if (args_owned) {
5051                 Py_DECREF(args);
5052         }
5053         _PyString_Resize(&result, reslen - rescnt);
5054         return result;
5055
5056 #ifdef Py_USING_UNICODE
5057  unicode:
5058         if (args_owned) {
5059                 Py_DECREF(args);
5060                 args_owned = 0;
5061         }
5062         /* Fiddle args right (remove the first argidx arguments) */
5063         if (PyTuple_Check(orig_args) && argidx > 0) {
5064                 PyObject *v;
5065                 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
5066                 v = PyTuple_New(n);
5067                 if (v == NULL)
5068                         goto error;
5069                 while (--n >= 0) {
5070                         PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5071                         Py_INCREF(w);
5072                         PyTuple_SET_ITEM(v, n, w);
5073                 }
5074                 args = v;
5075         } else {
5076                 Py_INCREF(orig_args);
5077                 args = orig_args;
5078         }
5079         args_owned = 1;
5080         /* Take what we have of the result and let the Unicode formatting
5081            function format the rest of the input. */
5082         rescnt = res - PyString_AS_STRING(result);
5083         if (_PyString_Resize(&result, rescnt))
5084                 goto error;
5085         fmtcnt = PyString_GET_SIZE(format) - \
5086                  (fmt - PyString_AS_STRING(format));
5087         format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5088         if (format == NULL)
5089                 goto error;
5090         v = PyUnicode_Format(format, args);
5091         Py_DECREF(format);
5092         if (v == NULL)
5093                 goto error;
5094         /* Paste what we have (result) to what the Unicode formatting
5095            function returned (v) and return the result (or error) */
5096         w = PyUnicode_Concat(result, v);
5097         Py_DECREF(result);
5098         Py_DECREF(v);
5099         Py_DECREF(args);
5100         return w;
5101 #endif /* Py_USING_UNICODE */
5102
5103  error:
5104         Py_DECREF(result);
5105         if (args_owned) {
5106                 Py_DECREF(args);
5107         }
5108         return NULL;
5109 }
5110
5111 void
5112 PyString_InternInPlace(PyObject **p)
5113 {
5114         register PyStringObject *s = (PyStringObject *)(*p);
5115         PyObject *t;
5116         if (s == NULL || !PyString_Check(s))
5117                 Py_FatalError("PyString_InternInPlace: strings only please!");
5118         /* If it's a string subclass, we don't really know what putting
5119            it in the interned dict might do. */
5120         if (!PyString_CheckExact(s))
5121                 return;
5122         if (PyString_CHECK_INTERNED(s))
5123                 return;
5124         if (interned == NULL) {
5125                 interned = PyDict_New();
5126                 if (interned == NULL) {
5127                         PyErr_Clear(); /* Don't leave an exception */
5128                         return;
5129                 }
5130         }
5131         t = PyDict_GetItem(interned, (PyObject *)s);
5132         if (t) {
5133                 Py_INCREF(t);
5134                 Py_DECREF(*p);
5135                 *p = t;
5136                 return;
5137         }
5138
5139         if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
5140                 PyErr_Clear();
5141                 return;
5142         }
5143         /* The two references in interned are not counted by refcnt.
5144            The string deallocator will take care of this */
5145         Py_REFCNT(s) -= 2;
5146         PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
5147 }
5148
5149 void
5150 PyString_InternImmortal(PyObject **p)
5151 {
5152         PyString_InternInPlace(p);
5153         if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5154                 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5155                 Py_INCREF(*p);
5156         }
5157 }
5158
5159
5160 PyObject *
5161 PyString_InternFromString(const char *cp)
5162 {
5163         PyObject *s = PyString_FromString(cp);
5164         if (s == NULL)
5165                 return NULL;
5166         PyString_InternInPlace(&s);
5167         return s;
5168 }
5169
5170 void
5171 PyString_Fini(void)
5172 {
5173         int i;
5174         for (i = 0; i < UCHAR_MAX + 1; i++) {
5175                 Py_XDECREF(characters[i]);
5176                 characters[i] = NULL;
5177         }
5178         Py_XDECREF(nullstring);
5179         nullstring = NULL;
5180 }
5181
5182 void _Py_ReleaseInternedStrings(void)
5183 {
5184         PyObject *keys;
5185         PyStringObject *s;
5186         Py_ssize_t i, n;
5187         Py_ssize_t immortal_size = 0, mortal_size = 0;
5188
5189         if (interned == NULL || !PyDict_Check(interned))
5190                 return;
5191         keys = PyDict_Keys(interned);
5192         if (keys == NULL || !PyList_Check(keys)) {
5193                 PyErr_Clear();
5194                 return;
5195         }
5196
5197         /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5198            detector, interned strings are not forcibly deallocated; rather, we
5199            give them their stolen references back, and then clear and DECREF
5200            the interned dict. */
5201
5202         n = PyList_GET_SIZE(keys);
5203         fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5204                 n);
5205         for (i = 0; i < n; i++) {
5206                 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5207                 switch (s->ob_sstate) {
5208                 case SSTATE_NOT_INTERNED:
5209                         /* XXX Shouldn't happen */
5210                         break;
5211                 case SSTATE_INTERNED_IMMORTAL:
5212                         Py_REFCNT(s) += 1;
5213                         immortal_size += Py_SIZE(s);
5214                         break;
5215                 case SSTATE_INTERNED_MORTAL:
5216                         Py_REFCNT(s) += 2;
5217                         mortal_size += Py_SIZE(s);
5218                         break;
5219                 default:
5220                         Py_FatalError("Inconsistent interned string state.");
5221                 }
5222                 s->ob_sstate = SSTATE_NOT_INTERNED;
5223         }
5224         fprintf(stderr, "total size of all interned strings: "
5225                         "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5226                         "mortal/immortal\n", mortal_size, immortal_size);
5227         Py_DECREF(keys);
5228         PyDict_Clear(interned);
5229         Py_DECREF(interned);
5230         interned = NULL;
5231 }